xref: /openbmc/qemu/fpu/softfloat.c (revision ec961b81)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 typedef struct {
537     FloatClass cls;
538     bool sign;
539     int32_t exp;
540     uint64_t frac_hi;
541     uint64_t frac_hm;  /* high-middle */
542     uint64_t frac_lm;  /* low-middle */
543     uint64_t frac_lo;
544 } FloatParts256;
545 
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT    63
548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
549 
550 /* Structure holding all of the relevant parameters for a format.
551  *   exp_size: the size of the exponent field
552  *   exp_bias: the offset applied to the exponent field
553  *   exp_max: the maximum normalised exponent
554  *   frac_size: the size of the fraction field
555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556  * The following are computed based the size of fraction
557  *   frac_lsb: least significant bit of fraction
558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
559  *   round_mask/roundeven_mask: masks used for rounding
560  * The following optional modifiers are available:
561  *   arm_althp: handle ARM Alternative Half Precision
562  */
563 typedef struct {
564     int exp_size;
565     int exp_bias;
566     int exp_max;
567     int frac_size;
568     int frac_shift;
569     uint64_t frac_lsb;
570     uint64_t frac_lsbm1;
571     uint64_t round_mask;
572     uint64_t roundeven_mask;
573     bool arm_althp;
574 } FloatFmt;
575 
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F)                                           \
578     .exp_size       = E,                                             \
579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
580     .exp_max        = (1 << E) - 1,                                  \
581     .frac_size      = F,                                             \
582     .frac_shift     = (-F - 1) & 63,                                 \
583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587 
588 static const FloatFmt float16_params = {
589     FLOAT_PARAMS(5, 10)
590 };
591 
592 static const FloatFmt float16_params_ahp = {
593     FLOAT_PARAMS(5, 10),
594     .arm_althp = true
595 };
596 
597 static const FloatFmt bfloat16_params = {
598     FLOAT_PARAMS(8, 7)
599 };
600 
601 static const FloatFmt float32_params = {
602     FLOAT_PARAMS(8, 23)
603 };
604 
605 static const FloatFmt float64_params = {
606     FLOAT_PARAMS(11, 52)
607 };
608 
609 static const FloatFmt float128_params = {
610     FLOAT_PARAMS(15, 112)
611 };
612 
613 /* Unpack a float to parts, but do not canonicalize.  */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616     const int f_size = fmt->frac_size;
617     const int e_size = fmt->exp_size;
618 
619     *r = (FloatParts64) {
620         .cls = float_class_unclassified,
621         .sign = extract64(raw, f_size + e_size, 1),
622         .exp = extract64(raw, f_size, e_size),
623         .frac = extract64(raw, 0, f_size)
624     };
625 }
626 
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629     unpack_raw64(p, &float16_params, f);
630 }
631 
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634     unpack_raw64(p, &bfloat16_params, f);
635 }
636 
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639     unpack_raw64(p, &float32_params, f);
640 }
641 
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644     unpack_raw64(p, &float64_params, f);
645 }
646 
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718 
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722 
723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725 
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728 
729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
730 
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732                                       float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734                                         float_status *s);
735 
736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737 
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739                                              FloatParts64 *c, float_status *s,
740                                              int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742                                                FloatParts128 *b,
743                                                FloatParts128 *c,
744                                                float_status *s,
745                                                int ab_mask, int abc_mask);
746 
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749 
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751                                  const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753                                   const FloatFmt *fmt);
754 
755 #define parts_canonicalize(A, S, F) \
756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757 
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759                             const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761                              const FloatFmt *fmt);
762 
763 #define parts_uncanon(A, S, F) \
764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765 
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769 
770 #define parts_add_normal(A, B) \
771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772 
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776 
777 #define parts_sub_normal(A, B) \
778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779 
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781                                     float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783                                       float_status *s, bool subtract);
784 
785 #define parts_addsub(A, B, S, Z) \
786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787 
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_mul(A, B, S) \
794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
795 
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797                                     FloatParts64 *c, int flags,
798                                     float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800                                       FloatParts128 *c, int flags,
801                                       float_status *s);
802 
803 #define parts_muladd(A, B, C, Z, S) \
804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805 
806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807                                  float_status *s);
808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809                                    float_status *s);
810 
811 #define parts_div(A, B, S) \
812     PARTS_GENERIC_64_128(div, A)(A, B, S)
813 
814 /*
815  * Helper functions for softfloat-parts.c.inc, per-size operations.
816  */
817 
818 #define FRAC_GENERIC_64_128(NAME, P) \
819     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
820 
821 #define FRAC_GENERIC_64_128_256(NAME, P) \
822     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
823                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
824 
825 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
826 {
827     return uadd64_overflow(a->frac, b->frac, &r->frac);
828 }
829 
830 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
831 {
832     bool c = 0;
833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
835     return c;
836 }
837 
838 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
839 {
840     bool c = 0;
841     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
842     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
843     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
844     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
845     return c;
846 }
847 
848 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
849 
850 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
851 {
852     return uadd64_overflow(a->frac, c, &r->frac);
853 }
854 
855 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
856 {
857     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
858     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
859 }
860 
861 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
862 
863 static void frac64_allones(FloatParts64 *a)
864 {
865     a->frac = -1;
866 }
867 
868 static void frac128_allones(FloatParts128 *a)
869 {
870     a->frac_hi = a->frac_lo = -1;
871 }
872 
873 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
874 
875 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
876 {
877     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
878 }
879 
880 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
881 {
882     uint64_t ta = a->frac_hi, tb = b->frac_hi;
883     if (ta == tb) {
884         ta = a->frac_lo, tb = b->frac_lo;
885         if (ta == tb) {
886             return 0;
887         }
888     }
889     return ta < tb ? -1 : 1;
890 }
891 
892 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
893 
894 static void frac64_clear(FloatParts64 *a)
895 {
896     a->frac = 0;
897 }
898 
899 static void frac128_clear(FloatParts128 *a)
900 {
901     a->frac_hi = a->frac_lo = 0;
902 }
903 
904 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
905 
906 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
907 {
908     uint64_t n1, n0, r, q;
909     bool ret;
910 
911     /*
912      * We want a 2*N / N-bit division to produce exactly an N-bit
913      * result, so that we do not lose any precision and so that we
914      * do not have to renormalize afterward.  If A.frac < B.frac,
915      * then division would produce an (N-1)-bit result; shift A left
916      * by one to produce the an N-bit result, and return true to
917      * decrement the exponent to match.
918      *
919      * The udiv_qrnnd algorithm that we're using requires normalization,
920      * i.e. the msb of the denominator must be set, which is already true.
921      */
922     ret = a->frac < b->frac;
923     if (ret) {
924         n0 = a->frac;
925         n1 = 0;
926     } else {
927         n0 = a->frac >> 1;
928         n1 = a->frac << 63;
929     }
930     q = udiv_qrnnd(&r, n0, n1, b->frac);
931 
932     /* Set lsb if there is a remainder, to set inexact. */
933     a->frac = q | (r != 0);
934 
935     return ret;
936 }
937 
938 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
939 {
940     uint64_t q0, q1, a0, a1, b0, b1;
941     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
942     bool ret = false;
943 
944     a0 = a->frac_hi, a1 = a->frac_lo;
945     b0 = b->frac_hi, b1 = b->frac_lo;
946 
947     ret = lt128(a0, a1, b0, b1);
948     if (!ret) {
949         a1 = shr_double(a0, a1, 1);
950         a0 = a0 >> 1;
951     }
952 
953     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
954     q0 = estimateDiv128To64(a0, a1, b0);
955 
956     /*
957      * Estimate is high because B1 was not included (unless B1 == 0).
958      * Reduce quotient and increase remainder until remainder is non-negative.
959      * This loop will execute 0 to 2 times.
960      */
961     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
962     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
963     while (r0 != 0) {
964         q0--;
965         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
966     }
967 
968     /* Repeat using the remainder, producing a second word of quotient. */
969     q1 = estimateDiv128To64(r1, r2, b0);
970     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
971     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
972     while (r1 != 0) {
973         q1--;
974         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
975     }
976 
977     /* Any remainder indicates inexact; set sticky bit. */
978     q1 |= (r2 | r3) != 0;
979 
980     a->frac_hi = q0;
981     a->frac_lo = q1;
982     return ret;
983 }
984 
985 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
986 
987 static bool frac64_eqz(FloatParts64 *a)
988 {
989     return a->frac == 0;
990 }
991 
992 static bool frac128_eqz(FloatParts128 *a)
993 {
994     return (a->frac_hi | a->frac_lo) == 0;
995 }
996 
997 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
998 
999 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1000 {
1001     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1002 }
1003 
1004 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1005 {
1006     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1007                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1008 }
1009 
1010 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1011 
1012 static void frac64_neg(FloatParts64 *a)
1013 {
1014     a->frac = -a->frac;
1015 }
1016 
1017 static void frac128_neg(FloatParts128 *a)
1018 {
1019     bool c = 0;
1020     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1021     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1022 }
1023 
1024 static void frac256_neg(FloatParts256 *a)
1025 {
1026     bool c = 0;
1027     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1028     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1029     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1030     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1031 }
1032 
1033 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1034 
1035 static int frac64_normalize(FloatParts64 *a)
1036 {
1037     if (a->frac) {
1038         int shift = clz64(a->frac);
1039         a->frac <<= shift;
1040         return shift;
1041     }
1042     return 64;
1043 }
1044 
1045 static int frac128_normalize(FloatParts128 *a)
1046 {
1047     if (a->frac_hi) {
1048         int shl = clz64(a->frac_hi);
1049         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1050         a->frac_lo <<= shl;
1051         return shl;
1052     } else if (a->frac_lo) {
1053         int shl = clz64(a->frac_lo);
1054         a->frac_hi = a->frac_lo << shl;
1055         a->frac_lo = 0;
1056         return shl + 64;
1057     }
1058     return 128;
1059 }
1060 
1061 static int frac256_normalize(FloatParts256 *a)
1062 {
1063     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1064     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1065     int ret, shl;
1066 
1067     if (likely(a0)) {
1068         shl = clz64(a0);
1069         if (shl == 0) {
1070             return 0;
1071         }
1072         ret = shl;
1073     } else {
1074         if (a1) {
1075             ret = 64;
1076             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1077         } else if (a2) {
1078             ret = 128;
1079             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1080         } else if (a3) {
1081             ret = 192;
1082             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1083         } else {
1084             ret = 256;
1085             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1086             goto done;
1087         }
1088         shl = clz64(a0);
1089         if (shl == 0) {
1090             goto done;
1091         }
1092         ret += shl;
1093     }
1094 
1095     a0 = shl_double(a0, a1, shl);
1096     a1 = shl_double(a1, a2, shl);
1097     a2 = shl_double(a2, a3, shl);
1098     a3 <<= shl;
1099 
1100  done:
1101     a->frac_hi = a0;
1102     a->frac_hm = a1;
1103     a->frac_lm = a2;
1104     a->frac_lo = a3;
1105     return ret;
1106 }
1107 
1108 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1109 
1110 static void frac64_shl(FloatParts64 *a, int c)
1111 {
1112     a->frac <<= c;
1113 }
1114 
1115 static void frac128_shl(FloatParts128 *a, int c)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1118 
1119     if (c & 64) {
1120         a0 = a1, a1 = 0;
1121     }
1122 
1123     c &= 63;
1124     if (c) {
1125         a0 = shl_double(a0, a1, c);
1126         a1 = a1 << c;
1127     }
1128 
1129     a->frac_hi = a0;
1130     a->frac_lo = a1;
1131 }
1132 
1133 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1134 
1135 static void frac64_shr(FloatParts64 *a, int c)
1136 {
1137     a->frac >>= c;
1138 }
1139 
1140 static void frac128_shr(FloatParts128 *a, int c)
1141 {
1142     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1143 
1144     if (c & 64) {
1145         a1 = a0, a0 = 0;
1146     }
1147 
1148     c &= 63;
1149     if (c) {
1150         a1 = shr_double(a0, a1, c);
1151         a0 = a0 >> c;
1152     }
1153 
1154     a->frac_hi = a0;
1155     a->frac_lo = a1;
1156 }
1157 
1158 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1159 
1160 static void frac64_shrjam(FloatParts64 *a, int c)
1161 {
1162     uint64_t a0 = a->frac;
1163 
1164     if (likely(c != 0)) {
1165         if (likely(c < 64)) {
1166             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1167         } else {
1168             a0 = a0 != 0;
1169         }
1170         a->frac = a0;
1171     }
1172 }
1173 
1174 static void frac128_shrjam(FloatParts128 *a, int c)
1175 {
1176     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1177     uint64_t sticky = 0;
1178 
1179     if (unlikely(c == 0)) {
1180         return;
1181     } else if (likely(c < 64)) {
1182         /* nothing */
1183     } else if (likely(c < 128)) {
1184         sticky = a1;
1185         a1 = a0;
1186         a0 = 0;
1187         c &= 63;
1188         if (c == 0) {
1189             goto done;
1190         }
1191     } else {
1192         sticky = a0 | a1;
1193         a0 = a1 = 0;
1194         goto done;
1195     }
1196 
1197     sticky |= shr_double(a1, 0, c);
1198     a1 = shr_double(a0, a1, c);
1199     a0 = a0 >> c;
1200 
1201  done:
1202     a->frac_lo = a1 | (sticky != 0);
1203     a->frac_hi = a0;
1204 }
1205 
1206 static void frac256_shrjam(FloatParts256 *a, int c)
1207 {
1208     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1209     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1210     uint64_t sticky = 0;
1211 
1212     if (unlikely(c == 0)) {
1213         return;
1214     } else if (likely(c < 64)) {
1215         /* nothing */
1216     } else if (likely(c < 256)) {
1217         if (unlikely(c & 128)) {
1218             sticky |= a2 | a3;
1219             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1220         }
1221         if (unlikely(c & 64)) {
1222             sticky |= a3;
1223             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1224         }
1225         c &= 63;
1226         if (c == 0) {
1227             goto done;
1228         }
1229     } else {
1230         sticky = a0 | a1 | a2 | a3;
1231         a0 = a1 = a2 = a3 = 0;
1232         goto done;
1233     }
1234 
1235     sticky |= shr_double(a3, 0, c);
1236     a3 = shr_double(a2, a3, c);
1237     a2 = shr_double(a1, a2, c);
1238     a1 = shr_double(a0, a1, c);
1239     a0 = a0 >> c;
1240 
1241  done:
1242     a->frac_lo = a3 | (sticky != 0);
1243     a->frac_lm = a2;
1244     a->frac_hm = a1;
1245     a->frac_hi = a0;
1246 }
1247 
1248 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1249 
1250 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1251 {
1252     return usub64_overflow(a->frac, b->frac, &r->frac);
1253 }
1254 
1255 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1256 {
1257     bool c = 0;
1258     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1259     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1260     return c;
1261 }
1262 
1263 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1264 {
1265     bool c = 0;
1266     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1267     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1268     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1269     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1270     return c;
1271 }
1272 
1273 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1274 
1275 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1276 {
1277     r->frac = a->frac_hi | (a->frac_lo != 0);
1278 }
1279 
1280 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1281 {
1282     r->frac_hi = a->frac_hi;
1283     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1284 }
1285 
1286 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1287 
1288 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1289 {
1290     r->frac_hi = a->frac;
1291     r->frac_lo = 0;
1292 }
1293 
1294 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1295 {
1296     r->frac_hi = a->frac_hi;
1297     r->frac_hm = a->frac_lo;
1298     r->frac_lm = 0;
1299     r->frac_lo = 0;
1300 }
1301 
1302 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1303 
1304 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1305 #define FloatPartsN    glue(FloatParts,N)
1306 #define FloatPartsW    glue(FloatParts,W)
1307 
1308 #define N 64
1309 #define W 128
1310 
1311 #include "softfloat-parts-addsub.c.inc"
1312 #include "softfloat-parts.c.inc"
1313 
1314 #undef  N
1315 #undef  W
1316 #define N 128
1317 #define W 256
1318 
1319 #include "softfloat-parts-addsub.c.inc"
1320 #include "softfloat-parts.c.inc"
1321 
1322 #undef  N
1323 #undef  W
1324 #define N            256
1325 
1326 #include "softfloat-parts-addsub.c.inc"
1327 
1328 #undef  N
1329 #undef  W
1330 #undef  partsN
1331 #undef  FloatPartsN
1332 #undef  FloatPartsW
1333 
1334 /*
1335  * Pack/unpack routines with a specific FloatFmt.
1336  */
1337 
1338 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1339                                       float_status *s, const FloatFmt *params)
1340 {
1341     float16_unpack_raw(p, f);
1342     parts_canonicalize(p, s, params);
1343 }
1344 
1345 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1346                                      float_status *s)
1347 {
1348     float16a_unpack_canonical(p, f, s, &float16_params);
1349 }
1350 
1351 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1352                                       float_status *s)
1353 {
1354     bfloat16_unpack_raw(p, f);
1355     parts_canonicalize(p, s, &bfloat16_params);
1356 }
1357 
1358 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1359                                              float_status *s,
1360                                              const FloatFmt *params)
1361 {
1362     parts_uncanon(p, s, params);
1363     return float16_pack_raw(p);
1364 }
1365 
1366 static float16 float16_round_pack_canonical(FloatParts64 *p,
1367                                             float_status *s)
1368 {
1369     return float16a_round_pack_canonical(p, s, &float16_params);
1370 }
1371 
1372 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1373                                               float_status *s)
1374 {
1375     parts_uncanon(p, s, &bfloat16_params);
1376     return bfloat16_pack_raw(p);
1377 }
1378 
1379 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1380                                      float_status *s)
1381 {
1382     float32_unpack_raw(p, f);
1383     parts_canonicalize(p, s, &float32_params);
1384 }
1385 
1386 static float32 float32_round_pack_canonical(FloatParts64 *p,
1387                                             float_status *s)
1388 {
1389     parts_uncanon(p, s, &float32_params);
1390     return float32_pack_raw(p);
1391 }
1392 
1393 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1394                                      float_status *s)
1395 {
1396     float64_unpack_raw(p, f);
1397     parts_canonicalize(p, s, &float64_params);
1398 }
1399 
1400 static float64 float64_round_pack_canonical(FloatParts64 *p,
1401                                             float_status *s)
1402 {
1403     parts_uncanon(p, s, &float64_params);
1404     return float64_pack_raw(p);
1405 }
1406 
1407 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1408                                       float_status *s)
1409 {
1410     float128_unpack_raw(p, f);
1411     parts_canonicalize(p, s, &float128_params);
1412 }
1413 
1414 static float128 float128_round_pack_canonical(FloatParts128 *p,
1415                                               float_status *s)
1416 {
1417     parts_uncanon(p, s, &float128_params);
1418     return float128_pack_raw(p);
1419 }
1420 
1421 /*
1422  * Addition and subtraction
1423  */
1424 
1425 static float16 QEMU_FLATTEN
1426 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1427 {
1428     FloatParts64 pa, pb, *pr;
1429 
1430     float16_unpack_canonical(&pa, a, status);
1431     float16_unpack_canonical(&pb, b, status);
1432     pr = parts_addsub(&pa, &pb, status, subtract);
1433 
1434     return float16_round_pack_canonical(pr, status);
1435 }
1436 
1437 float16 float16_add(float16 a, float16 b, float_status *status)
1438 {
1439     return float16_addsub(a, b, status, false);
1440 }
1441 
1442 float16 float16_sub(float16 a, float16 b, float_status *status)
1443 {
1444     return float16_addsub(a, b, status, true);
1445 }
1446 
1447 static float32 QEMU_SOFTFLOAT_ATTR
1448 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1449 {
1450     FloatParts64 pa, pb, *pr;
1451 
1452     float32_unpack_canonical(&pa, a, status);
1453     float32_unpack_canonical(&pb, b, status);
1454     pr = parts_addsub(&pa, &pb, status, subtract);
1455 
1456     return float32_round_pack_canonical(pr, status);
1457 }
1458 
1459 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1460 {
1461     return soft_f32_addsub(a, b, status, false);
1462 }
1463 
1464 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1465 {
1466     return soft_f32_addsub(a, b, status, true);
1467 }
1468 
1469 static float64 QEMU_SOFTFLOAT_ATTR
1470 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1471 {
1472     FloatParts64 pa, pb, *pr;
1473 
1474     float64_unpack_canonical(&pa, a, status);
1475     float64_unpack_canonical(&pb, b, status);
1476     pr = parts_addsub(&pa, &pb, status, subtract);
1477 
1478     return float64_round_pack_canonical(pr, status);
1479 }
1480 
1481 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1482 {
1483     return soft_f64_addsub(a, b, status, false);
1484 }
1485 
1486 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1487 {
1488     return soft_f64_addsub(a, b, status, true);
1489 }
1490 
1491 static float hard_f32_add(float a, float b)
1492 {
1493     return a + b;
1494 }
1495 
1496 static float hard_f32_sub(float a, float b)
1497 {
1498     return a - b;
1499 }
1500 
1501 static double hard_f64_add(double a, double b)
1502 {
1503     return a + b;
1504 }
1505 
1506 static double hard_f64_sub(double a, double b)
1507 {
1508     return a - b;
1509 }
1510 
1511 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1512 {
1513     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1514         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1515     }
1516     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1517 }
1518 
1519 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1520 {
1521     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1522         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1523     } else {
1524         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1525     }
1526 }
1527 
1528 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1529                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1530 {
1531     return float32_gen2(a, b, s, hard, soft,
1532                         f32_is_zon2, f32_addsubmul_post);
1533 }
1534 
1535 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1536                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1537 {
1538     return float64_gen2(a, b, s, hard, soft,
1539                         f64_is_zon2, f64_addsubmul_post);
1540 }
1541 
1542 float32 QEMU_FLATTEN
1543 float32_add(float32 a, float32 b, float_status *s)
1544 {
1545     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1546 }
1547 
1548 float32 QEMU_FLATTEN
1549 float32_sub(float32 a, float32 b, float_status *s)
1550 {
1551     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1552 }
1553 
1554 float64 QEMU_FLATTEN
1555 float64_add(float64 a, float64 b, float_status *s)
1556 {
1557     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1558 }
1559 
1560 float64 QEMU_FLATTEN
1561 float64_sub(float64 a, float64 b, float_status *s)
1562 {
1563     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1564 }
1565 
1566 static bfloat16 QEMU_FLATTEN
1567 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1568 {
1569     FloatParts64 pa, pb, *pr;
1570 
1571     bfloat16_unpack_canonical(&pa, a, status);
1572     bfloat16_unpack_canonical(&pb, b, status);
1573     pr = parts_addsub(&pa, &pb, status, subtract);
1574 
1575     return bfloat16_round_pack_canonical(pr, status);
1576 }
1577 
1578 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1579 {
1580     return bfloat16_addsub(a, b, status, false);
1581 }
1582 
1583 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1584 {
1585     return bfloat16_addsub(a, b, status, true);
1586 }
1587 
1588 static float128 QEMU_FLATTEN
1589 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1590 {
1591     FloatParts128 pa, pb, *pr;
1592 
1593     float128_unpack_canonical(&pa, a, status);
1594     float128_unpack_canonical(&pb, b, status);
1595     pr = parts_addsub(&pa, &pb, status, subtract);
1596 
1597     return float128_round_pack_canonical(pr, status);
1598 }
1599 
1600 float128 float128_add(float128 a, float128 b, float_status *status)
1601 {
1602     return float128_addsub(a, b, status, false);
1603 }
1604 
1605 float128 float128_sub(float128 a, float128 b, float_status *status)
1606 {
1607     return float128_addsub(a, b, status, true);
1608 }
1609 
1610 /*
1611  * Multiplication
1612  */
1613 
1614 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1615 {
1616     FloatParts64 pa, pb, *pr;
1617 
1618     float16_unpack_canonical(&pa, a, status);
1619     float16_unpack_canonical(&pb, b, status);
1620     pr = parts_mul(&pa, &pb, status);
1621 
1622     return float16_round_pack_canonical(pr, status);
1623 }
1624 
1625 static float32 QEMU_SOFTFLOAT_ATTR
1626 soft_f32_mul(float32 a, float32 b, float_status *status)
1627 {
1628     FloatParts64 pa, pb, *pr;
1629 
1630     float32_unpack_canonical(&pa, a, status);
1631     float32_unpack_canonical(&pb, b, status);
1632     pr = parts_mul(&pa, &pb, status);
1633 
1634     return float32_round_pack_canonical(pr, status);
1635 }
1636 
1637 static float64 QEMU_SOFTFLOAT_ATTR
1638 soft_f64_mul(float64 a, float64 b, float_status *status)
1639 {
1640     FloatParts64 pa, pb, *pr;
1641 
1642     float64_unpack_canonical(&pa, a, status);
1643     float64_unpack_canonical(&pb, b, status);
1644     pr = parts_mul(&pa, &pb, status);
1645 
1646     return float64_round_pack_canonical(pr, status);
1647 }
1648 
1649 static float hard_f32_mul(float a, float b)
1650 {
1651     return a * b;
1652 }
1653 
1654 static double hard_f64_mul(double a, double b)
1655 {
1656     return a * b;
1657 }
1658 
1659 float32 QEMU_FLATTEN
1660 float32_mul(float32 a, float32 b, float_status *s)
1661 {
1662     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1663                         f32_is_zon2, f32_addsubmul_post);
1664 }
1665 
1666 float64 QEMU_FLATTEN
1667 float64_mul(float64 a, float64 b, float_status *s)
1668 {
1669     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1670                         f64_is_zon2, f64_addsubmul_post);
1671 }
1672 
1673 bfloat16 QEMU_FLATTEN
1674 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677 
1678     bfloat16_unpack_canonical(&pa, a, status);
1679     bfloat16_unpack_canonical(&pb, b, status);
1680     pr = parts_mul(&pa, &pb, status);
1681 
1682     return bfloat16_round_pack_canonical(pr, status);
1683 }
1684 
1685 float128 QEMU_FLATTEN
1686 float128_mul(float128 a, float128 b, float_status *status)
1687 {
1688     FloatParts128 pa, pb, *pr;
1689 
1690     float128_unpack_canonical(&pa, a, status);
1691     float128_unpack_canonical(&pb, b, status);
1692     pr = parts_mul(&pa, &pb, status);
1693 
1694     return float128_round_pack_canonical(pr, status);
1695 }
1696 
1697 /*
1698  * Fused multiply-add
1699  */
1700 
1701 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1702                                     int flags, float_status *status)
1703 {
1704     FloatParts64 pa, pb, pc, *pr;
1705 
1706     float16_unpack_canonical(&pa, a, status);
1707     float16_unpack_canonical(&pb, b, status);
1708     float16_unpack_canonical(&pc, c, status);
1709     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1710 
1711     return float16_round_pack_canonical(pr, status);
1712 }
1713 
1714 static float32 QEMU_SOFTFLOAT_ATTR
1715 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1716                 float_status *status)
1717 {
1718     FloatParts64 pa, pb, pc, *pr;
1719 
1720     float32_unpack_canonical(&pa, a, status);
1721     float32_unpack_canonical(&pb, b, status);
1722     float32_unpack_canonical(&pc, c, status);
1723     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1724 
1725     return float32_round_pack_canonical(pr, status);
1726 }
1727 
1728 static float64 QEMU_SOFTFLOAT_ATTR
1729 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1730                 float_status *status)
1731 {
1732     FloatParts64 pa, pb, pc, *pr;
1733 
1734     float64_unpack_canonical(&pa, a, status);
1735     float64_unpack_canonical(&pb, b, status);
1736     float64_unpack_canonical(&pc, c, status);
1737     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1738 
1739     return float64_round_pack_canonical(pr, status);
1740 }
1741 
1742 static bool force_soft_fma;
1743 
1744 float32 QEMU_FLATTEN
1745 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1746 {
1747     union_float32 ua, ub, uc, ur;
1748 
1749     ua.s = xa;
1750     ub.s = xb;
1751     uc.s = xc;
1752 
1753     if (unlikely(!can_use_fpu(s))) {
1754         goto soft;
1755     }
1756     if (unlikely(flags & float_muladd_halve_result)) {
1757         goto soft;
1758     }
1759 
1760     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1761     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1762         goto soft;
1763     }
1764 
1765     if (unlikely(force_soft_fma)) {
1766         goto soft;
1767     }
1768 
1769     /*
1770      * When (a || b) == 0, there's no need to check for under/over flow,
1771      * since we know the addend is (normal || 0) and the product is 0.
1772      */
1773     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1774         union_float32 up;
1775         bool prod_sign;
1776 
1777         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1778         prod_sign ^= !!(flags & float_muladd_negate_product);
1779         up.s = float32_set_sign(float32_zero, prod_sign);
1780 
1781         if (flags & float_muladd_negate_c) {
1782             uc.h = -uc.h;
1783         }
1784         ur.h = up.h + uc.h;
1785     } else {
1786         union_float32 ua_orig = ua;
1787         union_float32 uc_orig = uc;
1788 
1789         if (flags & float_muladd_negate_product) {
1790             ua.h = -ua.h;
1791         }
1792         if (flags & float_muladd_negate_c) {
1793             uc.h = -uc.h;
1794         }
1795 
1796         ur.h = fmaf(ua.h, ub.h, uc.h);
1797 
1798         if (unlikely(f32_is_inf(ur))) {
1799             float_raise(float_flag_overflow, s);
1800         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1801             ua = ua_orig;
1802             uc = uc_orig;
1803             goto soft;
1804         }
1805     }
1806     if (flags & float_muladd_negate_result) {
1807         return float32_chs(ur.s);
1808     }
1809     return ur.s;
1810 
1811  soft:
1812     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1813 }
1814 
1815 float64 QEMU_FLATTEN
1816 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1817 {
1818     union_float64 ua, ub, uc, ur;
1819 
1820     ua.s = xa;
1821     ub.s = xb;
1822     uc.s = xc;
1823 
1824     if (unlikely(!can_use_fpu(s))) {
1825         goto soft;
1826     }
1827     if (unlikely(flags & float_muladd_halve_result)) {
1828         goto soft;
1829     }
1830 
1831     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1833         goto soft;
1834     }
1835 
1836     if (unlikely(force_soft_fma)) {
1837         goto soft;
1838     }
1839 
1840     /*
1841      * When (a || b) == 0, there's no need to check for under/over flow,
1842      * since we know the addend is (normal || 0) and the product is 0.
1843      */
1844     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1845         union_float64 up;
1846         bool prod_sign;
1847 
1848         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1849         prod_sign ^= !!(flags & float_muladd_negate_product);
1850         up.s = float64_set_sign(float64_zero, prod_sign);
1851 
1852         if (flags & float_muladd_negate_c) {
1853             uc.h = -uc.h;
1854         }
1855         ur.h = up.h + uc.h;
1856     } else {
1857         union_float64 ua_orig = ua;
1858         union_float64 uc_orig = uc;
1859 
1860         if (flags & float_muladd_negate_product) {
1861             ua.h = -ua.h;
1862         }
1863         if (flags & float_muladd_negate_c) {
1864             uc.h = -uc.h;
1865         }
1866 
1867         ur.h = fma(ua.h, ub.h, uc.h);
1868 
1869         if (unlikely(f64_is_inf(ur))) {
1870             float_raise(float_flag_overflow, s);
1871         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1872             ua = ua_orig;
1873             uc = uc_orig;
1874             goto soft;
1875         }
1876     }
1877     if (flags & float_muladd_negate_result) {
1878         return float64_chs(ur.s);
1879     }
1880     return ur.s;
1881 
1882  soft:
1883     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1884 }
1885 
1886 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1887                                       int flags, float_status *status)
1888 {
1889     FloatParts64 pa, pb, pc, *pr;
1890 
1891     bfloat16_unpack_canonical(&pa, a, status);
1892     bfloat16_unpack_canonical(&pb, b, status);
1893     bfloat16_unpack_canonical(&pc, c, status);
1894     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1895 
1896     return bfloat16_round_pack_canonical(pr, status);
1897 }
1898 
1899 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1900                                       int flags, float_status *status)
1901 {
1902     FloatParts128 pa, pb, pc, *pr;
1903 
1904     float128_unpack_canonical(&pa, a, status);
1905     float128_unpack_canonical(&pb, b, status);
1906     float128_unpack_canonical(&pc, c, status);
1907     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1908 
1909     return float128_round_pack_canonical(pr, status);
1910 }
1911 
1912 /*
1913  * Division
1914  */
1915 
1916 float16 float16_div(float16 a, float16 b, float_status *status)
1917 {
1918     FloatParts64 pa, pb, *pr;
1919 
1920     float16_unpack_canonical(&pa, a, status);
1921     float16_unpack_canonical(&pb, b, status);
1922     pr = parts_div(&pa, &pb, status);
1923 
1924     return float16_round_pack_canonical(pr, status);
1925 }
1926 
1927 static float32 QEMU_SOFTFLOAT_ATTR
1928 soft_f32_div(float32 a, float32 b, float_status *status)
1929 {
1930     FloatParts64 pa, pb, *pr;
1931 
1932     float32_unpack_canonical(&pa, a, status);
1933     float32_unpack_canonical(&pb, b, status);
1934     pr = parts_div(&pa, &pb, status);
1935 
1936     return float32_round_pack_canonical(pr, status);
1937 }
1938 
1939 static float64 QEMU_SOFTFLOAT_ATTR
1940 soft_f64_div(float64 a, float64 b, float_status *status)
1941 {
1942     FloatParts64 pa, pb, *pr;
1943 
1944     float64_unpack_canonical(&pa, a, status);
1945     float64_unpack_canonical(&pb, b, status);
1946     pr = parts_div(&pa, &pb, status);
1947 
1948     return float64_round_pack_canonical(pr, status);
1949 }
1950 
1951 static float hard_f32_div(float a, float b)
1952 {
1953     return a / b;
1954 }
1955 
1956 static double hard_f64_div(double a, double b)
1957 {
1958     return a / b;
1959 }
1960 
1961 static bool f32_div_pre(union_float32 a, union_float32 b)
1962 {
1963     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1964         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1965                fpclassify(b.h) == FP_NORMAL;
1966     }
1967     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1968 }
1969 
1970 static bool f64_div_pre(union_float64 a, union_float64 b)
1971 {
1972     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1973         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1974                fpclassify(b.h) == FP_NORMAL;
1975     }
1976     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1977 }
1978 
1979 static bool f32_div_post(union_float32 a, union_float32 b)
1980 {
1981     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1982         return fpclassify(a.h) != FP_ZERO;
1983     }
1984     return !float32_is_zero(a.s);
1985 }
1986 
1987 static bool f64_div_post(union_float64 a, union_float64 b)
1988 {
1989     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1990         return fpclassify(a.h) != FP_ZERO;
1991     }
1992     return !float64_is_zero(a.s);
1993 }
1994 
1995 float32 QEMU_FLATTEN
1996 float32_div(float32 a, float32 b, float_status *s)
1997 {
1998     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1999                         f32_div_pre, f32_div_post);
2000 }
2001 
2002 float64 QEMU_FLATTEN
2003 float64_div(float64 a, float64 b, float_status *s)
2004 {
2005     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2006                         f64_div_pre, f64_div_post);
2007 }
2008 
2009 bfloat16 QEMU_FLATTEN
2010 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013 
2014     bfloat16_unpack_canonical(&pa, a, status);
2015     bfloat16_unpack_canonical(&pb, b, status);
2016     pr = parts_div(&pa, &pb, status);
2017 
2018     return bfloat16_round_pack_canonical(pr, status);
2019 }
2020 
2021 float128 QEMU_FLATTEN
2022 float128_div(float128 a, float128 b, float_status *status)
2023 {
2024     FloatParts128 pa, pb, *pr;
2025 
2026     float128_unpack_canonical(&pa, a, status);
2027     float128_unpack_canonical(&pb, b, status);
2028     pr = parts_div(&pa, &pb, status);
2029 
2030     return float128_round_pack_canonical(pr, status);
2031 }
2032 
2033 /*
2034  * Float to Float conversions
2035  *
2036  * Returns the result of converting one float format to another. The
2037  * conversion is performed according to the IEC/IEEE Standard for
2038  * Binary Floating-Point Arithmetic.
2039  *
2040  * The float_to_float helper only needs to take care of raising
2041  * invalid exceptions and handling the conversion on NaNs.
2042  */
2043 
2044 static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
2045                                  float_status *s)
2046 {
2047     if (dstf->arm_althp) {
2048         switch (a.cls) {
2049         case float_class_qnan:
2050         case float_class_snan:
2051             /* There is no NaN in the destination format.  Raise Invalid
2052              * and return a zero with the sign of the input NaN.
2053              */
2054             float_raise(float_flag_invalid, s);
2055             a.cls = float_class_zero;
2056             a.frac = 0;
2057             a.exp = 0;
2058             break;
2059 
2060         case float_class_inf:
2061             /* There is no Inf in the destination format.  Raise Invalid
2062              * and return the maximum normal with the correct sign.
2063              */
2064             float_raise(float_flag_invalid, s);
2065             a.cls = float_class_normal;
2066             a.exp = dstf->exp_max;
2067             a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
2068             break;
2069 
2070         default:
2071             break;
2072         }
2073     } else if (is_nan(a.cls)) {
2074         parts_return_nan(&a, s);
2075     }
2076     return a;
2077 }
2078 
2079 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2080 {
2081     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2082     FloatParts64 pa, pr;
2083 
2084     float16a_unpack_canonical(&pa, a, s, fmt16);
2085     pr = float_to_float(pa, &float32_params, s);
2086     return float32_round_pack_canonical(&pr, s);
2087 }
2088 
2089 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2090 {
2091     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2092     FloatParts64 pa, pr;
2093 
2094     float16a_unpack_canonical(&pa, a, s, fmt16);
2095     pr = float_to_float(pa, &float64_params, s);
2096     return float64_round_pack_canonical(&pr, s);
2097 }
2098 
2099 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2100 {
2101     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2102     FloatParts64 pa, pr;
2103 
2104     float32_unpack_canonical(&pa, a, s);
2105     pr = float_to_float(pa, fmt16, s);
2106     return float16a_round_pack_canonical(&pr, s, fmt16);
2107 }
2108 
2109 static float64 QEMU_SOFTFLOAT_ATTR
2110 soft_float32_to_float64(float32 a, float_status *s)
2111 {
2112     FloatParts64 pa, pr;
2113 
2114     float32_unpack_canonical(&pa, a, s);
2115     pr = float_to_float(pa, &float64_params, s);
2116     return float64_round_pack_canonical(&pr, s);
2117 }
2118 
2119 float64 float32_to_float64(float32 a, float_status *s)
2120 {
2121     if (likely(float32_is_normal(a))) {
2122         /* Widening conversion can never produce inexact results.  */
2123         union_float32 uf;
2124         union_float64 ud;
2125         uf.s = a;
2126         ud.h = uf.h;
2127         return ud.s;
2128     } else if (float32_is_zero(a)) {
2129         return float64_set_sign(float64_zero, float32_is_neg(a));
2130     } else {
2131         return soft_float32_to_float64(a, s);
2132     }
2133 }
2134 
2135 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2136 {
2137     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2138     FloatParts64 pa, pr;
2139 
2140     float64_unpack_canonical(&pa, a, s);
2141     pr = float_to_float(pa, fmt16, s);
2142     return float16a_round_pack_canonical(&pr, s, fmt16);
2143 }
2144 
2145 float32 float64_to_float32(float64 a, float_status *s)
2146 {
2147     FloatParts64 pa, pr;
2148 
2149     float64_unpack_canonical(&pa, a, s);
2150     pr = float_to_float(pa, &float32_params, s);
2151     return float32_round_pack_canonical(&pr, s);
2152 }
2153 
2154 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2155 {
2156     FloatParts64 pa, pr;
2157 
2158     bfloat16_unpack_canonical(&pa, a, s);
2159     pr = float_to_float(pa, &float32_params, s);
2160     return float32_round_pack_canonical(&pr, s);
2161 }
2162 
2163 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2164 {
2165     FloatParts64 pa, pr;
2166 
2167     bfloat16_unpack_canonical(&pa, a, s);
2168     pr = float_to_float(pa, &float64_params, s);
2169     return float64_round_pack_canonical(&pr, s);
2170 }
2171 
2172 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2173 {
2174     FloatParts64 pa, pr;
2175 
2176     float32_unpack_canonical(&pa, a, s);
2177     pr = float_to_float(pa, &bfloat16_params, s);
2178     return bfloat16_round_pack_canonical(&pr, s);
2179 }
2180 
2181 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2182 {
2183     FloatParts64 pa, pr;
2184 
2185     float64_unpack_canonical(&pa, a, s);
2186     pr = float_to_float(pa, &bfloat16_params, s);
2187     return bfloat16_round_pack_canonical(&pr, s);
2188 }
2189 
2190 /*
2191  * Rounds the floating-point value `a' to an integer, and returns the
2192  * result as a floating-point value. The operation is performed
2193  * according to the IEC/IEEE Standard for Binary Floating-Point
2194  * Arithmetic.
2195  */
2196 
2197 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2198                                int scale, float_status *s)
2199 {
2200     switch (a.cls) {
2201     case float_class_qnan:
2202     case float_class_snan:
2203         parts_return_nan(&a, s);
2204         break;
2205 
2206     case float_class_zero:
2207     case float_class_inf:
2208         /* already "integral" */
2209         break;
2210 
2211     case float_class_normal:
2212         scale = MIN(MAX(scale, -0x10000), 0x10000);
2213         a.exp += scale;
2214 
2215         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2216             /* already integral */
2217             break;
2218         }
2219         if (a.exp < 0) {
2220             bool one;
2221             /* all fractional */
2222             float_raise(float_flag_inexact, s);
2223             switch (rmode) {
2224             case float_round_nearest_even:
2225                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2226                 break;
2227             case float_round_ties_away:
2228                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2229                 break;
2230             case float_round_to_zero:
2231                 one = false;
2232                 break;
2233             case float_round_up:
2234                 one = !a.sign;
2235                 break;
2236             case float_round_down:
2237                 one = a.sign;
2238                 break;
2239             case float_round_to_odd:
2240                 one = true;
2241                 break;
2242             default:
2243                 g_assert_not_reached();
2244             }
2245 
2246             if (one) {
2247                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2248                 a.exp = 0;
2249             } else {
2250                 a.cls = float_class_zero;
2251             }
2252         } else {
2253             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2254             uint64_t frac_lsbm1 = frac_lsb >> 1;
2255             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2256             uint64_t rnd_mask = rnd_even_mask >> 1;
2257             uint64_t inc;
2258 
2259             switch (rmode) {
2260             case float_round_nearest_even:
2261                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2262                 break;
2263             case float_round_ties_away:
2264                 inc = frac_lsbm1;
2265                 break;
2266             case float_round_to_zero:
2267                 inc = 0;
2268                 break;
2269             case float_round_up:
2270                 inc = a.sign ? 0 : rnd_mask;
2271                 break;
2272             case float_round_down:
2273                 inc = a.sign ? rnd_mask : 0;
2274                 break;
2275             case float_round_to_odd:
2276                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2277                 break;
2278             default:
2279                 g_assert_not_reached();
2280             }
2281 
2282             if (a.frac & rnd_mask) {
2283                 float_raise(float_flag_inexact, s);
2284                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2285                     a.frac >>= 1;
2286                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2287                     a.exp++;
2288                 }
2289                 a.frac &= ~rnd_mask;
2290             }
2291         }
2292         break;
2293     default:
2294         g_assert_not_reached();
2295     }
2296     return a;
2297 }
2298 
2299 float16 float16_round_to_int(float16 a, float_status *s)
2300 {
2301     FloatParts64 pa, pr;
2302 
2303     float16_unpack_canonical(&pa, a, s);
2304     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2305     return float16_round_pack_canonical(&pr, s);
2306 }
2307 
2308 float32 float32_round_to_int(float32 a, float_status *s)
2309 {
2310     FloatParts64 pa, pr;
2311 
2312     float32_unpack_canonical(&pa, a, s);
2313     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2314     return float32_round_pack_canonical(&pr, s);
2315 }
2316 
2317 float64 float64_round_to_int(float64 a, float_status *s)
2318 {
2319     FloatParts64 pa, pr;
2320 
2321     float64_unpack_canonical(&pa, a, s);
2322     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2323     return float64_round_pack_canonical(&pr, s);
2324 }
2325 
2326 /*
2327  * Rounds the bfloat16 value `a' to an integer, and returns the
2328  * result as a bfloat16 value.
2329  */
2330 
2331 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2332 {
2333     FloatParts64 pa, pr;
2334 
2335     bfloat16_unpack_canonical(&pa, a, s);
2336     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2337     return bfloat16_round_pack_canonical(&pr, s);
2338 }
2339 
2340 /*
2341  * Returns the result of converting the floating-point value `a' to
2342  * the two's complement integer format. The conversion is performed
2343  * according to the IEC/IEEE Standard for Binary Floating-Point
2344  * Arithmetic---which means in particular that the conversion is
2345  * rounded according to the current rounding mode. If `a' is a NaN,
2346  * the largest positive integer is returned. Otherwise, if the
2347  * conversion overflows, the largest integer with the same sign as `a'
2348  * is returned.
2349 */
2350 
2351 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2352                                      int scale, int64_t min, int64_t max,
2353                                      float_status *s)
2354 {
2355     uint64_t r;
2356     int orig_flags = get_float_exception_flags(s);
2357     FloatParts64 p = round_to_int(in, rmode, scale, s);
2358 
2359     switch (p.cls) {
2360     case float_class_snan:
2361     case float_class_qnan:
2362         s->float_exception_flags = orig_flags | float_flag_invalid;
2363         return max;
2364     case float_class_inf:
2365         s->float_exception_flags = orig_flags | float_flag_invalid;
2366         return p.sign ? min : max;
2367     case float_class_zero:
2368         return 0;
2369     case float_class_normal:
2370         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2371             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2372         } else {
2373             r = UINT64_MAX;
2374         }
2375         if (p.sign) {
2376             if (r <= -(uint64_t) min) {
2377                 return -r;
2378             } else {
2379                 s->float_exception_flags = orig_flags | float_flag_invalid;
2380                 return min;
2381             }
2382         } else {
2383             if (r <= max) {
2384                 return r;
2385             } else {
2386                 s->float_exception_flags = orig_flags | float_flag_invalid;
2387                 return max;
2388             }
2389         }
2390     default:
2391         g_assert_not_reached();
2392     }
2393 }
2394 
2395 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2396                               float_status *s)
2397 {
2398     FloatParts64 p;
2399 
2400     float16_unpack_canonical(&p, a, s);
2401     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2402 }
2403 
2404 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2405                                 float_status *s)
2406 {
2407     FloatParts64 p;
2408 
2409     float16_unpack_canonical(&p, a, s);
2410     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2411 }
2412 
2413 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2414                                 float_status *s)
2415 {
2416     FloatParts64 p;
2417 
2418     float16_unpack_canonical(&p, a, s);
2419     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2420 }
2421 
2422 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2423                                 float_status *s)
2424 {
2425     FloatParts64 p;
2426 
2427     float16_unpack_canonical(&p, a, s);
2428     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2429 }
2430 
2431 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2432                                 float_status *s)
2433 {
2434     FloatParts64 p;
2435 
2436     float32_unpack_canonical(&p, a, s);
2437     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2438 }
2439 
2440 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2441                                 float_status *s)
2442 {
2443     FloatParts64 p;
2444 
2445     float32_unpack_canonical(&p, a, s);
2446     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2447 }
2448 
2449 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2450                                 float_status *s)
2451 {
2452     FloatParts64 p;
2453 
2454     float32_unpack_canonical(&p, a, s);
2455     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2456 }
2457 
2458 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2459                                 float_status *s)
2460 {
2461     FloatParts64 p;
2462 
2463     float64_unpack_canonical(&p, a, s);
2464     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2465 }
2466 
2467 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2468                                 float_status *s)
2469 {
2470     FloatParts64 p;
2471 
2472     float64_unpack_canonical(&p, a, s);
2473     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2474 }
2475 
2476 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2477                                 float_status *s)
2478 {
2479     FloatParts64 p;
2480 
2481     float64_unpack_canonical(&p, a, s);
2482     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2483 }
2484 
2485 int8_t float16_to_int8(float16 a, float_status *s)
2486 {
2487     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2488 }
2489 
2490 int16_t float16_to_int16(float16 a, float_status *s)
2491 {
2492     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2493 }
2494 
2495 int32_t float16_to_int32(float16 a, float_status *s)
2496 {
2497     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2498 }
2499 
2500 int64_t float16_to_int64(float16 a, float_status *s)
2501 {
2502     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2503 }
2504 
2505 int16_t float32_to_int16(float32 a, float_status *s)
2506 {
2507     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2508 }
2509 
2510 int32_t float32_to_int32(float32 a, float_status *s)
2511 {
2512     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2513 }
2514 
2515 int64_t float32_to_int64(float32 a, float_status *s)
2516 {
2517     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2518 }
2519 
2520 int16_t float64_to_int16(float64 a, float_status *s)
2521 {
2522     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2523 }
2524 
2525 int32_t float64_to_int32(float64 a, float_status *s)
2526 {
2527     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2528 }
2529 
2530 int64_t float64_to_int64(float64 a, float_status *s)
2531 {
2532     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2533 }
2534 
2535 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2536 {
2537     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2538 }
2539 
2540 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2541 {
2542     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2543 }
2544 
2545 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2546 {
2547     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2548 }
2549 
2550 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2551 {
2552     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2553 }
2554 
2555 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2556 {
2557     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2558 }
2559 
2560 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2561 {
2562     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2563 }
2564 
2565 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2566 {
2567     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2568 }
2569 
2570 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2571 {
2572     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2573 }
2574 
2575 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2576 {
2577     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2578 }
2579 
2580 /*
2581  * Returns the result of converting the floating-point value `a' to
2582  * the two's complement integer format.
2583  */
2584 
2585 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2586                                  float_status *s)
2587 {
2588     FloatParts64 p;
2589 
2590     bfloat16_unpack_canonical(&p, a, s);
2591     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2592 }
2593 
2594 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2595                                  float_status *s)
2596 {
2597     FloatParts64 p;
2598 
2599     bfloat16_unpack_canonical(&p, a, s);
2600     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2601 }
2602 
2603 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2604                                  float_status *s)
2605 {
2606     FloatParts64 p;
2607 
2608     bfloat16_unpack_canonical(&p, a, s);
2609     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2610 }
2611 
2612 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2613 {
2614     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2615 }
2616 
2617 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2618 {
2619     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2620 }
2621 
2622 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2623 {
2624     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2625 }
2626 
2627 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2628 {
2629     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2630 }
2631 
2632 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2633 {
2634     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2635 }
2636 
2637 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2638 {
2639     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2640 }
2641 
2642 /*
2643  *  Returns the result of converting the floating-point value `a' to
2644  *  the unsigned integer format. The conversion is performed according
2645  *  to the IEC/IEEE Standard for Binary Floating-Point
2646  *  Arithmetic---which means in particular that the conversion is
2647  *  rounded according to the current rounding mode. If `a' is a NaN,
2648  *  the largest unsigned integer is returned. Otherwise, if the
2649  *  conversion overflows, the largest unsigned integer is returned. If
2650  *  the 'a' is negative, the result is rounded and zero is returned;
2651  *  values that do not round to zero will raise the inexact exception
2652  *  flag.
2653  */
2654 
2655 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2656                                        int scale, uint64_t max,
2657                                        float_status *s)
2658 {
2659     int orig_flags = get_float_exception_flags(s);
2660     FloatParts64 p = round_to_int(in, rmode, scale, s);
2661     uint64_t r;
2662 
2663     switch (p.cls) {
2664     case float_class_snan:
2665     case float_class_qnan:
2666         s->float_exception_flags = orig_flags | float_flag_invalid;
2667         return max;
2668     case float_class_inf:
2669         s->float_exception_flags = orig_flags | float_flag_invalid;
2670         return p.sign ? 0 : max;
2671     case float_class_zero:
2672         return 0;
2673     case float_class_normal:
2674         if (p.sign) {
2675             s->float_exception_flags = orig_flags | float_flag_invalid;
2676             return 0;
2677         }
2678 
2679         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2680             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2681         } else {
2682             s->float_exception_flags = orig_flags | float_flag_invalid;
2683             return max;
2684         }
2685 
2686         /* For uint64 this will never trip, but if p.exp is too large
2687          * to shift a decomposed fraction we shall have exited via the
2688          * 3rd leg above.
2689          */
2690         if (r > max) {
2691             s->float_exception_flags = orig_flags | float_flag_invalid;
2692             return max;
2693         }
2694         return r;
2695     default:
2696         g_assert_not_reached();
2697     }
2698 }
2699 
2700 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2701                                 float_status *s)
2702 {
2703     FloatParts64 p;
2704 
2705     float16_unpack_canonical(&p, a, s);
2706     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2707 }
2708 
2709 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2710                                   float_status *s)
2711 {
2712     FloatParts64 p;
2713 
2714     float16_unpack_canonical(&p, a, s);
2715     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2716 }
2717 
2718 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2719                                   float_status *s)
2720 {
2721     FloatParts64 p;
2722 
2723     float16_unpack_canonical(&p, a, s);
2724     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2725 }
2726 
2727 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2728                                   float_status *s)
2729 {
2730     FloatParts64 p;
2731 
2732     float16_unpack_canonical(&p, a, s);
2733     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2734 }
2735 
2736 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2737                                   float_status *s)
2738 {
2739     FloatParts64 p;
2740 
2741     float32_unpack_canonical(&p, a, s);
2742     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2743 }
2744 
2745 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2746                                   float_status *s)
2747 {
2748     FloatParts64 p;
2749 
2750     float32_unpack_canonical(&p, a, s);
2751     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2752 }
2753 
2754 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2755                                   float_status *s)
2756 {
2757     FloatParts64 p;
2758 
2759     float32_unpack_canonical(&p, a, s);
2760     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2761 }
2762 
2763 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2764                                   float_status *s)
2765 {
2766     FloatParts64 p;
2767 
2768     float64_unpack_canonical(&p, a, s);
2769     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2770 }
2771 
2772 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2773                                   float_status *s)
2774 {
2775     FloatParts64 p;
2776 
2777     float64_unpack_canonical(&p, a, s);
2778     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2779 }
2780 
2781 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2782                                   float_status *s)
2783 {
2784     FloatParts64 p;
2785 
2786     float64_unpack_canonical(&p, a, s);
2787     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2788 }
2789 
2790 uint8_t float16_to_uint8(float16 a, float_status *s)
2791 {
2792     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2793 }
2794 
2795 uint16_t float16_to_uint16(float16 a, float_status *s)
2796 {
2797     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2798 }
2799 
2800 uint32_t float16_to_uint32(float16 a, float_status *s)
2801 {
2802     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2803 }
2804 
2805 uint64_t float16_to_uint64(float16 a, float_status *s)
2806 {
2807     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2808 }
2809 
2810 uint16_t float32_to_uint16(float32 a, float_status *s)
2811 {
2812     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2813 }
2814 
2815 uint32_t float32_to_uint32(float32 a, float_status *s)
2816 {
2817     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2818 }
2819 
2820 uint64_t float32_to_uint64(float32 a, float_status *s)
2821 {
2822     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2823 }
2824 
2825 uint16_t float64_to_uint16(float64 a, float_status *s)
2826 {
2827     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2828 }
2829 
2830 uint32_t float64_to_uint32(float64 a, float_status *s)
2831 {
2832     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2833 }
2834 
2835 uint64_t float64_to_uint64(float64 a, float_status *s)
2836 {
2837     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2838 }
2839 
2840 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2841 {
2842     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2843 }
2844 
2845 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2846 {
2847     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2848 }
2849 
2850 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2851 {
2852     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2853 }
2854 
2855 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2856 {
2857     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2858 }
2859 
2860 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2861 {
2862     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2863 }
2864 
2865 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2866 {
2867     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2868 }
2869 
2870 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2871 {
2872     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2873 }
2874 
2875 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2876 {
2877     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2878 }
2879 
2880 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2881 {
2882     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2883 }
2884 
2885 /*
2886  *  Returns the result of converting the bfloat16 value `a' to
2887  *  the unsigned integer format.
2888  */
2889 
2890 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2891                                    int scale, float_status *s)
2892 {
2893     FloatParts64 p;
2894 
2895     bfloat16_unpack_canonical(&p, a, s);
2896     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2897 }
2898 
2899 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2900                                    int scale, float_status *s)
2901 {
2902     FloatParts64 p;
2903 
2904     bfloat16_unpack_canonical(&p, a, s);
2905     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2906 }
2907 
2908 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2909                                    int scale, float_status *s)
2910 {
2911     FloatParts64 p;
2912 
2913     bfloat16_unpack_canonical(&p, a, s);
2914     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2915 }
2916 
2917 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2918 {
2919     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2920 }
2921 
2922 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2923 {
2924     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2925 }
2926 
2927 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2928 {
2929     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2930 }
2931 
2932 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2933 {
2934     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2935 }
2936 
2937 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2938 {
2939     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2940 }
2941 
2942 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2943 {
2944     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2945 }
2946 
2947 /*
2948  * Integer to float conversions
2949  *
2950  * Returns the result of converting the two's complement integer `a'
2951  * to the floating-point format. The conversion is performed according
2952  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2953  */
2954 
2955 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2956 {
2957     FloatParts64 r = { .sign = false };
2958 
2959     if (a == 0) {
2960         r.cls = float_class_zero;
2961     } else {
2962         uint64_t f = a;
2963         int shift;
2964 
2965         r.cls = float_class_normal;
2966         if (a < 0) {
2967             f = -f;
2968             r.sign = true;
2969         }
2970         shift = clz64(f);
2971         scale = MIN(MAX(scale, -0x10000), 0x10000);
2972 
2973         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2974         r.frac = f << shift;
2975     }
2976 
2977     return r;
2978 }
2979 
2980 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2981 {
2982     FloatParts64 pa = int_to_float(a, scale, status);
2983     return float16_round_pack_canonical(&pa, status);
2984 }
2985 
2986 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2987 {
2988     return int64_to_float16_scalbn(a, scale, status);
2989 }
2990 
2991 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2992 {
2993     return int64_to_float16_scalbn(a, scale, status);
2994 }
2995 
2996 float16 int64_to_float16(int64_t a, float_status *status)
2997 {
2998     return int64_to_float16_scalbn(a, 0, status);
2999 }
3000 
3001 float16 int32_to_float16(int32_t a, float_status *status)
3002 {
3003     return int64_to_float16_scalbn(a, 0, status);
3004 }
3005 
3006 float16 int16_to_float16(int16_t a, float_status *status)
3007 {
3008     return int64_to_float16_scalbn(a, 0, status);
3009 }
3010 
3011 float16 int8_to_float16(int8_t a, float_status *status)
3012 {
3013     return int64_to_float16_scalbn(a, 0, status);
3014 }
3015 
3016 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3017 {
3018     FloatParts64 pa = int_to_float(a, scale, status);
3019     return float32_round_pack_canonical(&pa, status);
3020 }
3021 
3022 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3023 {
3024     return int64_to_float32_scalbn(a, scale, status);
3025 }
3026 
3027 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3028 {
3029     return int64_to_float32_scalbn(a, scale, status);
3030 }
3031 
3032 float32 int64_to_float32(int64_t a, float_status *status)
3033 {
3034     return int64_to_float32_scalbn(a, 0, status);
3035 }
3036 
3037 float32 int32_to_float32(int32_t a, float_status *status)
3038 {
3039     return int64_to_float32_scalbn(a, 0, status);
3040 }
3041 
3042 float32 int16_to_float32(int16_t a, float_status *status)
3043 {
3044     return int64_to_float32_scalbn(a, 0, status);
3045 }
3046 
3047 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3048 {
3049     FloatParts64 pa = int_to_float(a, scale, status);
3050     return float64_round_pack_canonical(&pa, status);
3051 }
3052 
3053 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3054 {
3055     return int64_to_float64_scalbn(a, scale, status);
3056 }
3057 
3058 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3059 {
3060     return int64_to_float64_scalbn(a, scale, status);
3061 }
3062 
3063 float64 int64_to_float64(int64_t a, float_status *status)
3064 {
3065     return int64_to_float64_scalbn(a, 0, status);
3066 }
3067 
3068 float64 int32_to_float64(int32_t a, float_status *status)
3069 {
3070     return int64_to_float64_scalbn(a, 0, status);
3071 }
3072 
3073 float64 int16_to_float64(int16_t a, float_status *status)
3074 {
3075     return int64_to_float64_scalbn(a, 0, status);
3076 }
3077 
3078 /*
3079  * Returns the result of converting the two's complement integer `a'
3080  * to the bfloat16 format.
3081  */
3082 
3083 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3084 {
3085     FloatParts64 pa = int_to_float(a, scale, status);
3086     return bfloat16_round_pack_canonical(&pa, status);
3087 }
3088 
3089 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3090 {
3091     return int64_to_bfloat16_scalbn(a, scale, status);
3092 }
3093 
3094 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3095 {
3096     return int64_to_bfloat16_scalbn(a, scale, status);
3097 }
3098 
3099 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3100 {
3101     return int64_to_bfloat16_scalbn(a, 0, status);
3102 }
3103 
3104 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3105 {
3106     return int64_to_bfloat16_scalbn(a, 0, status);
3107 }
3108 
3109 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3110 {
3111     return int64_to_bfloat16_scalbn(a, 0, status);
3112 }
3113 
3114 /*
3115  * Unsigned Integer to float conversions
3116  *
3117  * Returns the result of converting the unsigned integer `a' to the
3118  * floating-point format. The conversion is performed according to the
3119  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3120  */
3121 
3122 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3123 {
3124     FloatParts64 r = { .sign = false };
3125     int shift;
3126 
3127     if (a == 0) {
3128         r.cls = float_class_zero;
3129     } else {
3130         scale = MIN(MAX(scale, -0x10000), 0x10000);
3131         shift = clz64(a);
3132         r.cls = float_class_normal;
3133         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3134         r.frac = a << shift;
3135     }
3136 
3137     return r;
3138 }
3139 
3140 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3141 {
3142     FloatParts64 pa = uint_to_float(a, scale, status);
3143     return float16_round_pack_canonical(&pa, status);
3144 }
3145 
3146 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3147 {
3148     return uint64_to_float16_scalbn(a, scale, status);
3149 }
3150 
3151 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3152 {
3153     return uint64_to_float16_scalbn(a, scale, status);
3154 }
3155 
3156 float16 uint64_to_float16(uint64_t a, float_status *status)
3157 {
3158     return uint64_to_float16_scalbn(a, 0, status);
3159 }
3160 
3161 float16 uint32_to_float16(uint32_t a, float_status *status)
3162 {
3163     return uint64_to_float16_scalbn(a, 0, status);
3164 }
3165 
3166 float16 uint16_to_float16(uint16_t a, float_status *status)
3167 {
3168     return uint64_to_float16_scalbn(a, 0, status);
3169 }
3170 
3171 float16 uint8_to_float16(uint8_t a, float_status *status)
3172 {
3173     return uint64_to_float16_scalbn(a, 0, status);
3174 }
3175 
3176 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3177 {
3178     FloatParts64 pa = uint_to_float(a, scale, status);
3179     return float32_round_pack_canonical(&pa, status);
3180 }
3181 
3182 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3183 {
3184     return uint64_to_float32_scalbn(a, scale, status);
3185 }
3186 
3187 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3188 {
3189     return uint64_to_float32_scalbn(a, scale, status);
3190 }
3191 
3192 float32 uint64_to_float32(uint64_t a, float_status *status)
3193 {
3194     return uint64_to_float32_scalbn(a, 0, status);
3195 }
3196 
3197 float32 uint32_to_float32(uint32_t a, float_status *status)
3198 {
3199     return uint64_to_float32_scalbn(a, 0, status);
3200 }
3201 
3202 float32 uint16_to_float32(uint16_t a, float_status *status)
3203 {
3204     return uint64_to_float32_scalbn(a, 0, status);
3205 }
3206 
3207 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3208 {
3209     FloatParts64 pa = uint_to_float(a, scale, status);
3210     return float64_round_pack_canonical(&pa, status);
3211 }
3212 
3213 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3214 {
3215     return uint64_to_float64_scalbn(a, scale, status);
3216 }
3217 
3218 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3219 {
3220     return uint64_to_float64_scalbn(a, scale, status);
3221 }
3222 
3223 float64 uint64_to_float64(uint64_t a, float_status *status)
3224 {
3225     return uint64_to_float64_scalbn(a, 0, status);
3226 }
3227 
3228 float64 uint32_to_float64(uint32_t a, float_status *status)
3229 {
3230     return uint64_to_float64_scalbn(a, 0, status);
3231 }
3232 
3233 float64 uint16_to_float64(uint16_t a, float_status *status)
3234 {
3235     return uint64_to_float64_scalbn(a, 0, status);
3236 }
3237 
3238 /*
3239  * Returns the result of converting the unsigned integer `a' to the
3240  * bfloat16 format.
3241  */
3242 
3243 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3244 {
3245     FloatParts64 pa = uint_to_float(a, scale, status);
3246     return bfloat16_round_pack_canonical(&pa, status);
3247 }
3248 
3249 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3250 {
3251     return uint64_to_bfloat16_scalbn(a, scale, status);
3252 }
3253 
3254 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3255 {
3256     return uint64_to_bfloat16_scalbn(a, scale, status);
3257 }
3258 
3259 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3260 {
3261     return uint64_to_bfloat16_scalbn(a, 0, status);
3262 }
3263 
3264 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3265 {
3266     return uint64_to_bfloat16_scalbn(a, 0, status);
3267 }
3268 
3269 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3270 {
3271     return uint64_to_bfloat16_scalbn(a, 0, status);
3272 }
3273 
3274 /* Float Min/Max */
3275 /* min() and max() functions. These can't be implemented as
3276  * 'compare and pick one input' because that would mishandle
3277  * NaNs and +0 vs -0.
3278  *
3279  * minnum() and maxnum() functions. These are similar to the min()
3280  * and max() functions but if one of the arguments is a QNaN and
3281  * the other is numerical then the numerical argument is returned.
3282  * SNaNs will get quietened before being returned.
3283  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3284  * and maxNum() operations. min() and max() are the typical min/max
3285  * semantics provided by many CPUs which predate that specification.
3286  *
3287  * minnummag() and maxnummag() functions correspond to minNumMag()
3288  * and minNumMag() from the IEEE-754 2008.
3289  */
3290 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3291                                 bool ieee, bool ismag, float_status *s)
3292 {
3293     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3294         if (ieee) {
3295             /* Takes two floating-point values `a' and `b', one of
3296              * which is a NaN, and returns the appropriate NaN
3297              * result. If either `a' or `b' is a signaling NaN,
3298              * the invalid exception is raised.
3299              */
3300             if (is_snan(a.cls) || is_snan(b.cls)) {
3301                 return *parts_pick_nan(&a, &b, s);
3302             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3303                 return b;
3304             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3305                 return a;
3306             }
3307         }
3308         return *parts_pick_nan(&a, &b, s);
3309     } else {
3310         int a_exp, b_exp;
3311 
3312         switch (a.cls) {
3313         case float_class_normal:
3314             a_exp = a.exp;
3315             break;
3316         case float_class_inf:
3317             a_exp = INT_MAX;
3318             break;
3319         case float_class_zero:
3320             a_exp = INT_MIN;
3321             break;
3322         default:
3323             g_assert_not_reached();
3324             break;
3325         }
3326         switch (b.cls) {
3327         case float_class_normal:
3328             b_exp = b.exp;
3329             break;
3330         case float_class_inf:
3331             b_exp = INT_MAX;
3332             break;
3333         case float_class_zero:
3334             b_exp = INT_MIN;
3335             break;
3336         default:
3337             g_assert_not_reached();
3338             break;
3339         }
3340 
3341         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3342             bool a_less = a_exp < b_exp;
3343             if (a_exp == b_exp) {
3344                 a_less = a.frac < b.frac;
3345             }
3346             return a_less ^ ismin ? b : a;
3347         }
3348 
3349         if (a.sign == b.sign) {
3350             bool a_less = a_exp < b_exp;
3351             if (a_exp == b_exp) {
3352                 a_less = a.frac < b.frac;
3353             }
3354             return a.sign ^ a_less ^ ismin ? b : a;
3355         } else {
3356             return a.sign ^ ismin ? b : a;
3357         }
3358     }
3359 }
3360 
3361 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3362 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3363                                      float_status *s)                   \
3364 {                                                                       \
3365     FloatParts64 pa, pb, pr;                                            \
3366     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3367     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3368     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3369     return float ## sz ## _round_pack_canonical(&pr, s);                \
3370 }
3371 
3372 MINMAX(16, min, true, false, false)
3373 MINMAX(16, minnum, true, true, false)
3374 MINMAX(16, minnummag, true, true, true)
3375 MINMAX(16, max, false, false, false)
3376 MINMAX(16, maxnum, false, true, false)
3377 MINMAX(16, maxnummag, false, true, true)
3378 
3379 MINMAX(32, min, true, false, false)
3380 MINMAX(32, minnum, true, true, false)
3381 MINMAX(32, minnummag, true, true, true)
3382 MINMAX(32, max, false, false, false)
3383 MINMAX(32, maxnum, false, true, false)
3384 MINMAX(32, maxnummag, false, true, true)
3385 
3386 MINMAX(64, min, true, false, false)
3387 MINMAX(64, minnum, true, true, false)
3388 MINMAX(64, minnummag, true, true, true)
3389 MINMAX(64, max, false, false, false)
3390 MINMAX(64, maxnum, false, true, false)
3391 MINMAX(64, maxnummag, false, true, true)
3392 
3393 #undef MINMAX
3394 
3395 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3396 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3397 {                                                                       \
3398     FloatParts64 pa, pb, pr;                                            \
3399     bfloat16_unpack_canonical(&pa, a, s);                               \
3400     bfloat16_unpack_canonical(&pb, b, s);                               \
3401     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3402     return bfloat16_round_pack_canonical(&pr, s);                       \
3403 }
3404 
3405 BF16_MINMAX(min, true, false, false)
3406 BF16_MINMAX(minnum, true, true, false)
3407 BF16_MINMAX(minnummag, true, true, true)
3408 BF16_MINMAX(max, false, false, false)
3409 BF16_MINMAX(maxnum, false, true, false)
3410 BF16_MINMAX(maxnummag, false, true, true)
3411 
3412 #undef BF16_MINMAX
3413 
3414 /* Floating point compare */
3415 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3416                                     float_status *s)
3417 {
3418     if (is_nan(a.cls) || is_nan(b.cls)) {
3419         if (!is_quiet ||
3420             a.cls == float_class_snan ||
3421             b.cls == float_class_snan) {
3422             float_raise(float_flag_invalid, s);
3423         }
3424         return float_relation_unordered;
3425     }
3426 
3427     if (a.cls == float_class_zero) {
3428         if (b.cls == float_class_zero) {
3429             return float_relation_equal;
3430         }
3431         return b.sign ? float_relation_greater : float_relation_less;
3432     } else if (b.cls == float_class_zero) {
3433         return a.sign ? float_relation_less : float_relation_greater;
3434     }
3435 
3436     /* The only really important thing about infinity is its sign. If
3437      * both are infinities the sign marks the smallest of the two.
3438      */
3439     if (a.cls == float_class_inf) {
3440         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3441             return float_relation_equal;
3442         }
3443         return a.sign ? float_relation_less : float_relation_greater;
3444     } else if (b.cls == float_class_inf) {
3445         return b.sign ? float_relation_greater : float_relation_less;
3446     }
3447 
3448     if (a.sign != b.sign) {
3449         return a.sign ? float_relation_less : float_relation_greater;
3450     }
3451 
3452     if (a.exp == b.exp) {
3453         if (a.frac == b.frac) {
3454             return float_relation_equal;
3455         }
3456         if (a.sign) {
3457             return a.frac > b.frac ?
3458                 float_relation_less : float_relation_greater;
3459         } else {
3460             return a.frac > b.frac ?
3461                 float_relation_greater : float_relation_less;
3462         }
3463     } else {
3464         if (a.sign) {
3465             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3466         } else {
3467             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3468         }
3469     }
3470 }
3471 
3472 #define COMPARE(name, attr, sz)                                         \
3473 static int attr                                                         \
3474 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3475 {                                                                       \
3476     FloatParts64 pa, pb;                                                \
3477     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3478     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3479     return compare_floats(pa, pb, is_quiet, s);                         \
3480 }
3481 
3482 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3483 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3484 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3485 
3486 #undef COMPARE
3487 
3488 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3489 {
3490     return soft_f16_compare(a, b, false, s);
3491 }
3492 
3493 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3494 {
3495     return soft_f16_compare(a, b, true, s);
3496 }
3497 
3498 static FloatRelation QEMU_FLATTEN
3499 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3500 {
3501     union_float32 ua, ub;
3502 
3503     ua.s = xa;
3504     ub.s = xb;
3505 
3506     if (QEMU_NO_HARDFLOAT) {
3507         goto soft;
3508     }
3509 
3510     float32_input_flush2(&ua.s, &ub.s, s);
3511     if (isgreaterequal(ua.h, ub.h)) {
3512         if (isgreater(ua.h, ub.h)) {
3513             return float_relation_greater;
3514         }
3515         return float_relation_equal;
3516     }
3517     if (likely(isless(ua.h, ub.h))) {
3518         return float_relation_less;
3519     }
3520     /* The only condition remaining is unordered.
3521      * Fall through to set flags.
3522      */
3523  soft:
3524     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3525 }
3526 
3527 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3528 {
3529     return f32_compare(a, b, false, s);
3530 }
3531 
3532 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3533 {
3534     return f32_compare(a, b, true, s);
3535 }
3536 
3537 static FloatRelation QEMU_FLATTEN
3538 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3539 {
3540     union_float64 ua, ub;
3541 
3542     ua.s = xa;
3543     ub.s = xb;
3544 
3545     if (QEMU_NO_HARDFLOAT) {
3546         goto soft;
3547     }
3548 
3549     float64_input_flush2(&ua.s, &ub.s, s);
3550     if (isgreaterequal(ua.h, ub.h)) {
3551         if (isgreater(ua.h, ub.h)) {
3552             return float_relation_greater;
3553         }
3554         return float_relation_equal;
3555     }
3556     if (likely(isless(ua.h, ub.h))) {
3557         return float_relation_less;
3558     }
3559     /* The only condition remaining is unordered.
3560      * Fall through to set flags.
3561      */
3562  soft:
3563     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3564 }
3565 
3566 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3567 {
3568     return f64_compare(a, b, false, s);
3569 }
3570 
3571 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3572 {
3573     return f64_compare(a, b, true, s);
3574 }
3575 
3576 static FloatRelation QEMU_FLATTEN
3577 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3578 {
3579     FloatParts64 pa, pb;
3580 
3581     bfloat16_unpack_canonical(&pa, a, s);
3582     bfloat16_unpack_canonical(&pb, b, s);
3583     return compare_floats(pa, pb, is_quiet, s);
3584 }
3585 
3586 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3587 {
3588     return soft_bf16_compare(a, b, false, s);
3589 }
3590 
3591 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3592 {
3593     return soft_bf16_compare(a, b, true, s);
3594 }
3595 
3596 /* Multiply A by 2 raised to the power N.  */
3597 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3598 {
3599     if (unlikely(is_nan(a.cls))) {
3600         parts_return_nan(&a, s);
3601     }
3602     if (a.cls == float_class_normal) {
3603         /* The largest float type (even though not supported by FloatParts64)
3604          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3605          * still allows rounding to infinity, without allowing overflow
3606          * within the int32_t that backs FloatParts64.exp.
3607          */
3608         n = MIN(MAX(n, -0x10000), 0x10000);
3609         a.exp += n;
3610     }
3611     return a;
3612 }
3613 
3614 float16 float16_scalbn(float16 a, int n, float_status *status)
3615 {
3616     FloatParts64 pa, pr;
3617 
3618     float16_unpack_canonical(&pa, a, status);
3619     pr = scalbn_decomposed(pa, n, status);
3620     return float16_round_pack_canonical(&pr, status);
3621 }
3622 
3623 float32 float32_scalbn(float32 a, int n, float_status *status)
3624 {
3625     FloatParts64 pa, pr;
3626 
3627     float32_unpack_canonical(&pa, a, status);
3628     pr = scalbn_decomposed(pa, n, status);
3629     return float32_round_pack_canonical(&pr, status);
3630 }
3631 
3632 float64 float64_scalbn(float64 a, int n, float_status *status)
3633 {
3634     FloatParts64 pa, pr;
3635 
3636     float64_unpack_canonical(&pa, a, status);
3637     pr = scalbn_decomposed(pa, n, status);
3638     return float64_round_pack_canonical(&pr, status);
3639 }
3640 
3641 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3642 {
3643     FloatParts64 pa, pr;
3644 
3645     bfloat16_unpack_canonical(&pa, a, status);
3646     pr = scalbn_decomposed(pa, n, status);
3647     return bfloat16_round_pack_canonical(&pr, status);
3648 }
3649 
3650 /*
3651  * Square Root
3652  *
3653  * The old softfloat code did an approximation step before zeroing in
3654  * on the final result. However for simpleness we just compute the
3655  * square root by iterating down from the implicit bit to enough extra
3656  * bits to ensure we get a correctly rounded result.
3657  *
3658  * This does mean however the calculation is slower than before,
3659  * especially for 64 bit floats.
3660  */
3661 
3662 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3663 {
3664     uint64_t a_frac, r_frac, s_frac;
3665     int bit, last_bit;
3666 
3667     if (is_nan(a.cls)) {
3668         parts_return_nan(&a, s);
3669         return a;
3670     }
3671     if (a.cls == float_class_zero) {
3672         return a;  /* sqrt(+-0) = +-0 */
3673     }
3674     if (a.sign) {
3675         float_raise(float_flag_invalid, s);
3676         parts_default_nan(&a, s);
3677         return a;
3678     }
3679     if (a.cls == float_class_inf) {
3680         return a;  /* sqrt(+inf) = +inf */
3681     }
3682 
3683     assert(a.cls == float_class_normal);
3684 
3685     /* We need two overflow bits at the top. Adding room for that is a
3686      * right shift. If the exponent is odd, we can discard the low bit
3687      * by multiplying the fraction by 2; that's a left shift. Combine
3688      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3689      */
3690     a_frac = a.frac >> (2 - (a.exp & 1));
3691     a.exp >>= 1;
3692 
3693     /* Bit-by-bit computation of sqrt.  */
3694     r_frac = 0;
3695     s_frac = 0;
3696 
3697     /* Iterate from implicit bit down to the 3 extra bits to compute a
3698      * properly rounded result. Remember we've inserted two more bits
3699      * at the top, so these positions are two less.
3700      */
3701     bit = DECOMPOSED_BINARY_POINT - 2;
3702     last_bit = MAX(p->frac_shift - 4, 0);
3703     do {
3704         uint64_t q = 1ULL << bit;
3705         uint64_t t_frac = s_frac + q;
3706         if (t_frac <= a_frac) {
3707             s_frac = t_frac + q;
3708             a_frac -= t_frac;
3709             r_frac += q;
3710         }
3711         a_frac <<= 1;
3712     } while (--bit >= last_bit);
3713 
3714     /* Undo the right shift done above. If there is any remaining
3715      * fraction, the result is inexact. Set the sticky bit.
3716      */
3717     a.frac = (r_frac << 2) + (a_frac != 0);
3718 
3719     return a;
3720 }
3721 
3722 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3723 {
3724     FloatParts64 pa, pr;
3725 
3726     float16_unpack_canonical(&pa, a, status);
3727     pr = sqrt_float(pa, status, &float16_params);
3728     return float16_round_pack_canonical(&pr, status);
3729 }
3730 
3731 static float32 QEMU_SOFTFLOAT_ATTR
3732 soft_f32_sqrt(float32 a, float_status *status)
3733 {
3734     FloatParts64 pa, pr;
3735 
3736     float32_unpack_canonical(&pa, a, status);
3737     pr = sqrt_float(pa, status, &float32_params);
3738     return float32_round_pack_canonical(&pr, status);
3739 }
3740 
3741 static float64 QEMU_SOFTFLOAT_ATTR
3742 soft_f64_sqrt(float64 a, float_status *status)
3743 {
3744     FloatParts64 pa, pr;
3745 
3746     float64_unpack_canonical(&pa, a, status);
3747     pr = sqrt_float(pa, status, &float64_params);
3748     return float64_round_pack_canonical(&pr, status);
3749 }
3750 
3751 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3752 {
3753     union_float32 ua, ur;
3754 
3755     ua.s = xa;
3756     if (unlikely(!can_use_fpu(s))) {
3757         goto soft;
3758     }
3759 
3760     float32_input_flush1(&ua.s, s);
3761     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3762         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3763                        fpclassify(ua.h) == FP_ZERO) ||
3764                      signbit(ua.h))) {
3765             goto soft;
3766         }
3767     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3768                         float32_is_neg(ua.s))) {
3769         goto soft;
3770     }
3771     ur.h = sqrtf(ua.h);
3772     return ur.s;
3773 
3774  soft:
3775     return soft_f32_sqrt(ua.s, s);
3776 }
3777 
3778 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3779 {
3780     union_float64 ua, ur;
3781 
3782     ua.s = xa;
3783     if (unlikely(!can_use_fpu(s))) {
3784         goto soft;
3785     }
3786 
3787     float64_input_flush1(&ua.s, s);
3788     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3789         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3790                        fpclassify(ua.h) == FP_ZERO) ||
3791                      signbit(ua.h))) {
3792             goto soft;
3793         }
3794     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3795                         float64_is_neg(ua.s))) {
3796         goto soft;
3797     }
3798     ur.h = sqrt(ua.h);
3799     return ur.s;
3800 
3801  soft:
3802     return soft_f64_sqrt(ua.s, s);
3803 }
3804 
3805 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3806 {
3807     FloatParts64 pa, pr;
3808 
3809     bfloat16_unpack_canonical(&pa, a, status);
3810     pr = sqrt_float(pa, status, &bfloat16_params);
3811     return bfloat16_round_pack_canonical(&pr, status);
3812 }
3813 
3814 /*----------------------------------------------------------------------------
3815 | The pattern for a default generated NaN.
3816 *----------------------------------------------------------------------------*/
3817 
3818 float16 float16_default_nan(float_status *status)
3819 {
3820     FloatParts64 p;
3821 
3822     parts_default_nan(&p, status);
3823     p.frac >>= float16_params.frac_shift;
3824     return float16_pack_raw(&p);
3825 }
3826 
3827 float32 float32_default_nan(float_status *status)
3828 {
3829     FloatParts64 p;
3830 
3831     parts_default_nan(&p, status);
3832     p.frac >>= float32_params.frac_shift;
3833     return float32_pack_raw(&p);
3834 }
3835 
3836 float64 float64_default_nan(float_status *status)
3837 {
3838     FloatParts64 p;
3839 
3840     parts_default_nan(&p, status);
3841     p.frac >>= float64_params.frac_shift;
3842     return float64_pack_raw(&p);
3843 }
3844 
3845 float128 float128_default_nan(float_status *status)
3846 {
3847     FloatParts128 p;
3848 
3849     parts_default_nan(&p, status);
3850     frac_shr(&p, float128_params.frac_shift);
3851     return float128_pack_raw(&p);
3852 }
3853 
3854 bfloat16 bfloat16_default_nan(float_status *status)
3855 {
3856     FloatParts64 p;
3857 
3858     parts_default_nan(&p, status);
3859     p.frac >>= bfloat16_params.frac_shift;
3860     return bfloat16_pack_raw(&p);
3861 }
3862 
3863 /*----------------------------------------------------------------------------
3864 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3865 *----------------------------------------------------------------------------*/
3866 
3867 float16 float16_silence_nan(float16 a, float_status *status)
3868 {
3869     FloatParts64 p;
3870 
3871     float16_unpack_raw(&p, a);
3872     p.frac <<= float16_params.frac_shift;
3873     parts_silence_nan(&p, status);
3874     p.frac >>= float16_params.frac_shift;
3875     return float16_pack_raw(&p);
3876 }
3877 
3878 float32 float32_silence_nan(float32 a, float_status *status)
3879 {
3880     FloatParts64 p;
3881 
3882     float32_unpack_raw(&p, a);
3883     p.frac <<= float32_params.frac_shift;
3884     parts_silence_nan(&p, status);
3885     p.frac >>= float32_params.frac_shift;
3886     return float32_pack_raw(&p);
3887 }
3888 
3889 float64 float64_silence_nan(float64 a, float_status *status)
3890 {
3891     FloatParts64 p;
3892 
3893     float64_unpack_raw(&p, a);
3894     p.frac <<= float64_params.frac_shift;
3895     parts_silence_nan(&p, status);
3896     p.frac >>= float64_params.frac_shift;
3897     return float64_pack_raw(&p);
3898 }
3899 
3900 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3901 {
3902     FloatParts64 p;
3903 
3904     bfloat16_unpack_raw(&p, a);
3905     p.frac <<= bfloat16_params.frac_shift;
3906     parts_silence_nan(&p, status);
3907     p.frac >>= bfloat16_params.frac_shift;
3908     return bfloat16_pack_raw(&p);
3909 }
3910 
3911 float128 float128_silence_nan(float128 a, float_status *status)
3912 {
3913     FloatParts128 p;
3914 
3915     float128_unpack_raw(&p, a);
3916     frac_shl(&p, float128_params.frac_shift);
3917     parts_silence_nan(&p, status);
3918     frac_shr(&p, float128_params.frac_shift);
3919     return float128_pack_raw(&p);
3920 }
3921 
3922 /*----------------------------------------------------------------------------
3923 | If `a' is denormal and we are in flush-to-zero mode then set the
3924 | input-denormal exception and return zero. Otherwise just return the value.
3925 *----------------------------------------------------------------------------*/
3926 
3927 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3928 {
3929     if (p.exp == 0 && p.frac != 0) {
3930         float_raise(float_flag_input_denormal, status);
3931         return true;
3932     }
3933 
3934     return false;
3935 }
3936 
3937 float16 float16_squash_input_denormal(float16 a, float_status *status)
3938 {
3939     if (status->flush_inputs_to_zero) {
3940         FloatParts64 p;
3941 
3942         float16_unpack_raw(&p, a);
3943         if (parts_squash_denormal(p, status)) {
3944             return float16_set_sign(float16_zero, p.sign);
3945         }
3946     }
3947     return a;
3948 }
3949 
3950 float32 float32_squash_input_denormal(float32 a, float_status *status)
3951 {
3952     if (status->flush_inputs_to_zero) {
3953         FloatParts64 p;
3954 
3955         float32_unpack_raw(&p, a);
3956         if (parts_squash_denormal(p, status)) {
3957             return float32_set_sign(float32_zero, p.sign);
3958         }
3959     }
3960     return a;
3961 }
3962 
3963 float64 float64_squash_input_denormal(float64 a, float_status *status)
3964 {
3965     if (status->flush_inputs_to_zero) {
3966         FloatParts64 p;
3967 
3968         float64_unpack_raw(&p, a);
3969         if (parts_squash_denormal(p, status)) {
3970             return float64_set_sign(float64_zero, p.sign);
3971         }
3972     }
3973     return a;
3974 }
3975 
3976 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3977 {
3978     if (status->flush_inputs_to_zero) {
3979         FloatParts64 p;
3980 
3981         bfloat16_unpack_raw(&p, a);
3982         if (parts_squash_denormal(p, status)) {
3983             return bfloat16_set_sign(bfloat16_zero, p.sign);
3984         }
3985     }
3986     return a;
3987 }
3988 
3989 /*----------------------------------------------------------------------------
3990 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3991 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3992 | input.  If `zSign' is 1, the input is negated before being converted to an
3993 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3994 | is simply rounded to an integer, with the inexact exception raised if the
3995 | input cannot be represented exactly as an integer.  However, if the fixed-
3996 | point input is too large, the invalid exception is raised and the largest
3997 | positive or negative integer is returned.
3998 *----------------------------------------------------------------------------*/
3999 
4000 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4001                                  float_status *status)
4002 {
4003     int8_t roundingMode;
4004     bool roundNearestEven;
4005     int8_t roundIncrement, roundBits;
4006     int32_t z;
4007 
4008     roundingMode = status->float_rounding_mode;
4009     roundNearestEven = ( roundingMode == float_round_nearest_even );
4010     switch (roundingMode) {
4011     case float_round_nearest_even:
4012     case float_round_ties_away:
4013         roundIncrement = 0x40;
4014         break;
4015     case float_round_to_zero:
4016         roundIncrement = 0;
4017         break;
4018     case float_round_up:
4019         roundIncrement = zSign ? 0 : 0x7f;
4020         break;
4021     case float_round_down:
4022         roundIncrement = zSign ? 0x7f : 0;
4023         break;
4024     case float_round_to_odd:
4025         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4026         break;
4027     default:
4028         abort();
4029     }
4030     roundBits = absZ & 0x7F;
4031     absZ = ( absZ + roundIncrement )>>7;
4032     if (!(roundBits ^ 0x40) && roundNearestEven) {
4033         absZ &= ~1;
4034     }
4035     z = absZ;
4036     if ( zSign ) z = - z;
4037     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4038         float_raise(float_flag_invalid, status);
4039         return zSign ? INT32_MIN : INT32_MAX;
4040     }
4041     if (roundBits) {
4042         float_raise(float_flag_inexact, status);
4043     }
4044     return z;
4045 
4046 }
4047 
4048 /*----------------------------------------------------------------------------
4049 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4050 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4051 | and returns the properly rounded 64-bit integer corresponding to the input.
4052 | If `zSign' is 1, the input is negated before being converted to an integer.
4053 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4054 | the inexact exception raised if the input cannot be represented exactly as
4055 | an integer.  However, if the fixed-point input is too large, the invalid
4056 | exception is raised and the largest positive or negative integer is
4057 | returned.
4058 *----------------------------------------------------------------------------*/
4059 
4060 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4061                                float_status *status)
4062 {
4063     int8_t roundingMode;
4064     bool roundNearestEven, increment;
4065     int64_t z;
4066 
4067     roundingMode = status->float_rounding_mode;
4068     roundNearestEven = ( roundingMode == float_round_nearest_even );
4069     switch (roundingMode) {
4070     case float_round_nearest_even:
4071     case float_round_ties_away:
4072         increment = ((int64_t) absZ1 < 0);
4073         break;
4074     case float_round_to_zero:
4075         increment = 0;
4076         break;
4077     case float_round_up:
4078         increment = !zSign && absZ1;
4079         break;
4080     case float_round_down:
4081         increment = zSign && absZ1;
4082         break;
4083     case float_round_to_odd:
4084         increment = !(absZ0 & 1) && absZ1;
4085         break;
4086     default:
4087         abort();
4088     }
4089     if ( increment ) {
4090         ++absZ0;
4091         if ( absZ0 == 0 ) goto overflow;
4092         if (!(absZ1 << 1) && roundNearestEven) {
4093             absZ0 &= ~1;
4094         }
4095     }
4096     z = absZ0;
4097     if ( zSign ) z = - z;
4098     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4099  overflow:
4100         float_raise(float_flag_invalid, status);
4101         return zSign ? INT64_MIN : INT64_MAX;
4102     }
4103     if (absZ1) {
4104         float_raise(float_flag_inexact, status);
4105     }
4106     return z;
4107 
4108 }
4109 
4110 /*----------------------------------------------------------------------------
4111 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4112 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4113 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4114 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4115 | with the inexact exception raised if the input cannot be represented exactly
4116 | as an integer.  However, if the fixed-point input is too large, the invalid
4117 | exception is raised and the largest unsigned integer is returned.
4118 *----------------------------------------------------------------------------*/
4119 
4120 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4121                                 uint64_t absZ1, float_status *status)
4122 {
4123     int8_t roundingMode;
4124     bool roundNearestEven, increment;
4125 
4126     roundingMode = status->float_rounding_mode;
4127     roundNearestEven = (roundingMode == float_round_nearest_even);
4128     switch (roundingMode) {
4129     case float_round_nearest_even:
4130     case float_round_ties_away:
4131         increment = ((int64_t)absZ1 < 0);
4132         break;
4133     case float_round_to_zero:
4134         increment = 0;
4135         break;
4136     case float_round_up:
4137         increment = !zSign && absZ1;
4138         break;
4139     case float_round_down:
4140         increment = zSign && absZ1;
4141         break;
4142     case float_round_to_odd:
4143         increment = !(absZ0 & 1) && absZ1;
4144         break;
4145     default:
4146         abort();
4147     }
4148     if (increment) {
4149         ++absZ0;
4150         if (absZ0 == 0) {
4151             float_raise(float_flag_invalid, status);
4152             return UINT64_MAX;
4153         }
4154         if (!(absZ1 << 1) && roundNearestEven) {
4155             absZ0 &= ~1;
4156         }
4157     }
4158 
4159     if (zSign && absZ0) {
4160         float_raise(float_flag_invalid, status);
4161         return 0;
4162     }
4163 
4164     if (absZ1) {
4165         float_raise(float_flag_inexact, status);
4166     }
4167     return absZ0;
4168 }
4169 
4170 /*----------------------------------------------------------------------------
4171 | Normalizes the subnormal single-precision floating-point value represented
4172 | by the denormalized significand `aSig'.  The normalized exponent and
4173 | significand are stored at the locations pointed to by `zExpPtr' and
4174 | `zSigPtr', respectively.
4175 *----------------------------------------------------------------------------*/
4176 
4177 static void
4178  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4179 {
4180     int8_t shiftCount;
4181 
4182     shiftCount = clz32(aSig) - 8;
4183     *zSigPtr = aSig<<shiftCount;
4184     *zExpPtr = 1 - shiftCount;
4185 
4186 }
4187 
4188 /*----------------------------------------------------------------------------
4189 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4190 | and significand `zSig', and returns the proper single-precision floating-
4191 | point value corresponding to the abstract input.  Ordinarily, the abstract
4192 | value is simply rounded and packed into the single-precision format, with
4193 | the inexact exception raised if the abstract input cannot be represented
4194 | exactly.  However, if the abstract value is too large, the overflow and
4195 | inexact exceptions are raised and an infinity or maximal finite value is
4196 | returned.  If the abstract value is too small, the input value is rounded to
4197 | a subnormal number, and the underflow and inexact exceptions are raised if
4198 | the abstract input cannot be represented exactly as a subnormal single-
4199 | precision floating-point number.
4200 |     The input significand `zSig' has its binary point between bits 30
4201 | and 29, which is 7 bits to the left of the usual location.  This shifted
4202 | significand must be normalized or smaller.  If `zSig' is not normalized,
4203 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4204 | and it must not require rounding.  In the usual case that `zSig' is
4205 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4206 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4207 | Binary Floating-Point Arithmetic.
4208 *----------------------------------------------------------------------------*/
4209 
4210 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4211                                    float_status *status)
4212 {
4213     int8_t roundingMode;
4214     bool roundNearestEven;
4215     int8_t roundIncrement, roundBits;
4216     bool isTiny;
4217 
4218     roundingMode = status->float_rounding_mode;
4219     roundNearestEven = ( roundingMode == float_round_nearest_even );
4220     switch (roundingMode) {
4221     case float_round_nearest_even:
4222     case float_round_ties_away:
4223         roundIncrement = 0x40;
4224         break;
4225     case float_round_to_zero:
4226         roundIncrement = 0;
4227         break;
4228     case float_round_up:
4229         roundIncrement = zSign ? 0 : 0x7f;
4230         break;
4231     case float_round_down:
4232         roundIncrement = zSign ? 0x7f : 0;
4233         break;
4234     case float_round_to_odd:
4235         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4236         break;
4237     default:
4238         abort();
4239         break;
4240     }
4241     roundBits = zSig & 0x7F;
4242     if ( 0xFD <= (uint16_t) zExp ) {
4243         if (    ( 0xFD < zExp )
4244              || (    ( zExp == 0xFD )
4245                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4246            ) {
4247             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4248                                    roundIncrement != 0;
4249             float_raise(float_flag_overflow | float_flag_inexact, status);
4250             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4251         }
4252         if ( zExp < 0 ) {
4253             if (status->flush_to_zero) {
4254                 float_raise(float_flag_output_denormal, status);
4255                 return packFloat32(zSign, 0, 0);
4256             }
4257             isTiny = status->tininess_before_rounding
4258                   || (zExp < -1)
4259                   || (zSig + roundIncrement < 0x80000000);
4260             shift32RightJamming( zSig, - zExp, &zSig );
4261             zExp = 0;
4262             roundBits = zSig & 0x7F;
4263             if (isTiny && roundBits) {
4264                 float_raise(float_flag_underflow, status);
4265             }
4266             if (roundingMode == float_round_to_odd) {
4267                 /*
4268                  * For round-to-odd case, the roundIncrement depends on
4269                  * zSig which just changed.
4270                  */
4271                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4272             }
4273         }
4274     }
4275     if (roundBits) {
4276         float_raise(float_flag_inexact, status);
4277     }
4278     zSig = ( zSig + roundIncrement )>>7;
4279     if (!(roundBits ^ 0x40) && roundNearestEven) {
4280         zSig &= ~1;
4281     }
4282     if ( zSig == 0 ) zExp = 0;
4283     return packFloat32( zSign, zExp, zSig );
4284 
4285 }
4286 
4287 /*----------------------------------------------------------------------------
4288 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4289 | and significand `zSig', and returns the proper single-precision floating-
4290 | point value corresponding to the abstract input.  This routine is just like
4291 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4292 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4293 | floating-point exponent.
4294 *----------------------------------------------------------------------------*/
4295 
4296 static float32
4297  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4298                               float_status *status)
4299 {
4300     int8_t shiftCount;
4301 
4302     shiftCount = clz32(zSig) - 1;
4303     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4304                                status);
4305 
4306 }
4307 
4308 /*----------------------------------------------------------------------------
4309 | Normalizes the subnormal double-precision floating-point value represented
4310 | by the denormalized significand `aSig'.  The normalized exponent and
4311 | significand are stored at the locations pointed to by `zExpPtr' and
4312 | `zSigPtr', respectively.
4313 *----------------------------------------------------------------------------*/
4314 
4315 static void
4316  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4317 {
4318     int8_t shiftCount;
4319 
4320     shiftCount = clz64(aSig) - 11;
4321     *zSigPtr = aSig<<shiftCount;
4322     *zExpPtr = 1 - shiftCount;
4323 
4324 }
4325 
4326 /*----------------------------------------------------------------------------
4327 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4328 | double-precision floating-point value, returning the result.  After being
4329 | shifted into the proper positions, the three fields are simply added
4330 | together to form the result.  This means that any integer portion of `zSig'
4331 | will be added into the exponent.  Since a properly normalized significand
4332 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4333 | than the desired result exponent whenever `zSig' is a complete, normalized
4334 | significand.
4335 *----------------------------------------------------------------------------*/
4336 
4337 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4338 {
4339 
4340     return make_float64(
4341         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4342 
4343 }
4344 
4345 /*----------------------------------------------------------------------------
4346 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4347 | and significand `zSig', and returns the proper double-precision floating-
4348 | point value corresponding to the abstract input.  Ordinarily, the abstract
4349 | value is simply rounded and packed into the double-precision format, with
4350 | the inexact exception raised if the abstract input cannot be represented
4351 | exactly.  However, if the abstract value is too large, the overflow and
4352 | inexact exceptions are raised and an infinity or maximal finite value is
4353 | returned.  If the abstract value is too small, the input value is rounded to
4354 | a subnormal number, and the underflow and inexact exceptions are raised if
4355 | the abstract input cannot be represented exactly as a subnormal double-
4356 | precision floating-point number.
4357 |     The input significand `zSig' has its binary point between bits 62
4358 | and 61, which is 10 bits to the left of the usual location.  This shifted
4359 | significand must be normalized or smaller.  If `zSig' is not normalized,
4360 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4361 | and it must not require rounding.  In the usual case that `zSig' is
4362 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4363 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4364 | Binary Floating-Point Arithmetic.
4365 *----------------------------------------------------------------------------*/
4366 
4367 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4368                                    float_status *status)
4369 {
4370     int8_t roundingMode;
4371     bool roundNearestEven;
4372     int roundIncrement, roundBits;
4373     bool isTiny;
4374 
4375     roundingMode = status->float_rounding_mode;
4376     roundNearestEven = ( roundingMode == float_round_nearest_even );
4377     switch (roundingMode) {
4378     case float_round_nearest_even:
4379     case float_round_ties_away:
4380         roundIncrement = 0x200;
4381         break;
4382     case float_round_to_zero:
4383         roundIncrement = 0;
4384         break;
4385     case float_round_up:
4386         roundIncrement = zSign ? 0 : 0x3ff;
4387         break;
4388     case float_round_down:
4389         roundIncrement = zSign ? 0x3ff : 0;
4390         break;
4391     case float_round_to_odd:
4392         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4393         break;
4394     default:
4395         abort();
4396     }
4397     roundBits = zSig & 0x3FF;
4398     if ( 0x7FD <= (uint16_t) zExp ) {
4399         if (    ( 0x7FD < zExp )
4400              || (    ( zExp == 0x7FD )
4401                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4402            ) {
4403             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4404                                    roundIncrement != 0;
4405             float_raise(float_flag_overflow | float_flag_inexact, status);
4406             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4407         }
4408         if ( zExp < 0 ) {
4409             if (status->flush_to_zero) {
4410                 float_raise(float_flag_output_denormal, status);
4411                 return packFloat64(zSign, 0, 0);
4412             }
4413             isTiny = status->tininess_before_rounding
4414                   || (zExp < -1)
4415                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4416             shift64RightJamming( zSig, - zExp, &zSig );
4417             zExp = 0;
4418             roundBits = zSig & 0x3FF;
4419             if (isTiny && roundBits) {
4420                 float_raise(float_flag_underflow, status);
4421             }
4422             if (roundingMode == float_round_to_odd) {
4423                 /*
4424                  * For round-to-odd case, the roundIncrement depends on
4425                  * zSig which just changed.
4426                  */
4427                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4428             }
4429         }
4430     }
4431     if (roundBits) {
4432         float_raise(float_flag_inexact, status);
4433     }
4434     zSig = ( zSig + roundIncrement )>>10;
4435     if (!(roundBits ^ 0x200) && roundNearestEven) {
4436         zSig &= ~1;
4437     }
4438     if ( zSig == 0 ) zExp = 0;
4439     return packFloat64( zSign, zExp, zSig );
4440 
4441 }
4442 
4443 /*----------------------------------------------------------------------------
4444 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4445 | and significand `zSig', and returns the proper double-precision floating-
4446 | point value corresponding to the abstract input.  This routine is just like
4447 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4448 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4449 | floating-point exponent.
4450 *----------------------------------------------------------------------------*/
4451 
4452 static float64
4453  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4454                               float_status *status)
4455 {
4456     int8_t shiftCount;
4457 
4458     shiftCount = clz64(zSig) - 1;
4459     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4460                                status);
4461 
4462 }
4463 
4464 /*----------------------------------------------------------------------------
4465 | Normalizes the subnormal extended double-precision floating-point value
4466 | represented by the denormalized significand `aSig'.  The normalized exponent
4467 | and significand are stored at the locations pointed to by `zExpPtr' and
4468 | `zSigPtr', respectively.
4469 *----------------------------------------------------------------------------*/
4470 
4471 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4472                                 uint64_t *zSigPtr)
4473 {
4474     int8_t shiftCount;
4475 
4476     shiftCount = clz64(aSig);
4477     *zSigPtr = aSig<<shiftCount;
4478     *zExpPtr = 1 - shiftCount;
4479 }
4480 
4481 /*----------------------------------------------------------------------------
4482 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4483 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4484 | and returns the proper extended double-precision floating-point value
4485 | corresponding to the abstract input.  Ordinarily, the abstract value is
4486 | rounded and packed into the extended double-precision format, with the
4487 | inexact exception raised if the abstract input cannot be represented
4488 | exactly.  However, if the abstract value is too large, the overflow and
4489 | inexact exceptions are raised and an infinity or maximal finite value is
4490 | returned.  If the abstract value is too small, the input value is rounded to
4491 | a subnormal number, and the underflow and inexact exceptions are raised if
4492 | the abstract input cannot be represented exactly as a subnormal extended
4493 | double-precision floating-point number.
4494 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4495 | number of bits as single or double precision, respectively.  Otherwise, the
4496 | result is rounded to the full precision of the extended double-precision
4497 | format.
4498 |     The input significand must be normalized or smaller.  If the input
4499 | significand is not normalized, `zExp' must be 0; in that case, the result
4500 | returned is a subnormal number, and it must not require rounding.  The
4501 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4502 | Floating-Point Arithmetic.
4503 *----------------------------------------------------------------------------*/
4504 
4505 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4506                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4507                               float_status *status)
4508 {
4509     int8_t roundingMode;
4510     bool roundNearestEven, increment, isTiny;
4511     int64_t roundIncrement, roundMask, roundBits;
4512 
4513     roundingMode = status->float_rounding_mode;
4514     roundNearestEven = ( roundingMode == float_round_nearest_even );
4515     if ( roundingPrecision == 80 ) goto precision80;
4516     if ( roundingPrecision == 64 ) {
4517         roundIncrement = UINT64_C(0x0000000000000400);
4518         roundMask = UINT64_C(0x00000000000007FF);
4519     }
4520     else if ( roundingPrecision == 32 ) {
4521         roundIncrement = UINT64_C(0x0000008000000000);
4522         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4523     }
4524     else {
4525         goto precision80;
4526     }
4527     zSig0 |= ( zSig1 != 0 );
4528     switch (roundingMode) {
4529     case float_round_nearest_even:
4530     case float_round_ties_away:
4531         break;
4532     case float_round_to_zero:
4533         roundIncrement = 0;
4534         break;
4535     case float_round_up:
4536         roundIncrement = zSign ? 0 : roundMask;
4537         break;
4538     case float_round_down:
4539         roundIncrement = zSign ? roundMask : 0;
4540         break;
4541     default:
4542         abort();
4543     }
4544     roundBits = zSig0 & roundMask;
4545     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4546         if (    ( 0x7FFE < zExp )
4547              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4548            ) {
4549             goto overflow;
4550         }
4551         if ( zExp <= 0 ) {
4552             if (status->flush_to_zero) {
4553                 float_raise(float_flag_output_denormal, status);
4554                 return packFloatx80(zSign, 0, 0);
4555             }
4556             isTiny = status->tininess_before_rounding
4557                   || (zExp < 0 )
4558                   || (zSig0 <= zSig0 + roundIncrement);
4559             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4560             zExp = 0;
4561             roundBits = zSig0 & roundMask;
4562             if (isTiny && roundBits) {
4563                 float_raise(float_flag_underflow, status);
4564             }
4565             if (roundBits) {
4566                 float_raise(float_flag_inexact, status);
4567             }
4568             zSig0 += roundIncrement;
4569             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4570             roundIncrement = roundMask + 1;
4571             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4572                 roundMask |= roundIncrement;
4573             }
4574             zSig0 &= ~ roundMask;
4575             return packFloatx80( zSign, zExp, zSig0 );
4576         }
4577     }
4578     if (roundBits) {
4579         float_raise(float_flag_inexact, status);
4580     }
4581     zSig0 += roundIncrement;
4582     if ( zSig0 < roundIncrement ) {
4583         ++zExp;
4584         zSig0 = UINT64_C(0x8000000000000000);
4585     }
4586     roundIncrement = roundMask + 1;
4587     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4588         roundMask |= roundIncrement;
4589     }
4590     zSig0 &= ~ roundMask;
4591     if ( zSig0 == 0 ) zExp = 0;
4592     return packFloatx80( zSign, zExp, zSig0 );
4593  precision80:
4594     switch (roundingMode) {
4595     case float_round_nearest_even:
4596     case float_round_ties_away:
4597         increment = ((int64_t)zSig1 < 0);
4598         break;
4599     case float_round_to_zero:
4600         increment = 0;
4601         break;
4602     case float_round_up:
4603         increment = !zSign && zSig1;
4604         break;
4605     case float_round_down:
4606         increment = zSign && zSig1;
4607         break;
4608     default:
4609         abort();
4610     }
4611     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4612         if (    ( 0x7FFE < zExp )
4613              || (    ( zExp == 0x7FFE )
4614                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4615                   && increment
4616                 )
4617            ) {
4618             roundMask = 0;
4619  overflow:
4620             float_raise(float_flag_overflow | float_flag_inexact, status);
4621             if (    ( roundingMode == float_round_to_zero )
4622                  || ( zSign && ( roundingMode == float_round_up ) )
4623                  || ( ! zSign && ( roundingMode == float_round_down ) )
4624                ) {
4625                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4626             }
4627             return packFloatx80(zSign,
4628                                 floatx80_infinity_high,
4629                                 floatx80_infinity_low);
4630         }
4631         if ( zExp <= 0 ) {
4632             isTiny = status->tininess_before_rounding
4633                   || (zExp < 0)
4634                   || !increment
4635                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4636             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4637             zExp = 0;
4638             if (isTiny && zSig1) {
4639                 float_raise(float_flag_underflow, status);
4640             }
4641             if (zSig1) {
4642                 float_raise(float_flag_inexact, status);
4643             }
4644             switch (roundingMode) {
4645             case float_round_nearest_even:
4646             case float_round_ties_away:
4647                 increment = ((int64_t)zSig1 < 0);
4648                 break;
4649             case float_round_to_zero:
4650                 increment = 0;
4651                 break;
4652             case float_round_up:
4653                 increment = !zSign && zSig1;
4654                 break;
4655             case float_round_down:
4656                 increment = zSign && zSig1;
4657                 break;
4658             default:
4659                 abort();
4660             }
4661             if ( increment ) {
4662                 ++zSig0;
4663                 if (!(zSig1 << 1) && roundNearestEven) {
4664                     zSig0 &= ~1;
4665                 }
4666                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4667             }
4668             return packFloatx80( zSign, zExp, zSig0 );
4669         }
4670     }
4671     if (zSig1) {
4672         float_raise(float_flag_inexact, status);
4673     }
4674     if ( increment ) {
4675         ++zSig0;
4676         if ( zSig0 == 0 ) {
4677             ++zExp;
4678             zSig0 = UINT64_C(0x8000000000000000);
4679         }
4680         else {
4681             if (!(zSig1 << 1) && roundNearestEven) {
4682                 zSig0 &= ~1;
4683             }
4684         }
4685     }
4686     else {
4687         if ( zSig0 == 0 ) zExp = 0;
4688     }
4689     return packFloatx80( zSign, zExp, zSig0 );
4690 
4691 }
4692 
4693 /*----------------------------------------------------------------------------
4694 | Takes an abstract floating-point value having sign `zSign', exponent
4695 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4696 | and returns the proper extended double-precision floating-point value
4697 | corresponding to the abstract input.  This routine is just like
4698 | `roundAndPackFloatx80' except that the input significand does not have to be
4699 | normalized.
4700 *----------------------------------------------------------------------------*/
4701 
4702 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4703                                        bool zSign, int32_t zExp,
4704                                        uint64_t zSig0, uint64_t zSig1,
4705                                        float_status *status)
4706 {
4707     int8_t shiftCount;
4708 
4709     if ( zSig0 == 0 ) {
4710         zSig0 = zSig1;
4711         zSig1 = 0;
4712         zExp -= 64;
4713     }
4714     shiftCount = clz64(zSig0);
4715     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4716     zExp -= shiftCount;
4717     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4718                                 zSig0, zSig1, status);
4719 
4720 }
4721 
4722 /*----------------------------------------------------------------------------
4723 | Returns the least-significant 64 fraction bits of the quadruple-precision
4724 | floating-point value `a'.
4725 *----------------------------------------------------------------------------*/
4726 
4727 static inline uint64_t extractFloat128Frac1( float128 a )
4728 {
4729 
4730     return a.low;
4731 
4732 }
4733 
4734 /*----------------------------------------------------------------------------
4735 | Returns the most-significant 48 fraction bits of the quadruple-precision
4736 | floating-point value `a'.
4737 *----------------------------------------------------------------------------*/
4738 
4739 static inline uint64_t extractFloat128Frac0( float128 a )
4740 {
4741 
4742     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4743 
4744 }
4745 
4746 /*----------------------------------------------------------------------------
4747 | Returns the exponent bits of the quadruple-precision floating-point value
4748 | `a'.
4749 *----------------------------------------------------------------------------*/
4750 
4751 static inline int32_t extractFloat128Exp( float128 a )
4752 {
4753 
4754     return ( a.high>>48 ) & 0x7FFF;
4755 
4756 }
4757 
4758 /*----------------------------------------------------------------------------
4759 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4760 *----------------------------------------------------------------------------*/
4761 
4762 static inline bool extractFloat128Sign(float128 a)
4763 {
4764     return a.high >> 63;
4765 }
4766 
4767 /*----------------------------------------------------------------------------
4768 | Normalizes the subnormal quadruple-precision floating-point value
4769 | represented by the denormalized significand formed by the concatenation of
4770 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4771 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4772 | significand are stored at the location pointed to by `zSig0Ptr', and the
4773 | least significant 64 bits of the normalized significand are stored at the
4774 | location pointed to by `zSig1Ptr'.
4775 *----------------------------------------------------------------------------*/
4776 
4777 static void
4778  normalizeFloat128Subnormal(
4779      uint64_t aSig0,
4780      uint64_t aSig1,
4781      int32_t *zExpPtr,
4782      uint64_t *zSig0Ptr,
4783      uint64_t *zSig1Ptr
4784  )
4785 {
4786     int8_t shiftCount;
4787 
4788     if ( aSig0 == 0 ) {
4789         shiftCount = clz64(aSig1) - 15;
4790         if ( shiftCount < 0 ) {
4791             *zSig0Ptr = aSig1>>( - shiftCount );
4792             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4793         }
4794         else {
4795             *zSig0Ptr = aSig1<<shiftCount;
4796             *zSig1Ptr = 0;
4797         }
4798         *zExpPtr = - shiftCount - 63;
4799     }
4800     else {
4801         shiftCount = clz64(aSig0) - 15;
4802         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4803         *zExpPtr = 1 - shiftCount;
4804     }
4805 
4806 }
4807 
4808 /*----------------------------------------------------------------------------
4809 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4810 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4811 | floating-point value, returning the result.  After being shifted into the
4812 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4813 | added together to form the most significant 32 bits of the result.  This
4814 | means that any integer portion of `zSig0' will be added into the exponent.
4815 | Since a properly normalized significand will have an integer portion equal
4816 | to 1, the `zExp' input should be 1 less than the desired result exponent
4817 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4818 | significand.
4819 *----------------------------------------------------------------------------*/
4820 
4821 static inline float128
4822 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4823 {
4824     float128 z;
4825 
4826     z.low = zSig1;
4827     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4828     return z;
4829 }
4830 
4831 /*----------------------------------------------------------------------------
4832 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4833 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4834 | and `zSig2', and returns the proper quadruple-precision floating-point value
4835 | corresponding to the abstract input.  Ordinarily, the abstract value is
4836 | simply rounded and packed into the quadruple-precision format, with the
4837 | inexact exception raised if the abstract input cannot be represented
4838 | exactly.  However, if the abstract value is too large, the overflow and
4839 | inexact exceptions are raised and an infinity or maximal finite value is
4840 | returned.  If the abstract value is too small, the input value is rounded to
4841 | a subnormal number, and the underflow and inexact exceptions are raised if
4842 | the abstract input cannot be represented exactly as a subnormal quadruple-
4843 | precision floating-point number.
4844 |     The input significand must be normalized or smaller.  If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding.  In the
4847 | usual case that the input significand is normalized, `zExp' must be 1 less
4848 | than the ``true'' floating-point exponent.  The handling of underflow and
4849 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4850 *----------------------------------------------------------------------------*/
4851 
4852 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4853                                      uint64_t zSig0, uint64_t zSig1,
4854                                      uint64_t zSig2, float_status *status)
4855 {
4856     int8_t roundingMode;
4857     bool roundNearestEven, increment, isTiny;
4858 
4859     roundingMode = status->float_rounding_mode;
4860     roundNearestEven = ( roundingMode == float_round_nearest_even );
4861     switch (roundingMode) {
4862     case float_round_nearest_even:
4863     case float_round_ties_away:
4864         increment = ((int64_t)zSig2 < 0);
4865         break;
4866     case float_round_to_zero:
4867         increment = 0;
4868         break;
4869     case float_round_up:
4870         increment = !zSign && zSig2;
4871         break;
4872     case float_round_down:
4873         increment = zSign && zSig2;
4874         break;
4875     case float_round_to_odd:
4876         increment = !(zSig1 & 0x1) && zSig2;
4877         break;
4878     default:
4879         abort();
4880     }
4881     if ( 0x7FFD <= (uint32_t) zExp ) {
4882         if (    ( 0x7FFD < zExp )
4883              || (    ( zExp == 0x7FFD )
4884                   && eq128(
4885                          UINT64_C(0x0001FFFFFFFFFFFF),
4886                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4887                          zSig0,
4888                          zSig1
4889                      )
4890                   && increment
4891                 )
4892            ) {
4893             float_raise(float_flag_overflow | float_flag_inexact, status);
4894             if (    ( roundingMode == float_round_to_zero )
4895                  || ( zSign && ( roundingMode == float_round_up ) )
4896                  || ( ! zSign && ( roundingMode == float_round_down ) )
4897                  || (roundingMode == float_round_to_odd)
4898                ) {
4899                 return
4900                     packFloat128(
4901                         zSign,
4902                         0x7FFE,
4903                         UINT64_C(0x0000FFFFFFFFFFFF),
4904                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4905                     );
4906             }
4907             return packFloat128( zSign, 0x7FFF, 0, 0 );
4908         }
4909         if ( zExp < 0 ) {
4910             if (status->flush_to_zero) {
4911                 float_raise(float_flag_output_denormal, status);
4912                 return packFloat128(zSign, 0, 0, 0);
4913             }
4914             isTiny = status->tininess_before_rounding
4915                   || (zExp < -1)
4916                   || !increment
4917                   || lt128(zSig0, zSig1,
4918                            UINT64_C(0x0001FFFFFFFFFFFF),
4919                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4920             shift128ExtraRightJamming(
4921                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4922             zExp = 0;
4923             if (isTiny && zSig2) {
4924                 float_raise(float_flag_underflow, status);
4925             }
4926             switch (roundingMode) {
4927             case float_round_nearest_even:
4928             case float_round_ties_away:
4929                 increment = ((int64_t)zSig2 < 0);
4930                 break;
4931             case float_round_to_zero:
4932                 increment = 0;
4933                 break;
4934             case float_round_up:
4935                 increment = !zSign && zSig2;
4936                 break;
4937             case float_round_down:
4938                 increment = zSign && zSig2;
4939                 break;
4940             case float_round_to_odd:
4941                 increment = !(zSig1 & 0x1) && zSig2;
4942                 break;
4943             default:
4944                 abort();
4945             }
4946         }
4947     }
4948     if (zSig2) {
4949         float_raise(float_flag_inexact, status);
4950     }
4951     if ( increment ) {
4952         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4953         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4954             zSig1 &= ~1;
4955         }
4956     }
4957     else {
4958         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4959     }
4960     return packFloat128( zSign, zExp, zSig0, zSig1 );
4961 
4962 }
4963 
4964 /*----------------------------------------------------------------------------
4965 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4966 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4967 | returns the proper quadruple-precision floating-point value corresponding
4968 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4969 | except that the input significand has fewer bits and does not have to be
4970 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4971 | point exponent.
4972 *----------------------------------------------------------------------------*/
4973 
4974 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4975                                               uint64_t zSig0, uint64_t zSig1,
4976                                               float_status *status)
4977 {
4978     int8_t shiftCount;
4979     uint64_t zSig2;
4980 
4981     if ( zSig0 == 0 ) {
4982         zSig0 = zSig1;
4983         zSig1 = 0;
4984         zExp -= 64;
4985     }
4986     shiftCount = clz64(zSig0) - 15;
4987     if ( 0 <= shiftCount ) {
4988         zSig2 = 0;
4989         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4990     }
4991     else {
4992         shift128ExtraRightJamming(
4993             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4994     }
4995     zExp -= shiftCount;
4996     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4997 
4998 }
4999 
5000 
5001 /*----------------------------------------------------------------------------
5002 | Returns the result of converting the 32-bit two's complement integer `a'
5003 | to the extended double-precision floating-point format.  The conversion
5004 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5005 | Arithmetic.
5006 *----------------------------------------------------------------------------*/
5007 
5008 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5009 {
5010     bool zSign;
5011     uint32_t absA;
5012     int8_t shiftCount;
5013     uint64_t zSig;
5014 
5015     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5016     zSign = ( a < 0 );
5017     absA = zSign ? - a : a;
5018     shiftCount = clz32(absA) + 32;
5019     zSig = absA;
5020     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5021 
5022 }
5023 
5024 /*----------------------------------------------------------------------------
5025 | Returns the result of converting the 32-bit two's complement integer `a' to
5026 | the quadruple-precision floating-point format.  The conversion is performed
5027 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5028 *----------------------------------------------------------------------------*/
5029 
5030 float128 int32_to_float128(int32_t a, float_status *status)
5031 {
5032     bool zSign;
5033     uint32_t absA;
5034     int8_t shiftCount;
5035     uint64_t zSig0;
5036 
5037     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5038     zSign = ( a < 0 );
5039     absA = zSign ? - a : a;
5040     shiftCount = clz32(absA) + 17;
5041     zSig0 = absA;
5042     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5043 
5044 }
5045 
5046 /*----------------------------------------------------------------------------
5047 | Returns the result of converting the 64-bit two's complement integer `a'
5048 | to the extended double-precision floating-point format.  The conversion
5049 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5050 | Arithmetic.
5051 *----------------------------------------------------------------------------*/
5052 
5053 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5054 {
5055     bool zSign;
5056     uint64_t absA;
5057     int8_t shiftCount;
5058 
5059     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5060     zSign = ( a < 0 );
5061     absA = zSign ? - a : a;
5062     shiftCount = clz64(absA);
5063     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5064 
5065 }
5066 
5067 /*----------------------------------------------------------------------------
5068 | Returns the result of converting the 64-bit two's complement integer `a' to
5069 | the quadruple-precision floating-point format.  The conversion is performed
5070 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5071 *----------------------------------------------------------------------------*/
5072 
5073 float128 int64_to_float128(int64_t a, float_status *status)
5074 {
5075     bool zSign;
5076     uint64_t absA;
5077     int8_t shiftCount;
5078     int32_t zExp;
5079     uint64_t zSig0, zSig1;
5080 
5081     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5082     zSign = ( a < 0 );
5083     absA = zSign ? - a : a;
5084     shiftCount = clz64(absA) + 49;
5085     zExp = 0x406E - shiftCount;
5086     if ( 64 <= shiftCount ) {
5087         zSig1 = 0;
5088         zSig0 = absA;
5089         shiftCount -= 64;
5090     }
5091     else {
5092         zSig1 = absA;
5093         zSig0 = 0;
5094     }
5095     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5096     return packFloat128( zSign, zExp, zSig0, zSig1 );
5097 
5098 }
5099 
5100 /*----------------------------------------------------------------------------
5101 | Returns the result of converting the 64-bit unsigned integer `a'
5102 | to the quadruple-precision floating-point format.  The conversion is performed
5103 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5104 *----------------------------------------------------------------------------*/
5105 
5106 float128 uint64_to_float128(uint64_t a, float_status *status)
5107 {
5108     if (a == 0) {
5109         return float128_zero;
5110     }
5111     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5112 }
5113 
5114 /*----------------------------------------------------------------------------
5115 | Returns the result of converting the single-precision floating-point value
5116 | `a' to the extended double-precision floating-point format.  The conversion
5117 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5118 | Arithmetic.
5119 *----------------------------------------------------------------------------*/
5120 
5121 floatx80 float32_to_floatx80(float32 a, float_status *status)
5122 {
5123     bool aSign;
5124     int aExp;
5125     uint32_t aSig;
5126 
5127     a = float32_squash_input_denormal(a, status);
5128     aSig = extractFloat32Frac( a );
5129     aExp = extractFloat32Exp( a );
5130     aSign = extractFloat32Sign( a );
5131     if ( aExp == 0xFF ) {
5132         if (aSig) {
5133             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5134                                                status);
5135             return floatx80_silence_nan(res, status);
5136         }
5137         return packFloatx80(aSign,
5138                             floatx80_infinity_high,
5139                             floatx80_infinity_low);
5140     }
5141     if ( aExp == 0 ) {
5142         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5143         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5144     }
5145     aSig |= 0x00800000;
5146     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5147 
5148 }
5149 
5150 /*----------------------------------------------------------------------------
5151 | Returns the result of converting the single-precision floating-point value
5152 | `a' to the double-precision floating-point format.  The conversion is
5153 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5154 | Arithmetic.
5155 *----------------------------------------------------------------------------*/
5156 
5157 float128 float32_to_float128(float32 a, float_status *status)
5158 {
5159     bool aSign;
5160     int aExp;
5161     uint32_t aSig;
5162 
5163     a = float32_squash_input_denormal(a, status);
5164     aSig = extractFloat32Frac( a );
5165     aExp = extractFloat32Exp( a );
5166     aSign = extractFloat32Sign( a );
5167     if ( aExp == 0xFF ) {
5168         if (aSig) {
5169             return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5170         }
5171         return packFloat128( aSign, 0x7FFF, 0, 0 );
5172     }
5173     if ( aExp == 0 ) {
5174         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5175         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5176         --aExp;
5177     }
5178     return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
5179 
5180 }
5181 
5182 /*----------------------------------------------------------------------------
5183 | Returns the remainder of the single-precision floating-point value `a'
5184 | with respect to the corresponding value `b'.  The operation is performed
5185 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5186 *----------------------------------------------------------------------------*/
5187 
5188 float32 float32_rem(float32 a, float32 b, float_status *status)
5189 {
5190     bool aSign, zSign;
5191     int aExp, bExp, expDiff;
5192     uint32_t aSig, bSig;
5193     uint32_t q;
5194     uint64_t aSig64, bSig64, q64;
5195     uint32_t alternateASig;
5196     int32_t sigMean;
5197     a = float32_squash_input_denormal(a, status);
5198     b = float32_squash_input_denormal(b, status);
5199 
5200     aSig = extractFloat32Frac( a );
5201     aExp = extractFloat32Exp( a );
5202     aSign = extractFloat32Sign( a );
5203     bSig = extractFloat32Frac( b );
5204     bExp = extractFloat32Exp( b );
5205     if ( aExp == 0xFF ) {
5206         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5207             return propagateFloat32NaN(a, b, status);
5208         }
5209         float_raise(float_flag_invalid, status);
5210         return float32_default_nan(status);
5211     }
5212     if ( bExp == 0xFF ) {
5213         if (bSig) {
5214             return propagateFloat32NaN(a, b, status);
5215         }
5216         return a;
5217     }
5218     if ( bExp == 0 ) {
5219         if ( bSig == 0 ) {
5220             float_raise(float_flag_invalid, status);
5221             return float32_default_nan(status);
5222         }
5223         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5224     }
5225     if ( aExp == 0 ) {
5226         if ( aSig == 0 ) return a;
5227         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5228     }
5229     expDiff = aExp - bExp;
5230     aSig |= 0x00800000;
5231     bSig |= 0x00800000;
5232     if ( expDiff < 32 ) {
5233         aSig <<= 8;
5234         bSig <<= 8;
5235         if ( expDiff < 0 ) {
5236             if ( expDiff < -1 ) return a;
5237             aSig >>= 1;
5238         }
5239         q = ( bSig <= aSig );
5240         if ( q ) aSig -= bSig;
5241         if ( 0 < expDiff ) {
5242             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5243             q >>= 32 - expDiff;
5244             bSig >>= 2;
5245             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5246         }
5247         else {
5248             aSig >>= 2;
5249             bSig >>= 2;
5250         }
5251     }
5252     else {
5253         if ( bSig <= aSig ) aSig -= bSig;
5254         aSig64 = ( (uint64_t) aSig )<<40;
5255         bSig64 = ( (uint64_t) bSig )<<40;
5256         expDiff -= 64;
5257         while ( 0 < expDiff ) {
5258             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5259             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5260             aSig64 = - ( ( bSig * q64 )<<38 );
5261             expDiff -= 62;
5262         }
5263         expDiff += 64;
5264         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5265         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5266         q = q64>>( 64 - expDiff );
5267         bSig <<= 6;
5268         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5269     }
5270     do {
5271         alternateASig = aSig;
5272         ++q;
5273         aSig -= bSig;
5274     } while ( 0 <= (int32_t) aSig );
5275     sigMean = aSig + alternateASig;
5276     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5277         aSig = alternateASig;
5278     }
5279     zSign = ( (int32_t) aSig < 0 );
5280     if ( zSign ) aSig = - aSig;
5281     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5282 }
5283 
5284 
5285 
5286 /*----------------------------------------------------------------------------
5287 | Returns the binary exponential of the single-precision floating-point value
5288 | `a'. The operation is performed according to the IEC/IEEE Standard for
5289 | Binary Floating-Point Arithmetic.
5290 |
5291 | Uses the following identities:
5292 |
5293 | 1. -------------------------------------------------------------------------
5294 |      x    x*ln(2)
5295 |     2  = e
5296 |
5297 | 2. -------------------------------------------------------------------------
5298 |                      2     3     4     5           n
5299 |      x        x     x     x     x     x           x
5300 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5301 |               1!    2!    3!    4!    5!          n!
5302 *----------------------------------------------------------------------------*/
5303 
5304 static const float64 float32_exp2_coefficients[15] =
5305 {
5306     const_float64( 0x3ff0000000000000ll ), /*  1 */
5307     const_float64( 0x3fe0000000000000ll ), /*  2 */
5308     const_float64( 0x3fc5555555555555ll ), /*  3 */
5309     const_float64( 0x3fa5555555555555ll ), /*  4 */
5310     const_float64( 0x3f81111111111111ll ), /*  5 */
5311     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5312     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5313     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5314     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5315     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5316     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5317     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5318     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5319     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5320     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5321 };
5322 
5323 float32 float32_exp2(float32 a, float_status *status)
5324 {
5325     bool aSign;
5326     int aExp;
5327     uint32_t aSig;
5328     float64 r, x, xn;
5329     int i;
5330     a = float32_squash_input_denormal(a, status);
5331 
5332     aSig = extractFloat32Frac( a );
5333     aExp = extractFloat32Exp( a );
5334     aSign = extractFloat32Sign( a );
5335 
5336     if ( aExp == 0xFF) {
5337         if (aSig) {
5338             return propagateFloat32NaN(a, float32_zero, status);
5339         }
5340         return (aSign) ? float32_zero : a;
5341     }
5342     if (aExp == 0) {
5343         if (aSig == 0) return float32_one;
5344     }
5345 
5346     float_raise(float_flag_inexact, status);
5347 
5348     /* ******************************* */
5349     /* using float64 for approximation */
5350     /* ******************************* */
5351     x = float32_to_float64(a, status);
5352     x = float64_mul(x, float64_ln2, status);
5353 
5354     xn = x;
5355     r = float64_one;
5356     for (i = 0 ; i < 15 ; i++) {
5357         float64 f;
5358 
5359         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5360         r = float64_add(r, f, status);
5361 
5362         xn = float64_mul(xn, x, status);
5363     }
5364 
5365     return float64_to_float32(r, status);
5366 }
5367 
5368 /*----------------------------------------------------------------------------
5369 | Returns the binary log of the single-precision floating-point value `a'.
5370 | The operation is performed according to the IEC/IEEE Standard for Binary
5371 | Floating-Point Arithmetic.
5372 *----------------------------------------------------------------------------*/
5373 float32 float32_log2(float32 a, float_status *status)
5374 {
5375     bool aSign, zSign;
5376     int aExp;
5377     uint32_t aSig, zSig, i;
5378 
5379     a = float32_squash_input_denormal(a, status);
5380     aSig = extractFloat32Frac( a );
5381     aExp = extractFloat32Exp( a );
5382     aSign = extractFloat32Sign( a );
5383 
5384     if ( aExp == 0 ) {
5385         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5386         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5387     }
5388     if ( aSign ) {
5389         float_raise(float_flag_invalid, status);
5390         return float32_default_nan(status);
5391     }
5392     if ( aExp == 0xFF ) {
5393         if (aSig) {
5394             return propagateFloat32NaN(a, float32_zero, status);
5395         }
5396         return a;
5397     }
5398 
5399     aExp -= 0x7F;
5400     aSig |= 0x00800000;
5401     zSign = aExp < 0;
5402     zSig = aExp << 23;
5403 
5404     for (i = 1 << 22; i > 0; i >>= 1) {
5405         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5406         if ( aSig & 0x01000000 ) {
5407             aSig >>= 1;
5408             zSig |= i;
5409         }
5410     }
5411 
5412     if ( zSign )
5413         zSig = -zSig;
5414 
5415     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5416 }
5417 
5418 /*----------------------------------------------------------------------------
5419 | Returns the result of converting the double-precision floating-point value
5420 | `a' to the extended double-precision floating-point format.  The conversion
5421 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5422 | Arithmetic.
5423 *----------------------------------------------------------------------------*/
5424 
5425 floatx80 float64_to_floatx80(float64 a, float_status *status)
5426 {
5427     bool aSign;
5428     int aExp;
5429     uint64_t aSig;
5430 
5431     a = float64_squash_input_denormal(a, status);
5432     aSig = extractFloat64Frac( a );
5433     aExp = extractFloat64Exp( a );
5434     aSign = extractFloat64Sign( a );
5435     if ( aExp == 0x7FF ) {
5436         if (aSig) {
5437             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5438                                                status);
5439             return floatx80_silence_nan(res, status);
5440         }
5441         return packFloatx80(aSign,
5442                             floatx80_infinity_high,
5443                             floatx80_infinity_low);
5444     }
5445     if ( aExp == 0 ) {
5446         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5447         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5448     }
5449     return
5450         packFloatx80(
5451             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5452 
5453 }
5454 
5455 /*----------------------------------------------------------------------------
5456 | Returns the result of converting the double-precision floating-point value
5457 | `a' to the quadruple-precision floating-point format.  The conversion is
5458 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5459 | Arithmetic.
5460 *----------------------------------------------------------------------------*/
5461 
5462 float128 float64_to_float128(float64 a, float_status *status)
5463 {
5464     bool aSign;
5465     int aExp;
5466     uint64_t aSig, zSig0, zSig1;
5467 
5468     a = float64_squash_input_denormal(a, status);
5469     aSig = extractFloat64Frac( a );
5470     aExp = extractFloat64Exp( a );
5471     aSign = extractFloat64Sign( a );
5472     if ( aExp == 0x7FF ) {
5473         if (aSig) {
5474             return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5475         }
5476         return packFloat128( aSign, 0x7FFF, 0, 0 );
5477     }
5478     if ( aExp == 0 ) {
5479         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5480         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5481         --aExp;
5482     }
5483     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5484     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5485 
5486 }
5487 
5488 
5489 /*----------------------------------------------------------------------------
5490 | Returns the remainder of the double-precision floating-point value `a'
5491 | with respect to the corresponding value `b'.  The operation is performed
5492 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5493 *----------------------------------------------------------------------------*/
5494 
5495 float64 float64_rem(float64 a, float64 b, float_status *status)
5496 {
5497     bool aSign, zSign;
5498     int aExp, bExp, expDiff;
5499     uint64_t aSig, bSig;
5500     uint64_t q, alternateASig;
5501     int64_t sigMean;
5502 
5503     a = float64_squash_input_denormal(a, status);
5504     b = float64_squash_input_denormal(b, status);
5505     aSig = extractFloat64Frac( a );
5506     aExp = extractFloat64Exp( a );
5507     aSign = extractFloat64Sign( a );
5508     bSig = extractFloat64Frac( b );
5509     bExp = extractFloat64Exp( b );
5510     if ( aExp == 0x7FF ) {
5511         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5512             return propagateFloat64NaN(a, b, status);
5513         }
5514         float_raise(float_flag_invalid, status);
5515         return float64_default_nan(status);
5516     }
5517     if ( bExp == 0x7FF ) {
5518         if (bSig) {
5519             return propagateFloat64NaN(a, b, status);
5520         }
5521         return a;
5522     }
5523     if ( bExp == 0 ) {
5524         if ( bSig == 0 ) {
5525             float_raise(float_flag_invalid, status);
5526             return float64_default_nan(status);
5527         }
5528         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5529     }
5530     if ( aExp == 0 ) {
5531         if ( aSig == 0 ) return a;
5532         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5533     }
5534     expDiff = aExp - bExp;
5535     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5536     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5537     if ( expDiff < 0 ) {
5538         if ( expDiff < -1 ) return a;
5539         aSig >>= 1;
5540     }
5541     q = ( bSig <= aSig );
5542     if ( q ) aSig -= bSig;
5543     expDiff -= 64;
5544     while ( 0 < expDiff ) {
5545         q = estimateDiv128To64( aSig, 0, bSig );
5546         q = ( 2 < q ) ? q - 2 : 0;
5547         aSig = - ( ( bSig>>2 ) * q );
5548         expDiff -= 62;
5549     }
5550     expDiff += 64;
5551     if ( 0 < expDiff ) {
5552         q = estimateDiv128To64( aSig, 0, bSig );
5553         q = ( 2 < q ) ? q - 2 : 0;
5554         q >>= 64 - expDiff;
5555         bSig >>= 2;
5556         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5557     }
5558     else {
5559         aSig >>= 2;
5560         bSig >>= 2;
5561     }
5562     do {
5563         alternateASig = aSig;
5564         ++q;
5565         aSig -= bSig;
5566     } while ( 0 <= (int64_t) aSig );
5567     sigMean = aSig + alternateASig;
5568     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5569         aSig = alternateASig;
5570     }
5571     zSign = ( (int64_t) aSig < 0 );
5572     if ( zSign ) aSig = - aSig;
5573     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5574 
5575 }
5576 
5577 /*----------------------------------------------------------------------------
5578 | Returns the binary log of the double-precision floating-point value `a'.
5579 | The operation is performed according to the IEC/IEEE Standard for Binary
5580 | Floating-Point Arithmetic.
5581 *----------------------------------------------------------------------------*/
5582 float64 float64_log2(float64 a, float_status *status)
5583 {
5584     bool aSign, zSign;
5585     int aExp;
5586     uint64_t aSig, aSig0, aSig1, zSig, i;
5587     a = float64_squash_input_denormal(a, status);
5588 
5589     aSig = extractFloat64Frac( a );
5590     aExp = extractFloat64Exp( a );
5591     aSign = extractFloat64Sign( a );
5592 
5593     if ( aExp == 0 ) {
5594         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5595         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5596     }
5597     if ( aSign ) {
5598         float_raise(float_flag_invalid, status);
5599         return float64_default_nan(status);
5600     }
5601     if ( aExp == 0x7FF ) {
5602         if (aSig) {
5603             return propagateFloat64NaN(a, float64_zero, status);
5604         }
5605         return a;
5606     }
5607 
5608     aExp -= 0x3FF;
5609     aSig |= UINT64_C(0x0010000000000000);
5610     zSign = aExp < 0;
5611     zSig = (uint64_t)aExp << 52;
5612     for (i = 1LL << 51; i > 0; i >>= 1) {
5613         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5614         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5615         if ( aSig & UINT64_C(0x0020000000000000) ) {
5616             aSig >>= 1;
5617             zSig |= i;
5618         }
5619     }
5620 
5621     if ( zSign )
5622         zSig = -zSig;
5623     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5624 }
5625 
5626 /*----------------------------------------------------------------------------
5627 | Returns the result of converting the extended double-precision floating-
5628 | point value `a' to the 32-bit two's complement integer format.  The
5629 | conversion is performed according to the IEC/IEEE Standard for Binary
5630 | Floating-Point Arithmetic---which means in particular that the conversion
5631 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5632 | largest positive integer is returned.  Otherwise, if the conversion
5633 | overflows, the largest integer with the same sign as `a' is returned.
5634 *----------------------------------------------------------------------------*/
5635 
5636 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5637 {
5638     bool aSign;
5639     int32_t aExp, shiftCount;
5640     uint64_t aSig;
5641 
5642     if (floatx80_invalid_encoding(a)) {
5643         float_raise(float_flag_invalid, status);
5644         return 1 << 31;
5645     }
5646     aSig = extractFloatx80Frac( a );
5647     aExp = extractFloatx80Exp( a );
5648     aSign = extractFloatx80Sign( a );
5649     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5650     shiftCount = 0x4037 - aExp;
5651     if ( shiftCount <= 0 ) shiftCount = 1;
5652     shift64RightJamming( aSig, shiftCount, &aSig );
5653     return roundAndPackInt32(aSign, aSig, status);
5654 
5655 }
5656 
5657 /*----------------------------------------------------------------------------
5658 | Returns the result of converting the extended double-precision floating-
5659 | point value `a' to the 32-bit two's complement integer format.  The
5660 | conversion is performed according to the IEC/IEEE Standard for Binary
5661 | Floating-Point Arithmetic, except that the conversion is always rounded
5662 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5663 | Otherwise, if the conversion overflows, the largest integer with the same
5664 | sign as `a' is returned.
5665 *----------------------------------------------------------------------------*/
5666 
5667 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5668 {
5669     bool aSign;
5670     int32_t aExp, shiftCount;
5671     uint64_t aSig, savedASig;
5672     int32_t z;
5673 
5674     if (floatx80_invalid_encoding(a)) {
5675         float_raise(float_flag_invalid, status);
5676         return 1 << 31;
5677     }
5678     aSig = extractFloatx80Frac( a );
5679     aExp = extractFloatx80Exp( a );
5680     aSign = extractFloatx80Sign( a );
5681     if ( 0x401E < aExp ) {
5682         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5683         goto invalid;
5684     }
5685     else if ( aExp < 0x3FFF ) {
5686         if (aExp || aSig) {
5687             float_raise(float_flag_inexact, status);
5688         }
5689         return 0;
5690     }
5691     shiftCount = 0x403E - aExp;
5692     savedASig = aSig;
5693     aSig >>= shiftCount;
5694     z = aSig;
5695     if ( aSign ) z = - z;
5696     if ( ( z < 0 ) ^ aSign ) {
5697  invalid:
5698         float_raise(float_flag_invalid, status);
5699         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5700     }
5701     if ( ( aSig<<shiftCount ) != savedASig ) {
5702         float_raise(float_flag_inexact, status);
5703     }
5704     return z;
5705 
5706 }
5707 
5708 /*----------------------------------------------------------------------------
5709 | Returns the result of converting the extended double-precision floating-
5710 | point value `a' to the 64-bit two's complement integer format.  The
5711 | conversion is performed according to the IEC/IEEE Standard for Binary
5712 | Floating-Point Arithmetic---which means in particular that the conversion
5713 | is rounded according to the current rounding mode.  If `a' is a NaN,
5714 | the largest positive integer is returned.  Otherwise, if the conversion
5715 | overflows, the largest integer with the same sign as `a' is returned.
5716 *----------------------------------------------------------------------------*/
5717 
5718 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5719 {
5720     bool aSign;
5721     int32_t aExp, shiftCount;
5722     uint64_t aSig, aSigExtra;
5723 
5724     if (floatx80_invalid_encoding(a)) {
5725         float_raise(float_flag_invalid, status);
5726         return 1ULL << 63;
5727     }
5728     aSig = extractFloatx80Frac( a );
5729     aExp = extractFloatx80Exp( a );
5730     aSign = extractFloatx80Sign( a );
5731     shiftCount = 0x403E - aExp;
5732     if ( shiftCount <= 0 ) {
5733         if ( shiftCount ) {
5734             float_raise(float_flag_invalid, status);
5735             if (!aSign || floatx80_is_any_nan(a)) {
5736                 return INT64_MAX;
5737             }
5738             return INT64_MIN;
5739         }
5740         aSigExtra = 0;
5741     }
5742     else {
5743         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5744     }
5745     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5746 
5747 }
5748 
5749 /*----------------------------------------------------------------------------
5750 | Returns the result of converting the extended double-precision floating-
5751 | point value `a' to the 64-bit two's complement integer format.  The
5752 | conversion is performed according to the IEC/IEEE Standard for Binary
5753 | Floating-Point Arithmetic, except that the conversion is always rounded
5754 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5755 | Otherwise, if the conversion overflows, the largest integer with the same
5756 | sign as `a' is returned.
5757 *----------------------------------------------------------------------------*/
5758 
5759 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5760 {
5761     bool aSign;
5762     int32_t aExp, shiftCount;
5763     uint64_t aSig;
5764     int64_t z;
5765 
5766     if (floatx80_invalid_encoding(a)) {
5767         float_raise(float_flag_invalid, status);
5768         return 1ULL << 63;
5769     }
5770     aSig = extractFloatx80Frac( a );
5771     aExp = extractFloatx80Exp( a );
5772     aSign = extractFloatx80Sign( a );
5773     shiftCount = aExp - 0x403E;
5774     if ( 0 <= shiftCount ) {
5775         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5776         if ( ( a.high != 0xC03E ) || aSig ) {
5777             float_raise(float_flag_invalid, status);
5778             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5779                 return INT64_MAX;
5780             }
5781         }
5782         return INT64_MIN;
5783     }
5784     else if ( aExp < 0x3FFF ) {
5785         if (aExp | aSig) {
5786             float_raise(float_flag_inexact, status);
5787         }
5788         return 0;
5789     }
5790     z = aSig>>( - shiftCount );
5791     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5792         float_raise(float_flag_inexact, status);
5793     }
5794     if ( aSign ) z = - z;
5795     return z;
5796 
5797 }
5798 
5799 /*----------------------------------------------------------------------------
5800 | Returns the result of converting the extended double-precision floating-
5801 | point value `a' to the single-precision floating-point format.  The
5802 | conversion is performed according to the IEC/IEEE Standard for Binary
5803 | Floating-Point Arithmetic.
5804 *----------------------------------------------------------------------------*/
5805 
5806 float32 floatx80_to_float32(floatx80 a, float_status *status)
5807 {
5808     bool aSign;
5809     int32_t aExp;
5810     uint64_t aSig;
5811 
5812     if (floatx80_invalid_encoding(a)) {
5813         float_raise(float_flag_invalid, status);
5814         return float32_default_nan(status);
5815     }
5816     aSig = extractFloatx80Frac( a );
5817     aExp = extractFloatx80Exp( a );
5818     aSign = extractFloatx80Sign( a );
5819     if ( aExp == 0x7FFF ) {
5820         if ( (uint64_t) ( aSig<<1 ) ) {
5821             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5822                                              status);
5823             return float32_silence_nan(res, status);
5824         }
5825         return packFloat32( aSign, 0xFF, 0 );
5826     }
5827     shift64RightJamming( aSig, 33, &aSig );
5828     if ( aExp || aSig ) aExp -= 0x3F81;
5829     return roundAndPackFloat32(aSign, aExp, aSig, status);
5830 
5831 }
5832 
5833 /*----------------------------------------------------------------------------
5834 | Returns the result of converting the extended double-precision floating-
5835 | point value `a' to the double-precision floating-point format.  The
5836 | conversion is performed according to the IEC/IEEE Standard for Binary
5837 | Floating-Point Arithmetic.
5838 *----------------------------------------------------------------------------*/
5839 
5840 float64 floatx80_to_float64(floatx80 a, float_status *status)
5841 {
5842     bool aSign;
5843     int32_t aExp;
5844     uint64_t aSig, zSig;
5845 
5846     if (floatx80_invalid_encoding(a)) {
5847         float_raise(float_flag_invalid, status);
5848         return float64_default_nan(status);
5849     }
5850     aSig = extractFloatx80Frac( a );
5851     aExp = extractFloatx80Exp( a );
5852     aSign = extractFloatx80Sign( a );
5853     if ( aExp == 0x7FFF ) {
5854         if ( (uint64_t) ( aSig<<1 ) ) {
5855             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5856                                              status);
5857             return float64_silence_nan(res, status);
5858         }
5859         return packFloat64( aSign, 0x7FF, 0 );
5860     }
5861     shift64RightJamming( aSig, 1, &zSig );
5862     if ( aExp || aSig ) aExp -= 0x3C01;
5863     return roundAndPackFloat64(aSign, aExp, zSig, status);
5864 
5865 }
5866 
5867 /*----------------------------------------------------------------------------
5868 | Returns the result of converting the extended double-precision floating-
5869 | point value `a' to the quadruple-precision floating-point format.  The
5870 | conversion is performed according to the IEC/IEEE Standard for Binary
5871 | Floating-Point Arithmetic.
5872 *----------------------------------------------------------------------------*/
5873 
5874 float128 floatx80_to_float128(floatx80 a, float_status *status)
5875 {
5876     bool aSign;
5877     int aExp;
5878     uint64_t aSig, zSig0, zSig1;
5879 
5880     if (floatx80_invalid_encoding(a)) {
5881         float_raise(float_flag_invalid, status);
5882         return float128_default_nan(status);
5883     }
5884     aSig = extractFloatx80Frac( a );
5885     aExp = extractFloatx80Exp( a );
5886     aSign = extractFloatx80Sign( a );
5887     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5888         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5889                                            status);
5890         return float128_silence_nan(res, status);
5891     }
5892     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5893     return packFloat128( aSign, aExp, zSig0, zSig1 );
5894 
5895 }
5896 
5897 /*----------------------------------------------------------------------------
5898 | Rounds the extended double-precision floating-point value `a'
5899 | to the precision provided by floatx80_rounding_precision and returns the
5900 | result as an extended double-precision floating-point value.
5901 | The operation is performed according to the IEC/IEEE Standard for Binary
5902 | Floating-Point Arithmetic.
5903 *----------------------------------------------------------------------------*/
5904 
5905 floatx80 floatx80_round(floatx80 a, float_status *status)
5906 {
5907     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5908                                 extractFloatx80Sign(a),
5909                                 extractFloatx80Exp(a),
5910                                 extractFloatx80Frac(a), 0, status);
5911 }
5912 
5913 /*----------------------------------------------------------------------------
5914 | Rounds the extended double-precision floating-point value `a' to an integer,
5915 | and returns the result as an extended quadruple-precision floating-point
5916 | value.  The operation is performed according to the IEC/IEEE Standard for
5917 | Binary Floating-Point Arithmetic.
5918 *----------------------------------------------------------------------------*/
5919 
5920 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5921 {
5922     bool aSign;
5923     int32_t aExp;
5924     uint64_t lastBitMask, roundBitsMask;
5925     floatx80 z;
5926 
5927     if (floatx80_invalid_encoding(a)) {
5928         float_raise(float_flag_invalid, status);
5929         return floatx80_default_nan(status);
5930     }
5931     aExp = extractFloatx80Exp( a );
5932     if ( 0x403E <= aExp ) {
5933         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5934             return propagateFloatx80NaN(a, a, status);
5935         }
5936         return a;
5937     }
5938     if ( aExp < 0x3FFF ) {
5939         if (    ( aExp == 0 )
5940              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5941             return a;
5942         }
5943         float_raise(float_flag_inexact, status);
5944         aSign = extractFloatx80Sign( a );
5945         switch (status->float_rounding_mode) {
5946          case float_round_nearest_even:
5947             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5948                ) {
5949                 return
5950                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5951             }
5952             break;
5953         case float_round_ties_away:
5954             if (aExp == 0x3FFE) {
5955                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5956             }
5957             break;
5958          case float_round_down:
5959             return
5960                   aSign ?
5961                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5962                 : packFloatx80( 0, 0, 0 );
5963          case float_round_up:
5964             return
5965                   aSign ? packFloatx80( 1, 0, 0 )
5966                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5967 
5968         case float_round_to_zero:
5969             break;
5970         default:
5971             g_assert_not_reached();
5972         }
5973         return packFloatx80( aSign, 0, 0 );
5974     }
5975     lastBitMask = 1;
5976     lastBitMask <<= 0x403E - aExp;
5977     roundBitsMask = lastBitMask - 1;
5978     z = a;
5979     switch (status->float_rounding_mode) {
5980     case float_round_nearest_even:
5981         z.low += lastBitMask>>1;
5982         if ((z.low & roundBitsMask) == 0) {
5983             z.low &= ~lastBitMask;
5984         }
5985         break;
5986     case float_round_ties_away:
5987         z.low += lastBitMask >> 1;
5988         break;
5989     case float_round_to_zero:
5990         break;
5991     case float_round_up:
5992         if (!extractFloatx80Sign(z)) {
5993             z.low += roundBitsMask;
5994         }
5995         break;
5996     case float_round_down:
5997         if (extractFloatx80Sign(z)) {
5998             z.low += roundBitsMask;
5999         }
6000         break;
6001     default:
6002         abort();
6003     }
6004     z.low &= ~ roundBitsMask;
6005     if ( z.low == 0 ) {
6006         ++z.high;
6007         z.low = UINT64_C(0x8000000000000000);
6008     }
6009     if (z.low != a.low) {
6010         float_raise(float_flag_inexact, status);
6011     }
6012     return z;
6013 
6014 }
6015 
6016 /*----------------------------------------------------------------------------
6017 | Returns the result of adding the absolute values of the extended double-
6018 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
6019 | negated before being returned.  `zSign' is ignored if the result is a NaN.
6020 | The addition is performed according to the IEC/IEEE Standard for Binary
6021 | Floating-Point Arithmetic.
6022 *----------------------------------------------------------------------------*/
6023 
6024 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6025                                 float_status *status)
6026 {
6027     int32_t aExp, bExp, zExp;
6028     uint64_t aSig, bSig, zSig0, zSig1;
6029     int32_t expDiff;
6030 
6031     aSig = extractFloatx80Frac( a );
6032     aExp = extractFloatx80Exp( a );
6033     bSig = extractFloatx80Frac( b );
6034     bExp = extractFloatx80Exp( b );
6035     expDiff = aExp - bExp;
6036     if ( 0 < expDiff ) {
6037         if ( aExp == 0x7FFF ) {
6038             if ((uint64_t)(aSig << 1)) {
6039                 return propagateFloatx80NaN(a, b, status);
6040             }
6041             return a;
6042         }
6043         if ( bExp == 0 ) --expDiff;
6044         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6045         zExp = aExp;
6046     }
6047     else if ( expDiff < 0 ) {
6048         if ( bExp == 0x7FFF ) {
6049             if ((uint64_t)(bSig << 1)) {
6050                 return propagateFloatx80NaN(a, b, status);
6051             }
6052             return packFloatx80(zSign,
6053                                 floatx80_infinity_high,
6054                                 floatx80_infinity_low);
6055         }
6056         if ( aExp == 0 ) ++expDiff;
6057         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6058         zExp = bExp;
6059     }
6060     else {
6061         if ( aExp == 0x7FFF ) {
6062             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6063                 return propagateFloatx80NaN(a, b, status);
6064             }
6065             return a;
6066         }
6067         zSig1 = 0;
6068         zSig0 = aSig + bSig;
6069         if ( aExp == 0 ) {
6070             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6071                 /* At least one of the values is a pseudo-denormal,
6072                  * and there is a carry out of the result.  */
6073                 zExp = 1;
6074                 goto shiftRight1;
6075             }
6076             if (zSig0 == 0) {
6077                 return packFloatx80(zSign, 0, 0);
6078             }
6079             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6080             goto roundAndPack;
6081         }
6082         zExp = aExp;
6083         goto shiftRight1;
6084     }
6085     zSig0 = aSig + bSig;
6086     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6087  shiftRight1:
6088     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6089     zSig0 |= UINT64_C(0x8000000000000000);
6090     ++zExp;
6091  roundAndPack:
6092     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6093                                 zSign, zExp, zSig0, zSig1, status);
6094 }
6095 
6096 /*----------------------------------------------------------------------------
6097 | Returns the result of subtracting the absolute values of the extended
6098 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6099 | difference is negated before being returned.  `zSign' is ignored if the
6100 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6101 | Standard for Binary Floating-Point Arithmetic.
6102 *----------------------------------------------------------------------------*/
6103 
6104 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6105                                 float_status *status)
6106 {
6107     int32_t aExp, bExp, zExp;
6108     uint64_t aSig, bSig, zSig0, zSig1;
6109     int32_t expDiff;
6110 
6111     aSig = extractFloatx80Frac( a );
6112     aExp = extractFloatx80Exp( a );
6113     bSig = extractFloatx80Frac( b );
6114     bExp = extractFloatx80Exp( b );
6115     expDiff = aExp - bExp;
6116     if ( 0 < expDiff ) goto aExpBigger;
6117     if ( expDiff < 0 ) goto bExpBigger;
6118     if ( aExp == 0x7FFF ) {
6119         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6120             return propagateFloatx80NaN(a, b, status);
6121         }
6122         float_raise(float_flag_invalid, status);
6123         return floatx80_default_nan(status);
6124     }
6125     if ( aExp == 0 ) {
6126         aExp = 1;
6127         bExp = 1;
6128     }
6129     zSig1 = 0;
6130     if ( bSig < aSig ) goto aBigger;
6131     if ( aSig < bSig ) goto bBigger;
6132     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6133  bExpBigger:
6134     if ( bExp == 0x7FFF ) {
6135         if ((uint64_t)(bSig << 1)) {
6136             return propagateFloatx80NaN(a, b, status);
6137         }
6138         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6139                             floatx80_infinity_low);
6140     }
6141     if ( aExp == 0 ) ++expDiff;
6142     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6143  bBigger:
6144     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6145     zExp = bExp;
6146     zSign ^= 1;
6147     goto normalizeRoundAndPack;
6148  aExpBigger:
6149     if ( aExp == 0x7FFF ) {
6150         if ((uint64_t)(aSig << 1)) {
6151             return propagateFloatx80NaN(a, b, status);
6152         }
6153         return a;
6154     }
6155     if ( bExp == 0 ) --expDiff;
6156     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6157  aBigger:
6158     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6159     zExp = aExp;
6160  normalizeRoundAndPack:
6161     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6162                                          zSign, zExp, zSig0, zSig1, status);
6163 }
6164 
6165 /*----------------------------------------------------------------------------
6166 | Returns the result of adding the extended double-precision floating-point
6167 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6168 | Standard for Binary Floating-Point Arithmetic.
6169 *----------------------------------------------------------------------------*/
6170 
6171 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6172 {
6173     bool aSign, bSign;
6174 
6175     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6176         float_raise(float_flag_invalid, status);
6177         return floatx80_default_nan(status);
6178     }
6179     aSign = extractFloatx80Sign( a );
6180     bSign = extractFloatx80Sign( b );
6181     if ( aSign == bSign ) {
6182         return addFloatx80Sigs(a, b, aSign, status);
6183     }
6184     else {
6185         return subFloatx80Sigs(a, b, aSign, status);
6186     }
6187 
6188 }
6189 
6190 /*----------------------------------------------------------------------------
6191 | Returns the result of subtracting the extended double-precision floating-
6192 | point values `a' and `b'.  The operation is performed according to the
6193 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6194 *----------------------------------------------------------------------------*/
6195 
6196 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6197 {
6198     bool aSign, bSign;
6199 
6200     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6201         float_raise(float_flag_invalid, status);
6202         return floatx80_default_nan(status);
6203     }
6204     aSign = extractFloatx80Sign( a );
6205     bSign = extractFloatx80Sign( b );
6206     if ( aSign == bSign ) {
6207         return subFloatx80Sigs(a, b, aSign, status);
6208     }
6209     else {
6210         return addFloatx80Sigs(a, b, aSign, status);
6211     }
6212 
6213 }
6214 
6215 /*----------------------------------------------------------------------------
6216 | Returns the result of multiplying the extended double-precision floating-
6217 | point values `a' and `b'.  The operation is performed according to the
6218 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6219 *----------------------------------------------------------------------------*/
6220 
6221 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6222 {
6223     bool aSign, bSign, zSign;
6224     int32_t aExp, bExp, zExp;
6225     uint64_t aSig, bSig, zSig0, zSig1;
6226 
6227     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6228         float_raise(float_flag_invalid, status);
6229         return floatx80_default_nan(status);
6230     }
6231     aSig = extractFloatx80Frac( a );
6232     aExp = extractFloatx80Exp( a );
6233     aSign = extractFloatx80Sign( a );
6234     bSig = extractFloatx80Frac( b );
6235     bExp = extractFloatx80Exp( b );
6236     bSign = extractFloatx80Sign( b );
6237     zSign = aSign ^ bSign;
6238     if ( aExp == 0x7FFF ) {
6239         if (    (uint64_t) ( aSig<<1 )
6240              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6241             return propagateFloatx80NaN(a, b, status);
6242         }
6243         if ( ( bExp | bSig ) == 0 ) goto invalid;
6244         return packFloatx80(zSign, floatx80_infinity_high,
6245                                    floatx80_infinity_low);
6246     }
6247     if ( bExp == 0x7FFF ) {
6248         if ((uint64_t)(bSig << 1)) {
6249             return propagateFloatx80NaN(a, b, status);
6250         }
6251         if ( ( aExp | aSig ) == 0 ) {
6252  invalid:
6253             float_raise(float_flag_invalid, status);
6254             return floatx80_default_nan(status);
6255         }
6256         return packFloatx80(zSign, floatx80_infinity_high,
6257                                    floatx80_infinity_low);
6258     }
6259     if ( aExp == 0 ) {
6260         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6261         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6262     }
6263     if ( bExp == 0 ) {
6264         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6265         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6266     }
6267     zExp = aExp + bExp - 0x3FFE;
6268     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6269     if ( 0 < (int64_t) zSig0 ) {
6270         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6271         --zExp;
6272     }
6273     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6274                                 zSign, zExp, zSig0, zSig1, status);
6275 }
6276 
6277 /*----------------------------------------------------------------------------
6278 | Returns the result of dividing the extended double-precision floating-point
6279 | value `a' by the corresponding value `b'.  The operation is performed
6280 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6281 *----------------------------------------------------------------------------*/
6282 
6283 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6284 {
6285     bool aSign, bSign, zSign;
6286     int32_t aExp, bExp, zExp;
6287     uint64_t aSig, bSig, zSig0, zSig1;
6288     uint64_t rem0, rem1, rem2, term0, term1, term2;
6289 
6290     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6291         float_raise(float_flag_invalid, status);
6292         return floatx80_default_nan(status);
6293     }
6294     aSig = extractFloatx80Frac( a );
6295     aExp = extractFloatx80Exp( a );
6296     aSign = extractFloatx80Sign( a );
6297     bSig = extractFloatx80Frac( b );
6298     bExp = extractFloatx80Exp( b );
6299     bSign = extractFloatx80Sign( b );
6300     zSign = aSign ^ bSign;
6301     if ( aExp == 0x7FFF ) {
6302         if ((uint64_t)(aSig << 1)) {
6303             return propagateFloatx80NaN(a, b, status);
6304         }
6305         if ( bExp == 0x7FFF ) {
6306             if ((uint64_t)(bSig << 1)) {
6307                 return propagateFloatx80NaN(a, b, status);
6308             }
6309             goto invalid;
6310         }
6311         return packFloatx80(zSign, floatx80_infinity_high,
6312                                    floatx80_infinity_low);
6313     }
6314     if ( bExp == 0x7FFF ) {
6315         if ((uint64_t)(bSig << 1)) {
6316             return propagateFloatx80NaN(a, b, status);
6317         }
6318         return packFloatx80( zSign, 0, 0 );
6319     }
6320     if ( bExp == 0 ) {
6321         if ( bSig == 0 ) {
6322             if ( ( aExp | aSig ) == 0 ) {
6323  invalid:
6324                 float_raise(float_flag_invalid, status);
6325                 return floatx80_default_nan(status);
6326             }
6327             float_raise(float_flag_divbyzero, status);
6328             return packFloatx80(zSign, floatx80_infinity_high,
6329                                        floatx80_infinity_low);
6330         }
6331         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6332     }
6333     if ( aExp == 0 ) {
6334         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6335         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6336     }
6337     zExp = aExp - bExp + 0x3FFE;
6338     rem1 = 0;
6339     if ( bSig <= aSig ) {
6340         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6341         ++zExp;
6342     }
6343     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6344     mul64To128( bSig, zSig0, &term0, &term1 );
6345     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6346     while ( (int64_t) rem0 < 0 ) {
6347         --zSig0;
6348         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6349     }
6350     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6351     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6352         mul64To128( bSig, zSig1, &term1, &term2 );
6353         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6354         while ( (int64_t) rem1 < 0 ) {
6355             --zSig1;
6356             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6357         }
6358         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6359     }
6360     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6361                                 zSign, zExp, zSig0, zSig1, status);
6362 }
6363 
6364 /*----------------------------------------------------------------------------
6365 | Returns the remainder of the extended double-precision floating-point value
6366 | `a' with respect to the corresponding value `b'.  The operation is performed
6367 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6368 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6369 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6370 | the absolute value of the integer quotient.
6371 *----------------------------------------------------------------------------*/
6372 
6373 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6374                          float_status *status)
6375 {
6376     bool aSign, zSign;
6377     int32_t aExp, bExp, expDiff, aExpOrig;
6378     uint64_t aSig0, aSig1, bSig;
6379     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6380 
6381     *quotient = 0;
6382     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6383         float_raise(float_flag_invalid, status);
6384         return floatx80_default_nan(status);
6385     }
6386     aSig0 = extractFloatx80Frac( a );
6387     aExpOrig = aExp = extractFloatx80Exp( a );
6388     aSign = extractFloatx80Sign( a );
6389     bSig = extractFloatx80Frac( b );
6390     bExp = extractFloatx80Exp( b );
6391     if ( aExp == 0x7FFF ) {
6392         if (    (uint64_t) ( aSig0<<1 )
6393              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6394             return propagateFloatx80NaN(a, b, status);
6395         }
6396         goto invalid;
6397     }
6398     if ( bExp == 0x7FFF ) {
6399         if ((uint64_t)(bSig << 1)) {
6400             return propagateFloatx80NaN(a, b, status);
6401         }
6402         if (aExp == 0 && aSig0 >> 63) {
6403             /*
6404              * Pseudo-denormal argument must be returned in normalized
6405              * form.
6406              */
6407             return packFloatx80(aSign, 1, aSig0);
6408         }
6409         return a;
6410     }
6411     if ( bExp == 0 ) {
6412         if ( bSig == 0 ) {
6413  invalid:
6414             float_raise(float_flag_invalid, status);
6415             return floatx80_default_nan(status);
6416         }
6417         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6418     }
6419     if ( aExp == 0 ) {
6420         if ( aSig0 == 0 ) return a;
6421         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6422     }
6423     zSign = aSign;
6424     expDiff = aExp - bExp;
6425     aSig1 = 0;
6426     if ( expDiff < 0 ) {
6427         if ( mod || expDiff < -1 ) {
6428             if (aExp == 1 && aExpOrig == 0) {
6429                 /*
6430                  * Pseudo-denormal argument must be returned in
6431                  * normalized form.
6432                  */
6433                 return packFloatx80(aSign, aExp, aSig0);
6434             }
6435             return a;
6436         }
6437         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6438         expDiff = 0;
6439     }
6440     *quotient = q = ( bSig <= aSig0 );
6441     if ( q ) aSig0 -= bSig;
6442     expDiff -= 64;
6443     while ( 0 < expDiff ) {
6444         q = estimateDiv128To64( aSig0, aSig1, bSig );
6445         q = ( 2 < q ) ? q - 2 : 0;
6446         mul64To128( bSig, q, &term0, &term1 );
6447         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6448         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6449         expDiff -= 62;
6450         *quotient <<= 62;
6451         *quotient += q;
6452     }
6453     expDiff += 64;
6454     if ( 0 < expDiff ) {
6455         q = estimateDiv128To64( aSig0, aSig1, bSig );
6456         q = ( 2 < q ) ? q - 2 : 0;
6457         q >>= 64 - expDiff;
6458         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6459         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6460         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6461         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6462             ++q;
6463             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6464         }
6465         if (expDiff < 64) {
6466             *quotient <<= expDiff;
6467         } else {
6468             *quotient = 0;
6469         }
6470         *quotient += q;
6471     }
6472     else {
6473         term1 = 0;
6474         term0 = bSig;
6475     }
6476     if (!mod) {
6477         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6478         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6479                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6480                         && ( q & 1 ) )
6481             ) {
6482             aSig0 = alternateASig0;
6483             aSig1 = alternateASig1;
6484             zSign = ! zSign;
6485             ++*quotient;
6486         }
6487     }
6488     return
6489         normalizeRoundAndPackFloatx80(
6490             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6491 
6492 }
6493 
6494 /*----------------------------------------------------------------------------
6495 | Returns the remainder of the extended double-precision floating-point value
6496 | `a' with respect to the corresponding value `b'.  The operation is performed
6497 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6498 *----------------------------------------------------------------------------*/
6499 
6500 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6501 {
6502     uint64_t quotient;
6503     return floatx80_modrem(a, b, false, &quotient, status);
6504 }
6505 
6506 /*----------------------------------------------------------------------------
6507 | Returns the remainder of the extended double-precision floating-point value
6508 | `a' with respect to the corresponding value `b', with the quotient truncated
6509 | toward zero.
6510 *----------------------------------------------------------------------------*/
6511 
6512 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6513 {
6514     uint64_t quotient;
6515     return floatx80_modrem(a, b, true, &quotient, status);
6516 }
6517 
6518 /*----------------------------------------------------------------------------
6519 | Returns the square root of the extended double-precision floating-point
6520 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6521 | for Binary Floating-Point Arithmetic.
6522 *----------------------------------------------------------------------------*/
6523 
6524 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6525 {
6526     bool aSign;
6527     int32_t aExp, zExp;
6528     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6529     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6530 
6531     if (floatx80_invalid_encoding(a)) {
6532         float_raise(float_flag_invalid, status);
6533         return floatx80_default_nan(status);
6534     }
6535     aSig0 = extractFloatx80Frac( a );
6536     aExp = extractFloatx80Exp( a );
6537     aSign = extractFloatx80Sign( a );
6538     if ( aExp == 0x7FFF ) {
6539         if ((uint64_t)(aSig0 << 1)) {
6540             return propagateFloatx80NaN(a, a, status);
6541         }
6542         if ( ! aSign ) return a;
6543         goto invalid;
6544     }
6545     if ( aSign ) {
6546         if ( ( aExp | aSig0 ) == 0 ) return a;
6547  invalid:
6548         float_raise(float_flag_invalid, status);
6549         return floatx80_default_nan(status);
6550     }
6551     if ( aExp == 0 ) {
6552         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6553         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6554     }
6555     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6556     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6557     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6558     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6559     doubleZSig0 = zSig0<<1;
6560     mul64To128( zSig0, zSig0, &term0, &term1 );
6561     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6562     while ( (int64_t) rem0 < 0 ) {
6563         --zSig0;
6564         doubleZSig0 -= 2;
6565         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6566     }
6567     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6568     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6569         if ( zSig1 == 0 ) zSig1 = 1;
6570         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6571         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6572         mul64To128( zSig1, zSig1, &term2, &term3 );
6573         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6574         while ( (int64_t) rem1 < 0 ) {
6575             --zSig1;
6576             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6577             term3 |= 1;
6578             term2 |= doubleZSig0;
6579             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6580         }
6581         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6582     }
6583     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6584     zSig0 |= doubleZSig0;
6585     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6586                                 0, zExp, zSig0, zSig1, status);
6587 }
6588 
6589 /*----------------------------------------------------------------------------
6590 | Returns the result of converting the quadruple-precision floating-point
6591 | value `a' to the 32-bit two's complement integer format.  The conversion
6592 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6593 | Arithmetic---which means in particular that the conversion is rounded
6594 | according to the current rounding mode.  If `a' is a NaN, the largest
6595 | positive integer is returned.  Otherwise, if the conversion overflows, the
6596 | largest integer with the same sign as `a' is returned.
6597 *----------------------------------------------------------------------------*/
6598 
6599 int32_t float128_to_int32(float128 a, float_status *status)
6600 {
6601     bool aSign;
6602     int32_t aExp, shiftCount;
6603     uint64_t aSig0, aSig1;
6604 
6605     aSig1 = extractFloat128Frac1( a );
6606     aSig0 = extractFloat128Frac0( a );
6607     aExp = extractFloat128Exp( a );
6608     aSign = extractFloat128Sign( a );
6609     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6610     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6611     aSig0 |= ( aSig1 != 0 );
6612     shiftCount = 0x4028 - aExp;
6613     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6614     return roundAndPackInt32(aSign, aSig0, status);
6615 
6616 }
6617 
6618 /*----------------------------------------------------------------------------
6619 | Returns the result of converting the quadruple-precision floating-point
6620 | value `a' to the 32-bit two's complement integer format.  The conversion
6621 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6622 | Arithmetic, except that the conversion is always rounded toward zero.  If
6623 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6624 | conversion overflows, the largest integer with the same sign as `a' is
6625 | returned.
6626 *----------------------------------------------------------------------------*/
6627 
6628 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6629 {
6630     bool aSign;
6631     int32_t aExp, shiftCount;
6632     uint64_t aSig0, aSig1, savedASig;
6633     int32_t z;
6634 
6635     aSig1 = extractFloat128Frac1( a );
6636     aSig0 = extractFloat128Frac0( a );
6637     aExp = extractFloat128Exp( a );
6638     aSign = extractFloat128Sign( a );
6639     aSig0 |= ( aSig1 != 0 );
6640     if ( 0x401E < aExp ) {
6641         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6642         goto invalid;
6643     }
6644     else if ( aExp < 0x3FFF ) {
6645         if (aExp || aSig0) {
6646             float_raise(float_flag_inexact, status);
6647         }
6648         return 0;
6649     }
6650     aSig0 |= UINT64_C(0x0001000000000000);
6651     shiftCount = 0x402F - aExp;
6652     savedASig = aSig0;
6653     aSig0 >>= shiftCount;
6654     z = aSig0;
6655     if ( aSign ) z = - z;
6656     if ( ( z < 0 ) ^ aSign ) {
6657  invalid:
6658         float_raise(float_flag_invalid, status);
6659         return aSign ? INT32_MIN : INT32_MAX;
6660     }
6661     if ( ( aSig0<<shiftCount ) != savedASig ) {
6662         float_raise(float_flag_inexact, status);
6663     }
6664     return z;
6665 
6666 }
6667 
6668 /*----------------------------------------------------------------------------
6669 | Returns the result of converting the quadruple-precision floating-point
6670 | value `a' to the 64-bit two's complement integer format.  The conversion
6671 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6672 | Arithmetic---which means in particular that the conversion is rounded
6673 | according to the current rounding mode.  If `a' is a NaN, the largest
6674 | positive integer is returned.  Otherwise, if the conversion overflows, the
6675 | largest integer with the same sign as `a' is returned.
6676 *----------------------------------------------------------------------------*/
6677 
6678 int64_t float128_to_int64(float128 a, float_status *status)
6679 {
6680     bool aSign;
6681     int32_t aExp, shiftCount;
6682     uint64_t aSig0, aSig1;
6683 
6684     aSig1 = extractFloat128Frac1( a );
6685     aSig0 = extractFloat128Frac0( a );
6686     aExp = extractFloat128Exp( a );
6687     aSign = extractFloat128Sign( a );
6688     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6689     shiftCount = 0x402F - aExp;
6690     if ( shiftCount <= 0 ) {
6691         if ( 0x403E < aExp ) {
6692             float_raise(float_flag_invalid, status);
6693             if (    ! aSign
6694                  || (    ( aExp == 0x7FFF )
6695                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6696                     )
6697                ) {
6698                 return INT64_MAX;
6699             }
6700             return INT64_MIN;
6701         }
6702         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6703     }
6704     else {
6705         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6706     }
6707     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6708 
6709 }
6710 
6711 /*----------------------------------------------------------------------------
6712 | Returns the result of converting the quadruple-precision floating-point
6713 | value `a' to the 64-bit two's complement integer format.  The conversion
6714 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6715 | Arithmetic, except that the conversion is always rounded toward zero.
6716 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6717 | the conversion overflows, the largest integer with the same sign as `a' is
6718 | returned.
6719 *----------------------------------------------------------------------------*/
6720 
6721 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6722 {
6723     bool aSign;
6724     int32_t aExp, shiftCount;
6725     uint64_t aSig0, aSig1;
6726     int64_t z;
6727 
6728     aSig1 = extractFloat128Frac1( a );
6729     aSig0 = extractFloat128Frac0( a );
6730     aExp = extractFloat128Exp( a );
6731     aSign = extractFloat128Sign( a );
6732     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6733     shiftCount = aExp - 0x402F;
6734     if ( 0 < shiftCount ) {
6735         if ( 0x403E <= aExp ) {
6736             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6737             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6738                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6739                 if (aSig1) {
6740                     float_raise(float_flag_inexact, status);
6741                 }
6742             }
6743             else {
6744                 float_raise(float_flag_invalid, status);
6745                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6746                     return INT64_MAX;
6747                 }
6748             }
6749             return INT64_MIN;
6750         }
6751         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6752         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6753             float_raise(float_flag_inexact, status);
6754         }
6755     }
6756     else {
6757         if ( aExp < 0x3FFF ) {
6758             if ( aExp | aSig0 | aSig1 ) {
6759                 float_raise(float_flag_inexact, status);
6760             }
6761             return 0;
6762         }
6763         z = aSig0>>( - shiftCount );
6764         if (    aSig1
6765              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6766             float_raise(float_flag_inexact, status);
6767         }
6768     }
6769     if ( aSign ) z = - z;
6770     return z;
6771 
6772 }
6773 
6774 /*----------------------------------------------------------------------------
6775 | Returns the result of converting the quadruple-precision floating-point value
6776 | `a' to the 64-bit unsigned integer format.  The conversion is
6777 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6778 | Arithmetic---which means in particular that the conversion is rounded
6779 | according to the current rounding mode.  If `a' is a NaN, the largest
6780 | positive integer is returned.  If the conversion overflows, the
6781 | largest unsigned integer is returned.  If 'a' is negative, the value is
6782 | rounded and zero is returned; negative values that do not round to zero
6783 | will raise the inexact exception.
6784 *----------------------------------------------------------------------------*/
6785 
6786 uint64_t float128_to_uint64(float128 a, float_status *status)
6787 {
6788     bool aSign;
6789     int aExp;
6790     int shiftCount;
6791     uint64_t aSig0, aSig1;
6792 
6793     aSig0 = extractFloat128Frac0(a);
6794     aSig1 = extractFloat128Frac1(a);
6795     aExp = extractFloat128Exp(a);
6796     aSign = extractFloat128Sign(a);
6797     if (aSign && (aExp > 0x3FFE)) {
6798         float_raise(float_flag_invalid, status);
6799         if (float128_is_any_nan(a)) {
6800             return UINT64_MAX;
6801         } else {
6802             return 0;
6803         }
6804     }
6805     if (aExp) {
6806         aSig0 |= UINT64_C(0x0001000000000000);
6807     }
6808     shiftCount = 0x402F - aExp;
6809     if (shiftCount <= 0) {
6810         if (0x403E < aExp) {
6811             float_raise(float_flag_invalid, status);
6812             return UINT64_MAX;
6813         }
6814         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6815     } else {
6816         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6817     }
6818     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6819 }
6820 
6821 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6822 {
6823     uint64_t v;
6824     signed char current_rounding_mode = status->float_rounding_mode;
6825 
6826     set_float_rounding_mode(float_round_to_zero, status);
6827     v = float128_to_uint64(a, status);
6828     set_float_rounding_mode(current_rounding_mode, status);
6829 
6830     return v;
6831 }
6832 
6833 /*----------------------------------------------------------------------------
6834 | Returns the result of converting the quadruple-precision floating-point
6835 | value `a' to the 32-bit unsigned integer format.  The conversion
6836 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6837 | Arithmetic except that the conversion is always rounded toward zero.
6838 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6839 | if the conversion overflows, the largest unsigned integer is returned.
6840 | If 'a' is negative, the value is rounded and zero is returned; negative
6841 | values that do not round to zero will raise the inexact exception.
6842 *----------------------------------------------------------------------------*/
6843 
6844 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6845 {
6846     uint64_t v;
6847     uint32_t res;
6848     int old_exc_flags = get_float_exception_flags(status);
6849 
6850     v = float128_to_uint64_round_to_zero(a, status);
6851     if (v > 0xffffffff) {
6852         res = 0xffffffff;
6853     } else {
6854         return v;
6855     }
6856     set_float_exception_flags(old_exc_flags, status);
6857     float_raise(float_flag_invalid, status);
6858     return res;
6859 }
6860 
6861 /*----------------------------------------------------------------------------
6862 | Returns the result of converting the quadruple-precision floating-point value
6863 | `a' to the 32-bit unsigned integer format.  The conversion is
6864 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6865 | Arithmetic---which means in particular that the conversion is rounded
6866 | according to the current rounding mode.  If `a' is a NaN, the largest
6867 | positive integer is returned.  If the conversion overflows, the
6868 | largest unsigned integer is returned.  If 'a' is negative, the value is
6869 | rounded and zero is returned; negative values that do not round to zero
6870 | will raise the inexact exception.
6871 *----------------------------------------------------------------------------*/
6872 
6873 uint32_t float128_to_uint32(float128 a, float_status *status)
6874 {
6875     uint64_t v;
6876     uint32_t res;
6877     int old_exc_flags = get_float_exception_flags(status);
6878 
6879     v = float128_to_uint64(a, status);
6880     if (v > 0xffffffff) {
6881         res = 0xffffffff;
6882     } else {
6883         return v;
6884     }
6885     set_float_exception_flags(old_exc_flags, status);
6886     float_raise(float_flag_invalid, status);
6887     return res;
6888 }
6889 
6890 /*----------------------------------------------------------------------------
6891 | Returns the result of converting the quadruple-precision floating-point
6892 | value `a' to the single-precision floating-point format.  The conversion
6893 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6894 | Arithmetic.
6895 *----------------------------------------------------------------------------*/
6896 
6897 float32 float128_to_float32(float128 a, float_status *status)
6898 {
6899     bool aSign;
6900     int32_t aExp;
6901     uint64_t aSig0, aSig1;
6902     uint32_t zSig;
6903 
6904     aSig1 = extractFloat128Frac1( a );
6905     aSig0 = extractFloat128Frac0( a );
6906     aExp = extractFloat128Exp( a );
6907     aSign = extractFloat128Sign( a );
6908     if ( aExp == 0x7FFF ) {
6909         if ( aSig0 | aSig1 ) {
6910             return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6911         }
6912         return packFloat32( aSign, 0xFF, 0 );
6913     }
6914     aSig0 |= ( aSig1 != 0 );
6915     shift64RightJamming( aSig0, 18, &aSig0 );
6916     zSig = aSig0;
6917     if ( aExp || zSig ) {
6918         zSig |= 0x40000000;
6919         aExp -= 0x3F81;
6920     }
6921     return roundAndPackFloat32(aSign, aExp, zSig, status);
6922 
6923 }
6924 
6925 /*----------------------------------------------------------------------------
6926 | Returns the result of converting the quadruple-precision floating-point
6927 | value `a' to the double-precision floating-point format.  The conversion
6928 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6929 | Arithmetic.
6930 *----------------------------------------------------------------------------*/
6931 
6932 float64 float128_to_float64(float128 a, float_status *status)
6933 {
6934     bool aSign;
6935     int32_t aExp;
6936     uint64_t aSig0, aSig1;
6937 
6938     aSig1 = extractFloat128Frac1( a );
6939     aSig0 = extractFloat128Frac0( a );
6940     aExp = extractFloat128Exp( a );
6941     aSign = extractFloat128Sign( a );
6942     if ( aExp == 0x7FFF ) {
6943         if ( aSig0 | aSig1 ) {
6944             return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6945         }
6946         return packFloat64( aSign, 0x7FF, 0 );
6947     }
6948     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6949     aSig0 |= ( aSig1 != 0 );
6950     if ( aExp || aSig0 ) {
6951         aSig0 |= UINT64_C(0x4000000000000000);
6952         aExp -= 0x3C01;
6953     }
6954     return roundAndPackFloat64(aSign, aExp, aSig0, status);
6955 
6956 }
6957 
6958 /*----------------------------------------------------------------------------
6959 | Returns the result of converting the quadruple-precision floating-point
6960 | value `a' to the extended double-precision floating-point format.  The
6961 | conversion is performed according to the IEC/IEEE Standard for Binary
6962 | Floating-Point Arithmetic.
6963 *----------------------------------------------------------------------------*/
6964 
6965 floatx80 float128_to_floatx80(float128 a, float_status *status)
6966 {
6967     bool aSign;
6968     int32_t aExp;
6969     uint64_t aSig0, aSig1;
6970 
6971     aSig1 = extractFloat128Frac1( a );
6972     aSig0 = extractFloat128Frac0( a );
6973     aExp = extractFloat128Exp( a );
6974     aSign = extractFloat128Sign( a );
6975     if ( aExp == 0x7FFF ) {
6976         if ( aSig0 | aSig1 ) {
6977             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6978                                                status);
6979             return floatx80_silence_nan(res, status);
6980         }
6981         return packFloatx80(aSign, floatx80_infinity_high,
6982                                    floatx80_infinity_low);
6983     }
6984     if ( aExp == 0 ) {
6985         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6986         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6987     }
6988     else {
6989         aSig0 |= UINT64_C(0x0001000000000000);
6990     }
6991     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6992     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6993 
6994 }
6995 
6996 /*----------------------------------------------------------------------------
6997 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6998 | returns the result as a quadruple-precision floating-point value.  The
6999 | operation is performed according to the IEC/IEEE Standard for Binary
7000 | Floating-Point Arithmetic.
7001 *----------------------------------------------------------------------------*/
7002 
7003 float128 float128_round_to_int(float128 a, float_status *status)
7004 {
7005     bool aSign;
7006     int32_t aExp;
7007     uint64_t lastBitMask, roundBitsMask;
7008     float128 z;
7009 
7010     aExp = extractFloat128Exp( a );
7011     if ( 0x402F <= aExp ) {
7012         if ( 0x406F <= aExp ) {
7013             if (    ( aExp == 0x7FFF )
7014                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
7015                ) {
7016                 return propagateFloat128NaN(a, a, status);
7017             }
7018             return a;
7019         }
7020         lastBitMask = 1;
7021         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
7022         roundBitsMask = lastBitMask - 1;
7023         z = a;
7024         switch (status->float_rounding_mode) {
7025         case float_round_nearest_even:
7026             if ( lastBitMask ) {
7027                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
7028                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
7029             }
7030             else {
7031                 if ( (int64_t) z.low < 0 ) {
7032                     ++z.high;
7033                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
7034                 }
7035             }
7036             break;
7037         case float_round_ties_away:
7038             if (lastBitMask) {
7039                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7040             } else {
7041                 if ((int64_t) z.low < 0) {
7042                     ++z.high;
7043                 }
7044             }
7045             break;
7046         case float_round_to_zero:
7047             break;
7048         case float_round_up:
7049             if (!extractFloat128Sign(z)) {
7050                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7051             }
7052             break;
7053         case float_round_down:
7054             if (extractFloat128Sign(z)) {
7055                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7056             }
7057             break;
7058         case float_round_to_odd:
7059             /*
7060              * Note that if lastBitMask == 0, the last bit is the lsb
7061              * of high, and roundBitsMask == -1.
7062              */
7063             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7064                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7065             }
7066             break;
7067         default:
7068             abort();
7069         }
7070         z.low &= ~ roundBitsMask;
7071     }
7072     else {
7073         if ( aExp < 0x3FFF ) {
7074             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
7075             float_raise(float_flag_inexact, status);
7076             aSign = extractFloat128Sign( a );
7077             switch (status->float_rounding_mode) {
7078             case float_round_nearest_even:
7079                 if (    ( aExp == 0x3FFE )
7080                      && (   extractFloat128Frac0( a )
7081                           | extractFloat128Frac1( a ) )
7082                    ) {
7083                     return packFloat128( aSign, 0x3FFF, 0, 0 );
7084                 }
7085                 break;
7086             case float_round_ties_away:
7087                 if (aExp == 0x3FFE) {
7088                     return packFloat128(aSign, 0x3FFF, 0, 0);
7089                 }
7090                 break;
7091             case float_round_down:
7092                 return
7093                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7094                     : packFloat128( 0, 0, 0, 0 );
7095             case float_round_up:
7096                 return
7097                       aSign ? packFloat128( 1, 0, 0, 0 )
7098                     : packFloat128( 0, 0x3FFF, 0, 0 );
7099 
7100             case float_round_to_odd:
7101                 return packFloat128(aSign, 0x3FFF, 0, 0);
7102 
7103             case float_round_to_zero:
7104                 break;
7105             }
7106             return packFloat128( aSign, 0, 0, 0 );
7107         }
7108         lastBitMask = 1;
7109         lastBitMask <<= 0x402F - aExp;
7110         roundBitsMask = lastBitMask - 1;
7111         z.low = 0;
7112         z.high = a.high;
7113         switch (status->float_rounding_mode) {
7114         case float_round_nearest_even:
7115             z.high += lastBitMask>>1;
7116             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7117                 z.high &= ~ lastBitMask;
7118             }
7119             break;
7120         case float_round_ties_away:
7121             z.high += lastBitMask>>1;
7122             break;
7123         case float_round_to_zero:
7124             break;
7125         case float_round_up:
7126             if (!extractFloat128Sign(z)) {
7127                 z.high |= ( a.low != 0 );
7128                 z.high += roundBitsMask;
7129             }
7130             break;
7131         case float_round_down:
7132             if (extractFloat128Sign(z)) {
7133                 z.high |= (a.low != 0);
7134                 z.high += roundBitsMask;
7135             }
7136             break;
7137         case float_round_to_odd:
7138             if ((z.high & lastBitMask) == 0) {
7139                 z.high |= (a.low != 0);
7140                 z.high += roundBitsMask;
7141             }
7142             break;
7143         default:
7144             abort();
7145         }
7146         z.high &= ~ roundBitsMask;
7147     }
7148     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7149         float_raise(float_flag_inexact, status);
7150     }
7151     return z;
7152 
7153 }
7154 
7155 /*----------------------------------------------------------------------------
7156 | Returns the remainder of the quadruple-precision floating-point value `a'
7157 | with respect to the corresponding value `b'.  The operation is performed
7158 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7159 *----------------------------------------------------------------------------*/
7160 
7161 float128 float128_rem(float128 a, float128 b, float_status *status)
7162 {
7163     bool aSign, zSign;
7164     int32_t aExp, bExp, expDiff;
7165     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7166     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7167     int64_t sigMean0;
7168 
7169     aSig1 = extractFloat128Frac1( a );
7170     aSig0 = extractFloat128Frac0( a );
7171     aExp = extractFloat128Exp( a );
7172     aSign = extractFloat128Sign( a );
7173     bSig1 = extractFloat128Frac1( b );
7174     bSig0 = extractFloat128Frac0( b );
7175     bExp = extractFloat128Exp( b );
7176     if ( aExp == 0x7FFF ) {
7177         if (    ( aSig0 | aSig1 )
7178              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7179             return propagateFloat128NaN(a, b, status);
7180         }
7181         goto invalid;
7182     }
7183     if ( bExp == 0x7FFF ) {
7184         if (bSig0 | bSig1) {
7185             return propagateFloat128NaN(a, b, status);
7186         }
7187         return a;
7188     }
7189     if ( bExp == 0 ) {
7190         if ( ( bSig0 | bSig1 ) == 0 ) {
7191  invalid:
7192             float_raise(float_flag_invalid, status);
7193             return float128_default_nan(status);
7194         }
7195         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7196     }
7197     if ( aExp == 0 ) {
7198         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7199         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7200     }
7201     expDiff = aExp - bExp;
7202     if ( expDiff < -1 ) return a;
7203     shortShift128Left(
7204         aSig0 | UINT64_C(0x0001000000000000),
7205         aSig1,
7206         15 - ( expDiff < 0 ),
7207         &aSig0,
7208         &aSig1
7209     );
7210     shortShift128Left(
7211         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7212     q = le128( bSig0, bSig1, aSig0, aSig1 );
7213     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7214     expDiff -= 64;
7215     while ( 0 < expDiff ) {
7216         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7217         q = ( 4 < q ) ? q - 4 : 0;
7218         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7219         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7220         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7221         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7222         expDiff -= 61;
7223     }
7224     if ( -64 < expDiff ) {
7225         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7226         q = ( 4 < q ) ? q - 4 : 0;
7227         q >>= - expDiff;
7228         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7229         expDiff += 52;
7230         if ( expDiff < 0 ) {
7231             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7232         }
7233         else {
7234             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7235         }
7236         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7237         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7238     }
7239     else {
7240         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7241         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7242     }
7243     do {
7244         alternateASig0 = aSig0;
7245         alternateASig1 = aSig1;
7246         ++q;
7247         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7248     } while ( 0 <= (int64_t) aSig0 );
7249     add128(
7250         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7251     if (    ( sigMean0 < 0 )
7252          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7253         aSig0 = alternateASig0;
7254         aSig1 = alternateASig1;
7255     }
7256     zSign = ( (int64_t) aSig0 < 0 );
7257     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7258     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7259                                          status);
7260 }
7261 
7262 /*----------------------------------------------------------------------------
7263 | Returns the square root of the quadruple-precision floating-point value `a'.
7264 | The operation is performed according to the IEC/IEEE Standard for Binary
7265 | Floating-Point Arithmetic.
7266 *----------------------------------------------------------------------------*/
7267 
7268 float128 float128_sqrt(float128 a, float_status *status)
7269 {
7270     bool aSign;
7271     int32_t aExp, zExp;
7272     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7273     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7274 
7275     aSig1 = extractFloat128Frac1( a );
7276     aSig0 = extractFloat128Frac0( a );
7277     aExp = extractFloat128Exp( a );
7278     aSign = extractFloat128Sign( a );
7279     if ( aExp == 0x7FFF ) {
7280         if (aSig0 | aSig1) {
7281             return propagateFloat128NaN(a, a, status);
7282         }
7283         if ( ! aSign ) return a;
7284         goto invalid;
7285     }
7286     if ( aSign ) {
7287         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7288  invalid:
7289         float_raise(float_flag_invalid, status);
7290         return float128_default_nan(status);
7291     }
7292     if ( aExp == 0 ) {
7293         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7294         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7295     }
7296     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7297     aSig0 |= UINT64_C(0x0001000000000000);
7298     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7299     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7300     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7301     doubleZSig0 = zSig0<<1;
7302     mul64To128( zSig0, zSig0, &term0, &term1 );
7303     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7304     while ( (int64_t) rem0 < 0 ) {
7305         --zSig0;
7306         doubleZSig0 -= 2;
7307         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7308     }
7309     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7310     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7311         if ( zSig1 == 0 ) zSig1 = 1;
7312         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7313         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7314         mul64To128( zSig1, zSig1, &term2, &term3 );
7315         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7316         while ( (int64_t) rem1 < 0 ) {
7317             --zSig1;
7318             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7319             term3 |= 1;
7320             term2 |= doubleZSig0;
7321             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7322         }
7323         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7324     }
7325     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7326     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7327 
7328 }
7329 
7330 static inline FloatRelation
7331 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7332                           float_status *status)
7333 {
7334     bool aSign, bSign;
7335 
7336     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7337         float_raise(float_flag_invalid, status);
7338         return float_relation_unordered;
7339     }
7340     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7341           ( extractFloatx80Frac( a )<<1 ) ) ||
7342         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7343           ( extractFloatx80Frac( b )<<1 ) )) {
7344         if (!is_quiet ||
7345             floatx80_is_signaling_nan(a, status) ||
7346             floatx80_is_signaling_nan(b, status)) {
7347             float_raise(float_flag_invalid, status);
7348         }
7349         return float_relation_unordered;
7350     }
7351     aSign = extractFloatx80Sign( a );
7352     bSign = extractFloatx80Sign( b );
7353     if ( aSign != bSign ) {
7354 
7355         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7356              ( ( a.low | b.low ) == 0 ) ) {
7357             /* zero case */
7358             return float_relation_equal;
7359         } else {
7360             return 1 - (2 * aSign);
7361         }
7362     } else {
7363         /* Normalize pseudo-denormals before comparison.  */
7364         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7365             ++a.high;
7366         }
7367         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7368             ++b.high;
7369         }
7370         if (a.low == b.low && a.high == b.high) {
7371             return float_relation_equal;
7372         } else {
7373             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7374         }
7375     }
7376 }
7377 
7378 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7379 {
7380     return floatx80_compare_internal(a, b, 0, status);
7381 }
7382 
7383 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7384                                      float_status *status)
7385 {
7386     return floatx80_compare_internal(a, b, 1, status);
7387 }
7388 
7389 static inline FloatRelation
7390 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7391                           float_status *status)
7392 {
7393     bool aSign, bSign;
7394 
7395     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7396           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7397         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7398           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7399         if (!is_quiet ||
7400             float128_is_signaling_nan(a, status) ||
7401             float128_is_signaling_nan(b, status)) {
7402             float_raise(float_flag_invalid, status);
7403         }
7404         return float_relation_unordered;
7405     }
7406     aSign = extractFloat128Sign( a );
7407     bSign = extractFloat128Sign( b );
7408     if ( aSign != bSign ) {
7409         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7410             /* zero case */
7411             return float_relation_equal;
7412         } else {
7413             return 1 - (2 * aSign);
7414         }
7415     } else {
7416         if (a.low == b.low && a.high == b.high) {
7417             return float_relation_equal;
7418         } else {
7419             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7420         }
7421     }
7422 }
7423 
7424 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7425 {
7426     return float128_compare_internal(a, b, 0, status);
7427 }
7428 
7429 FloatRelation float128_compare_quiet(float128 a, float128 b,
7430                                      float_status *status)
7431 {
7432     return float128_compare_internal(a, b, 1, status);
7433 }
7434 
7435 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7436 {
7437     bool aSign;
7438     int32_t aExp;
7439     uint64_t aSig;
7440 
7441     if (floatx80_invalid_encoding(a)) {
7442         float_raise(float_flag_invalid, status);
7443         return floatx80_default_nan(status);
7444     }
7445     aSig = extractFloatx80Frac( a );
7446     aExp = extractFloatx80Exp( a );
7447     aSign = extractFloatx80Sign( a );
7448 
7449     if ( aExp == 0x7FFF ) {
7450         if ( aSig<<1 ) {
7451             return propagateFloatx80NaN(a, a, status);
7452         }
7453         return a;
7454     }
7455 
7456     if (aExp == 0) {
7457         if (aSig == 0) {
7458             return a;
7459         }
7460         aExp++;
7461     }
7462 
7463     if (n > 0x10000) {
7464         n = 0x10000;
7465     } else if (n < -0x10000) {
7466         n = -0x10000;
7467     }
7468 
7469     aExp += n;
7470     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7471                                          aSign, aExp, aSig, 0, status);
7472 }
7473 
7474 float128 float128_scalbn(float128 a, int n, float_status *status)
7475 {
7476     bool aSign;
7477     int32_t aExp;
7478     uint64_t aSig0, aSig1;
7479 
7480     aSig1 = extractFloat128Frac1( a );
7481     aSig0 = extractFloat128Frac0( a );
7482     aExp = extractFloat128Exp( a );
7483     aSign = extractFloat128Sign( a );
7484     if ( aExp == 0x7FFF ) {
7485         if ( aSig0 | aSig1 ) {
7486             return propagateFloat128NaN(a, a, status);
7487         }
7488         return a;
7489     }
7490     if (aExp != 0) {
7491         aSig0 |= UINT64_C(0x0001000000000000);
7492     } else if (aSig0 == 0 && aSig1 == 0) {
7493         return a;
7494     } else {
7495         aExp++;
7496     }
7497 
7498     if (n > 0x10000) {
7499         n = 0x10000;
7500     } else if (n < -0x10000) {
7501         n = -0x10000;
7502     }
7503 
7504     aExp += n - 1;
7505     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7506                                          , status);
7507 
7508 }
7509 
7510 static void __attribute__((constructor)) softfloat_init(void)
7511 {
7512     union_float64 ua, ub, uc, ur;
7513 
7514     if (QEMU_NO_HARDFLOAT) {
7515         return;
7516     }
7517     /*
7518      * Test that the host's FMA is not obviously broken. For example,
7519      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7520      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7521      */
7522     ua.s = 0x0020000000000001ULL;
7523     ub.s = 0x3ca0000000000000ULL;
7524     uc.s = 0x0020000000000000ULL;
7525     ur.h = fma(ua.h, ub.h, uc.h);
7526     if (ur.s != 0x0020000000000001ULL) {
7527         force_soft_fma = true;
7528     }
7529 }
7530