xref: /openbmc/qemu/fpu/softfloat.c (revision 463e45dc)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 typedef struct {
537     FloatClass cls;
538     bool sign;
539     int32_t exp;
540     uint64_t frac_hi;
541     uint64_t frac_hm;  /* high-middle */
542     uint64_t frac_lm;  /* low-middle */
543     uint64_t frac_lo;
544 } FloatParts256;
545 
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT    63
548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
549 
550 /* Structure holding all of the relevant parameters for a format.
551  *   exp_size: the size of the exponent field
552  *   exp_bias: the offset applied to the exponent field
553  *   exp_max: the maximum normalised exponent
554  *   frac_size: the size of the fraction field
555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556  * The following are computed based the size of fraction
557  *   frac_lsb: least significant bit of fraction
558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
559  *   round_mask/roundeven_mask: masks used for rounding
560  * The following optional modifiers are available:
561  *   arm_althp: handle ARM Alternative Half Precision
562  */
563 typedef struct {
564     int exp_size;
565     int exp_bias;
566     int exp_max;
567     int frac_size;
568     int frac_shift;
569     uint64_t frac_lsb;
570     uint64_t frac_lsbm1;
571     uint64_t round_mask;
572     uint64_t roundeven_mask;
573     bool arm_althp;
574 } FloatFmt;
575 
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F)                                           \
578     .exp_size       = E,                                             \
579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
580     .exp_max        = (1 << E) - 1,                                  \
581     .frac_size      = F,                                             \
582     .frac_shift     = (-F - 1) & 63,                                 \
583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587 
588 static const FloatFmt float16_params = {
589     FLOAT_PARAMS(5, 10)
590 };
591 
592 static const FloatFmt float16_params_ahp = {
593     FLOAT_PARAMS(5, 10),
594     .arm_althp = true
595 };
596 
597 static const FloatFmt bfloat16_params = {
598     FLOAT_PARAMS(8, 7)
599 };
600 
601 static const FloatFmt float32_params = {
602     FLOAT_PARAMS(8, 23)
603 };
604 
605 static const FloatFmt float64_params = {
606     FLOAT_PARAMS(11, 52)
607 };
608 
609 static const FloatFmt float128_params = {
610     FLOAT_PARAMS(15, 112)
611 };
612 
613 /* Unpack a float to parts, but do not canonicalize.  */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616     const int f_size = fmt->frac_size;
617     const int e_size = fmt->exp_size;
618 
619     *r = (FloatParts64) {
620         .cls = float_class_unclassified,
621         .sign = extract64(raw, f_size + e_size, 1),
622         .exp = extract64(raw, f_size, e_size),
623         .frac = extract64(raw, 0, f_size)
624     };
625 }
626 
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629     unpack_raw64(p, &float16_params, f);
630 }
631 
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634     unpack_raw64(p, &bfloat16_params, f);
635 }
636 
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639     unpack_raw64(p, &float32_params, f);
640 }
641 
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644     unpack_raw64(p, &float64_params, f);
645 }
646 
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718 
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722 
723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725 
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728 
729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
730 
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732                                       float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734                                         float_status *s);
735 
736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737 
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739                                              FloatParts64 *c, float_status *s,
740                                              int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742                                                FloatParts128 *b,
743                                                FloatParts128 *c,
744                                                float_status *s,
745                                                int ab_mask, int abc_mask);
746 
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749 
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751                                  const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753                                   const FloatFmt *fmt);
754 
755 #define parts_canonicalize(A, S, F) \
756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757 
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759                             const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761                              const FloatFmt *fmt);
762 
763 #define parts_uncanon(A, S, F) \
764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765 
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769 
770 #define parts_add_normal(A, B) \
771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772 
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776 
777 #define parts_sub_normal(A, B) \
778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779 
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781                                     float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783                                       float_status *s, bool subtract);
784 
785 #define parts_addsub(A, B, S, Z) \
786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787 
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_mul(A, B, S) \
794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
795 
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797                                     FloatParts64 *c, int flags,
798                                     float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800                                       FloatParts128 *c, int flags,
801                                       float_status *s);
802 
803 #define parts_muladd(A, B, C, Z, S) \
804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805 
806 /*
807  * Helper functions for softfloat-parts.c.inc, per-size operations.
808  */
809 
810 #define FRAC_GENERIC_64_128(NAME, P) \
811     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
812 
813 #define FRAC_GENERIC_64_128_256(NAME, P) \
814     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
815                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
816 
817 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
818 {
819     return uadd64_overflow(a->frac, b->frac, &r->frac);
820 }
821 
822 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
823 {
824     bool c = 0;
825     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
826     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
827     return c;
828 }
829 
830 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
831 {
832     bool c = 0;
833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
835     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
836     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
837     return c;
838 }
839 
840 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
841 
842 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
843 {
844     return uadd64_overflow(a->frac, c, &r->frac);
845 }
846 
847 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
848 {
849     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
850     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
851 }
852 
853 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
854 
855 static void frac64_allones(FloatParts64 *a)
856 {
857     a->frac = -1;
858 }
859 
860 static void frac128_allones(FloatParts128 *a)
861 {
862     a->frac_hi = a->frac_lo = -1;
863 }
864 
865 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
866 
867 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
868 {
869     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
870 }
871 
872 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
873 {
874     uint64_t ta = a->frac_hi, tb = b->frac_hi;
875     if (ta == tb) {
876         ta = a->frac_lo, tb = b->frac_lo;
877         if (ta == tb) {
878             return 0;
879         }
880     }
881     return ta < tb ? -1 : 1;
882 }
883 
884 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
885 
886 static void frac64_clear(FloatParts64 *a)
887 {
888     a->frac = 0;
889 }
890 
891 static void frac128_clear(FloatParts128 *a)
892 {
893     a->frac_hi = a->frac_lo = 0;
894 }
895 
896 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
897 
898 static bool frac64_eqz(FloatParts64 *a)
899 {
900     return a->frac == 0;
901 }
902 
903 static bool frac128_eqz(FloatParts128 *a)
904 {
905     return (a->frac_hi | a->frac_lo) == 0;
906 }
907 
908 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
909 
910 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
911 {
912     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
913 }
914 
915 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
916 {
917     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
918                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
919 }
920 
921 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
922 
923 static void frac64_neg(FloatParts64 *a)
924 {
925     a->frac = -a->frac;
926 }
927 
928 static void frac128_neg(FloatParts128 *a)
929 {
930     bool c = 0;
931     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
932     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
933 }
934 
935 static void frac256_neg(FloatParts256 *a)
936 {
937     bool c = 0;
938     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
939     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
940     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
941     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
942 }
943 
944 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
945 
946 static int frac64_normalize(FloatParts64 *a)
947 {
948     if (a->frac) {
949         int shift = clz64(a->frac);
950         a->frac <<= shift;
951         return shift;
952     }
953     return 64;
954 }
955 
956 static int frac128_normalize(FloatParts128 *a)
957 {
958     if (a->frac_hi) {
959         int shl = clz64(a->frac_hi);
960         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
961         a->frac_lo <<= shl;
962         return shl;
963     } else if (a->frac_lo) {
964         int shl = clz64(a->frac_lo);
965         a->frac_hi = a->frac_lo << shl;
966         a->frac_lo = 0;
967         return shl + 64;
968     }
969     return 128;
970 }
971 
972 static int frac256_normalize(FloatParts256 *a)
973 {
974     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
975     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
976     int ret, shl;
977 
978     if (likely(a0)) {
979         shl = clz64(a0);
980         if (shl == 0) {
981             return 0;
982         }
983         ret = shl;
984     } else {
985         if (a1) {
986             ret = 64;
987             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
988         } else if (a2) {
989             ret = 128;
990             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
991         } else if (a3) {
992             ret = 192;
993             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
994         } else {
995             ret = 256;
996             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
997             goto done;
998         }
999         shl = clz64(a0);
1000         if (shl == 0) {
1001             goto done;
1002         }
1003         ret += shl;
1004     }
1005 
1006     a0 = shl_double(a0, a1, shl);
1007     a1 = shl_double(a1, a2, shl);
1008     a2 = shl_double(a2, a3, shl);
1009     a3 <<= shl;
1010 
1011  done:
1012     a->frac_hi = a0;
1013     a->frac_hm = a1;
1014     a->frac_lm = a2;
1015     a->frac_lo = a3;
1016     return ret;
1017 }
1018 
1019 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1020 
1021 static void frac64_shl(FloatParts64 *a, int c)
1022 {
1023     a->frac <<= c;
1024 }
1025 
1026 static void frac128_shl(FloatParts128 *a, int c)
1027 {
1028     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1029 
1030     if (c & 64) {
1031         a0 = a1, a1 = 0;
1032     }
1033 
1034     c &= 63;
1035     if (c) {
1036         a0 = shl_double(a0, a1, c);
1037         a1 = a1 << c;
1038     }
1039 
1040     a->frac_hi = a0;
1041     a->frac_lo = a1;
1042 }
1043 
1044 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1045 
1046 static void frac64_shr(FloatParts64 *a, int c)
1047 {
1048     a->frac >>= c;
1049 }
1050 
1051 static void frac128_shr(FloatParts128 *a, int c)
1052 {
1053     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1054 
1055     if (c & 64) {
1056         a1 = a0, a0 = 0;
1057     }
1058 
1059     c &= 63;
1060     if (c) {
1061         a1 = shr_double(a0, a1, c);
1062         a0 = a0 >> c;
1063     }
1064 
1065     a->frac_hi = a0;
1066     a->frac_lo = a1;
1067 }
1068 
1069 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1070 
1071 static void frac64_shrjam(FloatParts64 *a, int c)
1072 {
1073     uint64_t a0 = a->frac;
1074 
1075     if (likely(c != 0)) {
1076         if (likely(c < 64)) {
1077             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1078         } else {
1079             a0 = a0 != 0;
1080         }
1081         a->frac = a0;
1082     }
1083 }
1084 
1085 static void frac128_shrjam(FloatParts128 *a, int c)
1086 {
1087     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1088     uint64_t sticky = 0;
1089 
1090     if (unlikely(c == 0)) {
1091         return;
1092     } else if (likely(c < 64)) {
1093         /* nothing */
1094     } else if (likely(c < 128)) {
1095         sticky = a1;
1096         a1 = a0;
1097         a0 = 0;
1098         c &= 63;
1099         if (c == 0) {
1100             goto done;
1101         }
1102     } else {
1103         sticky = a0 | a1;
1104         a0 = a1 = 0;
1105         goto done;
1106     }
1107 
1108     sticky |= shr_double(a1, 0, c);
1109     a1 = shr_double(a0, a1, c);
1110     a0 = a0 >> c;
1111 
1112  done:
1113     a->frac_lo = a1 | (sticky != 0);
1114     a->frac_hi = a0;
1115 }
1116 
1117 static void frac256_shrjam(FloatParts256 *a, int c)
1118 {
1119     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1120     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1121     uint64_t sticky = 0;
1122 
1123     if (unlikely(c == 0)) {
1124         return;
1125     } else if (likely(c < 64)) {
1126         /* nothing */
1127     } else if (likely(c < 256)) {
1128         if (unlikely(c & 128)) {
1129             sticky |= a2 | a3;
1130             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1131         }
1132         if (unlikely(c & 64)) {
1133             sticky |= a3;
1134             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1135         }
1136         c &= 63;
1137         if (c == 0) {
1138             goto done;
1139         }
1140     } else {
1141         sticky = a0 | a1 | a2 | a3;
1142         a0 = a1 = a2 = a3 = 0;
1143         goto done;
1144     }
1145 
1146     sticky |= shr_double(a3, 0, c);
1147     a3 = shr_double(a2, a3, c);
1148     a2 = shr_double(a1, a2, c);
1149     a1 = shr_double(a0, a1, c);
1150     a0 = a0 >> c;
1151 
1152  done:
1153     a->frac_lo = a3 | (sticky != 0);
1154     a->frac_lm = a2;
1155     a->frac_hm = a1;
1156     a->frac_hi = a0;
1157 }
1158 
1159 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1160 
1161 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1162 {
1163     return usub64_overflow(a->frac, b->frac, &r->frac);
1164 }
1165 
1166 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1167 {
1168     bool c = 0;
1169     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1170     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1171     return c;
1172 }
1173 
1174 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1175 {
1176     bool c = 0;
1177     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1178     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1179     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1180     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1181     return c;
1182 }
1183 
1184 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1185 
1186 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1187 {
1188     r->frac = a->frac_hi | (a->frac_lo != 0);
1189 }
1190 
1191 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1192 {
1193     r->frac_hi = a->frac_hi;
1194     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1195 }
1196 
1197 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1198 
1199 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1200 {
1201     r->frac_hi = a->frac;
1202     r->frac_lo = 0;
1203 }
1204 
1205 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1206 {
1207     r->frac_hi = a->frac_hi;
1208     r->frac_hm = a->frac_lo;
1209     r->frac_lm = 0;
1210     r->frac_lo = 0;
1211 }
1212 
1213 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1214 
1215 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1216 #define FloatPartsN    glue(FloatParts,N)
1217 #define FloatPartsW    glue(FloatParts,W)
1218 
1219 #define N 64
1220 #define W 128
1221 
1222 #include "softfloat-parts-addsub.c.inc"
1223 #include "softfloat-parts.c.inc"
1224 
1225 #undef  N
1226 #undef  W
1227 #define N 128
1228 #define W 256
1229 
1230 #include "softfloat-parts-addsub.c.inc"
1231 #include "softfloat-parts.c.inc"
1232 
1233 #undef  N
1234 #undef  W
1235 #define N            256
1236 
1237 #include "softfloat-parts-addsub.c.inc"
1238 
1239 #undef  N
1240 #undef  W
1241 #undef  partsN
1242 #undef  FloatPartsN
1243 #undef  FloatPartsW
1244 
1245 /*
1246  * Pack/unpack routines with a specific FloatFmt.
1247  */
1248 
1249 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1250                                       float_status *s, const FloatFmt *params)
1251 {
1252     float16_unpack_raw(p, f);
1253     parts_canonicalize(p, s, params);
1254 }
1255 
1256 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1257                                      float_status *s)
1258 {
1259     float16a_unpack_canonical(p, f, s, &float16_params);
1260 }
1261 
1262 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1263                                       float_status *s)
1264 {
1265     bfloat16_unpack_raw(p, f);
1266     parts_canonicalize(p, s, &bfloat16_params);
1267 }
1268 
1269 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1270                                              float_status *s,
1271                                              const FloatFmt *params)
1272 {
1273     parts_uncanon(p, s, params);
1274     return float16_pack_raw(p);
1275 }
1276 
1277 static float16 float16_round_pack_canonical(FloatParts64 *p,
1278                                             float_status *s)
1279 {
1280     return float16a_round_pack_canonical(p, s, &float16_params);
1281 }
1282 
1283 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1284                                               float_status *s)
1285 {
1286     parts_uncanon(p, s, &bfloat16_params);
1287     return bfloat16_pack_raw(p);
1288 }
1289 
1290 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1291                                      float_status *s)
1292 {
1293     float32_unpack_raw(p, f);
1294     parts_canonicalize(p, s, &float32_params);
1295 }
1296 
1297 static float32 float32_round_pack_canonical(FloatParts64 *p,
1298                                             float_status *s)
1299 {
1300     parts_uncanon(p, s, &float32_params);
1301     return float32_pack_raw(p);
1302 }
1303 
1304 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1305                                      float_status *s)
1306 {
1307     float64_unpack_raw(p, f);
1308     parts_canonicalize(p, s, &float64_params);
1309 }
1310 
1311 static float64 float64_round_pack_canonical(FloatParts64 *p,
1312                                             float_status *s)
1313 {
1314     parts_uncanon(p, s, &float64_params);
1315     return float64_pack_raw(p);
1316 }
1317 
1318 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1319                                       float_status *s)
1320 {
1321     float128_unpack_raw(p, f);
1322     parts_canonicalize(p, s, &float128_params);
1323 }
1324 
1325 static float128 float128_round_pack_canonical(FloatParts128 *p,
1326                                               float_status *s)
1327 {
1328     parts_uncanon(p, s, &float128_params);
1329     return float128_pack_raw(p);
1330 }
1331 
1332 /*
1333  * Addition and subtraction
1334  */
1335 
1336 static float16 QEMU_FLATTEN
1337 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1338 {
1339     FloatParts64 pa, pb, *pr;
1340 
1341     float16_unpack_canonical(&pa, a, status);
1342     float16_unpack_canonical(&pb, b, status);
1343     pr = parts_addsub(&pa, &pb, status, subtract);
1344 
1345     return float16_round_pack_canonical(pr, status);
1346 }
1347 
1348 float16 float16_add(float16 a, float16 b, float_status *status)
1349 {
1350     return float16_addsub(a, b, status, false);
1351 }
1352 
1353 float16 float16_sub(float16 a, float16 b, float_status *status)
1354 {
1355     return float16_addsub(a, b, status, true);
1356 }
1357 
1358 static float32 QEMU_SOFTFLOAT_ATTR
1359 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1360 {
1361     FloatParts64 pa, pb, *pr;
1362 
1363     float32_unpack_canonical(&pa, a, status);
1364     float32_unpack_canonical(&pb, b, status);
1365     pr = parts_addsub(&pa, &pb, status, subtract);
1366 
1367     return float32_round_pack_canonical(pr, status);
1368 }
1369 
1370 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1371 {
1372     return soft_f32_addsub(a, b, status, false);
1373 }
1374 
1375 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1376 {
1377     return soft_f32_addsub(a, b, status, true);
1378 }
1379 
1380 static float64 QEMU_SOFTFLOAT_ATTR
1381 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1382 {
1383     FloatParts64 pa, pb, *pr;
1384 
1385     float64_unpack_canonical(&pa, a, status);
1386     float64_unpack_canonical(&pb, b, status);
1387     pr = parts_addsub(&pa, &pb, status, subtract);
1388 
1389     return float64_round_pack_canonical(pr, status);
1390 }
1391 
1392 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1393 {
1394     return soft_f64_addsub(a, b, status, false);
1395 }
1396 
1397 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1398 {
1399     return soft_f64_addsub(a, b, status, true);
1400 }
1401 
1402 static float hard_f32_add(float a, float b)
1403 {
1404     return a + b;
1405 }
1406 
1407 static float hard_f32_sub(float a, float b)
1408 {
1409     return a - b;
1410 }
1411 
1412 static double hard_f64_add(double a, double b)
1413 {
1414     return a + b;
1415 }
1416 
1417 static double hard_f64_sub(double a, double b)
1418 {
1419     return a - b;
1420 }
1421 
1422 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1423 {
1424     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1425         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1426     }
1427     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1428 }
1429 
1430 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1431 {
1432     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1433         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1434     } else {
1435         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1436     }
1437 }
1438 
1439 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1440                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1441 {
1442     return float32_gen2(a, b, s, hard, soft,
1443                         f32_is_zon2, f32_addsubmul_post);
1444 }
1445 
1446 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1447                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1448 {
1449     return float64_gen2(a, b, s, hard, soft,
1450                         f64_is_zon2, f64_addsubmul_post);
1451 }
1452 
1453 float32 QEMU_FLATTEN
1454 float32_add(float32 a, float32 b, float_status *s)
1455 {
1456     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1457 }
1458 
1459 float32 QEMU_FLATTEN
1460 float32_sub(float32 a, float32 b, float_status *s)
1461 {
1462     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1463 }
1464 
1465 float64 QEMU_FLATTEN
1466 float64_add(float64 a, float64 b, float_status *s)
1467 {
1468     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1469 }
1470 
1471 float64 QEMU_FLATTEN
1472 float64_sub(float64 a, float64 b, float_status *s)
1473 {
1474     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1475 }
1476 
1477 static bfloat16 QEMU_FLATTEN
1478 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1479 {
1480     FloatParts64 pa, pb, *pr;
1481 
1482     bfloat16_unpack_canonical(&pa, a, status);
1483     bfloat16_unpack_canonical(&pb, b, status);
1484     pr = parts_addsub(&pa, &pb, status, subtract);
1485 
1486     return bfloat16_round_pack_canonical(pr, status);
1487 }
1488 
1489 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1490 {
1491     return bfloat16_addsub(a, b, status, false);
1492 }
1493 
1494 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1495 {
1496     return bfloat16_addsub(a, b, status, true);
1497 }
1498 
1499 static float128 QEMU_FLATTEN
1500 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1501 {
1502     FloatParts128 pa, pb, *pr;
1503 
1504     float128_unpack_canonical(&pa, a, status);
1505     float128_unpack_canonical(&pb, b, status);
1506     pr = parts_addsub(&pa, &pb, status, subtract);
1507 
1508     return float128_round_pack_canonical(pr, status);
1509 }
1510 
1511 float128 float128_add(float128 a, float128 b, float_status *status)
1512 {
1513     return float128_addsub(a, b, status, false);
1514 }
1515 
1516 float128 float128_sub(float128 a, float128 b, float_status *status)
1517 {
1518     return float128_addsub(a, b, status, true);
1519 }
1520 
1521 /*
1522  * Multiplication
1523  */
1524 
1525 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1526 {
1527     FloatParts64 pa, pb, *pr;
1528 
1529     float16_unpack_canonical(&pa, a, status);
1530     float16_unpack_canonical(&pb, b, status);
1531     pr = parts_mul(&pa, &pb, status);
1532 
1533     return float16_round_pack_canonical(pr, status);
1534 }
1535 
1536 static float32 QEMU_SOFTFLOAT_ATTR
1537 soft_f32_mul(float32 a, float32 b, float_status *status)
1538 {
1539     FloatParts64 pa, pb, *pr;
1540 
1541     float32_unpack_canonical(&pa, a, status);
1542     float32_unpack_canonical(&pb, b, status);
1543     pr = parts_mul(&pa, &pb, status);
1544 
1545     return float32_round_pack_canonical(pr, status);
1546 }
1547 
1548 static float64 QEMU_SOFTFLOAT_ATTR
1549 soft_f64_mul(float64 a, float64 b, float_status *status)
1550 {
1551     FloatParts64 pa, pb, *pr;
1552 
1553     float64_unpack_canonical(&pa, a, status);
1554     float64_unpack_canonical(&pb, b, status);
1555     pr = parts_mul(&pa, &pb, status);
1556 
1557     return float64_round_pack_canonical(pr, status);
1558 }
1559 
1560 static float hard_f32_mul(float a, float b)
1561 {
1562     return a * b;
1563 }
1564 
1565 static double hard_f64_mul(double a, double b)
1566 {
1567     return a * b;
1568 }
1569 
1570 float32 QEMU_FLATTEN
1571 float32_mul(float32 a, float32 b, float_status *s)
1572 {
1573     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1574                         f32_is_zon2, f32_addsubmul_post);
1575 }
1576 
1577 float64 QEMU_FLATTEN
1578 float64_mul(float64 a, float64 b, float_status *s)
1579 {
1580     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1581                         f64_is_zon2, f64_addsubmul_post);
1582 }
1583 
1584 bfloat16 QEMU_FLATTEN
1585 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1586 {
1587     FloatParts64 pa, pb, *pr;
1588 
1589     bfloat16_unpack_canonical(&pa, a, status);
1590     bfloat16_unpack_canonical(&pb, b, status);
1591     pr = parts_mul(&pa, &pb, status);
1592 
1593     return bfloat16_round_pack_canonical(pr, status);
1594 }
1595 
1596 float128 QEMU_FLATTEN
1597 float128_mul(float128 a, float128 b, float_status *status)
1598 {
1599     FloatParts128 pa, pb, *pr;
1600 
1601     float128_unpack_canonical(&pa, a, status);
1602     float128_unpack_canonical(&pb, b, status);
1603     pr = parts_mul(&pa, &pb, status);
1604 
1605     return float128_round_pack_canonical(pr, status);
1606 }
1607 
1608 /*
1609  * Fused multiply-add
1610  */
1611 
1612 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1613                                     int flags, float_status *status)
1614 {
1615     FloatParts64 pa, pb, pc, *pr;
1616 
1617     float16_unpack_canonical(&pa, a, status);
1618     float16_unpack_canonical(&pb, b, status);
1619     float16_unpack_canonical(&pc, c, status);
1620     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1621 
1622     return float16_round_pack_canonical(pr, status);
1623 }
1624 
1625 static float32 QEMU_SOFTFLOAT_ATTR
1626 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1627                 float_status *status)
1628 {
1629     FloatParts64 pa, pb, pc, *pr;
1630 
1631     float32_unpack_canonical(&pa, a, status);
1632     float32_unpack_canonical(&pb, b, status);
1633     float32_unpack_canonical(&pc, c, status);
1634     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1635 
1636     return float32_round_pack_canonical(pr, status);
1637 }
1638 
1639 static float64 QEMU_SOFTFLOAT_ATTR
1640 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1641                 float_status *status)
1642 {
1643     FloatParts64 pa, pb, pc, *pr;
1644 
1645     float64_unpack_canonical(&pa, a, status);
1646     float64_unpack_canonical(&pb, b, status);
1647     float64_unpack_canonical(&pc, c, status);
1648     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1649 
1650     return float64_round_pack_canonical(pr, status);
1651 }
1652 
1653 static bool force_soft_fma;
1654 
1655 float32 QEMU_FLATTEN
1656 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1657 {
1658     union_float32 ua, ub, uc, ur;
1659 
1660     ua.s = xa;
1661     ub.s = xb;
1662     uc.s = xc;
1663 
1664     if (unlikely(!can_use_fpu(s))) {
1665         goto soft;
1666     }
1667     if (unlikely(flags & float_muladd_halve_result)) {
1668         goto soft;
1669     }
1670 
1671     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1672     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1673         goto soft;
1674     }
1675 
1676     if (unlikely(force_soft_fma)) {
1677         goto soft;
1678     }
1679 
1680     /*
1681      * When (a || b) == 0, there's no need to check for under/over flow,
1682      * since we know the addend is (normal || 0) and the product is 0.
1683      */
1684     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1685         union_float32 up;
1686         bool prod_sign;
1687 
1688         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1689         prod_sign ^= !!(flags & float_muladd_negate_product);
1690         up.s = float32_set_sign(float32_zero, prod_sign);
1691 
1692         if (flags & float_muladd_negate_c) {
1693             uc.h = -uc.h;
1694         }
1695         ur.h = up.h + uc.h;
1696     } else {
1697         union_float32 ua_orig = ua;
1698         union_float32 uc_orig = uc;
1699 
1700         if (flags & float_muladd_negate_product) {
1701             ua.h = -ua.h;
1702         }
1703         if (flags & float_muladd_negate_c) {
1704             uc.h = -uc.h;
1705         }
1706 
1707         ur.h = fmaf(ua.h, ub.h, uc.h);
1708 
1709         if (unlikely(f32_is_inf(ur))) {
1710             float_raise(float_flag_overflow, s);
1711         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1712             ua = ua_orig;
1713             uc = uc_orig;
1714             goto soft;
1715         }
1716     }
1717     if (flags & float_muladd_negate_result) {
1718         return float32_chs(ur.s);
1719     }
1720     return ur.s;
1721 
1722  soft:
1723     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1724 }
1725 
1726 float64 QEMU_FLATTEN
1727 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1728 {
1729     union_float64 ua, ub, uc, ur;
1730 
1731     ua.s = xa;
1732     ub.s = xb;
1733     uc.s = xc;
1734 
1735     if (unlikely(!can_use_fpu(s))) {
1736         goto soft;
1737     }
1738     if (unlikely(flags & float_muladd_halve_result)) {
1739         goto soft;
1740     }
1741 
1742     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1743     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1744         goto soft;
1745     }
1746 
1747     if (unlikely(force_soft_fma)) {
1748         goto soft;
1749     }
1750 
1751     /*
1752      * When (a || b) == 0, there's no need to check for under/over flow,
1753      * since we know the addend is (normal || 0) and the product is 0.
1754      */
1755     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1756         union_float64 up;
1757         bool prod_sign;
1758 
1759         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1760         prod_sign ^= !!(flags & float_muladd_negate_product);
1761         up.s = float64_set_sign(float64_zero, prod_sign);
1762 
1763         if (flags & float_muladd_negate_c) {
1764             uc.h = -uc.h;
1765         }
1766         ur.h = up.h + uc.h;
1767     } else {
1768         union_float64 ua_orig = ua;
1769         union_float64 uc_orig = uc;
1770 
1771         if (flags & float_muladd_negate_product) {
1772             ua.h = -ua.h;
1773         }
1774         if (flags & float_muladd_negate_c) {
1775             uc.h = -uc.h;
1776         }
1777 
1778         ur.h = fma(ua.h, ub.h, uc.h);
1779 
1780         if (unlikely(f64_is_inf(ur))) {
1781             float_raise(float_flag_overflow, s);
1782         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1783             ua = ua_orig;
1784             uc = uc_orig;
1785             goto soft;
1786         }
1787     }
1788     if (flags & float_muladd_negate_result) {
1789         return float64_chs(ur.s);
1790     }
1791     return ur.s;
1792 
1793  soft:
1794     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1795 }
1796 
1797 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1798                                       int flags, float_status *status)
1799 {
1800     FloatParts64 pa, pb, pc, *pr;
1801 
1802     bfloat16_unpack_canonical(&pa, a, status);
1803     bfloat16_unpack_canonical(&pb, b, status);
1804     bfloat16_unpack_canonical(&pc, c, status);
1805     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1806 
1807     return bfloat16_round_pack_canonical(pr, status);
1808 }
1809 
1810 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1811                                       int flags, float_status *status)
1812 {
1813     FloatParts128 pa, pb, pc, *pr;
1814 
1815     float128_unpack_canonical(&pa, a, status);
1816     float128_unpack_canonical(&pb, b, status);
1817     float128_unpack_canonical(&pc, c, status);
1818     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1819 
1820     return float128_round_pack_canonical(pr, status);
1821 }
1822 
1823 /*
1824  * Returns the result of dividing the floating-point value `a' by the
1825  * corresponding value `b'. The operation is performed according to
1826  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1827  */
1828 
1829 static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
1830 {
1831     bool sign = a.sign ^ b.sign;
1832 
1833     if (a.cls == float_class_normal && b.cls == float_class_normal) {
1834         uint64_t n0, n1, q, r;
1835         int exp = a.exp - b.exp;
1836 
1837         /*
1838          * We want a 2*N / N-bit division to produce exactly an N-bit
1839          * result, so that we do not lose any precision and so that we
1840          * do not have to renormalize afterward.  If A.frac < B.frac,
1841          * then division would produce an (N-1)-bit result; shift A left
1842          * by one to produce the an N-bit result, and decrement the
1843          * exponent to match.
1844          *
1845          * The udiv_qrnnd algorithm that we're using requires normalization,
1846          * i.e. the msb of the denominator must be set, which is already true.
1847          */
1848         if (a.frac < b.frac) {
1849             exp -= 1;
1850             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1851         } else {
1852             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
1853         }
1854         q = udiv_qrnnd(&r, n1, n0, b.frac);
1855 
1856         /* Set lsb if there is a remainder, to set inexact. */
1857         a.frac = q | (r != 0);
1858         a.sign = sign;
1859         a.exp = exp;
1860         return a;
1861     }
1862     /* handle all the NaN cases */
1863     if (is_nan(a.cls) || is_nan(b.cls)) {
1864         return *parts_pick_nan(&a, &b, s);
1865     }
1866     /* 0/0 or Inf/Inf */
1867     if (a.cls == b.cls
1868         &&
1869         (a.cls == float_class_inf || a.cls == float_class_zero)) {
1870         float_raise(float_flag_invalid, s);
1871         parts_default_nan(&a, s);
1872         return a;
1873     }
1874     /* Inf / x or 0 / x */
1875     if (a.cls == float_class_inf || a.cls == float_class_zero) {
1876         a.sign = sign;
1877         return a;
1878     }
1879     /* Div 0 => Inf */
1880     if (b.cls == float_class_zero) {
1881         float_raise(float_flag_divbyzero, s);
1882         a.cls = float_class_inf;
1883         a.sign = sign;
1884         return a;
1885     }
1886     /* Div by Inf */
1887     if (b.cls == float_class_inf) {
1888         a.cls = float_class_zero;
1889         a.sign = sign;
1890         return a;
1891     }
1892     g_assert_not_reached();
1893 }
1894 
1895 float16 float16_div(float16 a, float16 b, float_status *status)
1896 {
1897     FloatParts64 pa, pb, pr;
1898 
1899     float16_unpack_canonical(&pa, a, status);
1900     float16_unpack_canonical(&pb, b, status);
1901     pr = div_floats(pa, pb, status);
1902 
1903     return float16_round_pack_canonical(&pr, status);
1904 }
1905 
1906 static float32 QEMU_SOFTFLOAT_ATTR
1907 soft_f32_div(float32 a, float32 b, float_status *status)
1908 {
1909     FloatParts64 pa, pb, pr;
1910 
1911     float32_unpack_canonical(&pa, a, status);
1912     float32_unpack_canonical(&pb, b, status);
1913     pr = div_floats(pa, pb, status);
1914 
1915     return float32_round_pack_canonical(&pr, status);
1916 }
1917 
1918 static float64 QEMU_SOFTFLOAT_ATTR
1919 soft_f64_div(float64 a, float64 b, float_status *status)
1920 {
1921     FloatParts64 pa, pb, pr;
1922 
1923     float64_unpack_canonical(&pa, a, status);
1924     float64_unpack_canonical(&pb, b, status);
1925     pr = div_floats(pa, pb, status);
1926 
1927     return float64_round_pack_canonical(&pr, status);
1928 }
1929 
1930 static float hard_f32_div(float a, float b)
1931 {
1932     return a / b;
1933 }
1934 
1935 static double hard_f64_div(double a, double b)
1936 {
1937     return a / b;
1938 }
1939 
1940 static bool f32_div_pre(union_float32 a, union_float32 b)
1941 {
1942     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1943         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1944                fpclassify(b.h) == FP_NORMAL;
1945     }
1946     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1947 }
1948 
1949 static bool f64_div_pre(union_float64 a, union_float64 b)
1950 {
1951     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1952         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1953                fpclassify(b.h) == FP_NORMAL;
1954     }
1955     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1956 }
1957 
1958 static bool f32_div_post(union_float32 a, union_float32 b)
1959 {
1960     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1961         return fpclassify(a.h) != FP_ZERO;
1962     }
1963     return !float32_is_zero(a.s);
1964 }
1965 
1966 static bool f64_div_post(union_float64 a, union_float64 b)
1967 {
1968     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1969         return fpclassify(a.h) != FP_ZERO;
1970     }
1971     return !float64_is_zero(a.s);
1972 }
1973 
1974 float32 QEMU_FLATTEN
1975 float32_div(float32 a, float32 b, float_status *s)
1976 {
1977     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1978                         f32_div_pre, f32_div_post);
1979 }
1980 
1981 float64 QEMU_FLATTEN
1982 float64_div(float64 a, float64 b, float_status *s)
1983 {
1984     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1985                         f64_div_pre, f64_div_post);
1986 }
1987 
1988 /*
1989  * Returns the result of dividing the bfloat16
1990  * value `a' by the corresponding value `b'.
1991  */
1992 
1993 bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1994 {
1995     FloatParts64 pa, pb, pr;
1996 
1997     bfloat16_unpack_canonical(&pa, a, status);
1998     bfloat16_unpack_canonical(&pb, b, status);
1999     pr = div_floats(pa, pb, status);
2000 
2001     return bfloat16_round_pack_canonical(&pr, status);
2002 }
2003 
2004 /*
2005  * Float to Float conversions
2006  *
2007  * Returns the result of converting one float format to another. The
2008  * conversion is performed according to the IEC/IEEE Standard for
2009  * Binary Floating-Point Arithmetic.
2010  *
2011  * The float_to_float helper only needs to take care of raising
2012  * invalid exceptions and handling the conversion on NaNs.
2013  */
2014 
2015 static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
2016                                  float_status *s)
2017 {
2018     if (dstf->arm_althp) {
2019         switch (a.cls) {
2020         case float_class_qnan:
2021         case float_class_snan:
2022             /* There is no NaN in the destination format.  Raise Invalid
2023              * and return a zero with the sign of the input NaN.
2024              */
2025             float_raise(float_flag_invalid, s);
2026             a.cls = float_class_zero;
2027             a.frac = 0;
2028             a.exp = 0;
2029             break;
2030 
2031         case float_class_inf:
2032             /* There is no Inf in the destination format.  Raise Invalid
2033              * and return the maximum normal with the correct sign.
2034              */
2035             float_raise(float_flag_invalid, s);
2036             a.cls = float_class_normal;
2037             a.exp = dstf->exp_max;
2038             a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
2039             break;
2040 
2041         default:
2042             break;
2043         }
2044     } else if (is_nan(a.cls)) {
2045         parts_return_nan(&a, s);
2046     }
2047     return a;
2048 }
2049 
2050 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2051 {
2052     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2053     FloatParts64 pa, pr;
2054 
2055     float16a_unpack_canonical(&pa, a, s, fmt16);
2056     pr = float_to_float(pa, &float32_params, s);
2057     return float32_round_pack_canonical(&pr, s);
2058 }
2059 
2060 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2061 {
2062     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2063     FloatParts64 pa, pr;
2064 
2065     float16a_unpack_canonical(&pa, a, s, fmt16);
2066     pr = float_to_float(pa, &float64_params, s);
2067     return float64_round_pack_canonical(&pr, s);
2068 }
2069 
2070 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2071 {
2072     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2073     FloatParts64 pa, pr;
2074 
2075     float32_unpack_canonical(&pa, a, s);
2076     pr = float_to_float(pa, fmt16, s);
2077     return float16a_round_pack_canonical(&pr, s, fmt16);
2078 }
2079 
2080 static float64 QEMU_SOFTFLOAT_ATTR
2081 soft_float32_to_float64(float32 a, float_status *s)
2082 {
2083     FloatParts64 pa, pr;
2084 
2085     float32_unpack_canonical(&pa, a, s);
2086     pr = float_to_float(pa, &float64_params, s);
2087     return float64_round_pack_canonical(&pr, s);
2088 }
2089 
2090 float64 float32_to_float64(float32 a, float_status *s)
2091 {
2092     if (likely(float32_is_normal(a))) {
2093         /* Widening conversion can never produce inexact results.  */
2094         union_float32 uf;
2095         union_float64 ud;
2096         uf.s = a;
2097         ud.h = uf.h;
2098         return ud.s;
2099     } else if (float32_is_zero(a)) {
2100         return float64_set_sign(float64_zero, float32_is_neg(a));
2101     } else {
2102         return soft_float32_to_float64(a, s);
2103     }
2104 }
2105 
2106 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2107 {
2108     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2109     FloatParts64 pa, pr;
2110 
2111     float64_unpack_canonical(&pa, a, s);
2112     pr = float_to_float(pa, fmt16, s);
2113     return float16a_round_pack_canonical(&pr, s, fmt16);
2114 }
2115 
2116 float32 float64_to_float32(float64 a, float_status *s)
2117 {
2118     FloatParts64 pa, pr;
2119 
2120     float64_unpack_canonical(&pa, a, s);
2121     pr = float_to_float(pa, &float32_params, s);
2122     return float32_round_pack_canonical(&pr, s);
2123 }
2124 
2125 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2126 {
2127     FloatParts64 pa, pr;
2128 
2129     bfloat16_unpack_canonical(&pa, a, s);
2130     pr = float_to_float(pa, &float32_params, s);
2131     return float32_round_pack_canonical(&pr, s);
2132 }
2133 
2134 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2135 {
2136     FloatParts64 pa, pr;
2137 
2138     bfloat16_unpack_canonical(&pa, a, s);
2139     pr = float_to_float(pa, &float64_params, s);
2140     return float64_round_pack_canonical(&pr, s);
2141 }
2142 
2143 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2144 {
2145     FloatParts64 pa, pr;
2146 
2147     float32_unpack_canonical(&pa, a, s);
2148     pr = float_to_float(pa, &bfloat16_params, s);
2149     return bfloat16_round_pack_canonical(&pr, s);
2150 }
2151 
2152 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2153 {
2154     FloatParts64 pa, pr;
2155 
2156     float64_unpack_canonical(&pa, a, s);
2157     pr = float_to_float(pa, &bfloat16_params, s);
2158     return bfloat16_round_pack_canonical(&pr, s);
2159 }
2160 
2161 /*
2162  * Rounds the floating-point value `a' to an integer, and returns the
2163  * result as a floating-point value. The operation is performed
2164  * according to the IEC/IEEE Standard for Binary Floating-Point
2165  * Arithmetic.
2166  */
2167 
2168 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2169                                int scale, float_status *s)
2170 {
2171     switch (a.cls) {
2172     case float_class_qnan:
2173     case float_class_snan:
2174         parts_return_nan(&a, s);
2175         break;
2176 
2177     case float_class_zero:
2178     case float_class_inf:
2179         /* already "integral" */
2180         break;
2181 
2182     case float_class_normal:
2183         scale = MIN(MAX(scale, -0x10000), 0x10000);
2184         a.exp += scale;
2185 
2186         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2187             /* already integral */
2188             break;
2189         }
2190         if (a.exp < 0) {
2191             bool one;
2192             /* all fractional */
2193             float_raise(float_flag_inexact, s);
2194             switch (rmode) {
2195             case float_round_nearest_even:
2196                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2197                 break;
2198             case float_round_ties_away:
2199                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2200                 break;
2201             case float_round_to_zero:
2202                 one = false;
2203                 break;
2204             case float_round_up:
2205                 one = !a.sign;
2206                 break;
2207             case float_round_down:
2208                 one = a.sign;
2209                 break;
2210             case float_round_to_odd:
2211                 one = true;
2212                 break;
2213             default:
2214                 g_assert_not_reached();
2215             }
2216 
2217             if (one) {
2218                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2219                 a.exp = 0;
2220             } else {
2221                 a.cls = float_class_zero;
2222             }
2223         } else {
2224             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2225             uint64_t frac_lsbm1 = frac_lsb >> 1;
2226             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2227             uint64_t rnd_mask = rnd_even_mask >> 1;
2228             uint64_t inc;
2229 
2230             switch (rmode) {
2231             case float_round_nearest_even:
2232                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2233                 break;
2234             case float_round_ties_away:
2235                 inc = frac_lsbm1;
2236                 break;
2237             case float_round_to_zero:
2238                 inc = 0;
2239                 break;
2240             case float_round_up:
2241                 inc = a.sign ? 0 : rnd_mask;
2242                 break;
2243             case float_round_down:
2244                 inc = a.sign ? rnd_mask : 0;
2245                 break;
2246             case float_round_to_odd:
2247                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2248                 break;
2249             default:
2250                 g_assert_not_reached();
2251             }
2252 
2253             if (a.frac & rnd_mask) {
2254                 float_raise(float_flag_inexact, s);
2255                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2256                     a.frac >>= 1;
2257                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2258                     a.exp++;
2259                 }
2260                 a.frac &= ~rnd_mask;
2261             }
2262         }
2263         break;
2264     default:
2265         g_assert_not_reached();
2266     }
2267     return a;
2268 }
2269 
2270 float16 float16_round_to_int(float16 a, float_status *s)
2271 {
2272     FloatParts64 pa, pr;
2273 
2274     float16_unpack_canonical(&pa, a, s);
2275     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2276     return float16_round_pack_canonical(&pr, s);
2277 }
2278 
2279 float32 float32_round_to_int(float32 a, float_status *s)
2280 {
2281     FloatParts64 pa, pr;
2282 
2283     float32_unpack_canonical(&pa, a, s);
2284     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2285     return float32_round_pack_canonical(&pr, s);
2286 }
2287 
2288 float64 float64_round_to_int(float64 a, float_status *s)
2289 {
2290     FloatParts64 pa, pr;
2291 
2292     float64_unpack_canonical(&pa, a, s);
2293     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2294     return float64_round_pack_canonical(&pr, s);
2295 }
2296 
2297 /*
2298  * Rounds the bfloat16 value `a' to an integer, and returns the
2299  * result as a bfloat16 value.
2300  */
2301 
2302 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2303 {
2304     FloatParts64 pa, pr;
2305 
2306     bfloat16_unpack_canonical(&pa, a, s);
2307     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2308     return bfloat16_round_pack_canonical(&pr, s);
2309 }
2310 
2311 /*
2312  * Returns the result of converting the floating-point value `a' to
2313  * the two's complement integer format. The conversion is performed
2314  * according to the IEC/IEEE Standard for Binary Floating-Point
2315  * Arithmetic---which means in particular that the conversion is
2316  * rounded according to the current rounding mode. If `a' is a NaN,
2317  * the largest positive integer is returned. Otherwise, if the
2318  * conversion overflows, the largest integer with the same sign as `a'
2319  * is returned.
2320 */
2321 
2322 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2323                                      int scale, int64_t min, int64_t max,
2324                                      float_status *s)
2325 {
2326     uint64_t r;
2327     int orig_flags = get_float_exception_flags(s);
2328     FloatParts64 p = round_to_int(in, rmode, scale, s);
2329 
2330     switch (p.cls) {
2331     case float_class_snan:
2332     case float_class_qnan:
2333         s->float_exception_flags = orig_flags | float_flag_invalid;
2334         return max;
2335     case float_class_inf:
2336         s->float_exception_flags = orig_flags | float_flag_invalid;
2337         return p.sign ? min : max;
2338     case float_class_zero:
2339         return 0;
2340     case float_class_normal:
2341         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2342             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2343         } else {
2344             r = UINT64_MAX;
2345         }
2346         if (p.sign) {
2347             if (r <= -(uint64_t) min) {
2348                 return -r;
2349             } else {
2350                 s->float_exception_flags = orig_flags | float_flag_invalid;
2351                 return min;
2352             }
2353         } else {
2354             if (r <= max) {
2355                 return r;
2356             } else {
2357                 s->float_exception_flags = orig_flags | float_flag_invalid;
2358                 return max;
2359             }
2360         }
2361     default:
2362         g_assert_not_reached();
2363     }
2364 }
2365 
2366 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2367                               float_status *s)
2368 {
2369     FloatParts64 p;
2370 
2371     float16_unpack_canonical(&p, a, s);
2372     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2373 }
2374 
2375 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2376                                 float_status *s)
2377 {
2378     FloatParts64 p;
2379 
2380     float16_unpack_canonical(&p, a, s);
2381     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2382 }
2383 
2384 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2385                                 float_status *s)
2386 {
2387     FloatParts64 p;
2388 
2389     float16_unpack_canonical(&p, a, s);
2390     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2391 }
2392 
2393 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2394                                 float_status *s)
2395 {
2396     FloatParts64 p;
2397 
2398     float16_unpack_canonical(&p, a, s);
2399     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2400 }
2401 
2402 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2403                                 float_status *s)
2404 {
2405     FloatParts64 p;
2406 
2407     float32_unpack_canonical(&p, a, s);
2408     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2409 }
2410 
2411 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2412                                 float_status *s)
2413 {
2414     FloatParts64 p;
2415 
2416     float32_unpack_canonical(&p, a, s);
2417     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2418 }
2419 
2420 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2421                                 float_status *s)
2422 {
2423     FloatParts64 p;
2424 
2425     float32_unpack_canonical(&p, a, s);
2426     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2427 }
2428 
2429 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2430                                 float_status *s)
2431 {
2432     FloatParts64 p;
2433 
2434     float64_unpack_canonical(&p, a, s);
2435     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2436 }
2437 
2438 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2439                                 float_status *s)
2440 {
2441     FloatParts64 p;
2442 
2443     float64_unpack_canonical(&p, a, s);
2444     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2445 }
2446 
2447 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2448                                 float_status *s)
2449 {
2450     FloatParts64 p;
2451 
2452     float64_unpack_canonical(&p, a, s);
2453     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2454 }
2455 
2456 int8_t float16_to_int8(float16 a, float_status *s)
2457 {
2458     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2459 }
2460 
2461 int16_t float16_to_int16(float16 a, float_status *s)
2462 {
2463     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2464 }
2465 
2466 int32_t float16_to_int32(float16 a, float_status *s)
2467 {
2468     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2469 }
2470 
2471 int64_t float16_to_int64(float16 a, float_status *s)
2472 {
2473     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2474 }
2475 
2476 int16_t float32_to_int16(float32 a, float_status *s)
2477 {
2478     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2479 }
2480 
2481 int32_t float32_to_int32(float32 a, float_status *s)
2482 {
2483     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2484 }
2485 
2486 int64_t float32_to_int64(float32 a, float_status *s)
2487 {
2488     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2489 }
2490 
2491 int16_t float64_to_int16(float64 a, float_status *s)
2492 {
2493     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2494 }
2495 
2496 int32_t float64_to_int32(float64 a, float_status *s)
2497 {
2498     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2499 }
2500 
2501 int64_t float64_to_int64(float64 a, float_status *s)
2502 {
2503     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2504 }
2505 
2506 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2507 {
2508     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2509 }
2510 
2511 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2512 {
2513     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2514 }
2515 
2516 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2517 {
2518     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2519 }
2520 
2521 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2522 {
2523     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2524 }
2525 
2526 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2527 {
2528     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2529 }
2530 
2531 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2532 {
2533     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2534 }
2535 
2536 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2537 {
2538     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2539 }
2540 
2541 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2542 {
2543     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2544 }
2545 
2546 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2547 {
2548     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2549 }
2550 
2551 /*
2552  * Returns the result of converting the floating-point value `a' to
2553  * the two's complement integer format.
2554  */
2555 
2556 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2557                                  float_status *s)
2558 {
2559     FloatParts64 p;
2560 
2561     bfloat16_unpack_canonical(&p, a, s);
2562     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2563 }
2564 
2565 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2566                                  float_status *s)
2567 {
2568     FloatParts64 p;
2569 
2570     bfloat16_unpack_canonical(&p, a, s);
2571     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2572 }
2573 
2574 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2575                                  float_status *s)
2576 {
2577     FloatParts64 p;
2578 
2579     bfloat16_unpack_canonical(&p, a, s);
2580     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2581 }
2582 
2583 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2584 {
2585     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2586 }
2587 
2588 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2589 {
2590     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2591 }
2592 
2593 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2594 {
2595     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2596 }
2597 
2598 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2599 {
2600     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2601 }
2602 
2603 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2604 {
2605     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2606 }
2607 
2608 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2609 {
2610     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2611 }
2612 
2613 /*
2614  *  Returns the result of converting the floating-point value `a' to
2615  *  the unsigned integer format. The conversion is performed according
2616  *  to the IEC/IEEE Standard for Binary Floating-Point
2617  *  Arithmetic---which means in particular that the conversion is
2618  *  rounded according to the current rounding mode. If `a' is a NaN,
2619  *  the largest unsigned integer is returned. Otherwise, if the
2620  *  conversion overflows, the largest unsigned integer is returned. If
2621  *  the 'a' is negative, the result is rounded and zero is returned;
2622  *  values that do not round to zero will raise the inexact exception
2623  *  flag.
2624  */
2625 
2626 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2627                                        int scale, uint64_t max,
2628                                        float_status *s)
2629 {
2630     int orig_flags = get_float_exception_flags(s);
2631     FloatParts64 p = round_to_int(in, rmode, scale, s);
2632     uint64_t r;
2633 
2634     switch (p.cls) {
2635     case float_class_snan:
2636     case float_class_qnan:
2637         s->float_exception_flags = orig_flags | float_flag_invalid;
2638         return max;
2639     case float_class_inf:
2640         s->float_exception_flags = orig_flags | float_flag_invalid;
2641         return p.sign ? 0 : max;
2642     case float_class_zero:
2643         return 0;
2644     case float_class_normal:
2645         if (p.sign) {
2646             s->float_exception_flags = orig_flags | float_flag_invalid;
2647             return 0;
2648         }
2649 
2650         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2651             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2652         } else {
2653             s->float_exception_flags = orig_flags | float_flag_invalid;
2654             return max;
2655         }
2656 
2657         /* For uint64 this will never trip, but if p.exp is too large
2658          * to shift a decomposed fraction we shall have exited via the
2659          * 3rd leg above.
2660          */
2661         if (r > max) {
2662             s->float_exception_flags = orig_flags | float_flag_invalid;
2663             return max;
2664         }
2665         return r;
2666     default:
2667         g_assert_not_reached();
2668     }
2669 }
2670 
2671 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2672                                 float_status *s)
2673 {
2674     FloatParts64 p;
2675 
2676     float16_unpack_canonical(&p, a, s);
2677     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2678 }
2679 
2680 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2681                                   float_status *s)
2682 {
2683     FloatParts64 p;
2684 
2685     float16_unpack_canonical(&p, a, s);
2686     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2687 }
2688 
2689 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2690                                   float_status *s)
2691 {
2692     FloatParts64 p;
2693 
2694     float16_unpack_canonical(&p, a, s);
2695     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2696 }
2697 
2698 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2699                                   float_status *s)
2700 {
2701     FloatParts64 p;
2702 
2703     float16_unpack_canonical(&p, a, s);
2704     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2705 }
2706 
2707 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2708                                   float_status *s)
2709 {
2710     FloatParts64 p;
2711 
2712     float32_unpack_canonical(&p, a, s);
2713     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2714 }
2715 
2716 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2717                                   float_status *s)
2718 {
2719     FloatParts64 p;
2720 
2721     float32_unpack_canonical(&p, a, s);
2722     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2723 }
2724 
2725 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2726                                   float_status *s)
2727 {
2728     FloatParts64 p;
2729 
2730     float32_unpack_canonical(&p, a, s);
2731     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2732 }
2733 
2734 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2735                                   float_status *s)
2736 {
2737     FloatParts64 p;
2738 
2739     float64_unpack_canonical(&p, a, s);
2740     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2741 }
2742 
2743 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2744                                   float_status *s)
2745 {
2746     FloatParts64 p;
2747 
2748     float64_unpack_canonical(&p, a, s);
2749     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2750 }
2751 
2752 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2753                                   float_status *s)
2754 {
2755     FloatParts64 p;
2756 
2757     float64_unpack_canonical(&p, a, s);
2758     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2759 }
2760 
2761 uint8_t float16_to_uint8(float16 a, float_status *s)
2762 {
2763     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2764 }
2765 
2766 uint16_t float16_to_uint16(float16 a, float_status *s)
2767 {
2768     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2769 }
2770 
2771 uint32_t float16_to_uint32(float16 a, float_status *s)
2772 {
2773     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2774 }
2775 
2776 uint64_t float16_to_uint64(float16 a, float_status *s)
2777 {
2778     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2779 }
2780 
2781 uint16_t float32_to_uint16(float32 a, float_status *s)
2782 {
2783     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2784 }
2785 
2786 uint32_t float32_to_uint32(float32 a, float_status *s)
2787 {
2788     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2789 }
2790 
2791 uint64_t float32_to_uint64(float32 a, float_status *s)
2792 {
2793     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2794 }
2795 
2796 uint16_t float64_to_uint16(float64 a, float_status *s)
2797 {
2798     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2799 }
2800 
2801 uint32_t float64_to_uint32(float64 a, float_status *s)
2802 {
2803     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2804 }
2805 
2806 uint64_t float64_to_uint64(float64 a, float_status *s)
2807 {
2808     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2809 }
2810 
2811 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2812 {
2813     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2814 }
2815 
2816 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2817 {
2818     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2819 }
2820 
2821 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2822 {
2823     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2824 }
2825 
2826 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2827 {
2828     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2829 }
2830 
2831 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2832 {
2833     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2834 }
2835 
2836 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2837 {
2838     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2839 }
2840 
2841 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2842 {
2843     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2844 }
2845 
2846 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2847 {
2848     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2849 }
2850 
2851 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2852 {
2853     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2854 }
2855 
2856 /*
2857  *  Returns the result of converting the bfloat16 value `a' to
2858  *  the unsigned integer format.
2859  */
2860 
2861 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2862                                    int scale, float_status *s)
2863 {
2864     FloatParts64 p;
2865 
2866     bfloat16_unpack_canonical(&p, a, s);
2867     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2868 }
2869 
2870 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2871                                    int scale, float_status *s)
2872 {
2873     FloatParts64 p;
2874 
2875     bfloat16_unpack_canonical(&p, a, s);
2876     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2877 }
2878 
2879 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2880                                    int scale, float_status *s)
2881 {
2882     FloatParts64 p;
2883 
2884     bfloat16_unpack_canonical(&p, a, s);
2885     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2886 }
2887 
2888 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2889 {
2890     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2891 }
2892 
2893 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2894 {
2895     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2896 }
2897 
2898 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2899 {
2900     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2901 }
2902 
2903 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2904 {
2905     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2906 }
2907 
2908 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2909 {
2910     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2911 }
2912 
2913 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2914 {
2915     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2916 }
2917 
2918 /*
2919  * Integer to float conversions
2920  *
2921  * Returns the result of converting the two's complement integer `a'
2922  * to the floating-point format. The conversion is performed according
2923  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2924  */
2925 
2926 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2927 {
2928     FloatParts64 r = { .sign = false };
2929 
2930     if (a == 0) {
2931         r.cls = float_class_zero;
2932     } else {
2933         uint64_t f = a;
2934         int shift;
2935 
2936         r.cls = float_class_normal;
2937         if (a < 0) {
2938             f = -f;
2939             r.sign = true;
2940         }
2941         shift = clz64(f);
2942         scale = MIN(MAX(scale, -0x10000), 0x10000);
2943 
2944         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2945         r.frac = f << shift;
2946     }
2947 
2948     return r;
2949 }
2950 
2951 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2952 {
2953     FloatParts64 pa = int_to_float(a, scale, status);
2954     return float16_round_pack_canonical(&pa, status);
2955 }
2956 
2957 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2958 {
2959     return int64_to_float16_scalbn(a, scale, status);
2960 }
2961 
2962 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2963 {
2964     return int64_to_float16_scalbn(a, scale, status);
2965 }
2966 
2967 float16 int64_to_float16(int64_t a, float_status *status)
2968 {
2969     return int64_to_float16_scalbn(a, 0, status);
2970 }
2971 
2972 float16 int32_to_float16(int32_t a, float_status *status)
2973 {
2974     return int64_to_float16_scalbn(a, 0, status);
2975 }
2976 
2977 float16 int16_to_float16(int16_t a, float_status *status)
2978 {
2979     return int64_to_float16_scalbn(a, 0, status);
2980 }
2981 
2982 float16 int8_to_float16(int8_t a, float_status *status)
2983 {
2984     return int64_to_float16_scalbn(a, 0, status);
2985 }
2986 
2987 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2988 {
2989     FloatParts64 pa = int_to_float(a, scale, status);
2990     return float32_round_pack_canonical(&pa, status);
2991 }
2992 
2993 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2994 {
2995     return int64_to_float32_scalbn(a, scale, status);
2996 }
2997 
2998 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2999 {
3000     return int64_to_float32_scalbn(a, scale, status);
3001 }
3002 
3003 float32 int64_to_float32(int64_t a, float_status *status)
3004 {
3005     return int64_to_float32_scalbn(a, 0, status);
3006 }
3007 
3008 float32 int32_to_float32(int32_t a, float_status *status)
3009 {
3010     return int64_to_float32_scalbn(a, 0, status);
3011 }
3012 
3013 float32 int16_to_float32(int16_t a, float_status *status)
3014 {
3015     return int64_to_float32_scalbn(a, 0, status);
3016 }
3017 
3018 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3019 {
3020     FloatParts64 pa = int_to_float(a, scale, status);
3021     return float64_round_pack_canonical(&pa, status);
3022 }
3023 
3024 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3025 {
3026     return int64_to_float64_scalbn(a, scale, status);
3027 }
3028 
3029 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3030 {
3031     return int64_to_float64_scalbn(a, scale, status);
3032 }
3033 
3034 float64 int64_to_float64(int64_t a, float_status *status)
3035 {
3036     return int64_to_float64_scalbn(a, 0, status);
3037 }
3038 
3039 float64 int32_to_float64(int32_t a, float_status *status)
3040 {
3041     return int64_to_float64_scalbn(a, 0, status);
3042 }
3043 
3044 float64 int16_to_float64(int16_t a, float_status *status)
3045 {
3046     return int64_to_float64_scalbn(a, 0, status);
3047 }
3048 
3049 /*
3050  * Returns the result of converting the two's complement integer `a'
3051  * to the bfloat16 format.
3052  */
3053 
3054 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3055 {
3056     FloatParts64 pa = int_to_float(a, scale, status);
3057     return bfloat16_round_pack_canonical(&pa, status);
3058 }
3059 
3060 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3061 {
3062     return int64_to_bfloat16_scalbn(a, scale, status);
3063 }
3064 
3065 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3066 {
3067     return int64_to_bfloat16_scalbn(a, scale, status);
3068 }
3069 
3070 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3071 {
3072     return int64_to_bfloat16_scalbn(a, 0, status);
3073 }
3074 
3075 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3076 {
3077     return int64_to_bfloat16_scalbn(a, 0, status);
3078 }
3079 
3080 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3081 {
3082     return int64_to_bfloat16_scalbn(a, 0, status);
3083 }
3084 
3085 /*
3086  * Unsigned Integer to float conversions
3087  *
3088  * Returns the result of converting the unsigned integer `a' to the
3089  * floating-point format. The conversion is performed according to the
3090  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3091  */
3092 
3093 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3094 {
3095     FloatParts64 r = { .sign = false };
3096     int shift;
3097 
3098     if (a == 0) {
3099         r.cls = float_class_zero;
3100     } else {
3101         scale = MIN(MAX(scale, -0x10000), 0x10000);
3102         shift = clz64(a);
3103         r.cls = float_class_normal;
3104         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3105         r.frac = a << shift;
3106     }
3107 
3108     return r;
3109 }
3110 
3111 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3112 {
3113     FloatParts64 pa = uint_to_float(a, scale, status);
3114     return float16_round_pack_canonical(&pa, status);
3115 }
3116 
3117 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3118 {
3119     return uint64_to_float16_scalbn(a, scale, status);
3120 }
3121 
3122 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3123 {
3124     return uint64_to_float16_scalbn(a, scale, status);
3125 }
3126 
3127 float16 uint64_to_float16(uint64_t a, float_status *status)
3128 {
3129     return uint64_to_float16_scalbn(a, 0, status);
3130 }
3131 
3132 float16 uint32_to_float16(uint32_t a, float_status *status)
3133 {
3134     return uint64_to_float16_scalbn(a, 0, status);
3135 }
3136 
3137 float16 uint16_to_float16(uint16_t a, float_status *status)
3138 {
3139     return uint64_to_float16_scalbn(a, 0, status);
3140 }
3141 
3142 float16 uint8_to_float16(uint8_t a, float_status *status)
3143 {
3144     return uint64_to_float16_scalbn(a, 0, status);
3145 }
3146 
3147 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3148 {
3149     FloatParts64 pa = uint_to_float(a, scale, status);
3150     return float32_round_pack_canonical(&pa, status);
3151 }
3152 
3153 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3154 {
3155     return uint64_to_float32_scalbn(a, scale, status);
3156 }
3157 
3158 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3159 {
3160     return uint64_to_float32_scalbn(a, scale, status);
3161 }
3162 
3163 float32 uint64_to_float32(uint64_t a, float_status *status)
3164 {
3165     return uint64_to_float32_scalbn(a, 0, status);
3166 }
3167 
3168 float32 uint32_to_float32(uint32_t a, float_status *status)
3169 {
3170     return uint64_to_float32_scalbn(a, 0, status);
3171 }
3172 
3173 float32 uint16_to_float32(uint16_t a, float_status *status)
3174 {
3175     return uint64_to_float32_scalbn(a, 0, status);
3176 }
3177 
3178 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3179 {
3180     FloatParts64 pa = uint_to_float(a, scale, status);
3181     return float64_round_pack_canonical(&pa, status);
3182 }
3183 
3184 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3185 {
3186     return uint64_to_float64_scalbn(a, scale, status);
3187 }
3188 
3189 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3190 {
3191     return uint64_to_float64_scalbn(a, scale, status);
3192 }
3193 
3194 float64 uint64_to_float64(uint64_t a, float_status *status)
3195 {
3196     return uint64_to_float64_scalbn(a, 0, status);
3197 }
3198 
3199 float64 uint32_to_float64(uint32_t a, float_status *status)
3200 {
3201     return uint64_to_float64_scalbn(a, 0, status);
3202 }
3203 
3204 float64 uint16_to_float64(uint16_t a, float_status *status)
3205 {
3206     return uint64_to_float64_scalbn(a, 0, status);
3207 }
3208 
3209 /*
3210  * Returns the result of converting the unsigned integer `a' to the
3211  * bfloat16 format.
3212  */
3213 
3214 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3215 {
3216     FloatParts64 pa = uint_to_float(a, scale, status);
3217     return bfloat16_round_pack_canonical(&pa, status);
3218 }
3219 
3220 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3221 {
3222     return uint64_to_bfloat16_scalbn(a, scale, status);
3223 }
3224 
3225 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3226 {
3227     return uint64_to_bfloat16_scalbn(a, scale, status);
3228 }
3229 
3230 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3231 {
3232     return uint64_to_bfloat16_scalbn(a, 0, status);
3233 }
3234 
3235 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3236 {
3237     return uint64_to_bfloat16_scalbn(a, 0, status);
3238 }
3239 
3240 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3241 {
3242     return uint64_to_bfloat16_scalbn(a, 0, status);
3243 }
3244 
3245 /* Float Min/Max */
3246 /* min() and max() functions. These can't be implemented as
3247  * 'compare and pick one input' because that would mishandle
3248  * NaNs and +0 vs -0.
3249  *
3250  * minnum() and maxnum() functions. These are similar to the min()
3251  * and max() functions but if one of the arguments is a QNaN and
3252  * the other is numerical then the numerical argument is returned.
3253  * SNaNs will get quietened before being returned.
3254  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3255  * and maxNum() operations. min() and max() are the typical min/max
3256  * semantics provided by many CPUs which predate that specification.
3257  *
3258  * minnummag() and maxnummag() functions correspond to minNumMag()
3259  * and minNumMag() from the IEEE-754 2008.
3260  */
3261 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3262                                 bool ieee, bool ismag, float_status *s)
3263 {
3264     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3265         if (ieee) {
3266             /* Takes two floating-point values `a' and `b', one of
3267              * which is a NaN, and returns the appropriate NaN
3268              * result. If either `a' or `b' is a signaling NaN,
3269              * the invalid exception is raised.
3270              */
3271             if (is_snan(a.cls) || is_snan(b.cls)) {
3272                 return *parts_pick_nan(&a, &b, s);
3273             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3274                 return b;
3275             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3276                 return a;
3277             }
3278         }
3279         return *parts_pick_nan(&a, &b, s);
3280     } else {
3281         int a_exp, b_exp;
3282 
3283         switch (a.cls) {
3284         case float_class_normal:
3285             a_exp = a.exp;
3286             break;
3287         case float_class_inf:
3288             a_exp = INT_MAX;
3289             break;
3290         case float_class_zero:
3291             a_exp = INT_MIN;
3292             break;
3293         default:
3294             g_assert_not_reached();
3295             break;
3296         }
3297         switch (b.cls) {
3298         case float_class_normal:
3299             b_exp = b.exp;
3300             break;
3301         case float_class_inf:
3302             b_exp = INT_MAX;
3303             break;
3304         case float_class_zero:
3305             b_exp = INT_MIN;
3306             break;
3307         default:
3308             g_assert_not_reached();
3309             break;
3310         }
3311 
3312         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3313             bool a_less = a_exp < b_exp;
3314             if (a_exp == b_exp) {
3315                 a_less = a.frac < b.frac;
3316             }
3317             return a_less ^ ismin ? b : a;
3318         }
3319 
3320         if (a.sign == b.sign) {
3321             bool a_less = a_exp < b_exp;
3322             if (a_exp == b_exp) {
3323                 a_less = a.frac < b.frac;
3324             }
3325             return a.sign ^ a_less ^ ismin ? b : a;
3326         } else {
3327             return a.sign ^ ismin ? b : a;
3328         }
3329     }
3330 }
3331 
3332 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3333 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3334                                      float_status *s)                   \
3335 {                                                                       \
3336     FloatParts64 pa, pb, pr;                                            \
3337     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3338     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3339     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3340     return float ## sz ## _round_pack_canonical(&pr, s);                \
3341 }
3342 
3343 MINMAX(16, min, true, false, false)
3344 MINMAX(16, minnum, true, true, false)
3345 MINMAX(16, minnummag, true, true, true)
3346 MINMAX(16, max, false, false, false)
3347 MINMAX(16, maxnum, false, true, false)
3348 MINMAX(16, maxnummag, false, true, true)
3349 
3350 MINMAX(32, min, true, false, false)
3351 MINMAX(32, minnum, true, true, false)
3352 MINMAX(32, minnummag, true, true, true)
3353 MINMAX(32, max, false, false, false)
3354 MINMAX(32, maxnum, false, true, false)
3355 MINMAX(32, maxnummag, false, true, true)
3356 
3357 MINMAX(64, min, true, false, false)
3358 MINMAX(64, minnum, true, true, false)
3359 MINMAX(64, minnummag, true, true, true)
3360 MINMAX(64, max, false, false, false)
3361 MINMAX(64, maxnum, false, true, false)
3362 MINMAX(64, maxnummag, false, true, true)
3363 
3364 #undef MINMAX
3365 
3366 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3367 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3368 {                                                                       \
3369     FloatParts64 pa, pb, pr;                                            \
3370     bfloat16_unpack_canonical(&pa, a, s);                               \
3371     bfloat16_unpack_canonical(&pb, b, s);                               \
3372     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3373     return bfloat16_round_pack_canonical(&pr, s);                       \
3374 }
3375 
3376 BF16_MINMAX(min, true, false, false)
3377 BF16_MINMAX(minnum, true, true, false)
3378 BF16_MINMAX(minnummag, true, true, true)
3379 BF16_MINMAX(max, false, false, false)
3380 BF16_MINMAX(maxnum, false, true, false)
3381 BF16_MINMAX(maxnummag, false, true, true)
3382 
3383 #undef BF16_MINMAX
3384 
3385 /* Floating point compare */
3386 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3387                                     float_status *s)
3388 {
3389     if (is_nan(a.cls) || is_nan(b.cls)) {
3390         if (!is_quiet ||
3391             a.cls == float_class_snan ||
3392             b.cls == float_class_snan) {
3393             float_raise(float_flag_invalid, s);
3394         }
3395         return float_relation_unordered;
3396     }
3397 
3398     if (a.cls == float_class_zero) {
3399         if (b.cls == float_class_zero) {
3400             return float_relation_equal;
3401         }
3402         return b.sign ? float_relation_greater : float_relation_less;
3403     } else if (b.cls == float_class_zero) {
3404         return a.sign ? float_relation_less : float_relation_greater;
3405     }
3406 
3407     /* The only really important thing about infinity is its sign. If
3408      * both are infinities the sign marks the smallest of the two.
3409      */
3410     if (a.cls == float_class_inf) {
3411         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3412             return float_relation_equal;
3413         }
3414         return a.sign ? float_relation_less : float_relation_greater;
3415     } else if (b.cls == float_class_inf) {
3416         return b.sign ? float_relation_greater : float_relation_less;
3417     }
3418 
3419     if (a.sign != b.sign) {
3420         return a.sign ? float_relation_less : float_relation_greater;
3421     }
3422 
3423     if (a.exp == b.exp) {
3424         if (a.frac == b.frac) {
3425             return float_relation_equal;
3426         }
3427         if (a.sign) {
3428             return a.frac > b.frac ?
3429                 float_relation_less : float_relation_greater;
3430         } else {
3431             return a.frac > b.frac ?
3432                 float_relation_greater : float_relation_less;
3433         }
3434     } else {
3435         if (a.sign) {
3436             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3437         } else {
3438             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3439         }
3440     }
3441 }
3442 
3443 #define COMPARE(name, attr, sz)                                         \
3444 static int attr                                                         \
3445 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3446 {                                                                       \
3447     FloatParts64 pa, pb;                                                \
3448     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3449     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3450     return compare_floats(pa, pb, is_quiet, s);                         \
3451 }
3452 
3453 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3454 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3455 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3456 
3457 #undef COMPARE
3458 
3459 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3460 {
3461     return soft_f16_compare(a, b, false, s);
3462 }
3463 
3464 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3465 {
3466     return soft_f16_compare(a, b, true, s);
3467 }
3468 
3469 static FloatRelation QEMU_FLATTEN
3470 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3471 {
3472     union_float32 ua, ub;
3473 
3474     ua.s = xa;
3475     ub.s = xb;
3476 
3477     if (QEMU_NO_HARDFLOAT) {
3478         goto soft;
3479     }
3480 
3481     float32_input_flush2(&ua.s, &ub.s, s);
3482     if (isgreaterequal(ua.h, ub.h)) {
3483         if (isgreater(ua.h, ub.h)) {
3484             return float_relation_greater;
3485         }
3486         return float_relation_equal;
3487     }
3488     if (likely(isless(ua.h, ub.h))) {
3489         return float_relation_less;
3490     }
3491     /* The only condition remaining is unordered.
3492      * Fall through to set flags.
3493      */
3494  soft:
3495     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3496 }
3497 
3498 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3499 {
3500     return f32_compare(a, b, false, s);
3501 }
3502 
3503 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3504 {
3505     return f32_compare(a, b, true, s);
3506 }
3507 
3508 static FloatRelation QEMU_FLATTEN
3509 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3510 {
3511     union_float64 ua, ub;
3512 
3513     ua.s = xa;
3514     ub.s = xb;
3515 
3516     if (QEMU_NO_HARDFLOAT) {
3517         goto soft;
3518     }
3519 
3520     float64_input_flush2(&ua.s, &ub.s, s);
3521     if (isgreaterequal(ua.h, ub.h)) {
3522         if (isgreater(ua.h, ub.h)) {
3523             return float_relation_greater;
3524         }
3525         return float_relation_equal;
3526     }
3527     if (likely(isless(ua.h, ub.h))) {
3528         return float_relation_less;
3529     }
3530     /* The only condition remaining is unordered.
3531      * Fall through to set flags.
3532      */
3533  soft:
3534     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3535 }
3536 
3537 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3538 {
3539     return f64_compare(a, b, false, s);
3540 }
3541 
3542 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3543 {
3544     return f64_compare(a, b, true, s);
3545 }
3546 
3547 static FloatRelation QEMU_FLATTEN
3548 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3549 {
3550     FloatParts64 pa, pb;
3551 
3552     bfloat16_unpack_canonical(&pa, a, s);
3553     bfloat16_unpack_canonical(&pb, b, s);
3554     return compare_floats(pa, pb, is_quiet, s);
3555 }
3556 
3557 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3558 {
3559     return soft_bf16_compare(a, b, false, s);
3560 }
3561 
3562 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3563 {
3564     return soft_bf16_compare(a, b, true, s);
3565 }
3566 
3567 /* Multiply A by 2 raised to the power N.  */
3568 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3569 {
3570     if (unlikely(is_nan(a.cls))) {
3571         parts_return_nan(&a, s);
3572     }
3573     if (a.cls == float_class_normal) {
3574         /* The largest float type (even though not supported by FloatParts64)
3575          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3576          * still allows rounding to infinity, without allowing overflow
3577          * within the int32_t that backs FloatParts64.exp.
3578          */
3579         n = MIN(MAX(n, -0x10000), 0x10000);
3580         a.exp += n;
3581     }
3582     return a;
3583 }
3584 
3585 float16 float16_scalbn(float16 a, int n, float_status *status)
3586 {
3587     FloatParts64 pa, pr;
3588 
3589     float16_unpack_canonical(&pa, a, status);
3590     pr = scalbn_decomposed(pa, n, status);
3591     return float16_round_pack_canonical(&pr, status);
3592 }
3593 
3594 float32 float32_scalbn(float32 a, int n, float_status *status)
3595 {
3596     FloatParts64 pa, pr;
3597 
3598     float32_unpack_canonical(&pa, a, status);
3599     pr = scalbn_decomposed(pa, n, status);
3600     return float32_round_pack_canonical(&pr, status);
3601 }
3602 
3603 float64 float64_scalbn(float64 a, int n, float_status *status)
3604 {
3605     FloatParts64 pa, pr;
3606 
3607     float64_unpack_canonical(&pa, a, status);
3608     pr = scalbn_decomposed(pa, n, status);
3609     return float64_round_pack_canonical(&pr, status);
3610 }
3611 
3612 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3613 {
3614     FloatParts64 pa, pr;
3615 
3616     bfloat16_unpack_canonical(&pa, a, status);
3617     pr = scalbn_decomposed(pa, n, status);
3618     return bfloat16_round_pack_canonical(&pr, status);
3619 }
3620 
3621 /*
3622  * Square Root
3623  *
3624  * The old softfloat code did an approximation step before zeroing in
3625  * on the final result. However for simpleness we just compute the
3626  * square root by iterating down from the implicit bit to enough extra
3627  * bits to ensure we get a correctly rounded result.
3628  *
3629  * This does mean however the calculation is slower than before,
3630  * especially for 64 bit floats.
3631  */
3632 
3633 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3634 {
3635     uint64_t a_frac, r_frac, s_frac;
3636     int bit, last_bit;
3637 
3638     if (is_nan(a.cls)) {
3639         parts_return_nan(&a, s);
3640         return a;
3641     }
3642     if (a.cls == float_class_zero) {
3643         return a;  /* sqrt(+-0) = +-0 */
3644     }
3645     if (a.sign) {
3646         float_raise(float_flag_invalid, s);
3647         parts_default_nan(&a, s);
3648         return a;
3649     }
3650     if (a.cls == float_class_inf) {
3651         return a;  /* sqrt(+inf) = +inf */
3652     }
3653 
3654     assert(a.cls == float_class_normal);
3655 
3656     /* We need two overflow bits at the top. Adding room for that is a
3657      * right shift. If the exponent is odd, we can discard the low bit
3658      * by multiplying the fraction by 2; that's a left shift. Combine
3659      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3660      */
3661     a_frac = a.frac >> (2 - (a.exp & 1));
3662     a.exp >>= 1;
3663 
3664     /* Bit-by-bit computation of sqrt.  */
3665     r_frac = 0;
3666     s_frac = 0;
3667 
3668     /* Iterate from implicit bit down to the 3 extra bits to compute a
3669      * properly rounded result. Remember we've inserted two more bits
3670      * at the top, so these positions are two less.
3671      */
3672     bit = DECOMPOSED_BINARY_POINT - 2;
3673     last_bit = MAX(p->frac_shift - 4, 0);
3674     do {
3675         uint64_t q = 1ULL << bit;
3676         uint64_t t_frac = s_frac + q;
3677         if (t_frac <= a_frac) {
3678             s_frac = t_frac + q;
3679             a_frac -= t_frac;
3680             r_frac += q;
3681         }
3682         a_frac <<= 1;
3683     } while (--bit >= last_bit);
3684 
3685     /* Undo the right shift done above. If there is any remaining
3686      * fraction, the result is inexact. Set the sticky bit.
3687      */
3688     a.frac = (r_frac << 2) + (a_frac != 0);
3689 
3690     return a;
3691 }
3692 
3693 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3694 {
3695     FloatParts64 pa, pr;
3696 
3697     float16_unpack_canonical(&pa, a, status);
3698     pr = sqrt_float(pa, status, &float16_params);
3699     return float16_round_pack_canonical(&pr, status);
3700 }
3701 
3702 static float32 QEMU_SOFTFLOAT_ATTR
3703 soft_f32_sqrt(float32 a, float_status *status)
3704 {
3705     FloatParts64 pa, pr;
3706 
3707     float32_unpack_canonical(&pa, a, status);
3708     pr = sqrt_float(pa, status, &float32_params);
3709     return float32_round_pack_canonical(&pr, status);
3710 }
3711 
3712 static float64 QEMU_SOFTFLOAT_ATTR
3713 soft_f64_sqrt(float64 a, float_status *status)
3714 {
3715     FloatParts64 pa, pr;
3716 
3717     float64_unpack_canonical(&pa, a, status);
3718     pr = sqrt_float(pa, status, &float64_params);
3719     return float64_round_pack_canonical(&pr, status);
3720 }
3721 
3722 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3723 {
3724     union_float32 ua, ur;
3725 
3726     ua.s = xa;
3727     if (unlikely(!can_use_fpu(s))) {
3728         goto soft;
3729     }
3730 
3731     float32_input_flush1(&ua.s, s);
3732     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3733         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3734                        fpclassify(ua.h) == FP_ZERO) ||
3735                      signbit(ua.h))) {
3736             goto soft;
3737         }
3738     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3739                         float32_is_neg(ua.s))) {
3740         goto soft;
3741     }
3742     ur.h = sqrtf(ua.h);
3743     return ur.s;
3744 
3745  soft:
3746     return soft_f32_sqrt(ua.s, s);
3747 }
3748 
3749 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3750 {
3751     union_float64 ua, ur;
3752 
3753     ua.s = xa;
3754     if (unlikely(!can_use_fpu(s))) {
3755         goto soft;
3756     }
3757 
3758     float64_input_flush1(&ua.s, s);
3759     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3760         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3761                        fpclassify(ua.h) == FP_ZERO) ||
3762                      signbit(ua.h))) {
3763             goto soft;
3764         }
3765     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3766                         float64_is_neg(ua.s))) {
3767         goto soft;
3768     }
3769     ur.h = sqrt(ua.h);
3770     return ur.s;
3771 
3772  soft:
3773     return soft_f64_sqrt(ua.s, s);
3774 }
3775 
3776 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3777 {
3778     FloatParts64 pa, pr;
3779 
3780     bfloat16_unpack_canonical(&pa, a, status);
3781     pr = sqrt_float(pa, status, &bfloat16_params);
3782     return bfloat16_round_pack_canonical(&pr, status);
3783 }
3784 
3785 /*----------------------------------------------------------------------------
3786 | The pattern for a default generated NaN.
3787 *----------------------------------------------------------------------------*/
3788 
3789 float16 float16_default_nan(float_status *status)
3790 {
3791     FloatParts64 p;
3792 
3793     parts_default_nan(&p, status);
3794     p.frac >>= float16_params.frac_shift;
3795     return float16_pack_raw(&p);
3796 }
3797 
3798 float32 float32_default_nan(float_status *status)
3799 {
3800     FloatParts64 p;
3801 
3802     parts_default_nan(&p, status);
3803     p.frac >>= float32_params.frac_shift;
3804     return float32_pack_raw(&p);
3805 }
3806 
3807 float64 float64_default_nan(float_status *status)
3808 {
3809     FloatParts64 p;
3810 
3811     parts_default_nan(&p, status);
3812     p.frac >>= float64_params.frac_shift;
3813     return float64_pack_raw(&p);
3814 }
3815 
3816 float128 float128_default_nan(float_status *status)
3817 {
3818     FloatParts128 p;
3819 
3820     parts_default_nan(&p, status);
3821     frac_shr(&p, float128_params.frac_shift);
3822     return float128_pack_raw(&p);
3823 }
3824 
3825 bfloat16 bfloat16_default_nan(float_status *status)
3826 {
3827     FloatParts64 p;
3828 
3829     parts_default_nan(&p, status);
3830     p.frac >>= bfloat16_params.frac_shift;
3831     return bfloat16_pack_raw(&p);
3832 }
3833 
3834 /*----------------------------------------------------------------------------
3835 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3836 *----------------------------------------------------------------------------*/
3837 
3838 float16 float16_silence_nan(float16 a, float_status *status)
3839 {
3840     FloatParts64 p;
3841 
3842     float16_unpack_raw(&p, a);
3843     p.frac <<= float16_params.frac_shift;
3844     parts_silence_nan(&p, status);
3845     p.frac >>= float16_params.frac_shift;
3846     return float16_pack_raw(&p);
3847 }
3848 
3849 float32 float32_silence_nan(float32 a, float_status *status)
3850 {
3851     FloatParts64 p;
3852 
3853     float32_unpack_raw(&p, a);
3854     p.frac <<= float32_params.frac_shift;
3855     parts_silence_nan(&p, status);
3856     p.frac >>= float32_params.frac_shift;
3857     return float32_pack_raw(&p);
3858 }
3859 
3860 float64 float64_silence_nan(float64 a, float_status *status)
3861 {
3862     FloatParts64 p;
3863 
3864     float64_unpack_raw(&p, a);
3865     p.frac <<= float64_params.frac_shift;
3866     parts_silence_nan(&p, status);
3867     p.frac >>= float64_params.frac_shift;
3868     return float64_pack_raw(&p);
3869 }
3870 
3871 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3872 {
3873     FloatParts64 p;
3874 
3875     bfloat16_unpack_raw(&p, a);
3876     p.frac <<= bfloat16_params.frac_shift;
3877     parts_silence_nan(&p, status);
3878     p.frac >>= bfloat16_params.frac_shift;
3879     return bfloat16_pack_raw(&p);
3880 }
3881 
3882 float128 float128_silence_nan(float128 a, float_status *status)
3883 {
3884     FloatParts128 p;
3885 
3886     float128_unpack_raw(&p, a);
3887     frac_shl(&p, float128_params.frac_shift);
3888     parts_silence_nan(&p, status);
3889     frac_shr(&p, float128_params.frac_shift);
3890     return float128_pack_raw(&p);
3891 }
3892 
3893 /*----------------------------------------------------------------------------
3894 | If `a' is denormal and we are in flush-to-zero mode then set the
3895 | input-denormal exception and return zero. Otherwise just return the value.
3896 *----------------------------------------------------------------------------*/
3897 
3898 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3899 {
3900     if (p.exp == 0 && p.frac != 0) {
3901         float_raise(float_flag_input_denormal, status);
3902         return true;
3903     }
3904 
3905     return false;
3906 }
3907 
3908 float16 float16_squash_input_denormal(float16 a, float_status *status)
3909 {
3910     if (status->flush_inputs_to_zero) {
3911         FloatParts64 p;
3912 
3913         float16_unpack_raw(&p, a);
3914         if (parts_squash_denormal(p, status)) {
3915             return float16_set_sign(float16_zero, p.sign);
3916         }
3917     }
3918     return a;
3919 }
3920 
3921 float32 float32_squash_input_denormal(float32 a, float_status *status)
3922 {
3923     if (status->flush_inputs_to_zero) {
3924         FloatParts64 p;
3925 
3926         float32_unpack_raw(&p, a);
3927         if (parts_squash_denormal(p, status)) {
3928             return float32_set_sign(float32_zero, p.sign);
3929         }
3930     }
3931     return a;
3932 }
3933 
3934 float64 float64_squash_input_denormal(float64 a, float_status *status)
3935 {
3936     if (status->flush_inputs_to_zero) {
3937         FloatParts64 p;
3938 
3939         float64_unpack_raw(&p, a);
3940         if (parts_squash_denormal(p, status)) {
3941             return float64_set_sign(float64_zero, p.sign);
3942         }
3943     }
3944     return a;
3945 }
3946 
3947 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3948 {
3949     if (status->flush_inputs_to_zero) {
3950         FloatParts64 p;
3951 
3952         bfloat16_unpack_raw(&p, a);
3953         if (parts_squash_denormal(p, status)) {
3954             return bfloat16_set_sign(bfloat16_zero, p.sign);
3955         }
3956     }
3957     return a;
3958 }
3959 
3960 /*----------------------------------------------------------------------------
3961 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3962 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3963 | input.  If `zSign' is 1, the input is negated before being converted to an
3964 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3965 | is simply rounded to an integer, with the inexact exception raised if the
3966 | input cannot be represented exactly as an integer.  However, if the fixed-
3967 | point input is too large, the invalid exception is raised and the largest
3968 | positive or negative integer is returned.
3969 *----------------------------------------------------------------------------*/
3970 
3971 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3972                                  float_status *status)
3973 {
3974     int8_t roundingMode;
3975     bool roundNearestEven;
3976     int8_t roundIncrement, roundBits;
3977     int32_t z;
3978 
3979     roundingMode = status->float_rounding_mode;
3980     roundNearestEven = ( roundingMode == float_round_nearest_even );
3981     switch (roundingMode) {
3982     case float_round_nearest_even:
3983     case float_round_ties_away:
3984         roundIncrement = 0x40;
3985         break;
3986     case float_round_to_zero:
3987         roundIncrement = 0;
3988         break;
3989     case float_round_up:
3990         roundIncrement = zSign ? 0 : 0x7f;
3991         break;
3992     case float_round_down:
3993         roundIncrement = zSign ? 0x7f : 0;
3994         break;
3995     case float_round_to_odd:
3996         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3997         break;
3998     default:
3999         abort();
4000     }
4001     roundBits = absZ & 0x7F;
4002     absZ = ( absZ + roundIncrement )>>7;
4003     if (!(roundBits ^ 0x40) && roundNearestEven) {
4004         absZ &= ~1;
4005     }
4006     z = absZ;
4007     if ( zSign ) z = - z;
4008     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4009         float_raise(float_flag_invalid, status);
4010         return zSign ? INT32_MIN : INT32_MAX;
4011     }
4012     if (roundBits) {
4013         float_raise(float_flag_inexact, status);
4014     }
4015     return z;
4016 
4017 }
4018 
4019 /*----------------------------------------------------------------------------
4020 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4021 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4022 | and returns the properly rounded 64-bit integer corresponding to the input.
4023 | If `zSign' is 1, the input is negated before being converted to an integer.
4024 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4025 | the inexact exception raised if the input cannot be represented exactly as
4026 | an integer.  However, if the fixed-point input is too large, the invalid
4027 | exception is raised and the largest positive or negative integer is
4028 | returned.
4029 *----------------------------------------------------------------------------*/
4030 
4031 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4032                                float_status *status)
4033 {
4034     int8_t roundingMode;
4035     bool roundNearestEven, increment;
4036     int64_t z;
4037 
4038     roundingMode = status->float_rounding_mode;
4039     roundNearestEven = ( roundingMode == float_round_nearest_even );
4040     switch (roundingMode) {
4041     case float_round_nearest_even:
4042     case float_round_ties_away:
4043         increment = ((int64_t) absZ1 < 0);
4044         break;
4045     case float_round_to_zero:
4046         increment = 0;
4047         break;
4048     case float_round_up:
4049         increment = !zSign && absZ1;
4050         break;
4051     case float_round_down:
4052         increment = zSign && absZ1;
4053         break;
4054     case float_round_to_odd:
4055         increment = !(absZ0 & 1) && absZ1;
4056         break;
4057     default:
4058         abort();
4059     }
4060     if ( increment ) {
4061         ++absZ0;
4062         if ( absZ0 == 0 ) goto overflow;
4063         if (!(absZ1 << 1) && roundNearestEven) {
4064             absZ0 &= ~1;
4065         }
4066     }
4067     z = absZ0;
4068     if ( zSign ) z = - z;
4069     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4070  overflow:
4071         float_raise(float_flag_invalid, status);
4072         return zSign ? INT64_MIN : INT64_MAX;
4073     }
4074     if (absZ1) {
4075         float_raise(float_flag_inexact, status);
4076     }
4077     return z;
4078 
4079 }
4080 
4081 /*----------------------------------------------------------------------------
4082 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4083 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4084 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4085 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4086 | with the inexact exception raised if the input cannot be represented exactly
4087 | as an integer.  However, if the fixed-point input is too large, the invalid
4088 | exception is raised and the largest unsigned integer is returned.
4089 *----------------------------------------------------------------------------*/
4090 
4091 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4092                                 uint64_t absZ1, float_status *status)
4093 {
4094     int8_t roundingMode;
4095     bool roundNearestEven, increment;
4096 
4097     roundingMode = status->float_rounding_mode;
4098     roundNearestEven = (roundingMode == float_round_nearest_even);
4099     switch (roundingMode) {
4100     case float_round_nearest_even:
4101     case float_round_ties_away:
4102         increment = ((int64_t)absZ1 < 0);
4103         break;
4104     case float_round_to_zero:
4105         increment = 0;
4106         break;
4107     case float_round_up:
4108         increment = !zSign && absZ1;
4109         break;
4110     case float_round_down:
4111         increment = zSign && absZ1;
4112         break;
4113     case float_round_to_odd:
4114         increment = !(absZ0 & 1) && absZ1;
4115         break;
4116     default:
4117         abort();
4118     }
4119     if (increment) {
4120         ++absZ0;
4121         if (absZ0 == 0) {
4122             float_raise(float_flag_invalid, status);
4123             return UINT64_MAX;
4124         }
4125         if (!(absZ1 << 1) && roundNearestEven) {
4126             absZ0 &= ~1;
4127         }
4128     }
4129 
4130     if (zSign && absZ0) {
4131         float_raise(float_flag_invalid, status);
4132         return 0;
4133     }
4134 
4135     if (absZ1) {
4136         float_raise(float_flag_inexact, status);
4137     }
4138     return absZ0;
4139 }
4140 
4141 /*----------------------------------------------------------------------------
4142 | Normalizes the subnormal single-precision floating-point value represented
4143 | by the denormalized significand `aSig'.  The normalized exponent and
4144 | significand are stored at the locations pointed to by `zExpPtr' and
4145 | `zSigPtr', respectively.
4146 *----------------------------------------------------------------------------*/
4147 
4148 static void
4149  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4150 {
4151     int8_t shiftCount;
4152 
4153     shiftCount = clz32(aSig) - 8;
4154     *zSigPtr = aSig<<shiftCount;
4155     *zExpPtr = 1 - shiftCount;
4156 
4157 }
4158 
4159 /*----------------------------------------------------------------------------
4160 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4161 | and significand `zSig', and returns the proper single-precision floating-
4162 | point value corresponding to the abstract input.  Ordinarily, the abstract
4163 | value is simply rounded and packed into the single-precision format, with
4164 | the inexact exception raised if the abstract input cannot be represented
4165 | exactly.  However, if the abstract value is too large, the overflow and
4166 | inexact exceptions are raised and an infinity or maximal finite value is
4167 | returned.  If the abstract value is too small, the input value is rounded to
4168 | a subnormal number, and the underflow and inexact exceptions are raised if
4169 | the abstract input cannot be represented exactly as a subnormal single-
4170 | precision floating-point number.
4171 |     The input significand `zSig' has its binary point between bits 30
4172 | and 29, which is 7 bits to the left of the usual location.  This shifted
4173 | significand must be normalized or smaller.  If `zSig' is not normalized,
4174 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4175 | and it must not require rounding.  In the usual case that `zSig' is
4176 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4177 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4178 | Binary Floating-Point Arithmetic.
4179 *----------------------------------------------------------------------------*/
4180 
4181 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4182                                    float_status *status)
4183 {
4184     int8_t roundingMode;
4185     bool roundNearestEven;
4186     int8_t roundIncrement, roundBits;
4187     bool isTiny;
4188 
4189     roundingMode = status->float_rounding_mode;
4190     roundNearestEven = ( roundingMode == float_round_nearest_even );
4191     switch (roundingMode) {
4192     case float_round_nearest_even:
4193     case float_round_ties_away:
4194         roundIncrement = 0x40;
4195         break;
4196     case float_round_to_zero:
4197         roundIncrement = 0;
4198         break;
4199     case float_round_up:
4200         roundIncrement = zSign ? 0 : 0x7f;
4201         break;
4202     case float_round_down:
4203         roundIncrement = zSign ? 0x7f : 0;
4204         break;
4205     case float_round_to_odd:
4206         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4207         break;
4208     default:
4209         abort();
4210         break;
4211     }
4212     roundBits = zSig & 0x7F;
4213     if ( 0xFD <= (uint16_t) zExp ) {
4214         if (    ( 0xFD < zExp )
4215              || (    ( zExp == 0xFD )
4216                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4217            ) {
4218             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4219                                    roundIncrement != 0;
4220             float_raise(float_flag_overflow | float_flag_inexact, status);
4221             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4222         }
4223         if ( zExp < 0 ) {
4224             if (status->flush_to_zero) {
4225                 float_raise(float_flag_output_denormal, status);
4226                 return packFloat32(zSign, 0, 0);
4227             }
4228             isTiny = status->tininess_before_rounding
4229                   || (zExp < -1)
4230                   || (zSig + roundIncrement < 0x80000000);
4231             shift32RightJamming( zSig, - zExp, &zSig );
4232             zExp = 0;
4233             roundBits = zSig & 0x7F;
4234             if (isTiny && roundBits) {
4235                 float_raise(float_flag_underflow, status);
4236             }
4237             if (roundingMode == float_round_to_odd) {
4238                 /*
4239                  * For round-to-odd case, the roundIncrement depends on
4240                  * zSig which just changed.
4241                  */
4242                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4243             }
4244         }
4245     }
4246     if (roundBits) {
4247         float_raise(float_flag_inexact, status);
4248     }
4249     zSig = ( zSig + roundIncrement )>>7;
4250     if (!(roundBits ^ 0x40) && roundNearestEven) {
4251         zSig &= ~1;
4252     }
4253     if ( zSig == 0 ) zExp = 0;
4254     return packFloat32( zSign, zExp, zSig );
4255 
4256 }
4257 
4258 /*----------------------------------------------------------------------------
4259 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4260 | and significand `zSig', and returns the proper single-precision floating-
4261 | point value corresponding to the abstract input.  This routine is just like
4262 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4263 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4264 | floating-point exponent.
4265 *----------------------------------------------------------------------------*/
4266 
4267 static float32
4268  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4269                               float_status *status)
4270 {
4271     int8_t shiftCount;
4272 
4273     shiftCount = clz32(zSig) - 1;
4274     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4275                                status);
4276 
4277 }
4278 
4279 /*----------------------------------------------------------------------------
4280 | Normalizes the subnormal double-precision floating-point value represented
4281 | by the denormalized significand `aSig'.  The normalized exponent and
4282 | significand are stored at the locations pointed to by `zExpPtr' and
4283 | `zSigPtr', respectively.
4284 *----------------------------------------------------------------------------*/
4285 
4286 static void
4287  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4288 {
4289     int8_t shiftCount;
4290 
4291     shiftCount = clz64(aSig) - 11;
4292     *zSigPtr = aSig<<shiftCount;
4293     *zExpPtr = 1 - shiftCount;
4294 
4295 }
4296 
4297 /*----------------------------------------------------------------------------
4298 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4299 | double-precision floating-point value, returning the result.  After being
4300 | shifted into the proper positions, the three fields are simply added
4301 | together to form the result.  This means that any integer portion of `zSig'
4302 | will be added into the exponent.  Since a properly normalized significand
4303 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4304 | than the desired result exponent whenever `zSig' is a complete, normalized
4305 | significand.
4306 *----------------------------------------------------------------------------*/
4307 
4308 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4309 {
4310 
4311     return make_float64(
4312         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4313 
4314 }
4315 
4316 /*----------------------------------------------------------------------------
4317 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4318 | and significand `zSig', and returns the proper double-precision floating-
4319 | point value corresponding to the abstract input.  Ordinarily, the abstract
4320 | value is simply rounded and packed into the double-precision format, with
4321 | the inexact exception raised if the abstract input cannot be represented
4322 | exactly.  However, if the abstract value is too large, the overflow and
4323 | inexact exceptions are raised and an infinity or maximal finite value is
4324 | returned.  If the abstract value is too small, the input value is rounded to
4325 | a subnormal number, and the underflow and inexact exceptions are raised if
4326 | the abstract input cannot be represented exactly as a subnormal double-
4327 | precision floating-point number.
4328 |     The input significand `zSig' has its binary point between bits 62
4329 | and 61, which is 10 bits to the left of the usual location.  This shifted
4330 | significand must be normalized or smaller.  If `zSig' is not normalized,
4331 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4332 | and it must not require rounding.  In the usual case that `zSig' is
4333 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4334 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4335 | Binary Floating-Point Arithmetic.
4336 *----------------------------------------------------------------------------*/
4337 
4338 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4339                                    float_status *status)
4340 {
4341     int8_t roundingMode;
4342     bool roundNearestEven;
4343     int roundIncrement, roundBits;
4344     bool isTiny;
4345 
4346     roundingMode = status->float_rounding_mode;
4347     roundNearestEven = ( roundingMode == float_round_nearest_even );
4348     switch (roundingMode) {
4349     case float_round_nearest_even:
4350     case float_round_ties_away:
4351         roundIncrement = 0x200;
4352         break;
4353     case float_round_to_zero:
4354         roundIncrement = 0;
4355         break;
4356     case float_round_up:
4357         roundIncrement = zSign ? 0 : 0x3ff;
4358         break;
4359     case float_round_down:
4360         roundIncrement = zSign ? 0x3ff : 0;
4361         break;
4362     case float_round_to_odd:
4363         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4364         break;
4365     default:
4366         abort();
4367     }
4368     roundBits = zSig & 0x3FF;
4369     if ( 0x7FD <= (uint16_t) zExp ) {
4370         if (    ( 0x7FD < zExp )
4371              || (    ( zExp == 0x7FD )
4372                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4373            ) {
4374             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4375                                    roundIncrement != 0;
4376             float_raise(float_flag_overflow | float_flag_inexact, status);
4377             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4378         }
4379         if ( zExp < 0 ) {
4380             if (status->flush_to_zero) {
4381                 float_raise(float_flag_output_denormal, status);
4382                 return packFloat64(zSign, 0, 0);
4383             }
4384             isTiny = status->tininess_before_rounding
4385                   || (zExp < -1)
4386                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4387             shift64RightJamming( zSig, - zExp, &zSig );
4388             zExp = 0;
4389             roundBits = zSig & 0x3FF;
4390             if (isTiny && roundBits) {
4391                 float_raise(float_flag_underflow, status);
4392             }
4393             if (roundingMode == float_round_to_odd) {
4394                 /*
4395                  * For round-to-odd case, the roundIncrement depends on
4396                  * zSig which just changed.
4397                  */
4398                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4399             }
4400         }
4401     }
4402     if (roundBits) {
4403         float_raise(float_flag_inexact, status);
4404     }
4405     zSig = ( zSig + roundIncrement )>>10;
4406     if (!(roundBits ^ 0x200) && roundNearestEven) {
4407         zSig &= ~1;
4408     }
4409     if ( zSig == 0 ) zExp = 0;
4410     return packFloat64( zSign, zExp, zSig );
4411 
4412 }
4413 
4414 /*----------------------------------------------------------------------------
4415 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4416 | and significand `zSig', and returns the proper double-precision floating-
4417 | point value corresponding to the abstract input.  This routine is just like
4418 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4419 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4420 | floating-point exponent.
4421 *----------------------------------------------------------------------------*/
4422 
4423 static float64
4424  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4425                               float_status *status)
4426 {
4427     int8_t shiftCount;
4428 
4429     shiftCount = clz64(zSig) - 1;
4430     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4431                                status);
4432 
4433 }
4434 
4435 /*----------------------------------------------------------------------------
4436 | Normalizes the subnormal extended double-precision floating-point value
4437 | represented by the denormalized significand `aSig'.  The normalized exponent
4438 | and significand are stored at the locations pointed to by `zExpPtr' and
4439 | `zSigPtr', respectively.
4440 *----------------------------------------------------------------------------*/
4441 
4442 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4443                                 uint64_t *zSigPtr)
4444 {
4445     int8_t shiftCount;
4446 
4447     shiftCount = clz64(aSig);
4448     *zSigPtr = aSig<<shiftCount;
4449     *zExpPtr = 1 - shiftCount;
4450 }
4451 
4452 /*----------------------------------------------------------------------------
4453 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4454 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4455 | and returns the proper extended double-precision floating-point value
4456 | corresponding to the abstract input.  Ordinarily, the abstract value is
4457 | rounded and packed into the extended double-precision format, with the
4458 | inexact exception raised if the abstract input cannot be represented
4459 | exactly.  However, if the abstract value is too large, the overflow and
4460 | inexact exceptions are raised and an infinity or maximal finite value is
4461 | returned.  If the abstract value is too small, the input value is rounded to
4462 | a subnormal number, and the underflow and inexact exceptions are raised if
4463 | the abstract input cannot be represented exactly as a subnormal extended
4464 | double-precision floating-point number.
4465 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4466 | number of bits as single or double precision, respectively.  Otherwise, the
4467 | result is rounded to the full precision of the extended double-precision
4468 | format.
4469 |     The input significand must be normalized or smaller.  If the input
4470 | significand is not normalized, `zExp' must be 0; in that case, the result
4471 | returned is a subnormal number, and it must not require rounding.  The
4472 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4473 | Floating-Point Arithmetic.
4474 *----------------------------------------------------------------------------*/
4475 
4476 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4477                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4478                               float_status *status)
4479 {
4480     int8_t roundingMode;
4481     bool roundNearestEven, increment, isTiny;
4482     int64_t roundIncrement, roundMask, roundBits;
4483 
4484     roundingMode = status->float_rounding_mode;
4485     roundNearestEven = ( roundingMode == float_round_nearest_even );
4486     if ( roundingPrecision == 80 ) goto precision80;
4487     if ( roundingPrecision == 64 ) {
4488         roundIncrement = UINT64_C(0x0000000000000400);
4489         roundMask = UINT64_C(0x00000000000007FF);
4490     }
4491     else if ( roundingPrecision == 32 ) {
4492         roundIncrement = UINT64_C(0x0000008000000000);
4493         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4494     }
4495     else {
4496         goto precision80;
4497     }
4498     zSig0 |= ( zSig1 != 0 );
4499     switch (roundingMode) {
4500     case float_round_nearest_even:
4501     case float_round_ties_away:
4502         break;
4503     case float_round_to_zero:
4504         roundIncrement = 0;
4505         break;
4506     case float_round_up:
4507         roundIncrement = zSign ? 0 : roundMask;
4508         break;
4509     case float_round_down:
4510         roundIncrement = zSign ? roundMask : 0;
4511         break;
4512     default:
4513         abort();
4514     }
4515     roundBits = zSig0 & roundMask;
4516     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4517         if (    ( 0x7FFE < zExp )
4518              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4519            ) {
4520             goto overflow;
4521         }
4522         if ( zExp <= 0 ) {
4523             if (status->flush_to_zero) {
4524                 float_raise(float_flag_output_denormal, status);
4525                 return packFloatx80(zSign, 0, 0);
4526             }
4527             isTiny = status->tininess_before_rounding
4528                   || (zExp < 0 )
4529                   || (zSig0 <= zSig0 + roundIncrement);
4530             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4531             zExp = 0;
4532             roundBits = zSig0 & roundMask;
4533             if (isTiny && roundBits) {
4534                 float_raise(float_flag_underflow, status);
4535             }
4536             if (roundBits) {
4537                 float_raise(float_flag_inexact, status);
4538             }
4539             zSig0 += roundIncrement;
4540             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4541             roundIncrement = roundMask + 1;
4542             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4543                 roundMask |= roundIncrement;
4544             }
4545             zSig0 &= ~ roundMask;
4546             return packFloatx80( zSign, zExp, zSig0 );
4547         }
4548     }
4549     if (roundBits) {
4550         float_raise(float_flag_inexact, status);
4551     }
4552     zSig0 += roundIncrement;
4553     if ( zSig0 < roundIncrement ) {
4554         ++zExp;
4555         zSig0 = UINT64_C(0x8000000000000000);
4556     }
4557     roundIncrement = roundMask + 1;
4558     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4559         roundMask |= roundIncrement;
4560     }
4561     zSig0 &= ~ roundMask;
4562     if ( zSig0 == 0 ) zExp = 0;
4563     return packFloatx80( zSign, zExp, zSig0 );
4564  precision80:
4565     switch (roundingMode) {
4566     case float_round_nearest_even:
4567     case float_round_ties_away:
4568         increment = ((int64_t)zSig1 < 0);
4569         break;
4570     case float_round_to_zero:
4571         increment = 0;
4572         break;
4573     case float_round_up:
4574         increment = !zSign && zSig1;
4575         break;
4576     case float_round_down:
4577         increment = zSign && zSig1;
4578         break;
4579     default:
4580         abort();
4581     }
4582     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4583         if (    ( 0x7FFE < zExp )
4584              || (    ( zExp == 0x7FFE )
4585                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4586                   && increment
4587                 )
4588            ) {
4589             roundMask = 0;
4590  overflow:
4591             float_raise(float_flag_overflow | float_flag_inexact, status);
4592             if (    ( roundingMode == float_round_to_zero )
4593                  || ( zSign && ( roundingMode == float_round_up ) )
4594                  || ( ! zSign && ( roundingMode == float_round_down ) )
4595                ) {
4596                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4597             }
4598             return packFloatx80(zSign,
4599                                 floatx80_infinity_high,
4600                                 floatx80_infinity_low);
4601         }
4602         if ( zExp <= 0 ) {
4603             isTiny = status->tininess_before_rounding
4604                   || (zExp < 0)
4605                   || !increment
4606                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4607             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4608             zExp = 0;
4609             if (isTiny && zSig1) {
4610                 float_raise(float_flag_underflow, status);
4611             }
4612             if (zSig1) {
4613                 float_raise(float_flag_inexact, status);
4614             }
4615             switch (roundingMode) {
4616             case float_round_nearest_even:
4617             case float_round_ties_away:
4618                 increment = ((int64_t)zSig1 < 0);
4619                 break;
4620             case float_round_to_zero:
4621                 increment = 0;
4622                 break;
4623             case float_round_up:
4624                 increment = !zSign && zSig1;
4625                 break;
4626             case float_round_down:
4627                 increment = zSign && zSig1;
4628                 break;
4629             default:
4630                 abort();
4631             }
4632             if ( increment ) {
4633                 ++zSig0;
4634                 if (!(zSig1 << 1) && roundNearestEven) {
4635                     zSig0 &= ~1;
4636                 }
4637                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4638             }
4639             return packFloatx80( zSign, zExp, zSig0 );
4640         }
4641     }
4642     if (zSig1) {
4643         float_raise(float_flag_inexact, status);
4644     }
4645     if ( increment ) {
4646         ++zSig0;
4647         if ( zSig0 == 0 ) {
4648             ++zExp;
4649             zSig0 = UINT64_C(0x8000000000000000);
4650         }
4651         else {
4652             if (!(zSig1 << 1) && roundNearestEven) {
4653                 zSig0 &= ~1;
4654             }
4655         }
4656     }
4657     else {
4658         if ( zSig0 == 0 ) zExp = 0;
4659     }
4660     return packFloatx80( zSign, zExp, zSig0 );
4661 
4662 }
4663 
4664 /*----------------------------------------------------------------------------
4665 | Takes an abstract floating-point value having sign `zSign', exponent
4666 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4667 | and returns the proper extended double-precision floating-point value
4668 | corresponding to the abstract input.  This routine is just like
4669 | `roundAndPackFloatx80' except that the input significand does not have to be
4670 | normalized.
4671 *----------------------------------------------------------------------------*/
4672 
4673 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4674                                        bool zSign, int32_t zExp,
4675                                        uint64_t zSig0, uint64_t zSig1,
4676                                        float_status *status)
4677 {
4678     int8_t shiftCount;
4679 
4680     if ( zSig0 == 0 ) {
4681         zSig0 = zSig1;
4682         zSig1 = 0;
4683         zExp -= 64;
4684     }
4685     shiftCount = clz64(zSig0);
4686     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4687     zExp -= shiftCount;
4688     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4689                                 zSig0, zSig1, status);
4690 
4691 }
4692 
4693 /*----------------------------------------------------------------------------
4694 | Returns the least-significant 64 fraction bits of the quadruple-precision
4695 | floating-point value `a'.
4696 *----------------------------------------------------------------------------*/
4697 
4698 static inline uint64_t extractFloat128Frac1( float128 a )
4699 {
4700 
4701     return a.low;
4702 
4703 }
4704 
4705 /*----------------------------------------------------------------------------
4706 | Returns the most-significant 48 fraction bits of the quadruple-precision
4707 | floating-point value `a'.
4708 *----------------------------------------------------------------------------*/
4709 
4710 static inline uint64_t extractFloat128Frac0( float128 a )
4711 {
4712 
4713     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4714 
4715 }
4716 
4717 /*----------------------------------------------------------------------------
4718 | Returns the exponent bits of the quadruple-precision floating-point value
4719 | `a'.
4720 *----------------------------------------------------------------------------*/
4721 
4722 static inline int32_t extractFloat128Exp( float128 a )
4723 {
4724 
4725     return ( a.high>>48 ) & 0x7FFF;
4726 
4727 }
4728 
4729 /*----------------------------------------------------------------------------
4730 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4731 *----------------------------------------------------------------------------*/
4732 
4733 static inline bool extractFloat128Sign(float128 a)
4734 {
4735     return a.high >> 63;
4736 }
4737 
4738 /*----------------------------------------------------------------------------
4739 | Normalizes the subnormal quadruple-precision floating-point value
4740 | represented by the denormalized significand formed by the concatenation of
4741 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4742 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4743 | significand are stored at the location pointed to by `zSig0Ptr', and the
4744 | least significant 64 bits of the normalized significand are stored at the
4745 | location pointed to by `zSig1Ptr'.
4746 *----------------------------------------------------------------------------*/
4747 
4748 static void
4749  normalizeFloat128Subnormal(
4750      uint64_t aSig0,
4751      uint64_t aSig1,
4752      int32_t *zExpPtr,
4753      uint64_t *zSig0Ptr,
4754      uint64_t *zSig1Ptr
4755  )
4756 {
4757     int8_t shiftCount;
4758 
4759     if ( aSig0 == 0 ) {
4760         shiftCount = clz64(aSig1) - 15;
4761         if ( shiftCount < 0 ) {
4762             *zSig0Ptr = aSig1>>( - shiftCount );
4763             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4764         }
4765         else {
4766             *zSig0Ptr = aSig1<<shiftCount;
4767             *zSig1Ptr = 0;
4768         }
4769         *zExpPtr = - shiftCount - 63;
4770     }
4771     else {
4772         shiftCount = clz64(aSig0) - 15;
4773         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4774         *zExpPtr = 1 - shiftCount;
4775     }
4776 
4777 }
4778 
4779 /*----------------------------------------------------------------------------
4780 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4781 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4782 | floating-point value, returning the result.  After being shifted into the
4783 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4784 | added together to form the most significant 32 bits of the result.  This
4785 | means that any integer portion of `zSig0' will be added into the exponent.
4786 | Since a properly normalized significand will have an integer portion equal
4787 | to 1, the `zExp' input should be 1 less than the desired result exponent
4788 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4789 | significand.
4790 *----------------------------------------------------------------------------*/
4791 
4792 static inline float128
4793 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4794 {
4795     float128 z;
4796 
4797     z.low = zSig1;
4798     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4799     return z;
4800 }
4801 
4802 /*----------------------------------------------------------------------------
4803 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4804 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4805 | and `zSig2', and returns the proper quadruple-precision floating-point value
4806 | corresponding to the abstract input.  Ordinarily, the abstract value is
4807 | simply rounded and packed into the quadruple-precision format, with the
4808 | inexact exception raised if the abstract input cannot be represented
4809 | exactly.  However, if the abstract value is too large, the overflow and
4810 | inexact exceptions are raised and an infinity or maximal finite value is
4811 | returned.  If the abstract value is too small, the input value is rounded to
4812 | a subnormal number, and the underflow and inexact exceptions are raised if
4813 | the abstract input cannot be represented exactly as a subnormal quadruple-
4814 | precision floating-point number.
4815 |     The input significand must be normalized or smaller.  If the input
4816 | significand is not normalized, `zExp' must be 0; in that case, the result
4817 | returned is a subnormal number, and it must not require rounding.  In the
4818 | usual case that the input significand is normalized, `zExp' must be 1 less
4819 | than the ``true'' floating-point exponent.  The handling of underflow and
4820 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4821 *----------------------------------------------------------------------------*/
4822 
4823 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4824                                      uint64_t zSig0, uint64_t zSig1,
4825                                      uint64_t zSig2, float_status *status)
4826 {
4827     int8_t roundingMode;
4828     bool roundNearestEven, increment, isTiny;
4829 
4830     roundingMode = status->float_rounding_mode;
4831     roundNearestEven = ( roundingMode == float_round_nearest_even );
4832     switch (roundingMode) {
4833     case float_round_nearest_even:
4834     case float_round_ties_away:
4835         increment = ((int64_t)zSig2 < 0);
4836         break;
4837     case float_round_to_zero:
4838         increment = 0;
4839         break;
4840     case float_round_up:
4841         increment = !zSign && zSig2;
4842         break;
4843     case float_round_down:
4844         increment = zSign && zSig2;
4845         break;
4846     case float_round_to_odd:
4847         increment = !(zSig1 & 0x1) && zSig2;
4848         break;
4849     default:
4850         abort();
4851     }
4852     if ( 0x7FFD <= (uint32_t) zExp ) {
4853         if (    ( 0x7FFD < zExp )
4854              || (    ( zExp == 0x7FFD )
4855                   && eq128(
4856                          UINT64_C(0x0001FFFFFFFFFFFF),
4857                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4858                          zSig0,
4859                          zSig1
4860                      )
4861                   && increment
4862                 )
4863            ) {
4864             float_raise(float_flag_overflow | float_flag_inexact, status);
4865             if (    ( roundingMode == float_round_to_zero )
4866                  || ( zSign && ( roundingMode == float_round_up ) )
4867                  || ( ! zSign && ( roundingMode == float_round_down ) )
4868                  || (roundingMode == float_round_to_odd)
4869                ) {
4870                 return
4871                     packFloat128(
4872                         zSign,
4873                         0x7FFE,
4874                         UINT64_C(0x0000FFFFFFFFFFFF),
4875                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4876                     );
4877             }
4878             return packFloat128( zSign, 0x7FFF, 0, 0 );
4879         }
4880         if ( zExp < 0 ) {
4881             if (status->flush_to_zero) {
4882                 float_raise(float_flag_output_denormal, status);
4883                 return packFloat128(zSign, 0, 0, 0);
4884             }
4885             isTiny = status->tininess_before_rounding
4886                   || (zExp < -1)
4887                   || !increment
4888                   || lt128(zSig0, zSig1,
4889                            UINT64_C(0x0001FFFFFFFFFFFF),
4890                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4891             shift128ExtraRightJamming(
4892                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4893             zExp = 0;
4894             if (isTiny && zSig2) {
4895                 float_raise(float_flag_underflow, status);
4896             }
4897             switch (roundingMode) {
4898             case float_round_nearest_even:
4899             case float_round_ties_away:
4900                 increment = ((int64_t)zSig2 < 0);
4901                 break;
4902             case float_round_to_zero:
4903                 increment = 0;
4904                 break;
4905             case float_round_up:
4906                 increment = !zSign && zSig2;
4907                 break;
4908             case float_round_down:
4909                 increment = zSign && zSig2;
4910                 break;
4911             case float_round_to_odd:
4912                 increment = !(zSig1 & 0x1) && zSig2;
4913                 break;
4914             default:
4915                 abort();
4916             }
4917         }
4918     }
4919     if (zSig2) {
4920         float_raise(float_flag_inexact, status);
4921     }
4922     if ( increment ) {
4923         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4924         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4925             zSig1 &= ~1;
4926         }
4927     }
4928     else {
4929         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4930     }
4931     return packFloat128( zSign, zExp, zSig0, zSig1 );
4932 
4933 }
4934 
4935 /*----------------------------------------------------------------------------
4936 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4937 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4938 | returns the proper quadruple-precision floating-point value corresponding
4939 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4940 | except that the input significand has fewer bits and does not have to be
4941 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4942 | point exponent.
4943 *----------------------------------------------------------------------------*/
4944 
4945 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4946                                               uint64_t zSig0, uint64_t zSig1,
4947                                               float_status *status)
4948 {
4949     int8_t shiftCount;
4950     uint64_t zSig2;
4951 
4952     if ( zSig0 == 0 ) {
4953         zSig0 = zSig1;
4954         zSig1 = 0;
4955         zExp -= 64;
4956     }
4957     shiftCount = clz64(zSig0) - 15;
4958     if ( 0 <= shiftCount ) {
4959         zSig2 = 0;
4960         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4961     }
4962     else {
4963         shift128ExtraRightJamming(
4964             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4965     }
4966     zExp -= shiftCount;
4967     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4968 
4969 }
4970 
4971 
4972 /*----------------------------------------------------------------------------
4973 | Returns the result of converting the 32-bit two's complement integer `a'
4974 | to the extended double-precision floating-point format.  The conversion
4975 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4976 | Arithmetic.
4977 *----------------------------------------------------------------------------*/
4978 
4979 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4980 {
4981     bool zSign;
4982     uint32_t absA;
4983     int8_t shiftCount;
4984     uint64_t zSig;
4985 
4986     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4987     zSign = ( a < 0 );
4988     absA = zSign ? - a : a;
4989     shiftCount = clz32(absA) + 32;
4990     zSig = absA;
4991     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4992 
4993 }
4994 
4995 /*----------------------------------------------------------------------------
4996 | Returns the result of converting the 32-bit two's complement integer `a' to
4997 | the quadruple-precision floating-point format.  The conversion is performed
4998 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4999 *----------------------------------------------------------------------------*/
5000 
5001 float128 int32_to_float128(int32_t a, float_status *status)
5002 {
5003     bool zSign;
5004     uint32_t absA;
5005     int8_t shiftCount;
5006     uint64_t zSig0;
5007 
5008     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5009     zSign = ( a < 0 );
5010     absA = zSign ? - a : a;
5011     shiftCount = clz32(absA) + 17;
5012     zSig0 = absA;
5013     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5014 
5015 }
5016 
5017 /*----------------------------------------------------------------------------
5018 | Returns the result of converting the 64-bit two's complement integer `a'
5019 | to the extended double-precision floating-point format.  The conversion
5020 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5021 | Arithmetic.
5022 *----------------------------------------------------------------------------*/
5023 
5024 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5025 {
5026     bool zSign;
5027     uint64_t absA;
5028     int8_t shiftCount;
5029 
5030     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5031     zSign = ( a < 0 );
5032     absA = zSign ? - a : a;
5033     shiftCount = clz64(absA);
5034     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5035 
5036 }
5037 
5038 /*----------------------------------------------------------------------------
5039 | Returns the result of converting the 64-bit two's complement integer `a' to
5040 | the quadruple-precision floating-point format.  The conversion is performed
5041 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5042 *----------------------------------------------------------------------------*/
5043 
5044 float128 int64_to_float128(int64_t a, float_status *status)
5045 {
5046     bool zSign;
5047     uint64_t absA;
5048     int8_t shiftCount;
5049     int32_t zExp;
5050     uint64_t zSig0, zSig1;
5051 
5052     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5053     zSign = ( a < 0 );
5054     absA = zSign ? - a : a;
5055     shiftCount = clz64(absA) + 49;
5056     zExp = 0x406E - shiftCount;
5057     if ( 64 <= shiftCount ) {
5058         zSig1 = 0;
5059         zSig0 = absA;
5060         shiftCount -= 64;
5061     }
5062     else {
5063         zSig1 = absA;
5064         zSig0 = 0;
5065     }
5066     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5067     return packFloat128( zSign, zExp, zSig0, zSig1 );
5068 
5069 }
5070 
5071 /*----------------------------------------------------------------------------
5072 | Returns the result of converting the 64-bit unsigned integer `a'
5073 | to the quadruple-precision floating-point format.  The conversion is performed
5074 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5075 *----------------------------------------------------------------------------*/
5076 
5077 float128 uint64_to_float128(uint64_t a, float_status *status)
5078 {
5079     if (a == 0) {
5080         return float128_zero;
5081     }
5082     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5083 }
5084 
5085 /*----------------------------------------------------------------------------
5086 | Returns the result of converting the single-precision floating-point value
5087 | `a' to the extended double-precision floating-point format.  The conversion
5088 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5089 | Arithmetic.
5090 *----------------------------------------------------------------------------*/
5091 
5092 floatx80 float32_to_floatx80(float32 a, float_status *status)
5093 {
5094     bool aSign;
5095     int aExp;
5096     uint32_t aSig;
5097 
5098     a = float32_squash_input_denormal(a, status);
5099     aSig = extractFloat32Frac( a );
5100     aExp = extractFloat32Exp( a );
5101     aSign = extractFloat32Sign( a );
5102     if ( aExp == 0xFF ) {
5103         if (aSig) {
5104             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5105                                                status);
5106             return floatx80_silence_nan(res, status);
5107         }
5108         return packFloatx80(aSign,
5109                             floatx80_infinity_high,
5110                             floatx80_infinity_low);
5111     }
5112     if ( aExp == 0 ) {
5113         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5114         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5115     }
5116     aSig |= 0x00800000;
5117     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5118 
5119 }
5120 
5121 /*----------------------------------------------------------------------------
5122 | Returns the result of converting the single-precision floating-point value
5123 | `a' to the double-precision floating-point format.  The conversion is
5124 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5125 | Arithmetic.
5126 *----------------------------------------------------------------------------*/
5127 
5128 float128 float32_to_float128(float32 a, float_status *status)
5129 {
5130     bool aSign;
5131     int aExp;
5132     uint32_t aSig;
5133 
5134     a = float32_squash_input_denormal(a, status);
5135     aSig = extractFloat32Frac( a );
5136     aExp = extractFloat32Exp( a );
5137     aSign = extractFloat32Sign( a );
5138     if ( aExp == 0xFF ) {
5139         if (aSig) {
5140             return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5141         }
5142         return packFloat128( aSign, 0x7FFF, 0, 0 );
5143     }
5144     if ( aExp == 0 ) {
5145         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5146         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5147         --aExp;
5148     }
5149     return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
5150 
5151 }
5152 
5153 /*----------------------------------------------------------------------------
5154 | Returns the remainder of the single-precision floating-point value `a'
5155 | with respect to the corresponding value `b'.  The operation is performed
5156 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5157 *----------------------------------------------------------------------------*/
5158 
5159 float32 float32_rem(float32 a, float32 b, float_status *status)
5160 {
5161     bool aSign, zSign;
5162     int aExp, bExp, expDiff;
5163     uint32_t aSig, bSig;
5164     uint32_t q;
5165     uint64_t aSig64, bSig64, q64;
5166     uint32_t alternateASig;
5167     int32_t sigMean;
5168     a = float32_squash_input_denormal(a, status);
5169     b = float32_squash_input_denormal(b, status);
5170 
5171     aSig = extractFloat32Frac( a );
5172     aExp = extractFloat32Exp( a );
5173     aSign = extractFloat32Sign( a );
5174     bSig = extractFloat32Frac( b );
5175     bExp = extractFloat32Exp( b );
5176     if ( aExp == 0xFF ) {
5177         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5178             return propagateFloat32NaN(a, b, status);
5179         }
5180         float_raise(float_flag_invalid, status);
5181         return float32_default_nan(status);
5182     }
5183     if ( bExp == 0xFF ) {
5184         if (bSig) {
5185             return propagateFloat32NaN(a, b, status);
5186         }
5187         return a;
5188     }
5189     if ( bExp == 0 ) {
5190         if ( bSig == 0 ) {
5191             float_raise(float_flag_invalid, status);
5192             return float32_default_nan(status);
5193         }
5194         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5195     }
5196     if ( aExp == 0 ) {
5197         if ( aSig == 0 ) return a;
5198         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5199     }
5200     expDiff = aExp - bExp;
5201     aSig |= 0x00800000;
5202     bSig |= 0x00800000;
5203     if ( expDiff < 32 ) {
5204         aSig <<= 8;
5205         bSig <<= 8;
5206         if ( expDiff < 0 ) {
5207             if ( expDiff < -1 ) return a;
5208             aSig >>= 1;
5209         }
5210         q = ( bSig <= aSig );
5211         if ( q ) aSig -= bSig;
5212         if ( 0 < expDiff ) {
5213             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5214             q >>= 32 - expDiff;
5215             bSig >>= 2;
5216             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5217         }
5218         else {
5219             aSig >>= 2;
5220             bSig >>= 2;
5221         }
5222     }
5223     else {
5224         if ( bSig <= aSig ) aSig -= bSig;
5225         aSig64 = ( (uint64_t) aSig )<<40;
5226         bSig64 = ( (uint64_t) bSig )<<40;
5227         expDiff -= 64;
5228         while ( 0 < expDiff ) {
5229             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5230             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5231             aSig64 = - ( ( bSig * q64 )<<38 );
5232             expDiff -= 62;
5233         }
5234         expDiff += 64;
5235         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5236         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5237         q = q64>>( 64 - expDiff );
5238         bSig <<= 6;
5239         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5240     }
5241     do {
5242         alternateASig = aSig;
5243         ++q;
5244         aSig -= bSig;
5245     } while ( 0 <= (int32_t) aSig );
5246     sigMean = aSig + alternateASig;
5247     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5248         aSig = alternateASig;
5249     }
5250     zSign = ( (int32_t) aSig < 0 );
5251     if ( zSign ) aSig = - aSig;
5252     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5253 }
5254 
5255 
5256 
5257 /*----------------------------------------------------------------------------
5258 | Returns the binary exponential of the single-precision floating-point value
5259 | `a'. The operation is performed according to the IEC/IEEE Standard for
5260 | Binary Floating-Point Arithmetic.
5261 |
5262 | Uses the following identities:
5263 |
5264 | 1. -------------------------------------------------------------------------
5265 |      x    x*ln(2)
5266 |     2  = e
5267 |
5268 | 2. -------------------------------------------------------------------------
5269 |                      2     3     4     5           n
5270 |      x        x     x     x     x     x           x
5271 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5272 |               1!    2!    3!    4!    5!          n!
5273 *----------------------------------------------------------------------------*/
5274 
5275 static const float64 float32_exp2_coefficients[15] =
5276 {
5277     const_float64( 0x3ff0000000000000ll ), /*  1 */
5278     const_float64( 0x3fe0000000000000ll ), /*  2 */
5279     const_float64( 0x3fc5555555555555ll ), /*  3 */
5280     const_float64( 0x3fa5555555555555ll ), /*  4 */
5281     const_float64( 0x3f81111111111111ll ), /*  5 */
5282     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5283     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5284     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5285     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5286     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5287     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5288     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5289     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5290     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5291     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5292 };
5293 
5294 float32 float32_exp2(float32 a, float_status *status)
5295 {
5296     bool aSign;
5297     int aExp;
5298     uint32_t aSig;
5299     float64 r, x, xn;
5300     int i;
5301     a = float32_squash_input_denormal(a, status);
5302 
5303     aSig = extractFloat32Frac( a );
5304     aExp = extractFloat32Exp( a );
5305     aSign = extractFloat32Sign( a );
5306 
5307     if ( aExp == 0xFF) {
5308         if (aSig) {
5309             return propagateFloat32NaN(a, float32_zero, status);
5310         }
5311         return (aSign) ? float32_zero : a;
5312     }
5313     if (aExp == 0) {
5314         if (aSig == 0) return float32_one;
5315     }
5316 
5317     float_raise(float_flag_inexact, status);
5318 
5319     /* ******************************* */
5320     /* using float64 for approximation */
5321     /* ******************************* */
5322     x = float32_to_float64(a, status);
5323     x = float64_mul(x, float64_ln2, status);
5324 
5325     xn = x;
5326     r = float64_one;
5327     for (i = 0 ; i < 15 ; i++) {
5328         float64 f;
5329 
5330         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5331         r = float64_add(r, f, status);
5332 
5333         xn = float64_mul(xn, x, status);
5334     }
5335 
5336     return float64_to_float32(r, status);
5337 }
5338 
5339 /*----------------------------------------------------------------------------
5340 | Returns the binary log of the single-precision floating-point value `a'.
5341 | The operation is performed according to the IEC/IEEE Standard for Binary
5342 | Floating-Point Arithmetic.
5343 *----------------------------------------------------------------------------*/
5344 float32 float32_log2(float32 a, float_status *status)
5345 {
5346     bool aSign, zSign;
5347     int aExp;
5348     uint32_t aSig, zSig, i;
5349 
5350     a = float32_squash_input_denormal(a, status);
5351     aSig = extractFloat32Frac( a );
5352     aExp = extractFloat32Exp( a );
5353     aSign = extractFloat32Sign( a );
5354 
5355     if ( aExp == 0 ) {
5356         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5357         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5358     }
5359     if ( aSign ) {
5360         float_raise(float_flag_invalid, status);
5361         return float32_default_nan(status);
5362     }
5363     if ( aExp == 0xFF ) {
5364         if (aSig) {
5365             return propagateFloat32NaN(a, float32_zero, status);
5366         }
5367         return a;
5368     }
5369 
5370     aExp -= 0x7F;
5371     aSig |= 0x00800000;
5372     zSign = aExp < 0;
5373     zSig = aExp << 23;
5374 
5375     for (i = 1 << 22; i > 0; i >>= 1) {
5376         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5377         if ( aSig & 0x01000000 ) {
5378             aSig >>= 1;
5379             zSig |= i;
5380         }
5381     }
5382 
5383     if ( zSign )
5384         zSig = -zSig;
5385 
5386     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5387 }
5388 
5389 /*----------------------------------------------------------------------------
5390 | Returns the result of converting the double-precision floating-point value
5391 | `a' to the extended double-precision floating-point format.  The conversion
5392 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5393 | Arithmetic.
5394 *----------------------------------------------------------------------------*/
5395 
5396 floatx80 float64_to_floatx80(float64 a, float_status *status)
5397 {
5398     bool aSign;
5399     int aExp;
5400     uint64_t aSig;
5401 
5402     a = float64_squash_input_denormal(a, status);
5403     aSig = extractFloat64Frac( a );
5404     aExp = extractFloat64Exp( a );
5405     aSign = extractFloat64Sign( a );
5406     if ( aExp == 0x7FF ) {
5407         if (aSig) {
5408             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5409                                                status);
5410             return floatx80_silence_nan(res, status);
5411         }
5412         return packFloatx80(aSign,
5413                             floatx80_infinity_high,
5414                             floatx80_infinity_low);
5415     }
5416     if ( aExp == 0 ) {
5417         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5418         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5419     }
5420     return
5421         packFloatx80(
5422             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5423 
5424 }
5425 
5426 /*----------------------------------------------------------------------------
5427 | Returns the result of converting the double-precision floating-point value
5428 | `a' to the quadruple-precision floating-point format.  The conversion is
5429 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5430 | Arithmetic.
5431 *----------------------------------------------------------------------------*/
5432 
5433 float128 float64_to_float128(float64 a, float_status *status)
5434 {
5435     bool aSign;
5436     int aExp;
5437     uint64_t aSig, zSig0, zSig1;
5438 
5439     a = float64_squash_input_denormal(a, status);
5440     aSig = extractFloat64Frac( a );
5441     aExp = extractFloat64Exp( a );
5442     aSign = extractFloat64Sign( a );
5443     if ( aExp == 0x7FF ) {
5444         if (aSig) {
5445             return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5446         }
5447         return packFloat128( aSign, 0x7FFF, 0, 0 );
5448     }
5449     if ( aExp == 0 ) {
5450         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5451         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5452         --aExp;
5453     }
5454     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5455     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5456 
5457 }
5458 
5459 
5460 /*----------------------------------------------------------------------------
5461 | Returns the remainder of the double-precision floating-point value `a'
5462 | with respect to the corresponding value `b'.  The operation is performed
5463 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5464 *----------------------------------------------------------------------------*/
5465 
5466 float64 float64_rem(float64 a, float64 b, float_status *status)
5467 {
5468     bool aSign, zSign;
5469     int aExp, bExp, expDiff;
5470     uint64_t aSig, bSig;
5471     uint64_t q, alternateASig;
5472     int64_t sigMean;
5473 
5474     a = float64_squash_input_denormal(a, status);
5475     b = float64_squash_input_denormal(b, status);
5476     aSig = extractFloat64Frac( a );
5477     aExp = extractFloat64Exp( a );
5478     aSign = extractFloat64Sign( a );
5479     bSig = extractFloat64Frac( b );
5480     bExp = extractFloat64Exp( b );
5481     if ( aExp == 0x7FF ) {
5482         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5483             return propagateFloat64NaN(a, b, status);
5484         }
5485         float_raise(float_flag_invalid, status);
5486         return float64_default_nan(status);
5487     }
5488     if ( bExp == 0x7FF ) {
5489         if (bSig) {
5490             return propagateFloat64NaN(a, b, status);
5491         }
5492         return a;
5493     }
5494     if ( bExp == 0 ) {
5495         if ( bSig == 0 ) {
5496             float_raise(float_flag_invalid, status);
5497             return float64_default_nan(status);
5498         }
5499         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5500     }
5501     if ( aExp == 0 ) {
5502         if ( aSig == 0 ) return a;
5503         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5504     }
5505     expDiff = aExp - bExp;
5506     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5507     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5508     if ( expDiff < 0 ) {
5509         if ( expDiff < -1 ) return a;
5510         aSig >>= 1;
5511     }
5512     q = ( bSig <= aSig );
5513     if ( q ) aSig -= bSig;
5514     expDiff -= 64;
5515     while ( 0 < expDiff ) {
5516         q = estimateDiv128To64( aSig, 0, bSig );
5517         q = ( 2 < q ) ? q - 2 : 0;
5518         aSig = - ( ( bSig>>2 ) * q );
5519         expDiff -= 62;
5520     }
5521     expDiff += 64;
5522     if ( 0 < expDiff ) {
5523         q = estimateDiv128To64( aSig, 0, bSig );
5524         q = ( 2 < q ) ? q - 2 : 0;
5525         q >>= 64 - expDiff;
5526         bSig >>= 2;
5527         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5528     }
5529     else {
5530         aSig >>= 2;
5531         bSig >>= 2;
5532     }
5533     do {
5534         alternateASig = aSig;
5535         ++q;
5536         aSig -= bSig;
5537     } while ( 0 <= (int64_t) aSig );
5538     sigMean = aSig + alternateASig;
5539     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5540         aSig = alternateASig;
5541     }
5542     zSign = ( (int64_t) aSig < 0 );
5543     if ( zSign ) aSig = - aSig;
5544     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5545 
5546 }
5547 
5548 /*----------------------------------------------------------------------------
5549 | Returns the binary log of the double-precision floating-point value `a'.
5550 | The operation is performed according to the IEC/IEEE Standard for Binary
5551 | Floating-Point Arithmetic.
5552 *----------------------------------------------------------------------------*/
5553 float64 float64_log2(float64 a, float_status *status)
5554 {
5555     bool aSign, zSign;
5556     int aExp;
5557     uint64_t aSig, aSig0, aSig1, zSig, i;
5558     a = float64_squash_input_denormal(a, status);
5559 
5560     aSig = extractFloat64Frac( a );
5561     aExp = extractFloat64Exp( a );
5562     aSign = extractFloat64Sign( a );
5563 
5564     if ( aExp == 0 ) {
5565         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5566         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5567     }
5568     if ( aSign ) {
5569         float_raise(float_flag_invalid, status);
5570         return float64_default_nan(status);
5571     }
5572     if ( aExp == 0x7FF ) {
5573         if (aSig) {
5574             return propagateFloat64NaN(a, float64_zero, status);
5575         }
5576         return a;
5577     }
5578 
5579     aExp -= 0x3FF;
5580     aSig |= UINT64_C(0x0010000000000000);
5581     zSign = aExp < 0;
5582     zSig = (uint64_t)aExp << 52;
5583     for (i = 1LL << 51; i > 0; i >>= 1) {
5584         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5585         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5586         if ( aSig & UINT64_C(0x0020000000000000) ) {
5587             aSig >>= 1;
5588             zSig |= i;
5589         }
5590     }
5591 
5592     if ( zSign )
5593         zSig = -zSig;
5594     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5595 }
5596 
5597 /*----------------------------------------------------------------------------
5598 | Returns the result of converting the extended double-precision floating-
5599 | point value `a' to the 32-bit two's complement integer format.  The
5600 | conversion is performed according to the IEC/IEEE Standard for Binary
5601 | Floating-Point Arithmetic---which means in particular that the conversion
5602 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5603 | largest positive integer is returned.  Otherwise, if the conversion
5604 | overflows, the largest integer with the same sign as `a' is returned.
5605 *----------------------------------------------------------------------------*/
5606 
5607 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5608 {
5609     bool aSign;
5610     int32_t aExp, shiftCount;
5611     uint64_t aSig;
5612 
5613     if (floatx80_invalid_encoding(a)) {
5614         float_raise(float_flag_invalid, status);
5615         return 1 << 31;
5616     }
5617     aSig = extractFloatx80Frac( a );
5618     aExp = extractFloatx80Exp( a );
5619     aSign = extractFloatx80Sign( a );
5620     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5621     shiftCount = 0x4037 - aExp;
5622     if ( shiftCount <= 0 ) shiftCount = 1;
5623     shift64RightJamming( aSig, shiftCount, &aSig );
5624     return roundAndPackInt32(aSign, aSig, status);
5625 
5626 }
5627 
5628 /*----------------------------------------------------------------------------
5629 | Returns the result of converting the extended double-precision floating-
5630 | point value `a' to the 32-bit two's complement integer format.  The
5631 | conversion is performed according to the IEC/IEEE Standard for Binary
5632 | Floating-Point Arithmetic, except that the conversion is always rounded
5633 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5634 | Otherwise, if the conversion overflows, the largest integer with the same
5635 | sign as `a' is returned.
5636 *----------------------------------------------------------------------------*/
5637 
5638 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5639 {
5640     bool aSign;
5641     int32_t aExp, shiftCount;
5642     uint64_t aSig, savedASig;
5643     int32_t z;
5644 
5645     if (floatx80_invalid_encoding(a)) {
5646         float_raise(float_flag_invalid, status);
5647         return 1 << 31;
5648     }
5649     aSig = extractFloatx80Frac( a );
5650     aExp = extractFloatx80Exp( a );
5651     aSign = extractFloatx80Sign( a );
5652     if ( 0x401E < aExp ) {
5653         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5654         goto invalid;
5655     }
5656     else if ( aExp < 0x3FFF ) {
5657         if (aExp || aSig) {
5658             float_raise(float_flag_inexact, status);
5659         }
5660         return 0;
5661     }
5662     shiftCount = 0x403E - aExp;
5663     savedASig = aSig;
5664     aSig >>= shiftCount;
5665     z = aSig;
5666     if ( aSign ) z = - z;
5667     if ( ( z < 0 ) ^ aSign ) {
5668  invalid:
5669         float_raise(float_flag_invalid, status);
5670         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5671     }
5672     if ( ( aSig<<shiftCount ) != savedASig ) {
5673         float_raise(float_flag_inexact, status);
5674     }
5675     return z;
5676 
5677 }
5678 
5679 /*----------------------------------------------------------------------------
5680 | Returns the result of converting the extended double-precision floating-
5681 | point value `a' to the 64-bit two's complement integer format.  The
5682 | conversion is performed according to the IEC/IEEE Standard for Binary
5683 | Floating-Point Arithmetic---which means in particular that the conversion
5684 | is rounded according to the current rounding mode.  If `a' is a NaN,
5685 | the largest positive integer is returned.  Otherwise, if the conversion
5686 | overflows, the largest integer with the same sign as `a' is returned.
5687 *----------------------------------------------------------------------------*/
5688 
5689 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5690 {
5691     bool aSign;
5692     int32_t aExp, shiftCount;
5693     uint64_t aSig, aSigExtra;
5694 
5695     if (floatx80_invalid_encoding(a)) {
5696         float_raise(float_flag_invalid, status);
5697         return 1ULL << 63;
5698     }
5699     aSig = extractFloatx80Frac( a );
5700     aExp = extractFloatx80Exp( a );
5701     aSign = extractFloatx80Sign( a );
5702     shiftCount = 0x403E - aExp;
5703     if ( shiftCount <= 0 ) {
5704         if ( shiftCount ) {
5705             float_raise(float_flag_invalid, status);
5706             if (!aSign || floatx80_is_any_nan(a)) {
5707                 return INT64_MAX;
5708             }
5709             return INT64_MIN;
5710         }
5711         aSigExtra = 0;
5712     }
5713     else {
5714         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5715     }
5716     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5717 
5718 }
5719 
5720 /*----------------------------------------------------------------------------
5721 | Returns the result of converting the extended double-precision floating-
5722 | point value `a' to the 64-bit two's complement integer format.  The
5723 | conversion is performed according to the IEC/IEEE Standard for Binary
5724 | Floating-Point Arithmetic, except that the conversion is always rounded
5725 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5726 | Otherwise, if the conversion overflows, the largest integer with the same
5727 | sign as `a' is returned.
5728 *----------------------------------------------------------------------------*/
5729 
5730 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5731 {
5732     bool aSign;
5733     int32_t aExp, shiftCount;
5734     uint64_t aSig;
5735     int64_t z;
5736 
5737     if (floatx80_invalid_encoding(a)) {
5738         float_raise(float_flag_invalid, status);
5739         return 1ULL << 63;
5740     }
5741     aSig = extractFloatx80Frac( a );
5742     aExp = extractFloatx80Exp( a );
5743     aSign = extractFloatx80Sign( a );
5744     shiftCount = aExp - 0x403E;
5745     if ( 0 <= shiftCount ) {
5746         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5747         if ( ( a.high != 0xC03E ) || aSig ) {
5748             float_raise(float_flag_invalid, status);
5749             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5750                 return INT64_MAX;
5751             }
5752         }
5753         return INT64_MIN;
5754     }
5755     else if ( aExp < 0x3FFF ) {
5756         if (aExp | aSig) {
5757             float_raise(float_flag_inexact, status);
5758         }
5759         return 0;
5760     }
5761     z = aSig>>( - shiftCount );
5762     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5763         float_raise(float_flag_inexact, status);
5764     }
5765     if ( aSign ) z = - z;
5766     return z;
5767 
5768 }
5769 
5770 /*----------------------------------------------------------------------------
5771 | Returns the result of converting the extended double-precision floating-
5772 | point value `a' to the single-precision floating-point format.  The
5773 | conversion is performed according to the IEC/IEEE Standard for Binary
5774 | Floating-Point Arithmetic.
5775 *----------------------------------------------------------------------------*/
5776 
5777 float32 floatx80_to_float32(floatx80 a, float_status *status)
5778 {
5779     bool aSign;
5780     int32_t aExp;
5781     uint64_t aSig;
5782 
5783     if (floatx80_invalid_encoding(a)) {
5784         float_raise(float_flag_invalid, status);
5785         return float32_default_nan(status);
5786     }
5787     aSig = extractFloatx80Frac( a );
5788     aExp = extractFloatx80Exp( a );
5789     aSign = extractFloatx80Sign( a );
5790     if ( aExp == 0x7FFF ) {
5791         if ( (uint64_t) ( aSig<<1 ) ) {
5792             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5793                                              status);
5794             return float32_silence_nan(res, status);
5795         }
5796         return packFloat32( aSign, 0xFF, 0 );
5797     }
5798     shift64RightJamming( aSig, 33, &aSig );
5799     if ( aExp || aSig ) aExp -= 0x3F81;
5800     return roundAndPackFloat32(aSign, aExp, aSig, status);
5801 
5802 }
5803 
5804 /*----------------------------------------------------------------------------
5805 | Returns the result of converting the extended double-precision floating-
5806 | point value `a' to the double-precision floating-point format.  The
5807 | conversion is performed according to the IEC/IEEE Standard for Binary
5808 | Floating-Point Arithmetic.
5809 *----------------------------------------------------------------------------*/
5810 
5811 float64 floatx80_to_float64(floatx80 a, float_status *status)
5812 {
5813     bool aSign;
5814     int32_t aExp;
5815     uint64_t aSig, zSig;
5816 
5817     if (floatx80_invalid_encoding(a)) {
5818         float_raise(float_flag_invalid, status);
5819         return float64_default_nan(status);
5820     }
5821     aSig = extractFloatx80Frac( a );
5822     aExp = extractFloatx80Exp( a );
5823     aSign = extractFloatx80Sign( a );
5824     if ( aExp == 0x7FFF ) {
5825         if ( (uint64_t) ( aSig<<1 ) ) {
5826             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5827                                              status);
5828             return float64_silence_nan(res, status);
5829         }
5830         return packFloat64( aSign, 0x7FF, 0 );
5831     }
5832     shift64RightJamming( aSig, 1, &zSig );
5833     if ( aExp || aSig ) aExp -= 0x3C01;
5834     return roundAndPackFloat64(aSign, aExp, zSig, status);
5835 
5836 }
5837 
5838 /*----------------------------------------------------------------------------
5839 | Returns the result of converting the extended double-precision floating-
5840 | point value `a' to the quadruple-precision floating-point format.  The
5841 | conversion is performed according to the IEC/IEEE Standard for Binary
5842 | Floating-Point Arithmetic.
5843 *----------------------------------------------------------------------------*/
5844 
5845 float128 floatx80_to_float128(floatx80 a, float_status *status)
5846 {
5847     bool aSign;
5848     int aExp;
5849     uint64_t aSig, zSig0, zSig1;
5850 
5851     if (floatx80_invalid_encoding(a)) {
5852         float_raise(float_flag_invalid, status);
5853         return float128_default_nan(status);
5854     }
5855     aSig = extractFloatx80Frac( a );
5856     aExp = extractFloatx80Exp( a );
5857     aSign = extractFloatx80Sign( a );
5858     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5859         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5860                                            status);
5861         return float128_silence_nan(res, status);
5862     }
5863     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5864     return packFloat128( aSign, aExp, zSig0, zSig1 );
5865 
5866 }
5867 
5868 /*----------------------------------------------------------------------------
5869 | Rounds the extended double-precision floating-point value `a'
5870 | to the precision provided by floatx80_rounding_precision and returns the
5871 | result as an extended double-precision floating-point value.
5872 | The operation is performed according to the IEC/IEEE Standard for Binary
5873 | Floating-Point Arithmetic.
5874 *----------------------------------------------------------------------------*/
5875 
5876 floatx80 floatx80_round(floatx80 a, float_status *status)
5877 {
5878     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5879                                 extractFloatx80Sign(a),
5880                                 extractFloatx80Exp(a),
5881                                 extractFloatx80Frac(a), 0, status);
5882 }
5883 
5884 /*----------------------------------------------------------------------------
5885 | Rounds the extended double-precision floating-point value `a' to an integer,
5886 | and returns the result as an extended quadruple-precision floating-point
5887 | value.  The operation is performed according to the IEC/IEEE Standard for
5888 | Binary Floating-Point Arithmetic.
5889 *----------------------------------------------------------------------------*/
5890 
5891 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5892 {
5893     bool aSign;
5894     int32_t aExp;
5895     uint64_t lastBitMask, roundBitsMask;
5896     floatx80 z;
5897 
5898     if (floatx80_invalid_encoding(a)) {
5899         float_raise(float_flag_invalid, status);
5900         return floatx80_default_nan(status);
5901     }
5902     aExp = extractFloatx80Exp( a );
5903     if ( 0x403E <= aExp ) {
5904         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5905             return propagateFloatx80NaN(a, a, status);
5906         }
5907         return a;
5908     }
5909     if ( aExp < 0x3FFF ) {
5910         if (    ( aExp == 0 )
5911              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5912             return a;
5913         }
5914         float_raise(float_flag_inexact, status);
5915         aSign = extractFloatx80Sign( a );
5916         switch (status->float_rounding_mode) {
5917          case float_round_nearest_even:
5918             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5919                ) {
5920                 return
5921                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5922             }
5923             break;
5924         case float_round_ties_away:
5925             if (aExp == 0x3FFE) {
5926                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5927             }
5928             break;
5929          case float_round_down:
5930             return
5931                   aSign ?
5932                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5933                 : packFloatx80( 0, 0, 0 );
5934          case float_round_up:
5935             return
5936                   aSign ? packFloatx80( 1, 0, 0 )
5937                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5938 
5939         case float_round_to_zero:
5940             break;
5941         default:
5942             g_assert_not_reached();
5943         }
5944         return packFloatx80( aSign, 0, 0 );
5945     }
5946     lastBitMask = 1;
5947     lastBitMask <<= 0x403E - aExp;
5948     roundBitsMask = lastBitMask - 1;
5949     z = a;
5950     switch (status->float_rounding_mode) {
5951     case float_round_nearest_even:
5952         z.low += lastBitMask>>1;
5953         if ((z.low & roundBitsMask) == 0) {
5954             z.low &= ~lastBitMask;
5955         }
5956         break;
5957     case float_round_ties_away:
5958         z.low += lastBitMask >> 1;
5959         break;
5960     case float_round_to_zero:
5961         break;
5962     case float_round_up:
5963         if (!extractFloatx80Sign(z)) {
5964             z.low += roundBitsMask;
5965         }
5966         break;
5967     case float_round_down:
5968         if (extractFloatx80Sign(z)) {
5969             z.low += roundBitsMask;
5970         }
5971         break;
5972     default:
5973         abort();
5974     }
5975     z.low &= ~ roundBitsMask;
5976     if ( z.low == 0 ) {
5977         ++z.high;
5978         z.low = UINT64_C(0x8000000000000000);
5979     }
5980     if (z.low != a.low) {
5981         float_raise(float_flag_inexact, status);
5982     }
5983     return z;
5984 
5985 }
5986 
5987 /*----------------------------------------------------------------------------
5988 | Returns the result of adding the absolute values of the extended double-
5989 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5990 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5991 | The addition is performed according to the IEC/IEEE Standard for Binary
5992 | Floating-Point Arithmetic.
5993 *----------------------------------------------------------------------------*/
5994 
5995 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5996                                 float_status *status)
5997 {
5998     int32_t aExp, bExp, zExp;
5999     uint64_t aSig, bSig, zSig0, zSig1;
6000     int32_t expDiff;
6001 
6002     aSig = extractFloatx80Frac( a );
6003     aExp = extractFloatx80Exp( a );
6004     bSig = extractFloatx80Frac( b );
6005     bExp = extractFloatx80Exp( b );
6006     expDiff = aExp - bExp;
6007     if ( 0 < expDiff ) {
6008         if ( aExp == 0x7FFF ) {
6009             if ((uint64_t)(aSig << 1)) {
6010                 return propagateFloatx80NaN(a, b, status);
6011             }
6012             return a;
6013         }
6014         if ( bExp == 0 ) --expDiff;
6015         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6016         zExp = aExp;
6017     }
6018     else if ( expDiff < 0 ) {
6019         if ( bExp == 0x7FFF ) {
6020             if ((uint64_t)(bSig << 1)) {
6021                 return propagateFloatx80NaN(a, b, status);
6022             }
6023             return packFloatx80(zSign,
6024                                 floatx80_infinity_high,
6025                                 floatx80_infinity_low);
6026         }
6027         if ( aExp == 0 ) ++expDiff;
6028         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6029         zExp = bExp;
6030     }
6031     else {
6032         if ( aExp == 0x7FFF ) {
6033             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6034                 return propagateFloatx80NaN(a, b, status);
6035             }
6036             return a;
6037         }
6038         zSig1 = 0;
6039         zSig0 = aSig + bSig;
6040         if ( aExp == 0 ) {
6041             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6042                 /* At least one of the values is a pseudo-denormal,
6043                  * and there is a carry out of the result.  */
6044                 zExp = 1;
6045                 goto shiftRight1;
6046             }
6047             if (zSig0 == 0) {
6048                 return packFloatx80(zSign, 0, 0);
6049             }
6050             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6051             goto roundAndPack;
6052         }
6053         zExp = aExp;
6054         goto shiftRight1;
6055     }
6056     zSig0 = aSig + bSig;
6057     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6058  shiftRight1:
6059     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6060     zSig0 |= UINT64_C(0x8000000000000000);
6061     ++zExp;
6062  roundAndPack:
6063     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6064                                 zSign, zExp, zSig0, zSig1, status);
6065 }
6066 
6067 /*----------------------------------------------------------------------------
6068 | Returns the result of subtracting the absolute values of the extended
6069 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6070 | difference is negated before being returned.  `zSign' is ignored if the
6071 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6072 | Standard for Binary Floating-Point Arithmetic.
6073 *----------------------------------------------------------------------------*/
6074 
6075 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6076                                 float_status *status)
6077 {
6078     int32_t aExp, bExp, zExp;
6079     uint64_t aSig, bSig, zSig0, zSig1;
6080     int32_t expDiff;
6081 
6082     aSig = extractFloatx80Frac( a );
6083     aExp = extractFloatx80Exp( a );
6084     bSig = extractFloatx80Frac( b );
6085     bExp = extractFloatx80Exp( b );
6086     expDiff = aExp - bExp;
6087     if ( 0 < expDiff ) goto aExpBigger;
6088     if ( expDiff < 0 ) goto bExpBigger;
6089     if ( aExp == 0x7FFF ) {
6090         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6091             return propagateFloatx80NaN(a, b, status);
6092         }
6093         float_raise(float_flag_invalid, status);
6094         return floatx80_default_nan(status);
6095     }
6096     if ( aExp == 0 ) {
6097         aExp = 1;
6098         bExp = 1;
6099     }
6100     zSig1 = 0;
6101     if ( bSig < aSig ) goto aBigger;
6102     if ( aSig < bSig ) goto bBigger;
6103     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6104  bExpBigger:
6105     if ( bExp == 0x7FFF ) {
6106         if ((uint64_t)(bSig << 1)) {
6107             return propagateFloatx80NaN(a, b, status);
6108         }
6109         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6110                             floatx80_infinity_low);
6111     }
6112     if ( aExp == 0 ) ++expDiff;
6113     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6114  bBigger:
6115     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6116     zExp = bExp;
6117     zSign ^= 1;
6118     goto normalizeRoundAndPack;
6119  aExpBigger:
6120     if ( aExp == 0x7FFF ) {
6121         if ((uint64_t)(aSig << 1)) {
6122             return propagateFloatx80NaN(a, b, status);
6123         }
6124         return a;
6125     }
6126     if ( bExp == 0 ) --expDiff;
6127     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6128  aBigger:
6129     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6130     zExp = aExp;
6131  normalizeRoundAndPack:
6132     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6133                                          zSign, zExp, zSig0, zSig1, status);
6134 }
6135 
6136 /*----------------------------------------------------------------------------
6137 | Returns the result of adding the extended double-precision floating-point
6138 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6139 | Standard for Binary Floating-Point Arithmetic.
6140 *----------------------------------------------------------------------------*/
6141 
6142 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6143 {
6144     bool aSign, bSign;
6145 
6146     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6147         float_raise(float_flag_invalid, status);
6148         return floatx80_default_nan(status);
6149     }
6150     aSign = extractFloatx80Sign( a );
6151     bSign = extractFloatx80Sign( b );
6152     if ( aSign == bSign ) {
6153         return addFloatx80Sigs(a, b, aSign, status);
6154     }
6155     else {
6156         return subFloatx80Sigs(a, b, aSign, status);
6157     }
6158 
6159 }
6160 
6161 /*----------------------------------------------------------------------------
6162 | Returns the result of subtracting the extended double-precision floating-
6163 | point values `a' and `b'.  The operation is performed according to the
6164 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6165 *----------------------------------------------------------------------------*/
6166 
6167 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6168 {
6169     bool aSign, bSign;
6170 
6171     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6172         float_raise(float_flag_invalid, status);
6173         return floatx80_default_nan(status);
6174     }
6175     aSign = extractFloatx80Sign( a );
6176     bSign = extractFloatx80Sign( b );
6177     if ( aSign == bSign ) {
6178         return subFloatx80Sigs(a, b, aSign, status);
6179     }
6180     else {
6181         return addFloatx80Sigs(a, b, aSign, status);
6182     }
6183 
6184 }
6185 
6186 /*----------------------------------------------------------------------------
6187 | Returns the result of multiplying the extended double-precision floating-
6188 | point values `a' and `b'.  The operation is performed according to the
6189 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6190 *----------------------------------------------------------------------------*/
6191 
6192 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6193 {
6194     bool aSign, bSign, zSign;
6195     int32_t aExp, bExp, zExp;
6196     uint64_t aSig, bSig, zSig0, zSig1;
6197 
6198     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6199         float_raise(float_flag_invalid, status);
6200         return floatx80_default_nan(status);
6201     }
6202     aSig = extractFloatx80Frac( a );
6203     aExp = extractFloatx80Exp( a );
6204     aSign = extractFloatx80Sign( a );
6205     bSig = extractFloatx80Frac( b );
6206     bExp = extractFloatx80Exp( b );
6207     bSign = extractFloatx80Sign( b );
6208     zSign = aSign ^ bSign;
6209     if ( aExp == 0x7FFF ) {
6210         if (    (uint64_t) ( aSig<<1 )
6211              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6212             return propagateFloatx80NaN(a, b, status);
6213         }
6214         if ( ( bExp | bSig ) == 0 ) goto invalid;
6215         return packFloatx80(zSign, floatx80_infinity_high,
6216                                    floatx80_infinity_low);
6217     }
6218     if ( bExp == 0x7FFF ) {
6219         if ((uint64_t)(bSig << 1)) {
6220             return propagateFloatx80NaN(a, b, status);
6221         }
6222         if ( ( aExp | aSig ) == 0 ) {
6223  invalid:
6224             float_raise(float_flag_invalid, status);
6225             return floatx80_default_nan(status);
6226         }
6227         return packFloatx80(zSign, floatx80_infinity_high,
6228                                    floatx80_infinity_low);
6229     }
6230     if ( aExp == 0 ) {
6231         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6232         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6233     }
6234     if ( bExp == 0 ) {
6235         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6236         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6237     }
6238     zExp = aExp + bExp - 0x3FFE;
6239     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6240     if ( 0 < (int64_t) zSig0 ) {
6241         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6242         --zExp;
6243     }
6244     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6245                                 zSign, zExp, zSig0, zSig1, status);
6246 }
6247 
6248 /*----------------------------------------------------------------------------
6249 | Returns the result of dividing the extended double-precision floating-point
6250 | value `a' by the corresponding value `b'.  The operation is performed
6251 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6252 *----------------------------------------------------------------------------*/
6253 
6254 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6255 {
6256     bool aSign, bSign, zSign;
6257     int32_t aExp, bExp, zExp;
6258     uint64_t aSig, bSig, zSig0, zSig1;
6259     uint64_t rem0, rem1, rem2, term0, term1, term2;
6260 
6261     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6262         float_raise(float_flag_invalid, status);
6263         return floatx80_default_nan(status);
6264     }
6265     aSig = extractFloatx80Frac( a );
6266     aExp = extractFloatx80Exp( a );
6267     aSign = extractFloatx80Sign( a );
6268     bSig = extractFloatx80Frac( b );
6269     bExp = extractFloatx80Exp( b );
6270     bSign = extractFloatx80Sign( b );
6271     zSign = aSign ^ bSign;
6272     if ( aExp == 0x7FFF ) {
6273         if ((uint64_t)(aSig << 1)) {
6274             return propagateFloatx80NaN(a, b, status);
6275         }
6276         if ( bExp == 0x7FFF ) {
6277             if ((uint64_t)(bSig << 1)) {
6278                 return propagateFloatx80NaN(a, b, status);
6279             }
6280             goto invalid;
6281         }
6282         return packFloatx80(zSign, floatx80_infinity_high,
6283                                    floatx80_infinity_low);
6284     }
6285     if ( bExp == 0x7FFF ) {
6286         if ((uint64_t)(bSig << 1)) {
6287             return propagateFloatx80NaN(a, b, status);
6288         }
6289         return packFloatx80( zSign, 0, 0 );
6290     }
6291     if ( bExp == 0 ) {
6292         if ( bSig == 0 ) {
6293             if ( ( aExp | aSig ) == 0 ) {
6294  invalid:
6295                 float_raise(float_flag_invalid, status);
6296                 return floatx80_default_nan(status);
6297             }
6298             float_raise(float_flag_divbyzero, status);
6299             return packFloatx80(zSign, floatx80_infinity_high,
6300                                        floatx80_infinity_low);
6301         }
6302         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6303     }
6304     if ( aExp == 0 ) {
6305         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6306         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6307     }
6308     zExp = aExp - bExp + 0x3FFE;
6309     rem1 = 0;
6310     if ( bSig <= aSig ) {
6311         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6312         ++zExp;
6313     }
6314     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6315     mul64To128( bSig, zSig0, &term0, &term1 );
6316     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6317     while ( (int64_t) rem0 < 0 ) {
6318         --zSig0;
6319         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6320     }
6321     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6322     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6323         mul64To128( bSig, zSig1, &term1, &term2 );
6324         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6325         while ( (int64_t) rem1 < 0 ) {
6326             --zSig1;
6327             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6328         }
6329         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6330     }
6331     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6332                                 zSign, zExp, zSig0, zSig1, status);
6333 }
6334 
6335 /*----------------------------------------------------------------------------
6336 | Returns the remainder of the extended double-precision floating-point value
6337 | `a' with respect to the corresponding value `b'.  The operation is performed
6338 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6339 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6340 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6341 | the absolute value of the integer quotient.
6342 *----------------------------------------------------------------------------*/
6343 
6344 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6345                          float_status *status)
6346 {
6347     bool aSign, zSign;
6348     int32_t aExp, bExp, expDiff, aExpOrig;
6349     uint64_t aSig0, aSig1, bSig;
6350     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6351 
6352     *quotient = 0;
6353     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6354         float_raise(float_flag_invalid, status);
6355         return floatx80_default_nan(status);
6356     }
6357     aSig0 = extractFloatx80Frac( a );
6358     aExpOrig = aExp = extractFloatx80Exp( a );
6359     aSign = extractFloatx80Sign( a );
6360     bSig = extractFloatx80Frac( b );
6361     bExp = extractFloatx80Exp( b );
6362     if ( aExp == 0x7FFF ) {
6363         if (    (uint64_t) ( aSig0<<1 )
6364              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6365             return propagateFloatx80NaN(a, b, status);
6366         }
6367         goto invalid;
6368     }
6369     if ( bExp == 0x7FFF ) {
6370         if ((uint64_t)(bSig << 1)) {
6371             return propagateFloatx80NaN(a, b, status);
6372         }
6373         if (aExp == 0 && aSig0 >> 63) {
6374             /*
6375              * Pseudo-denormal argument must be returned in normalized
6376              * form.
6377              */
6378             return packFloatx80(aSign, 1, aSig0);
6379         }
6380         return a;
6381     }
6382     if ( bExp == 0 ) {
6383         if ( bSig == 0 ) {
6384  invalid:
6385             float_raise(float_flag_invalid, status);
6386             return floatx80_default_nan(status);
6387         }
6388         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6389     }
6390     if ( aExp == 0 ) {
6391         if ( aSig0 == 0 ) return a;
6392         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6393     }
6394     zSign = aSign;
6395     expDiff = aExp - bExp;
6396     aSig1 = 0;
6397     if ( expDiff < 0 ) {
6398         if ( mod || expDiff < -1 ) {
6399             if (aExp == 1 && aExpOrig == 0) {
6400                 /*
6401                  * Pseudo-denormal argument must be returned in
6402                  * normalized form.
6403                  */
6404                 return packFloatx80(aSign, aExp, aSig0);
6405             }
6406             return a;
6407         }
6408         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6409         expDiff = 0;
6410     }
6411     *quotient = q = ( bSig <= aSig0 );
6412     if ( q ) aSig0 -= bSig;
6413     expDiff -= 64;
6414     while ( 0 < expDiff ) {
6415         q = estimateDiv128To64( aSig0, aSig1, bSig );
6416         q = ( 2 < q ) ? q - 2 : 0;
6417         mul64To128( bSig, q, &term0, &term1 );
6418         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6419         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6420         expDiff -= 62;
6421         *quotient <<= 62;
6422         *quotient += q;
6423     }
6424     expDiff += 64;
6425     if ( 0 < expDiff ) {
6426         q = estimateDiv128To64( aSig0, aSig1, bSig );
6427         q = ( 2 < q ) ? q - 2 : 0;
6428         q >>= 64 - expDiff;
6429         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6430         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6431         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6432         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6433             ++q;
6434             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6435         }
6436         if (expDiff < 64) {
6437             *quotient <<= expDiff;
6438         } else {
6439             *quotient = 0;
6440         }
6441         *quotient += q;
6442     }
6443     else {
6444         term1 = 0;
6445         term0 = bSig;
6446     }
6447     if (!mod) {
6448         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6449         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6450                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6451                         && ( q & 1 ) )
6452             ) {
6453             aSig0 = alternateASig0;
6454             aSig1 = alternateASig1;
6455             zSign = ! zSign;
6456             ++*quotient;
6457         }
6458     }
6459     return
6460         normalizeRoundAndPackFloatx80(
6461             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6462 
6463 }
6464 
6465 /*----------------------------------------------------------------------------
6466 | Returns the remainder of the extended double-precision floating-point value
6467 | `a' with respect to the corresponding value `b'.  The operation is performed
6468 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6469 *----------------------------------------------------------------------------*/
6470 
6471 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6472 {
6473     uint64_t quotient;
6474     return floatx80_modrem(a, b, false, &quotient, status);
6475 }
6476 
6477 /*----------------------------------------------------------------------------
6478 | Returns the remainder of the extended double-precision floating-point value
6479 | `a' with respect to the corresponding value `b', with the quotient truncated
6480 | toward zero.
6481 *----------------------------------------------------------------------------*/
6482 
6483 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6484 {
6485     uint64_t quotient;
6486     return floatx80_modrem(a, b, true, &quotient, status);
6487 }
6488 
6489 /*----------------------------------------------------------------------------
6490 | Returns the square root of the extended double-precision floating-point
6491 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6492 | for Binary Floating-Point Arithmetic.
6493 *----------------------------------------------------------------------------*/
6494 
6495 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6496 {
6497     bool aSign;
6498     int32_t aExp, zExp;
6499     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6500     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6501 
6502     if (floatx80_invalid_encoding(a)) {
6503         float_raise(float_flag_invalid, status);
6504         return floatx80_default_nan(status);
6505     }
6506     aSig0 = extractFloatx80Frac( a );
6507     aExp = extractFloatx80Exp( a );
6508     aSign = extractFloatx80Sign( a );
6509     if ( aExp == 0x7FFF ) {
6510         if ((uint64_t)(aSig0 << 1)) {
6511             return propagateFloatx80NaN(a, a, status);
6512         }
6513         if ( ! aSign ) return a;
6514         goto invalid;
6515     }
6516     if ( aSign ) {
6517         if ( ( aExp | aSig0 ) == 0 ) return a;
6518  invalid:
6519         float_raise(float_flag_invalid, status);
6520         return floatx80_default_nan(status);
6521     }
6522     if ( aExp == 0 ) {
6523         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6524         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6525     }
6526     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6527     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6528     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6529     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6530     doubleZSig0 = zSig0<<1;
6531     mul64To128( zSig0, zSig0, &term0, &term1 );
6532     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6533     while ( (int64_t) rem0 < 0 ) {
6534         --zSig0;
6535         doubleZSig0 -= 2;
6536         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6537     }
6538     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6539     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6540         if ( zSig1 == 0 ) zSig1 = 1;
6541         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6542         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6543         mul64To128( zSig1, zSig1, &term2, &term3 );
6544         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6545         while ( (int64_t) rem1 < 0 ) {
6546             --zSig1;
6547             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6548             term3 |= 1;
6549             term2 |= doubleZSig0;
6550             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6551         }
6552         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6553     }
6554     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6555     zSig0 |= doubleZSig0;
6556     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6557                                 0, zExp, zSig0, zSig1, status);
6558 }
6559 
6560 /*----------------------------------------------------------------------------
6561 | Returns the result of converting the quadruple-precision floating-point
6562 | value `a' to the 32-bit two's complement integer format.  The conversion
6563 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6564 | Arithmetic---which means in particular that the conversion is rounded
6565 | according to the current rounding mode.  If `a' is a NaN, the largest
6566 | positive integer is returned.  Otherwise, if the conversion overflows, the
6567 | largest integer with the same sign as `a' is returned.
6568 *----------------------------------------------------------------------------*/
6569 
6570 int32_t float128_to_int32(float128 a, float_status *status)
6571 {
6572     bool aSign;
6573     int32_t aExp, shiftCount;
6574     uint64_t aSig0, aSig1;
6575 
6576     aSig1 = extractFloat128Frac1( a );
6577     aSig0 = extractFloat128Frac0( a );
6578     aExp = extractFloat128Exp( a );
6579     aSign = extractFloat128Sign( a );
6580     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6581     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6582     aSig0 |= ( aSig1 != 0 );
6583     shiftCount = 0x4028 - aExp;
6584     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6585     return roundAndPackInt32(aSign, aSig0, status);
6586 
6587 }
6588 
6589 /*----------------------------------------------------------------------------
6590 | Returns the result of converting the quadruple-precision floating-point
6591 | value `a' to the 32-bit two's complement integer format.  The conversion
6592 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6593 | Arithmetic, except that the conversion is always rounded toward zero.  If
6594 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6595 | conversion overflows, the largest integer with the same sign as `a' is
6596 | returned.
6597 *----------------------------------------------------------------------------*/
6598 
6599 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6600 {
6601     bool aSign;
6602     int32_t aExp, shiftCount;
6603     uint64_t aSig0, aSig1, savedASig;
6604     int32_t z;
6605 
6606     aSig1 = extractFloat128Frac1( a );
6607     aSig0 = extractFloat128Frac0( a );
6608     aExp = extractFloat128Exp( a );
6609     aSign = extractFloat128Sign( a );
6610     aSig0 |= ( aSig1 != 0 );
6611     if ( 0x401E < aExp ) {
6612         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6613         goto invalid;
6614     }
6615     else if ( aExp < 0x3FFF ) {
6616         if (aExp || aSig0) {
6617             float_raise(float_flag_inexact, status);
6618         }
6619         return 0;
6620     }
6621     aSig0 |= UINT64_C(0x0001000000000000);
6622     shiftCount = 0x402F - aExp;
6623     savedASig = aSig0;
6624     aSig0 >>= shiftCount;
6625     z = aSig0;
6626     if ( aSign ) z = - z;
6627     if ( ( z < 0 ) ^ aSign ) {
6628  invalid:
6629         float_raise(float_flag_invalid, status);
6630         return aSign ? INT32_MIN : INT32_MAX;
6631     }
6632     if ( ( aSig0<<shiftCount ) != savedASig ) {
6633         float_raise(float_flag_inexact, status);
6634     }
6635     return z;
6636 
6637 }
6638 
6639 /*----------------------------------------------------------------------------
6640 | Returns the result of converting the quadruple-precision floating-point
6641 | value `a' to the 64-bit two's complement integer format.  The conversion
6642 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6643 | Arithmetic---which means in particular that the conversion is rounded
6644 | according to the current rounding mode.  If `a' is a NaN, the largest
6645 | positive integer is returned.  Otherwise, if the conversion overflows, the
6646 | largest integer with the same sign as `a' is returned.
6647 *----------------------------------------------------------------------------*/
6648 
6649 int64_t float128_to_int64(float128 a, float_status *status)
6650 {
6651     bool aSign;
6652     int32_t aExp, shiftCount;
6653     uint64_t aSig0, aSig1;
6654 
6655     aSig1 = extractFloat128Frac1( a );
6656     aSig0 = extractFloat128Frac0( a );
6657     aExp = extractFloat128Exp( a );
6658     aSign = extractFloat128Sign( a );
6659     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6660     shiftCount = 0x402F - aExp;
6661     if ( shiftCount <= 0 ) {
6662         if ( 0x403E < aExp ) {
6663             float_raise(float_flag_invalid, status);
6664             if (    ! aSign
6665                  || (    ( aExp == 0x7FFF )
6666                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6667                     )
6668                ) {
6669                 return INT64_MAX;
6670             }
6671             return INT64_MIN;
6672         }
6673         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6674     }
6675     else {
6676         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6677     }
6678     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6679 
6680 }
6681 
6682 /*----------------------------------------------------------------------------
6683 | Returns the result of converting the quadruple-precision floating-point
6684 | value `a' to the 64-bit two's complement integer format.  The conversion
6685 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6686 | Arithmetic, except that the conversion is always rounded toward zero.
6687 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6688 | the conversion overflows, the largest integer with the same sign as `a' is
6689 | returned.
6690 *----------------------------------------------------------------------------*/
6691 
6692 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6693 {
6694     bool aSign;
6695     int32_t aExp, shiftCount;
6696     uint64_t aSig0, aSig1;
6697     int64_t z;
6698 
6699     aSig1 = extractFloat128Frac1( a );
6700     aSig0 = extractFloat128Frac0( a );
6701     aExp = extractFloat128Exp( a );
6702     aSign = extractFloat128Sign( a );
6703     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6704     shiftCount = aExp - 0x402F;
6705     if ( 0 < shiftCount ) {
6706         if ( 0x403E <= aExp ) {
6707             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6708             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6709                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6710                 if (aSig1) {
6711                     float_raise(float_flag_inexact, status);
6712                 }
6713             }
6714             else {
6715                 float_raise(float_flag_invalid, status);
6716                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6717                     return INT64_MAX;
6718                 }
6719             }
6720             return INT64_MIN;
6721         }
6722         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6723         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6724             float_raise(float_flag_inexact, status);
6725         }
6726     }
6727     else {
6728         if ( aExp < 0x3FFF ) {
6729             if ( aExp | aSig0 | aSig1 ) {
6730                 float_raise(float_flag_inexact, status);
6731             }
6732             return 0;
6733         }
6734         z = aSig0>>( - shiftCount );
6735         if (    aSig1
6736              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6737             float_raise(float_flag_inexact, status);
6738         }
6739     }
6740     if ( aSign ) z = - z;
6741     return z;
6742 
6743 }
6744 
6745 /*----------------------------------------------------------------------------
6746 | Returns the result of converting the quadruple-precision floating-point value
6747 | `a' to the 64-bit unsigned integer format.  The conversion is
6748 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6749 | Arithmetic---which means in particular that the conversion is rounded
6750 | according to the current rounding mode.  If `a' is a NaN, the largest
6751 | positive integer is returned.  If the conversion overflows, the
6752 | largest unsigned integer is returned.  If 'a' is negative, the value is
6753 | rounded and zero is returned; negative values that do not round to zero
6754 | will raise the inexact exception.
6755 *----------------------------------------------------------------------------*/
6756 
6757 uint64_t float128_to_uint64(float128 a, float_status *status)
6758 {
6759     bool aSign;
6760     int aExp;
6761     int shiftCount;
6762     uint64_t aSig0, aSig1;
6763 
6764     aSig0 = extractFloat128Frac0(a);
6765     aSig1 = extractFloat128Frac1(a);
6766     aExp = extractFloat128Exp(a);
6767     aSign = extractFloat128Sign(a);
6768     if (aSign && (aExp > 0x3FFE)) {
6769         float_raise(float_flag_invalid, status);
6770         if (float128_is_any_nan(a)) {
6771             return UINT64_MAX;
6772         } else {
6773             return 0;
6774         }
6775     }
6776     if (aExp) {
6777         aSig0 |= UINT64_C(0x0001000000000000);
6778     }
6779     shiftCount = 0x402F - aExp;
6780     if (shiftCount <= 0) {
6781         if (0x403E < aExp) {
6782             float_raise(float_flag_invalid, status);
6783             return UINT64_MAX;
6784         }
6785         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6786     } else {
6787         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6788     }
6789     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6790 }
6791 
6792 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6793 {
6794     uint64_t v;
6795     signed char current_rounding_mode = status->float_rounding_mode;
6796 
6797     set_float_rounding_mode(float_round_to_zero, status);
6798     v = float128_to_uint64(a, status);
6799     set_float_rounding_mode(current_rounding_mode, status);
6800 
6801     return v;
6802 }
6803 
6804 /*----------------------------------------------------------------------------
6805 | Returns the result of converting the quadruple-precision floating-point
6806 | value `a' to the 32-bit unsigned integer format.  The conversion
6807 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6808 | Arithmetic except that the conversion is always rounded toward zero.
6809 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6810 | if the conversion overflows, the largest unsigned integer is returned.
6811 | If 'a' is negative, the value is rounded and zero is returned; negative
6812 | values that do not round to zero will raise the inexact exception.
6813 *----------------------------------------------------------------------------*/
6814 
6815 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6816 {
6817     uint64_t v;
6818     uint32_t res;
6819     int old_exc_flags = get_float_exception_flags(status);
6820 
6821     v = float128_to_uint64_round_to_zero(a, status);
6822     if (v > 0xffffffff) {
6823         res = 0xffffffff;
6824     } else {
6825         return v;
6826     }
6827     set_float_exception_flags(old_exc_flags, status);
6828     float_raise(float_flag_invalid, status);
6829     return res;
6830 }
6831 
6832 /*----------------------------------------------------------------------------
6833 | Returns the result of converting the quadruple-precision floating-point value
6834 | `a' to the 32-bit unsigned integer format.  The conversion is
6835 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6836 | Arithmetic---which means in particular that the conversion is rounded
6837 | according to the current rounding mode.  If `a' is a NaN, the largest
6838 | positive integer is returned.  If the conversion overflows, the
6839 | largest unsigned integer is returned.  If 'a' is negative, the value is
6840 | rounded and zero is returned; negative values that do not round to zero
6841 | will raise the inexact exception.
6842 *----------------------------------------------------------------------------*/
6843 
6844 uint32_t float128_to_uint32(float128 a, float_status *status)
6845 {
6846     uint64_t v;
6847     uint32_t res;
6848     int old_exc_flags = get_float_exception_flags(status);
6849 
6850     v = float128_to_uint64(a, status);
6851     if (v > 0xffffffff) {
6852         res = 0xffffffff;
6853     } else {
6854         return v;
6855     }
6856     set_float_exception_flags(old_exc_flags, status);
6857     float_raise(float_flag_invalid, status);
6858     return res;
6859 }
6860 
6861 /*----------------------------------------------------------------------------
6862 | Returns the result of converting the quadruple-precision floating-point
6863 | value `a' to the single-precision floating-point format.  The conversion
6864 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6865 | Arithmetic.
6866 *----------------------------------------------------------------------------*/
6867 
6868 float32 float128_to_float32(float128 a, float_status *status)
6869 {
6870     bool aSign;
6871     int32_t aExp;
6872     uint64_t aSig0, aSig1;
6873     uint32_t zSig;
6874 
6875     aSig1 = extractFloat128Frac1( a );
6876     aSig0 = extractFloat128Frac0( a );
6877     aExp = extractFloat128Exp( a );
6878     aSign = extractFloat128Sign( a );
6879     if ( aExp == 0x7FFF ) {
6880         if ( aSig0 | aSig1 ) {
6881             return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6882         }
6883         return packFloat32( aSign, 0xFF, 0 );
6884     }
6885     aSig0 |= ( aSig1 != 0 );
6886     shift64RightJamming( aSig0, 18, &aSig0 );
6887     zSig = aSig0;
6888     if ( aExp || zSig ) {
6889         zSig |= 0x40000000;
6890         aExp -= 0x3F81;
6891     }
6892     return roundAndPackFloat32(aSign, aExp, zSig, status);
6893 
6894 }
6895 
6896 /*----------------------------------------------------------------------------
6897 | Returns the result of converting the quadruple-precision floating-point
6898 | value `a' to the double-precision floating-point format.  The conversion
6899 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6900 | Arithmetic.
6901 *----------------------------------------------------------------------------*/
6902 
6903 float64 float128_to_float64(float128 a, float_status *status)
6904 {
6905     bool aSign;
6906     int32_t aExp;
6907     uint64_t aSig0, aSig1;
6908 
6909     aSig1 = extractFloat128Frac1( a );
6910     aSig0 = extractFloat128Frac0( a );
6911     aExp = extractFloat128Exp( a );
6912     aSign = extractFloat128Sign( a );
6913     if ( aExp == 0x7FFF ) {
6914         if ( aSig0 | aSig1 ) {
6915             return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6916         }
6917         return packFloat64( aSign, 0x7FF, 0 );
6918     }
6919     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6920     aSig0 |= ( aSig1 != 0 );
6921     if ( aExp || aSig0 ) {
6922         aSig0 |= UINT64_C(0x4000000000000000);
6923         aExp -= 0x3C01;
6924     }
6925     return roundAndPackFloat64(aSign, aExp, aSig0, status);
6926 
6927 }
6928 
6929 /*----------------------------------------------------------------------------
6930 | Returns the result of converting the quadruple-precision floating-point
6931 | value `a' to the extended double-precision floating-point format.  The
6932 | conversion is performed according to the IEC/IEEE Standard for Binary
6933 | Floating-Point Arithmetic.
6934 *----------------------------------------------------------------------------*/
6935 
6936 floatx80 float128_to_floatx80(float128 a, float_status *status)
6937 {
6938     bool aSign;
6939     int32_t aExp;
6940     uint64_t aSig0, aSig1;
6941 
6942     aSig1 = extractFloat128Frac1( a );
6943     aSig0 = extractFloat128Frac0( a );
6944     aExp = extractFloat128Exp( a );
6945     aSign = extractFloat128Sign( a );
6946     if ( aExp == 0x7FFF ) {
6947         if ( aSig0 | aSig1 ) {
6948             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6949                                                status);
6950             return floatx80_silence_nan(res, status);
6951         }
6952         return packFloatx80(aSign, floatx80_infinity_high,
6953                                    floatx80_infinity_low);
6954     }
6955     if ( aExp == 0 ) {
6956         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6957         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6958     }
6959     else {
6960         aSig0 |= UINT64_C(0x0001000000000000);
6961     }
6962     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6963     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6964 
6965 }
6966 
6967 /*----------------------------------------------------------------------------
6968 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6969 | returns the result as a quadruple-precision floating-point value.  The
6970 | operation is performed according to the IEC/IEEE Standard for Binary
6971 | Floating-Point Arithmetic.
6972 *----------------------------------------------------------------------------*/
6973 
6974 float128 float128_round_to_int(float128 a, float_status *status)
6975 {
6976     bool aSign;
6977     int32_t aExp;
6978     uint64_t lastBitMask, roundBitsMask;
6979     float128 z;
6980 
6981     aExp = extractFloat128Exp( a );
6982     if ( 0x402F <= aExp ) {
6983         if ( 0x406F <= aExp ) {
6984             if (    ( aExp == 0x7FFF )
6985                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6986                ) {
6987                 return propagateFloat128NaN(a, a, status);
6988             }
6989             return a;
6990         }
6991         lastBitMask = 1;
6992         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6993         roundBitsMask = lastBitMask - 1;
6994         z = a;
6995         switch (status->float_rounding_mode) {
6996         case float_round_nearest_even:
6997             if ( lastBitMask ) {
6998                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6999                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
7000             }
7001             else {
7002                 if ( (int64_t) z.low < 0 ) {
7003                     ++z.high;
7004                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
7005                 }
7006             }
7007             break;
7008         case float_round_ties_away:
7009             if (lastBitMask) {
7010                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7011             } else {
7012                 if ((int64_t) z.low < 0) {
7013                     ++z.high;
7014                 }
7015             }
7016             break;
7017         case float_round_to_zero:
7018             break;
7019         case float_round_up:
7020             if (!extractFloat128Sign(z)) {
7021                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7022             }
7023             break;
7024         case float_round_down:
7025             if (extractFloat128Sign(z)) {
7026                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7027             }
7028             break;
7029         case float_round_to_odd:
7030             /*
7031              * Note that if lastBitMask == 0, the last bit is the lsb
7032              * of high, and roundBitsMask == -1.
7033              */
7034             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7035                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7036             }
7037             break;
7038         default:
7039             abort();
7040         }
7041         z.low &= ~ roundBitsMask;
7042     }
7043     else {
7044         if ( aExp < 0x3FFF ) {
7045             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
7046             float_raise(float_flag_inexact, status);
7047             aSign = extractFloat128Sign( a );
7048             switch (status->float_rounding_mode) {
7049             case float_round_nearest_even:
7050                 if (    ( aExp == 0x3FFE )
7051                      && (   extractFloat128Frac0( a )
7052                           | extractFloat128Frac1( a ) )
7053                    ) {
7054                     return packFloat128( aSign, 0x3FFF, 0, 0 );
7055                 }
7056                 break;
7057             case float_round_ties_away:
7058                 if (aExp == 0x3FFE) {
7059                     return packFloat128(aSign, 0x3FFF, 0, 0);
7060                 }
7061                 break;
7062             case float_round_down:
7063                 return
7064                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7065                     : packFloat128( 0, 0, 0, 0 );
7066             case float_round_up:
7067                 return
7068                       aSign ? packFloat128( 1, 0, 0, 0 )
7069                     : packFloat128( 0, 0x3FFF, 0, 0 );
7070 
7071             case float_round_to_odd:
7072                 return packFloat128(aSign, 0x3FFF, 0, 0);
7073 
7074             case float_round_to_zero:
7075                 break;
7076             }
7077             return packFloat128( aSign, 0, 0, 0 );
7078         }
7079         lastBitMask = 1;
7080         lastBitMask <<= 0x402F - aExp;
7081         roundBitsMask = lastBitMask - 1;
7082         z.low = 0;
7083         z.high = a.high;
7084         switch (status->float_rounding_mode) {
7085         case float_round_nearest_even:
7086             z.high += lastBitMask>>1;
7087             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7088                 z.high &= ~ lastBitMask;
7089             }
7090             break;
7091         case float_round_ties_away:
7092             z.high += lastBitMask>>1;
7093             break;
7094         case float_round_to_zero:
7095             break;
7096         case float_round_up:
7097             if (!extractFloat128Sign(z)) {
7098                 z.high |= ( a.low != 0 );
7099                 z.high += roundBitsMask;
7100             }
7101             break;
7102         case float_round_down:
7103             if (extractFloat128Sign(z)) {
7104                 z.high |= (a.low != 0);
7105                 z.high += roundBitsMask;
7106             }
7107             break;
7108         case float_round_to_odd:
7109             if ((z.high & lastBitMask) == 0) {
7110                 z.high |= (a.low != 0);
7111                 z.high += roundBitsMask;
7112             }
7113             break;
7114         default:
7115             abort();
7116         }
7117         z.high &= ~ roundBitsMask;
7118     }
7119     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7120         float_raise(float_flag_inexact, status);
7121     }
7122     return z;
7123 
7124 }
7125 
7126 /*----------------------------------------------------------------------------
7127 | Returns the result of dividing the quadruple-precision floating-point value
7128 | `a' by the corresponding value `b'.  The operation is performed according to
7129 | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7130 *----------------------------------------------------------------------------*/
7131 
7132 float128 float128_div(float128 a, float128 b, float_status *status)
7133 {
7134     bool aSign, bSign, zSign;
7135     int32_t aExp, bExp, zExp;
7136     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7137     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7138 
7139     aSig1 = extractFloat128Frac1( a );
7140     aSig0 = extractFloat128Frac0( a );
7141     aExp = extractFloat128Exp( a );
7142     aSign = extractFloat128Sign( a );
7143     bSig1 = extractFloat128Frac1( b );
7144     bSig0 = extractFloat128Frac0( b );
7145     bExp = extractFloat128Exp( b );
7146     bSign = extractFloat128Sign( b );
7147     zSign = aSign ^ bSign;
7148     if ( aExp == 0x7FFF ) {
7149         if (aSig0 | aSig1) {
7150             return propagateFloat128NaN(a, b, status);
7151         }
7152         if ( bExp == 0x7FFF ) {
7153             if (bSig0 | bSig1) {
7154                 return propagateFloat128NaN(a, b, status);
7155             }
7156             goto invalid;
7157         }
7158         return packFloat128( zSign, 0x7FFF, 0, 0 );
7159     }
7160     if ( bExp == 0x7FFF ) {
7161         if (bSig0 | bSig1) {
7162             return propagateFloat128NaN(a, b, status);
7163         }
7164         return packFloat128( zSign, 0, 0, 0 );
7165     }
7166     if ( bExp == 0 ) {
7167         if ( ( bSig0 | bSig1 ) == 0 ) {
7168             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7169  invalid:
7170                 float_raise(float_flag_invalid, status);
7171                 return float128_default_nan(status);
7172             }
7173             float_raise(float_flag_divbyzero, status);
7174             return packFloat128( zSign, 0x7FFF, 0, 0 );
7175         }
7176         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7177     }
7178     if ( aExp == 0 ) {
7179         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7180         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7181     }
7182     zExp = aExp - bExp + 0x3FFD;
7183     shortShift128Left(
7184         aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
7185     shortShift128Left(
7186         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7187     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7188         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7189         ++zExp;
7190     }
7191     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7192     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7193     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
7194     while ( (int64_t) rem0 < 0 ) {
7195         --zSig0;
7196         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7197     }
7198     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7199     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7200         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7201         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
7202         while ( (int64_t) rem1 < 0 ) {
7203             --zSig1;
7204             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7205         }
7206         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7207     }
7208     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
7209     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7210 
7211 }
7212 
7213 /*----------------------------------------------------------------------------
7214 | Returns the remainder of the quadruple-precision floating-point value `a'
7215 | with respect to the corresponding value `b'.  The operation is performed
7216 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7217 *----------------------------------------------------------------------------*/
7218 
7219 float128 float128_rem(float128 a, float128 b, float_status *status)
7220 {
7221     bool aSign, zSign;
7222     int32_t aExp, bExp, expDiff;
7223     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7224     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7225     int64_t sigMean0;
7226 
7227     aSig1 = extractFloat128Frac1( a );
7228     aSig0 = extractFloat128Frac0( a );
7229     aExp = extractFloat128Exp( a );
7230     aSign = extractFloat128Sign( a );
7231     bSig1 = extractFloat128Frac1( b );
7232     bSig0 = extractFloat128Frac0( b );
7233     bExp = extractFloat128Exp( b );
7234     if ( aExp == 0x7FFF ) {
7235         if (    ( aSig0 | aSig1 )
7236              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7237             return propagateFloat128NaN(a, b, status);
7238         }
7239         goto invalid;
7240     }
7241     if ( bExp == 0x7FFF ) {
7242         if (bSig0 | bSig1) {
7243             return propagateFloat128NaN(a, b, status);
7244         }
7245         return a;
7246     }
7247     if ( bExp == 0 ) {
7248         if ( ( bSig0 | bSig1 ) == 0 ) {
7249  invalid:
7250             float_raise(float_flag_invalid, status);
7251             return float128_default_nan(status);
7252         }
7253         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7254     }
7255     if ( aExp == 0 ) {
7256         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7257         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7258     }
7259     expDiff = aExp - bExp;
7260     if ( expDiff < -1 ) return a;
7261     shortShift128Left(
7262         aSig0 | UINT64_C(0x0001000000000000),
7263         aSig1,
7264         15 - ( expDiff < 0 ),
7265         &aSig0,
7266         &aSig1
7267     );
7268     shortShift128Left(
7269         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7270     q = le128( bSig0, bSig1, aSig0, aSig1 );
7271     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7272     expDiff -= 64;
7273     while ( 0 < expDiff ) {
7274         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7275         q = ( 4 < q ) ? q - 4 : 0;
7276         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7277         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7278         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7279         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7280         expDiff -= 61;
7281     }
7282     if ( -64 < expDiff ) {
7283         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7284         q = ( 4 < q ) ? q - 4 : 0;
7285         q >>= - expDiff;
7286         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7287         expDiff += 52;
7288         if ( expDiff < 0 ) {
7289             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7290         }
7291         else {
7292             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7293         }
7294         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7295         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7296     }
7297     else {
7298         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7299         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7300     }
7301     do {
7302         alternateASig0 = aSig0;
7303         alternateASig1 = aSig1;
7304         ++q;
7305         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7306     } while ( 0 <= (int64_t) aSig0 );
7307     add128(
7308         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7309     if (    ( sigMean0 < 0 )
7310          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7311         aSig0 = alternateASig0;
7312         aSig1 = alternateASig1;
7313     }
7314     zSign = ( (int64_t) aSig0 < 0 );
7315     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7316     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7317                                          status);
7318 }
7319 
7320 /*----------------------------------------------------------------------------
7321 | Returns the square root of the quadruple-precision floating-point value `a'.
7322 | The operation is performed according to the IEC/IEEE Standard for Binary
7323 | Floating-Point Arithmetic.
7324 *----------------------------------------------------------------------------*/
7325 
7326 float128 float128_sqrt(float128 a, float_status *status)
7327 {
7328     bool aSign;
7329     int32_t aExp, zExp;
7330     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7331     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7332 
7333     aSig1 = extractFloat128Frac1( a );
7334     aSig0 = extractFloat128Frac0( a );
7335     aExp = extractFloat128Exp( a );
7336     aSign = extractFloat128Sign( a );
7337     if ( aExp == 0x7FFF ) {
7338         if (aSig0 | aSig1) {
7339             return propagateFloat128NaN(a, a, status);
7340         }
7341         if ( ! aSign ) return a;
7342         goto invalid;
7343     }
7344     if ( aSign ) {
7345         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7346  invalid:
7347         float_raise(float_flag_invalid, status);
7348         return float128_default_nan(status);
7349     }
7350     if ( aExp == 0 ) {
7351         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7352         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7353     }
7354     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7355     aSig0 |= UINT64_C(0x0001000000000000);
7356     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7357     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7358     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7359     doubleZSig0 = zSig0<<1;
7360     mul64To128( zSig0, zSig0, &term0, &term1 );
7361     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7362     while ( (int64_t) rem0 < 0 ) {
7363         --zSig0;
7364         doubleZSig0 -= 2;
7365         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7366     }
7367     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7368     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7369         if ( zSig1 == 0 ) zSig1 = 1;
7370         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7371         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7372         mul64To128( zSig1, zSig1, &term2, &term3 );
7373         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7374         while ( (int64_t) rem1 < 0 ) {
7375             --zSig1;
7376             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7377             term3 |= 1;
7378             term2 |= doubleZSig0;
7379             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7380         }
7381         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7382     }
7383     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7384     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7385 
7386 }
7387 
7388 static inline FloatRelation
7389 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7390                           float_status *status)
7391 {
7392     bool aSign, bSign;
7393 
7394     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7395         float_raise(float_flag_invalid, status);
7396         return float_relation_unordered;
7397     }
7398     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7399           ( extractFloatx80Frac( a )<<1 ) ) ||
7400         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7401           ( extractFloatx80Frac( b )<<1 ) )) {
7402         if (!is_quiet ||
7403             floatx80_is_signaling_nan(a, status) ||
7404             floatx80_is_signaling_nan(b, status)) {
7405             float_raise(float_flag_invalid, status);
7406         }
7407         return float_relation_unordered;
7408     }
7409     aSign = extractFloatx80Sign( a );
7410     bSign = extractFloatx80Sign( b );
7411     if ( aSign != bSign ) {
7412 
7413         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7414              ( ( a.low | b.low ) == 0 ) ) {
7415             /* zero case */
7416             return float_relation_equal;
7417         } else {
7418             return 1 - (2 * aSign);
7419         }
7420     } else {
7421         /* Normalize pseudo-denormals before comparison.  */
7422         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7423             ++a.high;
7424         }
7425         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7426             ++b.high;
7427         }
7428         if (a.low == b.low && a.high == b.high) {
7429             return float_relation_equal;
7430         } else {
7431             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7432         }
7433     }
7434 }
7435 
7436 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7437 {
7438     return floatx80_compare_internal(a, b, 0, status);
7439 }
7440 
7441 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7442                                      float_status *status)
7443 {
7444     return floatx80_compare_internal(a, b, 1, status);
7445 }
7446 
7447 static inline FloatRelation
7448 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7449                           float_status *status)
7450 {
7451     bool aSign, bSign;
7452 
7453     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7454           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7455         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7456           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7457         if (!is_quiet ||
7458             float128_is_signaling_nan(a, status) ||
7459             float128_is_signaling_nan(b, status)) {
7460             float_raise(float_flag_invalid, status);
7461         }
7462         return float_relation_unordered;
7463     }
7464     aSign = extractFloat128Sign( a );
7465     bSign = extractFloat128Sign( b );
7466     if ( aSign != bSign ) {
7467         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7468             /* zero case */
7469             return float_relation_equal;
7470         } else {
7471             return 1 - (2 * aSign);
7472         }
7473     } else {
7474         if (a.low == b.low && a.high == b.high) {
7475             return float_relation_equal;
7476         } else {
7477             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7478         }
7479     }
7480 }
7481 
7482 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7483 {
7484     return float128_compare_internal(a, b, 0, status);
7485 }
7486 
7487 FloatRelation float128_compare_quiet(float128 a, float128 b,
7488                                      float_status *status)
7489 {
7490     return float128_compare_internal(a, b, 1, status);
7491 }
7492 
7493 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7494 {
7495     bool aSign;
7496     int32_t aExp;
7497     uint64_t aSig;
7498 
7499     if (floatx80_invalid_encoding(a)) {
7500         float_raise(float_flag_invalid, status);
7501         return floatx80_default_nan(status);
7502     }
7503     aSig = extractFloatx80Frac( a );
7504     aExp = extractFloatx80Exp( a );
7505     aSign = extractFloatx80Sign( a );
7506 
7507     if ( aExp == 0x7FFF ) {
7508         if ( aSig<<1 ) {
7509             return propagateFloatx80NaN(a, a, status);
7510         }
7511         return a;
7512     }
7513 
7514     if (aExp == 0) {
7515         if (aSig == 0) {
7516             return a;
7517         }
7518         aExp++;
7519     }
7520 
7521     if (n > 0x10000) {
7522         n = 0x10000;
7523     } else if (n < -0x10000) {
7524         n = -0x10000;
7525     }
7526 
7527     aExp += n;
7528     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7529                                          aSign, aExp, aSig, 0, status);
7530 }
7531 
7532 float128 float128_scalbn(float128 a, int n, float_status *status)
7533 {
7534     bool aSign;
7535     int32_t aExp;
7536     uint64_t aSig0, aSig1;
7537 
7538     aSig1 = extractFloat128Frac1( a );
7539     aSig0 = extractFloat128Frac0( a );
7540     aExp = extractFloat128Exp( a );
7541     aSign = extractFloat128Sign( a );
7542     if ( aExp == 0x7FFF ) {
7543         if ( aSig0 | aSig1 ) {
7544             return propagateFloat128NaN(a, a, status);
7545         }
7546         return a;
7547     }
7548     if (aExp != 0) {
7549         aSig0 |= UINT64_C(0x0001000000000000);
7550     } else if (aSig0 == 0 && aSig1 == 0) {
7551         return a;
7552     } else {
7553         aExp++;
7554     }
7555 
7556     if (n > 0x10000) {
7557         n = 0x10000;
7558     } else if (n < -0x10000) {
7559         n = -0x10000;
7560     }
7561 
7562     aExp += n - 1;
7563     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7564                                          , status);
7565 
7566 }
7567 
7568 static void __attribute__((constructor)) softfloat_init(void)
7569 {
7570     union_float64 ua, ub, uc, ur;
7571 
7572     if (QEMU_NO_HARDFLOAT) {
7573         return;
7574     }
7575     /*
7576      * Test that the host's FMA is not obviously broken. For example,
7577      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7578      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7579      */
7580     ua.s = 0x0020000000000001ULL;
7581     ub.s = 0x3ca0000000000000ULL;
7582     uc.s = 0x0020000000000000ULL;
7583     ur.h = fma(ua.h, ub.h, uc.h);
7584     if (ur.s != 0x0020000000000001ULL) {
7585         force_soft_fma = true;
7586     }
7587 }
7588