xref: /openbmc/qemu/fpu/softfloat.c (revision ee6959f2)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 /* These apply to the most significant word of each FloatPartsN. */
537 #define DECOMPOSED_BINARY_POINT    63
538 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
539 
540 /* Structure holding all of the relevant parameters for a format.
541  *   exp_size: the size of the exponent field
542  *   exp_bias: the offset applied to the exponent field
543  *   exp_max: the maximum normalised exponent
544  *   frac_size: the size of the fraction field
545  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
546  * The following are computed based the size of fraction
547  *   frac_lsb: least significant bit of fraction
548  *   frac_lsbm1: the bit below the least significant bit (for rounding)
549  *   round_mask/roundeven_mask: masks used for rounding
550  * The following optional modifiers are available:
551  *   arm_althp: handle ARM Alternative Half Precision
552  */
553 typedef struct {
554     int exp_size;
555     int exp_bias;
556     int exp_max;
557     int frac_size;
558     int frac_shift;
559     uint64_t frac_lsb;
560     uint64_t frac_lsbm1;
561     uint64_t round_mask;
562     uint64_t roundeven_mask;
563     bool arm_althp;
564 } FloatFmt;
565 
566 /* Expand fields based on the size of exponent and fraction */
567 #define FLOAT_PARAMS(E, F)                                           \
568     .exp_size       = E,                                             \
569     .exp_bias       = ((1 << E) - 1) >> 1,                           \
570     .exp_max        = (1 << E) - 1,                                  \
571     .frac_size      = F,                                             \
572     .frac_shift     = (-F - 1) & 63,                                 \
573     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
574     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
575     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
576     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
577 
578 static const FloatFmt float16_params = {
579     FLOAT_PARAMS(5, 10)
580 };
581 
582 static const FloatFmt float16_params_ahp = {
583     FLOAT_PARAMS(5, 10),
584     .arm_althp = true
585 };
586 
587 static const FloatFmt bfloat16_params = {
588     FLOAT_PARAMS(8, 7)
589 };
590 
591 static const FloatFmt float32_params = {
592     FLOAT_PARAMS(8, 23)
593 };
594 
595 static const FloatFmt float64_params = {
596     FLOAT_PARAMS(11, 52)
597 };
598 
599 static const FloatFmt float128_params = {
600     FLOAT_PARAMS(15, 112)
601 };
602 
603 /* Unpack a float to parts, but do not canonicalize.  */
604 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
605 {
606     const int f_size = fmt->frac_size;
607     const int e_size = fmt->exp_size;
608 
609     *r = (FloatParts64) {
610         .cls = float_class_unclassified,
611         .sign = extract64(raw, f_size + e_size, 1),
612         .exp = extract64(raw, f_size, e_size),
613         .frac = extract64(raw, 0, f_size)
614     };
615 }
616 
617 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
618 {
619     unpack_raw64(p, &float16_params, f);
620 }
621 
622 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
623 {
624     unpack_raw64(p, &bfloat16_params, f);
625 }
626 
627 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
628 {
629     unpack_raw64(p, &float32_params, f);
630 }
631 
632 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
633 {
634     unpack_raw64(p, &float64_params, f);
635 }
636 
637 static void float128_unpack_raw(FloatParts128 *p, float128 f)
638 {
639     const int f_size = float128_params.frac_size - 64;
640     const int e_size = float128_params.exp_size;
641 
642     *p = (FloatParts128) {
643         .cls = float_class_unclassified,
644         .sign = extract64(f.high, f_size + e_size, 1),
645         .exp = extract64(f.high, f_size, e_size),
646         .frac_hi = extract64(f.high, 0, f_size),
647         .frac_lo = f.low,
648     };
649 }
650 
651 /* Pack a float from parts, but do not canonicalize.  */
652 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
653 {
654     const int f_size = fmt->frac_size;
655     const int e_size = fmt->exp_size;
656     uint64_t ret;
657 
658     ret = (uint64_t)p->sign << (f_size + e_size);
659     ret = deposit64(ret, f_size, e_size, p->exp);
660     ret = deposit64(ret, 0, f_size, p->frac);
661     return ret;
662 }
663 
664 static inline float16 float16_pack_raw(const FloatParts64 *p)
665 {
666     return make_float16(pack_raw64(p, &float16_params));
667 }
668 
669 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
670 {
671     return pack_raw64(p, &bfloat16_params);
672 }
673 
674 static inline float32 float32_pack_raw(const FloatParts64 *p)
675 {
676     return make_float32(pack_raw64(p, &float32_params));
677 }
678 
679 static inline float64 float64_pack_raw(const FloatParts64 *p)
680 {
681     return make_float64(pack_raw64(p, &float64_params));
682 }
683 
684 static float128 float128_pack_raw(const FloatParts128 *p)
685 {
686     const int f_size = float128_params.frac_size - 64;
687     const int e_size = float128_params.exp_size;
688     uint64_t hi;
689 
690     hi = (uint64_t)p->sign << (f_size + e_size);
691     hi = deposit64(hi, f_size, e_size, p->exp);
692     hi = deposit64(hi, 0, f_size, p->frac_hi);
693     return make_float128(hi, p->frac_lo);
694 }
695 
696 /*----------------------------------------------------------------------------
697 | Functions and definitions to determine:  (1) whether tininess for underflow
698 | is detected before or after rounding by default, (2) what (if anything)
699 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
700 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
701 | are propagated from function inputs to output.  These details are target-
702 | specific.
703 *----------------------------------------------------------------------------*/
704 #include "softfloat-specialize.c.inc"
705 
706 #define PARTS_GENERIC_64_128(NAME, P) \
707     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
708 
709 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
710 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
711 
712 static void parts64_return_nan(FloatParts64 *a, float_status *s);
713 static void parts128_return_nan(FloatParts128 *a, float_status *s);
714 
715 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
716 
717 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
718                                       float_status *s);
719 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
720                                         float_status *s);
721 
722 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
723 
724 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
725                                              FloatParts64 *c, float_status *s,
726                                              int ab_mask, int abc_mask);
727 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
728                                                FloatParts128 *b,
729                                                FloatParts128 *c,
730                                                float_status *s,
731                                                int ab_mask, int abc_mask);
732 
733 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
734     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
735 
736 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
737                                  const FloatFmt *fmt);
738 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
739                                   const FloatFmt *fmt);
740 
741 #define parts_canonicalize(A, S, F) \
742     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
743 
744 static void parts64_uncanon(FloatParts64 *p, float_status *status,
745                             const FloatFmt *fmt);
746 static void parts128_uncanon(FloatParts128 *p, float_status *status,
747                              const FloatFmt *fmt);
748 
749 #define parts_uncanon(A, S, F) \
750     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
751 
752 /*
753  * Helper functions for softfloat-parts.c.inc, per-size operations.
754  */
755 
756 #define FRAC_GENERIC_64_128(NAME, P) \
757     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
758 
759 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
760 {
761     return uadd64_overflow(a->frac, c, &r->frac);
762 }
763 
764 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
765 {
766     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
767     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
768 }
769 
770 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
771 
772 static void frac64_allones(FloatParts64 *a)
773 {
774     a->frac = -1;
775 }
776 
777 static void frac128_allones(FloatParts128 *a)
778 {
779     a->frac_hi = a->frac_lo = -1;
780 }
781 
782 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
783 
784 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
785 {
786     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
787 }
788 
789 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
790 {
791     uint64_t ta = a->frac_hi, tb = b->frac_hi;
792     if (ta == tb) {
793         ta = a->frac_lo, tb = b->frac_lo;
794         if (ta == tb) {
795             return 0;
796         }
797     }
798     return ta < tb ? -1 : 1;
799 }
800 
801 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
802 
803 static void frac64_clear(FloatParts64 *a)
804 {
805     a->frac = 0;
806 }
807 
808 static void frac128_clear(FloatParts128 *a)
809 {
810     a->frac_hi = a->frac_lo = 0;
811 }
812 
813 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
814 
815 static bool frac64_eqz(FloatParts64 *a)
816 {
817     return a->frac == 0;
818 }
819 
820 static bool frac128_eqz(FloatParts128 *a)
821 {
822     return (a->frac_hi | a->frac_lo) == 0;
823 }
824 
825 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
826 
827 static int frac64_normalize(FloatParts64 *a)
828 {
829     if (a->frac) {
830         int shift = clz64(a->frac);
831         a->frac <<= shift;
832         return shift;
833     }
834     return 64;
835 }
836 
837 static int frac128_normalize(FloatParts128 *a)
838 {
839     if (a->frac_hi) {
840         int shl = clz64(a->frac_hi);
841         if (shl) {
842             int shr = 64 - shl;
843             a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
844             a->frac_lo = (a->frac_lo << shl);
845         }
846         return shl;
847     } else if (a->frac_lo) {
848         int shl = clz64(a->frac_lo);
849         a->frac_hi = (a->frac_lo << shl);
850         a->frac_lo = 0;
851         return shl + 64;
852     }
853     return 128;
854 }
855 
856 #define frac_normalize(A)  FRAC_GENERIC_64_128(normalize, A)(A)
857 
858 static void frac64_shl(FloatParts64 *a, int c)
859 {
860     a->frac <<= c;
861 }
862 
863 static void frac128_shl(FloatParts128 *a, int c)
864 {
865     shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
866 }
867 
868 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
869 
870 static void frac64_shr(FloatParts64 *a, int c)
871 {
872     a->frac >>= c;
873 }
874 
875 static void frac128_shr(FloatParts128 *a, int c)
876 {
877     shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
878 }
879 
880 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
881 
882 static void frac64_shrjam(FloatParts64 *a, int c)
883 {
884     shift64RightJamming(a->frac, c, &a->frac);
885 }
886 
887 static void frac128_shrjam(FloatParts128 *a, int c)
888 {
889     shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
890 }
891 
892 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128(shrjam, A)(A, C)
893 
894 #define partsN(NAME)   parts64_##NAME
895 #define FloatPartsN    FloatParts64
896 
897 #include "softfloat-parts.c.inc"
898 
899 #undef  partsN
900 #undef  FloatPartsN
901 #define partsN(NAME)   parts128_##NAME
902 #define FloatPartsN    FloatParts128
903 
904 #include "softfloat-parts.c.inc"
905 
906 #undef  partsN
907 #undef  FloatPartsN
908 
909 /*
910  * Pack/unpack routines with a specific FloatFmt.
911  */
912 
913 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
914                                       float_status *s, const FloatFmt *params)
915 {
916     float16_unpack_raw(p, f);
917     parts_canonicalize(p, s, params);
918 }
919 
920 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
921                                      float_status *s)
922 {
923     float16a_unpack_canonical(p, f, s, &float16_params);
924 }
925 
926 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
927                                       float_status *s)
928 {
929     bfloat16_unpack_raw(p, f);
930     parts_canonicalize(p, s, &bfloat16_params);
931 }
932 
933 static float16 float16a_round_pack_canonical(FloatParts64 *p,
934                                              float_status *s,
935                                              const FloatFmt *params)
936 {
937     parts_uncanon(p, s, params);
938     return float16_pack_raw(p);
939 }
940 
941 static float16 float16_round_pack_canonical(FloatParts64 *p,
942                                             float_status *s)
943 {
944     return float16a_round_pack_canonical(p, s, &float16_params);
945 }
946 
947 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
948                                               float_status *s)
949 {
950     parts_uncanon(p, s, &bfloat16_params);
951     return bfloat16_pack_raw(p);
952 }
953 
954 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
955                                      float_status *s)
956 {
957     float32_unpack_raw(p, f);
958     parts_canonicalize(p, s, &float32_params);
959 }
960 
961 static float32 float32_round_pack_canonical(FloatParts64 *p,
962                                             float_status *s)
963 {
964     parts_uncanon(p, s, &float32_params);
965     return float32_pack_raw(p);
966 }
967 
968 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
969                                      float_status *s)
970 {
971     float64_unpack_raw(p, f);
972     parts_canonicalize(p, s, &float64_params);
973 }
974 
975 static float64 float64_round_pack_canonical(FloatParts64 *p,
976                                             float_status *s)
977 {
978     parts_uncanon(p, s, &float64_params);
979     return float64_pack_raw(p);
980 }
981 
982 /*
983  * Returns the result of adding or subtracting the values of the
984  * floating-point values `a' and `b'. The operation is performed
985  * according to the IEC/IEEE Standard for Binary Floating-Point
986  * Arithmetic.
987  */
988 
989 static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
990                                 float_status *s)
991 {
992     bool a_sign = a.sign;
993     bool b_sign = b.sign ^ subtract;
994 
995     if (a_sign != b_sign) {
996         /* Subtraction */
997 
998         if (a.cls == float_class_normal && b.cls == float_class_normal) {
999             if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
1000                 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1001                 a.frac = a.frac - b.frac;
1002             } else {
1003                 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1004                 a.frac = b.frac - a.frac;
1005                 a.exp = b.exp;
1006                 a_sign ^= 1;
1007             }
1008 
1009             if (a.frac == 0) {
1010                 a.cls = float_class_zero;
1011                 a.sign = s->float_rounding_mode == float_round_down;
1012             } else {
1013                 int shift = clz64(a.frac);
1014                 a.frac = a.frac << shift;
1015                 a.exp = a.exp - shift;
1016                 a.sign = a_sign;
1017             }
1018             return a;
1019         }
1020         if (is_nan(a.cls) || is_nan(b.cls)) {
1021             return *parts_pick_nan(&a, &b, s);
1022         }
1023         if (a.cls == float_class_inf) {
1024             if (b.cls == float_class_inf) {
1025                 float_raise(float_flag_invalid, s);
1026                 parts_default_nan(&a, s);
1027             }
1028             return a;
1029         }
1030         if (a.cls == float_class_zero && b.cls == float_class_zero) {
1031             a.sign = s->float_rounding_mode == float_round_down;
1032             return a;
1033         }
1034         if (a.cls == float_class_zero || b.cls == float_class_inf) {
1035             b.sign = a_sign ^ 1;
1036             return b;
1037         }
1038         if (b.cls == float_class_zero) {
1039             return a;
1040         }
1041     } else {
1042         /* Addition */
1043         if (a.cls == float_class_normal && b.cls == float_class_normal) {
1044             if (a.exp > b.exp) {
1045                 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1046             } else if (a.exp < b.exp) {
1047                 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1048                 a.exp = b.exp;
1049             }
1050 
1051             if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
1052                 shift64RightJamming(a.frac, 1, &a.frac);
1053                 a.frac |= DECOMPOSED_IMPLICIT_BIT;
1054                 a.exp += 1;
1055             }
1056             return a;
1057         }
1058         if (is_nan(a.cls) || is_nan(b.cls)) {
1059             return *parts_pick_nan(&a, &b, s);
1060         }
1061         if (a.cls == float_class_inf || b.cls == float_class_zero) {
1062             return a;
1063         }
1064         if (b.cls == float_class_inf || a.cls == float_class_zero) {
1065             b.sign = b_sign;
1066             return b;
1067         }
1068     }
1069     g_assert_not_reached();
1070 }
1071 
1072 /*
1073  * Returns the result of adding or subtracting the floating-point
1074  * values `a' and `b'. The operation is performed according to the
1075  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1076  */
1077 
1078 float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1079 {
1080     FloatParts64 pa, pb, pr;
1081 
1082     float16_unpack_canonical(&pa, a, status);
1083     float16_unpack_canonical(&pb, b, status);
1084     pr = addsub_floats(pa, pb, false, status);
1085 
1086     return float16_round_pack_canonical(&pr, status);
1087 }
1088 
1089 float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1090 {
1091     FloatParts64 pa, pb, pr;
1092 
1093     float16_unpack_canonical(&pa, a, status);
1094     float16_unpack_canonical(&pb, b, status);
1095     pr = addsub_floats(pa, pb, true, status);
1096 
1097     return float16_round_pack_canonical(&pr, status);
1098 }
1099 
1100 static float32 QEMU_SOFTFLOAT_ATTR
1101 soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1102 {
1103     FloatParts64 pa, pb, pr;
1104 
1105     float32_unpack_canonical(&pa, a, status);
1106     float32_unpack_canonical(&pb, b, status);
1107     pr = addsub_floats(pa, pb, subtract, status);
1108 
1109     return float32_round_pack_canonical(&pr, status);
1110 }
1111 
1112 static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1113 {
1114     return soft_f32_addsub(a, b, false, status);
1115 }
1116 
1117 static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1118 {
1119     return soft_f32_addsub(a, b, true, status);
1120 }
1121 
1122 static float64 QEMU_SOFTFLOAT_ATTR
1123 soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1124 {
1125     FloatParts64 pa, pb, pr;
1126 
1127     float64_unpack_canonical(&pa, a, status);
1128     float64_unpack_canonical(&pb, b, status);
1129     pr = addsub_floats(pa, pb, subtract, status);
1130 
1131     return float64_round_pack_canonical(&pr, status);
1132 }
1133 
1134 static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1135 {
1136     return soft_f64_addsub(a, b, false, status);
1137 }
1138 
1139 static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1140 {
1141     return soft_f64_addsub(a, b, true, status);
1142 }
1143 
1144 static float hard_f32_add(float a, float b)
1145 {
1146     return a + b;
1147 }
1148 
1149 static float hard_f32_sub(float a, float b)
1150 {
1151     return a - b;
1152 }
1153 
1154 static double hard_f64_add(double a, double b)
1155 {
1156     return a + b;
1157 }
1158 
1159 static double hard_f64_sub(double a, double b)
1160 {
1161     return a - b;
1162 }
1163 
1164 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1165 {
1166     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1167         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1168     }
1169     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1170 }
1171 
1172 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1173 {
1174     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1175         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1176     } else {
1177         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1178     }
1179 }
1180 
1181 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1182                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1183 {
1184     return float32_gen2(a, b, s, hard, soft,
1185                         f32_is_zon2, f32_addsubmul_post);
1186 }
1187 
1188 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1189                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1190 {
1191     return float64_gen2(a, b, s, hard, soft,
1192                         f64_is_zon2, f64_addsubmul_post);
1193 }
1194 
1195 float32 QEMU_FLATTEN
1196 float32_add(float32 a, float32 b, float_status *s)
1197 {
1198     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1199 }
1200 
1201 float32 QEMU_FLATTEN
1202 float32_sub(float32 a, float32 b, float_status *s)
1203 {
1204     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1205 }
1206 
1207 float64 QEMU_FLATTEN
1208 float64_add(float64 a, float64 b, float_status *s)
1209 {
1210     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1211 }
1212 
1213 float64 QEMU_FLATTEN
1214 float64_sub(float64 a, float64 b, float_status *s)
1215 {
1216     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1217 }
1218 
1219 /*
1220  * Returns the result of adding or subtracting the bfloat16
1221  * values `a' and `b'.
1222  */
1223 bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1224 {
1225     FloatParts64 pa, pb, pr;
1226 
1227     bfloat16_unpack_canonical(&pa, a, status);
1228     bfloat16_unpack_canonical(&pb, b, status);
1229     pr = addsub_floats(pa, pb, false, status);
1230 
1231     return bfloat16_round_pack_canonical(&pr, status);
1232 }
1233 
1234 bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1235 {
1236     FloatParts64 pa, pb, pr;
1237 
1238     bfloat16_unpack_canonical(&pa, a, status);
1239     bfloat16_unpack_canonical(&pb, b, status);
1240     pr = addsub_floats(pa, pb, true, status);
1241 
1242     return bfloat16_round_pack_canonical(&pr, status);
1243 }
1244 
1245 /*
1246  * Returns the result of multiplying the floating-point values `a' and
1247  * `b'. The operation is performed according to the IEC/IEEE Standard
1248  * for Binary Floating-Point Arithmetic.
1249  */
1250 
1251 static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
1252 {
1253     bool sign = a.sign ^ b.sign;
1254 
1255     if (a.cls == float_class_normal && b.cls == float_class_normal) {
1256         uint64_t hi, lo;
1257         int exp = a.exp + b.exp;
1258 
1259         mul64To128(a.frac, b.frac, &hi, &lo);
1260         if (hi & DECOMPOSED_IMPLICIT_BIT) {
1261             exp += 1;
1262         } else {
1263             hi <<= 1;
1264         }
1265         hi |= (lo != 0);
1266 
1267         /* Re-use a */
1268         a.exp = exp;
1269         a.sign = sign;
1270         a.frac = hi;
1271         return a;
1272     }
1273     /* handle all the NaN cases */
1274     if (is_nan(a.cls) || is_nan(b.cls)) {
1275         return *parts_pick_nan(&a, &b, s);
1276     }
1277     /* Inf * Zero == NaN */
1278     if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1279         (a.cls == float_class_zero && b.cls == float_class_inf)) {
1280         float_raise(float_flag_invalid, s);
1281         parts_default_nan(&a, s);
1282         return a;
1283     }
1284     /* Multiply by 0 or Inf */
1285     if (a.cls == float_class_inf || a.cls == float_class_zero) {
1286         a.sign = sign;
1287         return a;
1288     }
1289     if (b.cls == float_class_inf || b.cls == float_class_zero) {
1290         b.sign = sign;
1291         return b;
1292     }
1293     g_assert_not_reached();
1294 }
1295 
1296 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1297 {
1298     FloatParts64 pa, pb, pr;
1299 
1300     float16_unpack_canonical(&pa, a, status);
1301     float16_unpack_canonical(&pb, b, status);
1302     pr = mul_floats(pa, pb, status);
1303 
1304     return float16_round_pack_canonical(&pr, status);
1305 }
1306 
1307 static float32 QEMU_SOFTFLOAT_ATTR
1308 soft_f32_mul(float32 a, float32 b, float_status *status)
1309 {
1310     FloatParts64 pa, pb, pr;
1311 
1312     float32_unpack_canonical(&pa, a, status);
1313     float32_unpack_canonical(&pb, b, status);
1314     pr = mul_floats(pa, pb, status);
1315 
1316     return float32_round_pack_canonical(&pr, status);
1317 }
1318 
1319 static float64 QEMU_SOFTFLOAT_ATTR
1320 soft_f64_mul(float64 a, float64 b, float_status *status)
1321 {
1322     FloatParts64 pa, pb, pr;
1323 
1324     float64_unpack_canonical(&pa, a, status);
1325     float64_unpack_canonical(&pb, b, status);
1326     pr = mul_floats(pa, pb, status);
1327 
1328     return float64_round_pack_canonical(&pr, status);
1329 }
1330 
1331 static float hard_f32_mul(float a, float b)
1332 {
1333     return a * b;
1334 }
1335 
1336 static double hard_f64_mul(double a, double b)
1337 {
1338     return a * b;
1339 }
1340 
1341 float32 QEMU_FLATTEN
1342 float32_mul(float32 a, float32 b, float_status *s)
1343 {
1344     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1345                         f32_is_zon2, f32_addsubmul_post);
1346 }
1347 
1348 float64 QEMU_FLATTEN
1349 float64_mul(float64 a, float64 b, float_status *s)
1350 {
1351     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1352                         f64_is_zon2, f64_addsubmul_post);
1353 }
1354 
1355 /*
1356  * Returns the result of multiplying the bfloat16
1357  * values `a' and `b'.
1358  */
1359 
1360 bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1361 {
1362     FloatParts64 pa, pb, pr;
1363 
1364     bfloat16_unpack_canonical(&pa, a, status);
1365     bfloat16_unpack_canonical(&pb, b, status);
1366     pr = mul_floats(pa, pb, status);
1367 
1368     return bfloat16_round_pack_canonical(&pr, status);
1369 }
1370 
1371 /*
1372  * Returns the result of multiplying the floating-point values `a' and
1373  * `b' then adding 'c', with no intermediate rounding step after the
1374  * multiplication. The operation is performed according to the
1375  * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1376  * The flags argument allows the caller to select negation of the
1377  * addend, the intermediate product, or the final result. (The
1378  * difference between this and having the caller do a separate
1379  * negation is that negating externally will flip the sign bit on
1380  * NaNs.)
1381  */
1382 
1383 static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
1384                                 int flags, float_status *s)
1385 {
1386     bool inf_zero, p_sign;
1387     bool sign_flip = flags & float_muladd_negate_result;
1388     FloatClass p_class;
1389     uint64_t hi, lo;
1390     int p_exp;
1391     int ab_mask, abc_mask;
1392 
1393     ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
1394     abc_mask = float_cmask(c.cls) | ab_mask;
1395     inf_zero = ab_mask == float_cmask_infzero;
1396 
1397     /* It is implementation-defined whether the cases of (0,inf,qnan)
1398      * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1399      * they return if they do), so we have to hand this information
1400      * off to the target-specific pick-a-NaN routine.
1401      */
1402     if (unlikely(abc_mask & float_cmask_anynan)) {
1403         return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
1404     }
1405 
1406     if (inf_zero) {
1407         float_raise(float_flag_invalid, s);
1408         parts_default_nan(&a, s);
1409         return a;
1410     }
1411 
1412     if (flags & float_muladd_negate_c) {
1413         c.sign ^= 1;
1414     }
1415 
1416     p_sign = a.sign ^ b.sign;
1417 
1418     if (flags & float_muladd_negate_product) {
1419         p_sign ^= 1;
1420     }
1421 
1422     if (ab_mask & float_cmask_inf) {
1423         p_class = float_class_inf;
1424     } else if (ab_mask & float_cmask_zero) {
1425         p_class = float_class_zero;
1426     } else {
1427         p_class = float_class_normal;
1428     }
1429 
1430     if (c.cls == float_class_inf) {
1431         if (p_class == float_class_inf && p_sign != c.sign) {
1432             float_raise(float_flag_invalid, s);
1433             parts_default_nan(&c, s);
1434         } else {
1435             c.sign ^= sign_flip;
1436         }
1437         return c;
1438     }
1439 
1440     if (p_class == float_class_inf) {
1441         a.cls = float_class_inf;
1442         a.sign = p_sign ^ sign_flip;
1443         return a;
1444     }
1445 
1446     if (p_class == float_class_zero) {
1447         if (c.cls == float_class_zero) {
1448             if (p_sign != c.sign) {
1449                 p_sign = s->float_rounding_mode == float_round_down;
1450             }
1451             c.sign = p_sign;
1452         } else if (flags & float_muladd_halve_result) {
1453             c.exp -= 1;
1454         }
1455         c.sign ^= sign_flip;
1456         return c;
1457     }
1458 
1459     /* a & b should be normals now... */
1460     assert(a.cls == float_class_normal &&
1461            b.cls == float_class_normal);
1462 
1463     p_exp = a.exp + b.exp;
1464 
1465     mul64To128(a.frac, b.frac, &hi, &lo);
1466 
1467     /* Renormalize to the msb. */
1468     if (hi & DECOMPOSED_IMPLICIT_BIT) {
1469         p_exp += 1;
1470     } else {
1471         shortShift128Left(hi, lo, 1, &hi, &lo);
1472     }
1473 
1474     /* + add/sub */
1475     if (c.cls != float_class_zero) {
1476         int exp_diff = p_exp - c.exp;
1477         if (p_sign == c.sign) {
1478             /* Addition */
1479             if (exp_diff <= 0) {
1480                 shift64RightJamming(hi, -exp_diff, &hi);
1481                 p_exp = c.exp;
1482                 if (uadd64_overflow(hi, c.frac, &hi)) {
1483                     shift64RightJamming(hi, 1, &hi);
1484                     hi |= DECOMPOSED_IMPLICIT_BIT;
1485                     p_exp += 1;
1486                 }
1487             } else {
1488                 uint64_t c_hi, c_lo, over;
1489                 shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
1490                 add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
1491                 if (over) {
1492                     shift64RightJamming(hi, 1, &hi);
1493                     hi |= DECOMPOSED_IMPLICIT_BIT;
1494                     p_exp += 1;
1495                 }
1496             }
1497         } else {
1498             /* Subtraction */
1499             uint64_t c_hi = c.frac, c_lo = 0;
1500 
1501             if (exp_diff <= 0) {
1502                 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1503                 if (exp_diff == 0
1504                     &&
1505                     (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1506                     sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1507                 } else {
1508                     sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1509                     p_sign ^= 1;
1510                     p_exp = c.exp;
1511                 }
1512             } else {
1513                 shift128RightJamming(c_hi, c_lo,
1514                                      exp_diff,
1515                                      &c_hi, &c_lo);
1516                 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1517             }
1518 
1519             if (hi == 0 && lo == 0) {
1520                 a.cls = float_class_zero;
1521                 a.sign = s->float_rounding_mode == float_round_down;
1522                 a.sign ^= sign_flip;
1523                 return a;
1524             } else {
1525                 int shift;
1526                 if (hi != 0) {
1527                     shift = clz64(hi);
1528                 } else {
1529                     shift = clz64(lo) + 64;
1530                 }
1531                 /* Normalizing to a binary point of 124 is the
1532                    correct adjust for the exponent.  However since we're
1533                    shifting, we might as well put the binary point back
1534                    at 63 where we really want it.  Therefore shift as
1535                    if we're leaving 1 bit at the top of the word, but
1536                    adjust the exponent as if we're leaving 3 bits.  */
1537                 shift128Left(hi, lo, shift, &hi, &lo);
1538                 p_exp -= shift;
1539             }
1540         }
1541     }
1542     hi |= (lo != 0);
1543 
1544     if (flags & float_muladd_halve_result) {
1545         p_exp -= 1;
1546     }
1547 
1548     /* finally prepare our result */
1549     a.cls = float_class_normal;
1550     a.sign = p_sign ^ sign_flip;
1551     a.exp = p_exp;
1552     a.frac = hi;
1553 
1554     return a;
1555 }
1556 
1557 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1558                                                 int flags, float_status *status)
1559 {
1560     FloatParts64 pa, pb, pc, pr;
1561 
1562     float16_unpack_canonical(&pa, a, status);
1563     float16_unpack_canonical(&pb, b, status);
1564     float16_unpack_canonical(&pc, c, status);
1565     pr = muladd_floats(pa, pb, pc, flags, status);
1566 
1567     return float16_round_pack_canonical(&pr, status);
1568 }
1569 
1570 static float32 QEMU_SOFTFLOAT_ATTR
1571 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1572                 float_status *status)
1573 {
1574     FloatParts64 pa, pb, pc, pr;
1575 
1576     float32_unpack_canonical(&pa, a, status);
1577     float32_unpack_canonical(&pb, b, status);
1578     float32_unpack_canonical(&pc, c, status);
1579     pr = muladd_floats(pa, pb, pc, flags, status);
1580 
1581     return float32_round_pack_canonical(&pr, status);
1582 }
1583 
1584 static float64 QEMU_SOFTFLOAT_ATTR
1585 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1586                 float_status *status)
1587 {
1588     FloatParts64 pa, pb, pc, pr;
1589 
1590     float64_unpack_canonical(&pa, a, status);
1591     float64_unpack_canonical(&pb, b, status);
1592     float64_unpack_canonical(&pc, c, status);
1593     pr = muladd_floats(pa, pb, pc, flags, status);
1594 
1595     return float64_round_pack_canonical(&pr, status);
1596 }
1597 
1598 static bool force_soft_fma;
1599 
1600 float32 QEMU_FLATTEN
1601 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1602 {
1603     union_float32 ua, ub, uc, ur;
1604 
1605     ua.s = xa;
1606     ub.s = xb;
1607     uc.s = xc;
1608 
1609     if (unlikely(!can_use_fpu(s))) {
1610         goto soft;
1611     }
1612     if (unlikely(flags & float_muladd_halve_result)) {
1613         goto soft;
1614     }
1615 
1616     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1617     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1618         goto soft;
1619     }
1620 
1621     if (unlikely(force_soft_fma)) {
1622         goto soft;
1623     }
1624 
1625     /*
1626      * When (a || b) == 0, there's no need to check for under/over flow,
1627      * since we know the addend is (normal || 0) and the product is 0.
1628      */
1629     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1630         union_float32 up;
1631         bool prod_sign;
1632 
1633         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1634         prod_sign ^= !!(flags & float_muladd_negate_product);
1635         up.s = float32_set_sign(float32_zero, prod_sign);
1636 
1637         if (flags & float_muladd_negate_c) {
1638             uc.h = -uc.h;
1639         }
1640         ur.h = up.h + uc.h;
1641     } else {
1642         union_float32 ua_orig = ua;
1643         union_float32 uc_orig = uc;
1644 
1645         if (flags & float_muladd_negate_product) {
1646             ua.h = -ua.h;
1647         }
1648         if (flags & float_muladd_negate_c) {
1649             uc.h = -uc.h;
1650         }
1651 
1652         ur.h = fmaf(ua.h, ub.h, uc.h);
1653 
1654         if (unlikely(f32_is_inf(ur))) {
1655             float_raise(float_flag_overflow, s);
1656         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1657             ua = ua_orig;
1658             uc = uc_orig;
1659             goto soft;
1660         }
1661     }
1662     if (flags & float_muladd_negate_result) {
1663         return float32_chs(ur.s);
1664     }
1665     return ur.s;
1666 
1667  soft:
1668     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1669 }
1670 
1671 float64 QEMU_FLATTEN
1672 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1673 {
1674     union_float64 ua, ub, uc, ur;
1675 
1676     ua.s = xa;
1677     ub.s = xb;
1678     uc.s = xc;
1679 
1680     if (unlikely(!can_use_fpu(s))) {
1681         goto soft;
1682     }
1683     if (unlikely(flags & float_muladd_halve_result)) {
1684         goto soft;
1685     }
1686 
1687     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1688     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1689         goto soft;
1690     }
1691 
1692     if (unlikely(force_soft_fma)) {
1693         goto soft;
1694     }
1695 
1696     /*
1697      * When (a || b) == 0, there's no need to check for under/over flow,
1698      * since we know the addend is (normal || 0) and the product is 0.
1699      */
1700     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1701         union_float64 up;
1702         bool prod_sign;
1703 
1704         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1705         prod_sign ^= !!(flags & float_muladd_negate_product);
1706         up.s = float64_set_sign(float64_zero, prod_sign);
1707 
1708         if (flags & float_muladd_negate_c) {
1709             uc.h = -uc.h;
1710         }
1711         ur.h = up.h + uc.h;
1712     } else {
1713         union_float64 ua_orig = ua;
1714         union_float64 uc_orig = uc;
1715 
1716         if (flags & float_muladd_negate_product) {
1717             ua.h = -ua.h;
1718         }
1719         if (flags & float_muladd_negate_c) {
1720             uc.h = -uc.h;
1721         }
1722 
1723         ur.h = fma(ua.h, ub.h, uc.h);
1724 
1725         if (unlikely(f64_is_inf(ur))) {
1726             float_raise(float_flag_overflow, s);
1727         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1728             ua = ua_orig;
1729             uc = uc_orig;
1730             goto soft;
1731         }
1732     }
1733     if (flags & float_muladd_negate_result) {
1734         return float64_chs(ur.s);
1735     }
1736     return ur.s;
1737 
1738  soft:
1739     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1740 }
1741 
1742 /*
1743  * Returns the result of multiplying the bfloat16 values `a'
1744  * and `b' then adding 'c', with no intermediate rounding step after the
1745  * multiplication.
1746  */
1747 
1748 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1749                                       int flags, float_status *status)
1750 {
1751     FloatParts64 pa, pb, pc, pr;
1752 
1753     bfloat16_unpack_canonical(&pa, a, status);
1754     bfloat16_unpack_canonical(&pb, b, status);
1755     bfloat16_unpack_canonical(&pc, c, status);
1756     pr = muladd_floats(pa, pb, pc, flags, status);
1757 
1758     return bfloat16_round_pack_canonical(&pr, status);
1759 }
1760 
1761 /*
1762  * Returns the result of dividing the floating-point value `a' by the
1763  * corresponding value `b'. The operation is performed according to
1764  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1765  */
1766 
1767 static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
1768 {
1769     bool sign = a.sign ^ b.sign;
1770 
1771     if (a.cls == float_class_normal && b.cls == float_class_normal) {
1772         uint64_t n0, n1, q, r;
1773         int exp = a.exp - b.exp;
1774 
1775         /*
1776          * We want a 2*N / N-bit division to produce exactly an N-bit
1777          * result, so that we do not lose any precision and so that we
1778          * do not have to renormalize afterward.  If A.frac < B.frac,
1779          * then division would produce an (N-1)-bit result; shift A left
1780          * by one to produce the an N-bit result, and decrement the
1781          * exponent to match.
1782          *
1783          * The udiv_qrnnd algorithm that we're using requires normalization,
1784          * i.e. the msb of the denominator must be set, which is already true.
1785          */
1786         if (a.frac < b.frac) {
1787             exp -= 1;
1788             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1789         } else {
1790             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
1791         }
1792         q = udiv_qrnnd(&r, n1, n0, b.frac);
1793 
1794         /* Set lsb if there is a remainder, to set inexact. */
1795         a.frac = q | (r != 0);
1796         a.sign = sign;
1797         a.exp = exp;
1798         return a;
1799     }
1800     /* handle all the NaN cases */
1801     if (is_nan(a.cls) || is_nan(b.cls)) {
1802         return *parts_pick_nan(&a, &b, s);
1803     }
1804     /* 0/0 or Inf/Inf */
1805     if (a.cls == b.cls
1806         &&
1807         (a.cls == float_class_inf || a.cls == float_class_zero)) {
1808         float_raise(float_flag_invalid, s);
1809         parts_default_nan(&a, s);
1810         return a;
1811     }
1812     /* Inf / x or 0 / x */
1813     if (a.cls == float_class_inf || a.cls == float_class_zero) {
1814         a.sign = sign;
1815         return a;
1816     }
1817     /* Div 0 => Inf */
1818     if (b.cls == float_class_zero) {
1819         float_raise(float_flag_divbyzero, s);
1820         a.cls = float_class_inf;
1821         a.sign = sign;
1822         return a;
1823     }
1824     /* Div by Inf */
1825     if (b.cls == float_class_inf) {
1826         a.cls = float_class_zero;
1827         a.sign = sign;
1828         return a;
1829     }
1830     g_assert_not_reached();
1831 }
1832 
1833 float16 float16_div(float16 a, float16 b, float_status *status)
1834 {
1835     FloatParts64 pa, pb, pr;
1836 
1837     float16_unpack_canonical(&pa, a, status);
1838     float16_unpack_canonical(&pb, b, status);
1839     pr = div_floats(pa, pb, status);
1840 
1841     return float16_round_pack_canonical(&pr, status);
1842 }
1843 
1844 static float32 QEMU_SOFTFLOAT_ATTR
1845 soft_f32_div(float32 a, float32 b, float_status *status)
1846 {
1847     FloatParts64 pa, pb, pr;
1848 
1849     float32_unpack_canonical(&pa, a, status);
1850     float32_unpack_canonical(&pb, b, status);
1851     pr = div_floats(pa, pb, status);
1852 
1853     return float32_round_pack_canonical(&pr, status);
1854 }
1855 
1856 static float64 QEMU_SOFTFLOAT_ATTR
1857 soft_f64_div(float64 a, float64 b, float_status *status)
1858 {
1859     FloatParts64 pa, pb, pr;
1860 
1861     float64_unpack_canonical(&pa, a, status);
1862     float64_unpack_canonical(&pb, b, status);
1863     pr = div_floats(pa, pb, status);
1864 
1865     return float64_round_pack_canonical(&pr, status);
1866 }
1867 
1868 static float hard_f32_div(float a, float b)
1869 {
1870     return a / b;
1871 }
1872 
1873 static double hard_f64_div(double a, double b)
1874 {
1875     return a / b;
1876 }
1877 
1878 static bool f32_div_pre(union_float32 a, union_float32 b)
1879 {
1880     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1881         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1882                fpclassify(b.h) == FP_NORMAL;
1883     }
1884     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1885 }
1886 
1887 static bool f64_div_pre(union_float64 a, union_float64 b)
1888 {
1889     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1890         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1891                fpclassify(b.h) == FP_NORMAL;
1892     }
1893     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1894 }
1895 
1896 static bool f32_div_post(union_float32 a, union_float32 b)
1897 {
1898     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1899         return fpclassify(a.h) != FP_ZERO;
1900     }
1901     return !float32_is_zero(a.s);
1902 }
1903 
1904 static bool f64_div_post(union_float64 a, union_float64 b)
1905 {
1906     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1907         return fpclassify(a.h) != FP_ZERO;
1908     }
1909     return !float64_is_zero(a.s);
1910 }
1911 
1912 float32 QEMU_FLATTEN
1913 float32_div(float32 a, float32 b, float_status *s)
1914 {
1915     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1916                         f32_div_pre, f32_div_post);
1917 }
1918 
1919 float64 QEMU_FLATTEN
1920 float64_div(float64 a, float64 b, float_status *s)
1921 {
1922     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1923                         f64_div_pre, f64_div_post);
1924 }
1925 
1926 /*
1927  * Returns the result of dividing the bfloat16
1928  * value `a' by the corresponding value `b'.
1929  */
1930 
1931 bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1932 {
1933     FloatParts64 pa, pb, pr;
1934 
1935     bfloat16_unpack_canonical(&pa, a, status);
1936     bfloat16_unpack_canonical(&pb, b, status);
1937     pr = div_floats(pa, pb, status);
1938 
1939     return bfloat16_round_pack_canonical(&pr, status);
1940 }
1941 
1942 /*
1943  * Float to Float conversions
1944  *
1945  * Returns the result of converting one float format to another. The
1946  * conversion is performed according to the IEC/IEEE Standard for
1947  * Binary Floating-Point Arithmetic.
1948  *
1949  * The float_to_float helper only needs to take care of raising
1950  * invalid exceptions and handling the conversion on NaNs.
1951  */
1952 
1953 static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
1954                                  float_status *s)
1955 {
1956     if (dstf->arm_althp) {
1957         switch (a.cls) {
1958         case float_class_qnan:
1959         case float_class_snan:
1960             /* There is no NaN in the destination format.  Raise Invalid
1961              * and return a zero with the sign of the input NaN.
1962              */
1963             float_raise(float_flag_invalid, s);
1964             a.cls = float_class_zero;
1965             a.frac = 0;
1966             a.exp = 0;
1967             break;
1968 
1969         case float_class_inf:
1970             /* There is no Inf in the destination format.  Raise Invalid
1971              * and return the maximum normal with the correct sign.
1972              */
1973             float_raise(float_flag_invalid, s);
1974             a.cls = float_class_normal;
1975             a.exp = dstf->exp_max;
1976             a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1977             break;
1978 
1979         default:
1980             break;
1981         }
1982     } else if (is_nan(a.cls)) {
1983         parts_return_nan(&a, s);
1984     }
1985     return a;
1986 }
1987 
1988 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1989 {
1990     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1991     FloatParts64 pa, pr;
1992 
1993     float16a_unpack_canonical(&pa, a, s, fmt16);
1994     pr = float_to_float(pa, &float32_params, s);
1995     return float32_round_pack_canonical(&pr, s);
1996 }
1997 
1998 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1999 {
2000     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2001     FloatParts64 pa, pr;
2002 
2003     float16a_unpack_canonical(&pa, a, s, fmt16);
2004     pr = float_to_float(pa, &float64_params, s);
2005     return float64_round_pack_canonical(&pr, s);
2006 }
2007 
2008 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2009 {
2010     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2011     FloatParts64 pa, pr;
2012 
2013     float32_unpack_canonical(&pa, a, s);
2014     pr = float_to_float(pa, fmt16, s);
2015     return float16a_round_pack_canonical(&pr, s, fmt16);
2016 }
2017 
2018 static float64 QEMU_SOFTFLOAT_ATTR
2019 soft_float32_to_float64(float32 a, float_status *s)
2020 {
2021     FloatParts64 pa, pr;
2022 
2023     float32_unpack_canonical(&pa, a, s);
2024     pr = float_to_float(pa, &float64_params, s);
2025     return float64_round_pack_canonical(&pr, s);
2026 }
2027 
2028 float64 float32_to_float64(float32 a, float_status *s)
2029 {
2030     if (likely(float32_is_normal(a))) {
2031         /* Widening conversion can never produce inexact results.  */
2032         union_float32 uf;
2033         union_float64 ud;
2034         uf.s = a;
2035         ud.h = uf.h;
2036         return ud.s;
2037     } else if (float32_is_zero(a)) {
2038         return float64_set_sign(float64_zero, float32_is_neg(a));
2039     } else {
2040         return soft_float32_to_float64(a, s);
2041     }
2042 }
2043 
2044 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2045 {
2046     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2047     FloatParts64 pa, pr;
2048 
2049     float64_unpack_canonical(&pa, a, s);
2050     pr = float_to_float(pa, fmt16, s);
2051     return float16a_round_pack_canonical(&pr, s, fmt16);
2052 }
2053 
2054 float32 float64_to_float32(float64 a, float_status *s)
2055 {
2056     FloatParts64 pa, pr;
2057 
2058     float64_unpack_canonical(&pa, a, s);
2059     pr = float_to_float(pa, &float32_params, s);
2060     return float32_round_pack_canonical(&pr, s);
2061 }
2062 
2063 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2064 {
2065     FloatParts64 pa, pr;
2066 
2067     bfloat16_unpack_canonical(&pa, a, s);
2068     pr = float_to_float(pa, &float32_params, s);
2069     return float32_round_pack_canonical(&pr, s);
2070 }
2071 
2072 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2073 {
2074     FloatParts64 pa, pr;
2075 
2076     bfloat16_unpack_canonical(&pa, a, s);
2077     pr = float_to_float(pa, &float64_params, s);
2078     return float64_round_pack_canonical(&pr, s);
2079 }
2080 
2081 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2082 {
2083     FloatParts64 pa, pr;
2084 
2085     float32_unpack_canonical(&pa, a, s);
2086     pr = float_to_float(pa, &bfloat16_params, s);
2087     return bfloat16_round_pack_canonical(&pr, s);
2088 }
2089 
2090 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2091 {
2092     FloatParts64 pa, pr;
2093 
2094     float64_unpack_canonical(&pa, a, s);
2095     pr = float_to_float(pa, &bfloat16_params, s);
2096     return bfloat16_round_pack_canonical(&pr, s);
2097 }
2098 
2099 /*
2100  * Rounds the floating-point value `a' to an integer, and returns the
2101  * result as a floating-point value. The operation is performed
2102  * according to the IEC/IEEE Standard for Binary Floating-Point
2103  * Arithmetic.
2104  */
2105 
2106 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2107                                int scale, float_status *s)
2108 {
2109     switch (a.cls) {
2110     case float_class_qnan:
2111     case float_class_snan:
2112         parts_return_nan(&a, s);
2113         break;
2114 
2115     case float_class_zero:
2116     case float_class_inf:
2117         /* already "integral" */
2118         break;
2119 
2120     case float_class_normal:
2121         scale = MIN(MAX(scale, -0x10000), 0x10000);
2122         a.exp += scale;
2123 
2124         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2125             /* already integral */
2126             break;
2127         }
2128         if (a.exp < 0) {
2129             bool one;
2130             /* all fractional */
2131             float_raise(float_flag_inexact, s);
2132             switch (rmode) {
2133             case float_round_nearest_even:
2134                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2135                 break;
2136             case float_round_ties_away:
2137                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2138                 break;
2139             case float_round_to_zero:
2140                 one = false;
2141                 break;
2142             case float_round_up:
2143                 one = !a.sign;
2144                 break;
2145             case float_round_down:
2146                 one = a.sign;
2147                 break;
2148             case float_round_to_odd:
2149                 one = true;
2150                 break;
2151             default:
2152                 g_assert_not_reached();
2153             }
2154 
2155             if (one) {
2156                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2157                 a.exp = 0;
2158             } else {
2159                 a.cls = float_class_zero;
2160             }
2161         } else {
2162             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2163             uint64_t frac_lsbm1 = frac_lsb >> 1;
2164             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2165             uint64_t rnd_mask = rnd_even_mask >> 1;
2166             uint64_t inc;
2167 
2168             switch (rmode) {
2169             case float_round_nearest_even:
2170                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2171                 break;
2172             case float_round_ties_away:
2173                 inc = frac_lsbm1;
2174                 break;
2175             case float_round_to_zero:
2176                 inc = 0;
2177                 break;
2178             case float_round_up:
2179                 inc = a.sign ? 0 : rnd_mask;
2180                 break;
2181             case float_round_down:
2182                 inc = a.sign ? rnd_mask : 0;
2183                 break;
2184             case float_round_to_odd:
2185                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2186                 break;
2187             default:
2188                 g_assert_not_reached();
2189             }
2190 
2191             if (a.frac & rnd_mask) {
2192                 float_raise(float_flag_inexact, s);
2193                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2194                     a.frac >>= 1;
2195                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2196                     a.exp++;
2197                 }
2198                 a.frac &= ~rnd_mask;
2199             }
2200         }
2201         break;
2202     default:
2203         g_assert_not_reached();
2204     }
2205     return a;
2206 }
2207 
2208 float16 float16_round_to_int(float16 a, float_status *s)
2209 {
2210     FloatParts64 pa, pr;
2211 
2212     float16_unpack_canonical(&pa, a, s);
2213     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2214     return float16_round_pack_canonical(&pr, s);
2215 }
2216 
2217 float32 float32_round_to_int(float32 a, float_status *s)
2218 {
2219     FloatParts64 pa, pr;
2220 
2221     float32_unpack_canonical(&pa, a, s);
2222     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2223     return float32_round_pack_canonical(&pr, s);
2224 }
2225 
2226 float64 float64_round_to_int(float64 a, float_status *s)
2227 {
2228     FloatParts64 pa, pr;
2229 
2230     float64_unpack_canonical(&pa, a, s);
2231     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2232     return float64_round_pack_canonical(&pr, s);
2233 }
2234 
2235 /*
2236  * Rounds the bfloat16 value `a' to an integer, and returns the
2237  * result as a bfloat16 value.
2238  */
2239 
2240 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2241 {
2242     FloatParts64 pa, pr;
2243 
2244     bfloat16_unpack_canonical(&pa, a, s);
2245     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2246     return bfloat16_round_pack_canonical(&pr, s);
2247 }
2248 
2249 /*
2250  * Returns the result of converting the floating-point value `a' to
2251  * the two's complement integer format. The conversion is performed
2252  * according to the IEC/IEEE Standard for Binary Floating-Point
2253  * Arithmetic---which means in particular that the conversion is
2254  * rounded according to the current rounding mode. If `a' is a NaN,
2255  * the largest positive integer is returned. Otherwise, if the
2256  * conversion overflows, the largest integer with the same sign as `a'
2257  * is returned.
2258 */
2259 
2260 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2261                                      int scale, int64_t min, int64_t max,
2262                                      float_status *s)
2263 {
2264     uint64_t r;
2265     int orig_flags = get_float_exception_flags(s);
2266     FloatParts64 p = round_to_int(in, rmode, scale, s);
2267 
2268     switch (p.cls) {
2269     case float_class_snan:
2270     case float_class_qnan:
2271         s->float_exception_flags = orig_flags | float_flag_invalid;
2272         return max;
2273     case float_class_inf:
2274         s->float_exception_flags = orig_flags | float_flag_invalid;
2275         return p.sign ? min : max;
2276     case float_class_zero:
2277         return 0;
2278     case float_class_normal:
2279         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2280             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2281         } else {
2282             r = UINT64_MAX;
2283         }
2284         if (p.sign) {
2285             if (r <= -(uint64_t) min) {
2286                 return -r;
2287             } else {
2288                 s->float_exception_flags = orig_flags | float_flag_invalid;
2289                 return min;
2290             }
2291         } else {
2292             if (r <= max) {
2293                 return r;
2294             } else {
2295                 s->float_exception_flags = orig_flags | float_flag_invalid;
2296                 return max;
2297             }
2298         }
2299     default:
2300         g_assert_not_reached();
2301     }
2302 }
2303 
2304 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2305                               float_status *s)
2306 {
2307     FloatParts64 p;
2308 
2309     float16_unpack_canonical(&p, a, s);
2310     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2311 }
2312 
2313 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2314                                 float_status *s)
2315 {
2316     FloatParts64 p;
2317 
2318     float16_unpack_canonical(&p, a, s);
2319     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2320 }
2321 
2322 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2323                                 float_status *s)
2324 {
2325     FloatParts64 p;
2326 
2327     float16_unpack_canonical(&p, a, s);
2328     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2329 }
2330 
2331 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2332                                 float_status *s)
2333 {
2334     FloatParts64 p;
2335 
2336     float16_unpack_canonical(&p, a, s);
2337     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2338 }
2339 
2340 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2341                                 float_status *s)
2342 {
2343     FloatParts64 p;
2344 
2345     float32_unpack_canonical(&p, a, s);
2346     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2347 }
2348 
2349 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2350                                 float_status *s)
2351 {
2352     FloatParts64 p;
2353 
2354     float32_unpack_canonical(&p, a, s);
2355     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2356 }
2357 
2358 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2359                                 float_status *s)
2360 {
2361     FloatParts64 p;
2362 
2363     float32_unpack_canonical(&p, a, s);
2364     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2365 }
2366 
2367 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2368                                 float_status *s)
2369 {
2370     FloatParts64 p;
2371 
2372     float64_unpack_canonical(&p, a, s);
2373     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2374 }
2375 
2376 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2377                                 float_status *s)
2378 {
2379     FloatParts64 p;
2380 
2381     float64_unpack_canonical(&p, a, s);
2382     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2383 }
2384 
2385 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2386                                 float_status *s)
2387 {
2388     FloatParts64 p;
2389 
2390     float64_unpack_canonical(&p, a, s);
2391     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2392 }
2393 
2394 int8_t float16_to_int8(float16 a, float_status *s)
2395 {
2396     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2397 }
2398 
2399 int16_t float16_to_int16(float16 a, float_status *s)
2400 {
2401     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2402 }
2403 
2404 int32_t float16_to_int32(float16 a, float_status *s)
2405 {
2406     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2407 }
2408 
2409 int64_t float16_to_int64(float16 a, float_status *s)
2410 {
2411     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2412 }
2413 
2414 int16_t float32_to_int16(float32 a, float_status *s)
2415 {
2416     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2417 }
2418 
2419 int32_t float32_to_int32(float32 a, float_status *s)
2420 {
2421     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2422 }
2423 
2424 int64_t float32_to_int64(float32 a, float_status *s)
2425 {
2426     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2427 }
2428 
2429 int16_t float64_to_int16(float64 a, float_status *s)
2430 {
2431     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2432 }
2433 
2434 int32_t float64_to_int32(float64 a, float_status *s)
2435 {
2436     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2437 }
2438 
2439 int64_t float64_to_int64(float64 a, float_status *s)
2440 {
2441     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2442 }
2443 
2444 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2445 {
2446     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2447 }
2448 
2449 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2450 {
2451     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2452 }
2453 
2454 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2455 {
2456     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2457 }
2458 
2459 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2460 {
2461     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2462 }
2463 
2464 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2465 {
2466     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2467 }
2468 
2469 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2470 {
2471     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2472 }
2473 
2474 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2475 {
2476     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2477 }
2478 
2479 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2480 {
2481     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2482 }
2483 
2484 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2485 {
2486     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2487 }
2488 
2489 /*
2490  * Returns the result of converting the floating-point value `a' to
2491  * the two's complement integer format.
2492  */
2493 
2494 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2495                                  float_status *s)
2496 {
2497     FloatParts64 p;
2498 
2499     bfloat16_unpack_canonical(&p, a, s);
2500     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2501 }
2502 
2503 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2504                                  float_status *s)
2505 {
2506     FloatParts64 p;
2507 
2508     bfloat16_unpack_canonical(&p, a, s);
2509     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2510 }
2511 
2512 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2513                                  float_status *s)
2514 {
2515     FloatParts64 p;
2516 
2517     bfloat16_unpack_canonical(&p, a, s);
2518     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2519 }
2520 
2521 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2522 {
2523     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2524 }
2525 
2526 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2527 {
2528     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2529 }
2530 
2531 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2532 {
2533     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2534 }
2535 
2536 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2537 {
2538     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2539 }
2540 
2541 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2542 {
2543     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2544 }
2545 
2546 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2547 {
2548     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2549 }
2550 
2551 /*
2552  *  Returns the result of converting the floating-point value `a' to
2553  *  the unsigned integer format. The conversion is performed according
2554  *  to the IEC/IEEE Standard for Binary Floating-Point
2555  *  Arithmetic---which means in particular that the conversion is
2556  *  rounded according to the current rounding mode. If `a' is a NaN,
2557  *  the largest unsigned integer is returned. Otherwise, if the
2558  *  conversion overflows, the largest unsigned integer is returned. If
2559  *  the 'a' is negative, the result is rounded and zero is returned;
2560  *  values that do not round to zero will raise the inexact exception
2561  *  flag.
2562  */
2563 
2564 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2565                                        int scale, uint64_t max,
2566                                        float_status *s)
2567 {
2568     int orig_flags = get_float_exception_flags(s);
2569     FloatParts64 p = round_to_int(in, rmode, scale, s);
2570     uint64_t r;
2571 
2572     switch (p.cls) {
2573     case float_class_snan:
2574     case float_class_qnan:
2575         s->float_exception_flags = orig_flags | float_flag_invalid;
2576         return max;
2577     case float_class_inf:
2578         s->float_exception_flags = orig_flags | float_flag_invalid;
2579         return p.sign ? 0 : max;
2580     case float_class_zero:
2581         return 0;
2582     case float_class_normal:
2583         if (p.sign) {
2584             s->float_exception_flags = orig_flags | float_flag_invalid;
2585             return 0;
2586         }
2587 
2588         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2589             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2590         } else {
2591             s->float_exception_flags = orig_flags | float_flag_invalid;
2592             return max;
2593         }
2594 
2595         /* For uint64 this will never trip, but if p.exp is too large
2596          * to shift a decomposed fraction we shall have exited via the
2597          * 3rd leg above.
2598          */
2599         if (r > max) {
2600             s->float_exception_flags = orig_flags | float_flag_invalid;
2601             return max;
2602         }
2603         return r;
2604     default:
2605         g_assert_not_reached();
2606     }
2607 }
2608 
2609 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2610                                 float_status *s)
2611 {
2612     FloatParts64 p;
2613 
2614     float16_unpack_canonical(&p, a, s);
2615     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2616 }
2617 
2618 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2619                                   float_status *s)
2620 {
2621     FloatParts64 p;
2622 
2623     float16_unpack_canonical(&p, a, s);
2624     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2625 }
2626 
2627 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2628                                   float_status *s)
2629 {
2630     FloatParts64 p;
2631 
2632     float16_unpack_canonical(&p, a, s);
2633     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2634 }
2635 
2636 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2637                                   float_status *s)
2638 {
2639     FloatParts64 p;
2640 
2641     float16_unpack_canonical(&p, a, s);
2642     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2643 }
2644 
2645 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2646                                   float_status *s)
2647 {
2648     FloatParts64 p;
2649 
2650     float32_unpack_canonical(&p, a, s);
2651     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2652 }
2653 
2654 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2655                                   float_status *s)
2656 {
2657     FloatParts64 p;
2658 
2659     float32_unpack_canonical(&p, a, s);
2660     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2661 }
2662 
2663 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2664                                   float_status *s)
2665 {
2666     FloatParts64 p;
2667 
2668     float32_unpack_canonical(&p, a, s);
2669     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2670 }
2671 
2672 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2673                                   float_status *s)
2674 {
2675     FloatParts64 p;
2676 
2677     float64_unpack_canonical(&p, a, s);
2678     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2679 }
2680 
2681 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2682                                   float_status *s)
2683 {
2684     FloatParts64 p;
2685 
2686     float64_unpack_canonical(&p, a, s);
2687     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2688 }
2689 
2690 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2691                                   float_status *s)
2692 {
2693     FloatParts64 p;
2694 
2695     float64_unpack_canonical(&p, a, s);
2696     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2697 }
2698 
2699 uint8_t float16_to_uint8(float16 a, float_status *s)
2700 {
2701     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2702 }
2703 
2704 uint16_t float16_to_uint16(float16 a, float_status *s)
2705 {
2706     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2707 }
2708 
2709 uint32_t float16_to_uint32(float16 a, float_status *s)
2710 {
2711     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2712 }
2713 
2714 uint64_t float16_to_uint64(float16 a, float_status *s)
2715 {
2716     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2717 }
2718 
2719 uint16_t float32_to_uint16(float32 a, float_status *s)
2720 {
2721     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2722 }
2723 
2724 uint32_t float32_to_uint32(float32 a, float_status *s)
2725 {
2726     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2727 }
2728 
2729 uint64_t float32_to_uint64(float32 a, float_status *s)
2730 {
2731     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2732 }
2733 
2734 uint16_t float64_to_uint16(float64 a, float_status *s)
2735 {
2736     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2737 }
2738 
2739 uint32_t float64_to_uint32(float64 a, float_status *s)
2740 {
2741     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2742 }
2743 
2744 uint64_t float64_to_uint64(float64 a, float_status *s)
2745 {
2746     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2747 }
2748 
2749 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2750 {
2751     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2752 }
2753 
2754 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2755 {
2756     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2757 }
2758 
2759 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2760 {
2761     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2762 }
2763 
2764 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2765 {
2766     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2767 }
2768 
2769 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2770 {
2771     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2772 }
2773 
2774 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2775 {
2776     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2777 }
2778 
2779 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2780 {
2781     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2782 }
2783 
2784 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2785 {
2786     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2787 }
2788 
2789 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2790 {
2791     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2792 }
2793 
2794 /*
2795  *  Returns the result of converting the bfloat16 value `a' to
2796  *  the unsigned integer format.
2797  */
2798 
2799 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2800                                    int scale, float_status *s)
2801 {
2802     FloatParts64 p;
2803 
2804     bfloat16_unpack_canonical(&p, a, s);
2805     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2806 }
2807 
2808 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2809                                    int scale, float_status *s)
2810 {
2811     FloatParts64 p;
2812 
2813     bfloat16_unpack_canonical(&p, a, s);
2814     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2815 }
2816 
2817 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2818                                    int scale, float_status *s)
2819 {
2820     FloatParts64 p;
2821 
2822     bfloat16_unpack_canonical(&p, a, s);
2823     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2824 }
2825 
2826 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2827 {
2828     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2829 }
2830 
2831 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2832 {
2833     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2834 }
2835 
2836 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2837 {
2838     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2839 }
2840 
2841 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2842 {
2843     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2844 }
2845 
2846 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2847 {
2848     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2849 }
2850 
2851 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2852 {
2853     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2854 }
2855 
2856 /*
2857  * Integer to float conversions
2858  *
2859  * Returns the result of converting the two's complement integer `a'
2860  * to the floating-point format. The conversion is performed according
2861  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2862  */
2863 
2864 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2865 {
2866     FloatParts64 r = { .sign = false };
2867 
2868     if (a == 0) {
2869         r.cls = float_class_zero;
2870     } else {
2871         uint64_t f = a;
2872         int shift;
2873 
2874         r.cls = float_class_normal;
2875         if (a < 0) {
2876             f = -f;
2877             r.sign = true;
2878         }
2879         shift = clz64(f);
2880         scale = MIN(MAX(scale, -0x10000), 0x10000);
2881 
2882         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2883         r.frac = f << shift;
2884     }
2885 
2886     return r;
2887 }
2888 
2889 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2890 {
2891     FloatParts64 pa = int_to_float(a, scale, status);
2892     return float16_round_pack_canonical(&pa, status);
2893 }
2894 
2895 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2896 {
2897     return int64_to_float16_scalbn(a, scale, status);
2898 }
2899 
2900 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2901 {
2902     return int64_to_float16_scalbn(a, scale, status);
2903 }
2904 
2905 float16 int64_to_float16(int64_t a, float_status *status)
2906 {
2907     return int64_to_float16_scalbn(a, 0, status);
2908 }
2909 
2910 float16 int32_to_float16(int32_t a, float_status *status)
2911 {
2912     return int64_to_float16_scalbn(a, 0, status);
2913 }
2914 
2915 float16 int16_to_float16(int16_t a, float_status *status)
2916 {
2917     return int64_to_float16_scalbn(a, 0, status);
2918 }
2919 
2920 float16 int8_to_float16(int8_t a, float_status *status)
2921 {
2922     return int64_to_float16_scalbn(a, 0, status);
2923 }
2924 
2925 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2926 {
2927     FloatParts64 pa = int_to_float(a, scale, status);
2928     return float32_round_pack_canonical(&pa, status);
2929 }
2930 
2931 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2932 {
2933     return int64_to_float32_scalbn(a, scale, status);
2934 }
2935 
2936 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2937 {
2938     return int64_to_float32_scalbn(a, scale, status);
2939 }
2940 
2941 float32 int64_to_float32(int64_t a, float_status *status)
2942 {
2943     return int64_to_float32_scalbn(a, 0, status);
2944 }
2945 
2946 float32 int32_to_float32(int32_t a, float_status *status)
2947 {
2948     return int64_to_float32_scalbn(a, 0, status);
2949 }
2950 
2951 float32 int16_to_float32(int16_t a, float_status *status)
2952 {
2953     return int64_to_float32_scalbn(a, 0, status);
2954 }
2955 
2956 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2957 {
2958     FloatParts64 pa = int_to_float(a, scale, status);
2959     return float64_round_pack_canonical(&pa, status);
2960 }
2961 
2962 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2963 {
2964     return int64_to_float64_scalbn(a, scale, status);
2965 }
2966 
2967 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2968 {
2969     return int64_to_float64_scalbn(a, scale, status);
2970 }
2971 
2972 float64 int64_to_float64(int64_t a, float_status *status)
2973 {
2974     return int64_to_float64_scalbn(a, 0, status);
2975 }
2976 
2977 float64 int32_to_float64(int32_t a, float_status *status)
2978 {
2979     return int64_to_float64_scalbn(a, 0, status);
2980 }
2981 
2982 float64 int16_to_float64(int16_t a, float_status *status)
2983 {
2984     return int64_to_float64_scalbn(a, 0, status);
2985 }
2986 
2987 /*
2988  * Returns the result of converting the two's complement integer `a'
2989  * to the bfloat16 format.
2990  */
2991 
2992 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2993 {
2994     FloatParts64 pa = int_to_float(a, scale, status);
2995     return bfloat16_round_pack_canonical(&pa, status);
2996 }
2997 
2998 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
2999 {
3000     return int64_to_bfloat16_scalbn(a, scale, status);
3001 }
3002 
3003 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3004 {
3005     return int64_to_bfloat16_scalbn(a, scale, status);
3006 }
3007 
3008 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3009 {
3010     return int64_to_bfloat16_scalbn(a, 0, status);
3011 }
3012 
3013 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3014 {
3015     return int64_to_bfloat16_scalbn(a, 0, status);
3016 }
3017 
3018 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3019 {
3020     return int64_to_bfloat16_scalbn(a, 0, status);
3021 }
3022 
3023 /*
3024  * Unsigned Integer to float conversions
3025  *
3026  * Returns the result of converting the unsigned integer `a' to the
3027  * floating-point format. The conversion is performed according to the
3028  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3029  */
3030 
3031 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3032 {
3033     FloatParts64 r = { .sign = false };
3034     int shift;
3035 
3036     if (a == 0) {
3037         r.cls = float_class_zero;
3038     } else {
3039         scale = MIN(MAX(scale, -0x10000), 0x10000);
3040         shift = clz64(a);
3041         r.cls = float_class_normal;
3042         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3043         r.frac = a << shift;
3044     }
3045 
3046     return r;
3047 }
3048 
3049 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3050 {
3051     FloatParts64 pa = uint_to_float(a, scale, status);
3052     return float16_round_pack_canonical(&pa, status);
3053 }
3054 
3055 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3056 {
3057     return uint64_to_float16_scalbn(a, scale, status);
3058 }
3059 
3060 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3061 {
3062     return uint64_to_float16_scalbn(a, scale, status);
3063 }
3064 
3065 float16 uint64_to_float16(uint64_t a, float_status *status)
3066 {
3067     return uint64_to_float16_scalbn(a, 0, status);
3068 }
3069 
3070 float16 uint32_to_float16(uint32_t a, float_status *status)
3071 {
3072     return uint64_to_float16_scalbn(a, 0, status);
3073 }
3074 
3075 float16 uint16_to_float16(uint16_t a, float_status *status)
3076 {
3077     return uint64_to_float16_scalbn(a, 0, status);
3078 }
3079 
3080 float16 uint8_to_float16(uint8_t a, float_status *status)
3081 {
3082     return uint64_to_float16_scalbn(a, 0, status);
3083 }
3084 
3085 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3086 {
3087     FloatParts64 pa = uint_to_float(a, scale, status);
3088     return float32_round_pack_canonical(&pa, status);
3089 }
3090 
3091 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3092 {
3093     return uint64_to_float32_scalbn(a, scale, status);
3094 }
3095 
3096 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3097 {
3098     return uint64_to_float32_scalbn(a, scale, status);
3099 }
3100 
3101 float32 uint64_to_float32(uint64_t a, float_status *status)
3102 {
3103     return uint64_to_float32_scalbn(a, 0, status);
3104 }
3105 
3106 float32 uint32_to_float32(uint32_t a, float_status *status)
3107 {
3108     return uint64_to_float32_scalbn(a, 0, status);
3109 }
3110 
3111 float32 uint16_to_float32(uint16_t a, float_status *status)
3112 {
3113     return uint64_to_float32_scalbn(a, 0, status);
3114 }
3115 
3116 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3117 {
3118     FloatParts64 pa = uint_to_float(a, scale, status);
3119     return float64_round_pack_canonical(&pa, status);
3120 }
3121 
3122 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3123 {
3124     return uint64_to_float64_scalbn(a, scale, status);
3125 }
3126 
3127 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3128 {
3129     return uint64_to_float64_scalbn(a, scale, status);
3130 }
3131 
3132 float64 uint64_to_float64(uint64_t a, float_status *status)
3133 {
3134     return uint64_to_float64_scalbn(a, 0, status);
3135 }
3136 
3137 float64 uint32_to_float64(uint32_t a, float_status *status)
3138 {
3139     return uint64_to_float64_scalbn(a, 0, status);
3140 }
3141 
3142 float64 uint16_to_float64(uint16_t a, float_status *status)
3143 {
3144     return uint64_to_float64_scalbn(a, 0, status);
3145 }
3146 
3147 /*
3148  * Returns the result of converting the unsigned integer `a' to the
3149  * bfloat16 format.
3150  */
3151 
3152 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3153 {
3154     FloatParts64 pa = uint_to_float(a, scale, status);
3155     return bfloat16_round_pack_canonical(&pa, status);
3156 }
3157 
3158 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3159 {
3160     return uint64_to_bfloat16_scalbn(a, scale, status);
3161 }
3162 
3163 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3164 {
3165     return uint64_to_bfloat16_scalbn(a, scale, status);
3166 }
3167 
3168 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3169 {
3170     return uint64_to_bfloat16_scalbn(a, 0, status);
3171 }
3172 
3173 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3174 {
3175     return uint64_to_bfloat16_scalbn(a, 0, status);
3176 }
3177 
3178 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3179 {
3180     return uint64_to_bfloat16_scalbn(a, 0, status);
3181 }
3182 
3183 /* Float Min/Max */
3184 /* min() and max() functions. These can't be implemented as
3185  * 'compare and pick one input' because that would mishandle
3186  * NaNs and +0 vs -0.
3187  *
3188  * minnum() and maxnum() functions. These are similar to the min()
3189  * and max() functions but if one of the arguments is a QNaN and
3190  * the other is numerical then the numerical argument is returned.
3191  * SNaNs will get quietened before being returned.
3192  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3193  * and maxNum() operations. min() and max() are the typical min/max
3194  * semantics provided by many CPUs which predate that specification.
3195  *
3196  * minnummag() and maxnummag() functions correspond to minNumMag()
3197  * and minNumMag() from the IEEE-754 2008.
3198  */
3199 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3200                                 bool ieee, bool ismag, float_status *s)
3201 {
3202     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3203         if (ieee) {
3204             /* Takes two floating-point values `a' and `b', one of
3205              * which is a NaN, and returns the appropriate NaN
3206              * result. If either `a' or `b' is a signaling NaN,
3207              * the invalid exception is raised.
3208              */
3209             if (is_snan(a.cls) || is_snan(b.cls)) {
3210                 return *parts_pick_nan(&a, &b, s);
3211             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3212                 return b;
3213             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3214                 return a;
3215             }
3216         }
3217         return *parts_pick_nan(&a, &b, s);
3218     } else {
3219         int a_exp, b_exp;
3220 
3221         switch (a.cls) {
3222         case float_class_normal:
3223             a_exp = a.exp;
3224             break;
3225         case float_class_inf:
3226             a_exp = INT_MAX;
3227             break;
3228         case float_class_zero:
3229             a_exp = INT_MIN;
3230             break;
3231         default:
3232             g_assert_not_reached();
3233             break;
3234         }
3235         switch (b.cls) {
3236         case float_class_normal:
3237             b_exp = b.exp;
3238             break;
3239         case float_class_inf:
3240             b_exp = INT_MAX;
3241             break;
3242         case float_class_zero:
3243             b_exp = INT_MIN;
3244             break;
3245         default:
3246             g_assert_not_reached();
3247             break;
3248         }
3249 
3250         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3251             bool a_less = a_exp < b_exp;
3252             if (a_exp == b_exp) {
3253                 a_less = a.frac < b.frac;
3254             }
3255             return a_less ^ ismin ? b : a;
3256         }
3257 
3258         if (a.sign == b.sign) {
3259             bool a_less = a_exp < b_exp;
3260             if (a_exp == b_exp) {
3261                 a_less = a.frac < b.frac;
3262             }
3263             return a.sign ^ a_less ^ ismin ? b : a;
3264         } else {
3265             return a.sign ^ ismin ? b : a;
3266         }
3267     }
3268 }
3269 
3270 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3271 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3272                                      float_status *s)                   \
3273 {                                                                       \
3274     FloatParts64 pa, pb, pr;                                            \
3275     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3276     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3277     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3278     return float ## sz ## _round_pack_canonical(&pr, s);                \
3279 }
3280 
3281 MINMAX(16, min, true, false, false)
3282 MINMAX(16, minnum, true, true, false)
3283 MINMAX(16, minnummag, true, true, true)
3284 MINMAX(16, max, false, false, false)
3285 MINMAX(16, maxnum, false, true, false)
3286 MINMAX(16, maxnummag, false, true, true)
3287 
3288 MINMAX(32, min, true, false, false)
3289 MINMAX(32, minnum, true, true, false)
3290 MINMAX(32, minnummag, true, true, true)
3291 MINMAX(32, max, false, false, false)
3292 MINMAX(32, maxnum, false, true, false)
3293 MINMAX(32, maxnummag, false, true, true)
3294 
3295 MINMAX(64, min, true, false, false)
3296 MINMAX(64, minnum, true, true, false)
3297 MINMAX(64, minnummag, true, true, true)
3298 MINMAX(64, max, false, false, false)
3299 MINMAX(64, maxnum, false, true, false)
3300 MINMAX(64, maxnummag, false, true, true)
3301 
3302 #undef MINMAX
3303 
3304 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3305 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3306 {                                                                       \
3307     FloatParts64 pa, pb, pr;                                            \
3308     bfloat16_unpack_canonical(&pa, a, s);                               \
3309     bfloat16_unpack_canonical(&pb, b, s);                               \
3310     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3311     return bfloat16_round_pack_canonical(&pr, s);                       \
3312 }
3313 
3314 BF16_MINMAX(min, true, false, false)
3315 BF16_MINMAX(minnum, true, true, false)
3316 BF16_MINMAX(minnummag, true, true, true)
3317 BF16_MINMAX(max, false, false, false)
3318 BF16_MINMAX(maxnum, false, true, false)
3319 BF16_MINMAX(maxnummag, false, true, true)
3320 
3321 #undef BF16_MINMAX
3322 
3323 /* Floating point compare */
3324 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3325                                     float_status *s)
3326 {
3327     if (is_nan(a.cls) || is_nan(b.cls)) {
3328         if (!is_quiet ||
3329             a.cls == float_class_snan ||
3330             b.cls == float_class_snan) {
3331             float_raise(float_flag_invalid, s);
3332         }
3333         return float_relation_unordered;
3334     }
3335 
3336     if (a.cls == float_class_zero) {
3337         if (b.cls == float_class_zero) {
3338             return float_relation_equal;
3339         }
3340         return b.sign ? float_relation_greater : float_relation_less;
3341     } else if (b.cls == float_class_zero) {
3342         return a.sign ? float_relation_less : float_relation_greater;
3343     }
3344 
3345     /* The only really important thing about infinity is its sign. If
3346      * both are infinities the sign marks the smallest of the two.
3347      */
3348     if (a.cls == float_class_inf) {
3349         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3350             return float_relation_equal;
3351         }
3352         return a.sign ? float_relation_less : float_relation_greater;
3353     } else if (b.cls == float_class_inf) {
3354         return b.sign ? float_relation_greater : float_relation_less;
3355     }
3356 
3357     if (a.sign != b.sign) {
3358         return a.sign ? float_relation_less : float_relation_greater;
3359     }
3360 
3361     if (a.exp == b.exp) {
3362         if (a.frac == b.frac) {
3363             return float_relation_equal;
3364         }
3365         if (a.sign) {
3366             return a.frac > b.frac ?
3367                 float_relation_less : float_relation_greater;
3368         } else {
3369             return a.frac > b.frac ?
3370                 float_relation_greater : float_relation_less;
3371         }
3372     } else {
3373         if (a.sign) {
3374             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3375         } else {
3376             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3377         }
3378     }
3379 }
3380 
3381 #define COMPARE(name, attr, sz)                                         \
3382 static int attr                                                         \
3383 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3384 {                                                                       \
3385     FloatParts64 pa, pb;                                                \
3386     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3387     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3388     return compare_floats(pa, pb, is_quiet, s);                         \
3389 }
3390 
3391 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3392 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3393 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3394 
3395 #undef COMPARE
3396 
3397 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3398 {
3399     return soft_f16_compare(a, b, false, s);
3400 }
3401 
3402 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3403 {
3404     return soft_f16_compare(a, b, true, s);
3405 }
3406 
3407 static FloatRelation QEMU_FLATTEN
3408 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3409 {
3410     union_float32 ua, ub;
3411 
3412     ua.s = xa;
3413     ub.s = xb;
3414 
3415     if (QEMU_NO_HARDFLOAT) {
3416         goto soft;
3417     }
3418 
3419     float32_input_flush2(&ua.s, &ub.s, s);
3420     if (isgreaterequal(ua.h, ub.h)) {
3421         if (isgreater(ua.h, ub.h)) {
3422             return float_relation_greater;
3423         }
3424         return float_relation_equal;
3425     }
3426     if (likely(isless(ua.h, ub.h))) {
3427         return float_relation_less;
3428     }
3429     /* The only condition remaining is unordered.
3430      * Fall through to set flags.
3431      */
3432  soft:
3433     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3434 }
3435 
3436 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3437 {
3438     return f32_compare(a, b, false, s);
3439 }
3440 
3441 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3442 {
3443     return f32_compare(a, b, true, s);
3444 }
3445 
3446 static FloatRelation QEMU_FLATTEN
3447 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3448 {
3449     union_float64 ua, ub;
3450 
3451     ua.s = xa;
3452     ub.s = xb;
3453 
3454     if (QEMU_NO_HARDFLOAT) {
3455         goto soft;
3456     }
3457 
3458     float64_input_flush2(&ua.s, &ub.s, s);
3459     if (isgreaterequal(ua.h, ub.h)) {
3460         if (isgreater(ua.h, ub.h)) {
3461             return float_relation_greater;
3462         }
3463         return float_relation_equal;
3464     }
3465     if (likely(isless(ua.h, ub.h))) {
3466         return float_relation_less;
3467     }
3468     /* The only condition remaining is unordered.
3469      * Fall through to set flags.
3470      */
3471  soft:
3472     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3473 }
3474 
3475 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3476 {
3477     return f64_compare(a, b, false, s);
3478 }
3479 
3480 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3481 {
3482     return f64_compare(a, b, true, s);
3483 }
3484 
3485 static FloatRelation QEMU_FLATTEN
3486 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3487 {
3488     FloatParts64 pa, pb;
3489 
3490     bfloat16_unpack_canonical(&pa, a, s);
3491     bfloat16_unpack_canonical(&pb, b, s);
3492     return compare_floats(pa, pb, is_quiet, s);
3493 }
3494 
3495 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3496 {
3497     return soft_bf16_compare(a, b, false, s);
3498 }
3499 
3500 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3501 {
3502     return soft_bf16_compare(a, b, true, s);
3503 }
3504 
3505 /* Multiply A by 2 raised to the power N.  */
3506 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3507 {
3508     if (unlikely(is_nan(a.cls))) {
3509         parts_return_nan(&a, s);
3510     }
3511     if (a.cls == float_class_normal) {
3512         /* The largest float type (even though not supported by FloatParts64)
3513          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3514          * still allows rounding to infinity, without allowing overflow
3515          * within the int32_t that backs FloatParts64.exp.
3516          */
3517         n = MIN(MAX(n, -0x10000), 0x10000);
3518         a.exp += n;
3519     }
3520     return a;
3521 }
3522 
3523 float16 float16_scalbn(float16 a, int n, float_status *status)
3524 {
3525     FloatParts64 pa, pr;
3526 
3527     float16_unpack_canonical(&pa, a, status);
3528     pr = scalbn_decomposed(pa, n, status);
3529     return float16_round_pack_canonical(&pr, status);
3530 }
3531 
3532 float32 float32_scalbn(float32 a, int n, float_status *status)
3533 {
3534     FloatParts64 pa, pr;
3535 
3536     float32_unpack_canonical(&pa, a, status);
3537     pr = scalbn_decomposed(pa, n, status);
3538     return float32_round_pack_canonical(&pr, status);
3539 }
3540 
3541 float64 float64_scalbn(float64 a, int n, float_status *status)
3542 {
3543     FloatParts64 pa, pr;
3544 
3545     float64_unpack_canonical(&pa, a, status);
3546     pr = scalbn_decomposed(pa, n, status);
3547     return float64_round_pack_canonical(&pr, status);
3548 }
3549 
3550 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3551 {
3552     FloatParts64 pa, pr;
3553 
3554     bfloat16_unpack_canonical(&pa, a, status);
3555     pr = scalbn_decomposed(pa, n, status);
3556     return bfloat16_round_pack_canonical(&pr, status);
3557 }
3558 
3559 /*
3560  * Square Root
3561  *
3562  * The old softfloat code did an approximation step before zeroing in
3563  * on the final result. However for simpleness we just compute the
3564  * square root by iterating down from the implicit bit to enough extra
3565  * bits to ensure we get a correctly rounded result.
3566  *
3567  * This does mean however the calculation is slower than before,
3568  * especially for 64 bit floats.
3569  */
3570 
3571 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3572 {
3573     uint64_t a_frac, r_frac, s_frac;
3574     int bit, last_bit;
3575 
3576     if (is_nan(a.cls)) {
3577         parts_return_nan(&a, s);
3578         return a;
3579     }
3580     if (a.cls == float_class_zero) {
3581         return a;  /* sqrt(+-0) = +-0 */
3582     }
3583     if (a.sign) {
3584         float_raise(float_flag_invalid, s);
3585         parts_default_nan(&a, s);
3586         return a;
3587     }
3588     if (a.cls == float_class_inf) {
3589         return a;  /* sqrt(+inf) = +inf */
3590     }
3591 
3592     assert(a.cls == float_class_normal);
3593 
3594     /* We need two overflow bits at the top. Adding room for that is a
3595      * right shift. If the exponent is odd, we can discard the low bit
3596      * by multiplying the fraction by 2; that's a left shift. Combine
3597      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3598      */
3599     a_frac = a.frac >> (2 - (a.exp & 1));
3600     a.exp >>= 1;
3601 
3602     /* Bit-by-bit computation of sqrt.  */
3603     r_frac = 0;
3604     s_frac = 0;
3605 
3606     /* Iterate from implicit bit down to the 3 extra bits to compute a
3607      * properly rounded result. Remember we've inserted two more bits
3608      * at the top, so these positions are two less.
3609      */
3610     bit = DECOMPOSED_BINARY_POINT - 2;
3611     last_bit = MAX(p->frac_shift - 4, 0);
3612     do {
3613         uint64_t q = 1ULL << bit;
3614         uint64_t t_frac = s_frac + q;
3615         if (t_frac <= a_frac) {
3616             s_frac = t_frac + q;
3617             a_frac -= t_frac;
3618             r_frac += q;
3619         }
3620         a_frac <<= 1;
3621     } while (--bit >= last_bit);
3622 
3623     /* Undo the right shift done above. If there is any remaining
3624      * fraction, the result is inexact. Set the sticky bit.
3625      */
3626     a.frac = (r_frac << 2) + (a_frac != 0);
3627 
3628     return a;
3629 }
3630 
3631 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3632 {
3633     FloatParts64 pa, pr;
3634 
3635     float16_unpack_canonical(&pa, a, status);
3636     pr = sqrt_float(pa, status, &float16_params);
3637     return float16_round_pack_canonical(&pr, status);
3638 }
3639 
3640 static float32 QEMU_SOFTFLOAT_ATTR
3641 soft_f32_sqrt(float32 a, float_status *status)
3642 {
3643     FloatParts64 pa, pr;
3644 
3645     float32_unpack_canonical(&pa, a, status);
3646     pr = sqrt_float(pa, status, &float32_params);
3647     return float32_round_pack_canonical(&pr, status);
3648 }
3649 
3650 static float64 QEMU_SOFTFLOAT_ATTR
3651 soft_f64_sqrt(float64 a, float_status *status)
3652 {
3653     FloatParts64 pa, pr;
3654 
3655     float64_unpack_canonical(&pa, a, status);
3656     pr = sqrt_float(pa, status, &float64_params);
3657     return float64_round_pack_canonical(&pr, status);
3658 }
3659 
3660 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3661 {
3662     union_float32 ua, ur;
3663 
3664     ua.s = xa;
3665     if (unlikely(!can_use_fpu(s))) {
3666         goto soft;
3667     }
3668 
3669     float32_input_flush1(&ua.s, s);
3670     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3671         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3672                        fpclassify(ua.h) == FP_ZERO) ||
3673                      signbit(ua.h))) {
3674             goto soft;
3675         }
3676     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3677                         float32_is_neg(ua.s))) {
3678         goto soft;
3679     }
3680     ur.h = sqrtf(ua.h);
3681     return ur.s;
3682 
3683  soft:
3684     return soft_f32_sqrt(ua.s, s);
3685 }
3686 
3687 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3688 {
3689     union_float64 ua, ur;
3690 
3691     ua.s = xa;
3692     if (unlikely(!can_use_fpu(s))) {
3693         goto soft;
3694     }
3695 
3696     float64_input_flush1(&ua.s, s);
3697     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3698         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3699                        fpclassify(ua.h) == FP_ZERO) ||
3700                      signbit(ua.h))) {
3701             goto soft;
3702         }
3703     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3704                         float64_is_neg(ua.s))) {
3705         goto soft;
3706     }
3707     ur.h = sqrt(ua.h);
3708     return ur.s;
3709 
3710  soft:
3711     return soft_f64_sqrt(ua.s, s);
3712 }
3713 
3714 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3715 {
3716     FloatParts64 pa, pr;
3717 
3718     bfloat16_unpack_canonical(&pa, a, status);
3719     pr = sqrt_float(pa, status, &bfloat16_params);
3720     return bfloat16_round_pack_canonical(&pr, status);
3721 }
3722 
3723 /*----------------------------------------------------------------------------
3724 | The pattern for a default generated NaN.
3725 *----------------------------------------------------------------------------*/
3726 
3727 float16 float16_default_nan(float_status *status)
3728 {
3729     FloatParts64 p;
3730 
3731     parts_default_nan(&p, status);
3732     p.frac >>= float16_params.frac_shift;
3733     return float16_pack_raw(&p);
3734 }
3735 
3736 float32 float32_default_nan(float_status *status)
3737 {
3738     FloatParts64 p;
3739 
3740     parts_default_nan(&p, status);
3741     p.frac >>= float32_params.frac_shift;
3742     return float32_pack_raw(&p);
3743 }
3744 
3745 float64 float64_default_nan(float_status *status)
3746 {
3747     FloatParts64 p;
3748 
3749     parts_default_nan(&p, status);
3750     p.frac >>= float64_params.frac_shift;
3751     return float64_pack_raw(&p);
3752 }
3753 
3754 float128 float128_default_nan(float_status *status)
3755 {
3756     FloatParts128 p;
3757 
3758     parts_default_nan(&p, status);
3759     frac_shr(&p, float128_params.frac_shift);
3760     return float128_pack_raw(&p);
3761 }
3762 
3763 bfloat16 bfloat16_default_nan(float_status *status)
3764 {
3765     FloatParts64 p;
3766 
3767     parts_default_nan(&p, status);
3768     p.frac >>= bfloat16_params.frac_shift;
3769     return bfloat16_pack_raw(&p);
3770 }
3771 
3772 /*----------------------------------------------------------------------------
3773 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3774 *----------------------------------------------------------------------------*/
3775 
3776 float16 float16_silence_nan(float16 a, float_status *status)
3777 {
3778     FloatParts64 p;
3779 
3780     float16_unpack_raw(&p, a);
3781     p.frac <<= float16_params.frac_shift;
3782     parts_silence_nan(&p, status);
3783     p.frac >>= float16_params.frac_shift;
3784     return float16_pack_raw(&p);
3785 }
3786 
3787 float32 float32_silence_nan(float32 a, float_status *status)
3788 {
3789     FloatParts64 p;
3790 
3791     float32_unpack_raw(&p, a);
3792     p.frac <<= float32_params.frac_shift;
3793     parts_silence_nan(&p, status);
3794     p.frac >>= float32_params.frac_shift;
3795     return float32_pack_raw(&p);
3796 }
3797 
3798 float64 float64_silence_nan(float64 a, float_status *status)
3799 {
3800     FloatParts64 p;
3801 
3802     float64_unpack_raw(&p, a);
3803     p.frac <<= float64_params.frac_shift;
3804     parts_silence_nan(&p, status);
3805     p.frac >>= float64_params.frac_shift;
3806     return float64_pack_raw(&p);
3807 }
3808 
3809 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3810 {
3811     FloatParts64 p;
3812 
3813     bfloat16_unpack_raw(&p, a);
3814     p.frac <<= bfloat16_params.frac_shift;
3815     parts_silence_nan(&p, status);
3816     p.frac >>= bfloat16_params.frac_shift;
3817     return bfloat16_pack_raw(&p);
3818 }
3819 
3820 float128 float128_silence_nan(float128 a, float_status *status)
3821 {
3822     FloatParts128 p;
3823 
3824     float128_unpack_raw(&p, a);
3825     frac_shl(&p, float128_params.frac_shift);
3826     parts_silence_nan(&p, status);
3827     frac_shr(&p, float128_params.frac_shift);
3828     return float128_pack_raw(&p);
3829 }
3830 
3831 /*----------------------------------------------------------------------------
3832 | If `a' is denormal and we are in flush-to-zero mode then set the
3833 | input-denormal exception and return zero. Otherwise just return the value.
3834 *----------------------------------------------------------------------------*/
3835 
3836 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3837 {
3838     if (p.exp == 0 && p.frac != 0) {
3839         float_raise(float_flag_input_denormal, status);
3840         return true;
3841     }
3842 
3843     return false;
3844 }
3845 
3846 float16 float16_squash_input_denormal(float16 a, float_status *status)
3847 {
3848     if (status->flush_inputs_to_zero) {
3849         FloatParts64 p;
3850 
3851         float16_unpack_raw(&p, a);
3852         if (parts_squash_denormal(p, status)) {
3853             return float16_set_sign(float16_zero, p.sign);
3854         }
3855     }
3856     return a;
3857 }
3858 
3859 float32 float32_squash_input_denormal(float32 a, float_status *status)
3860 {
3861     if (status->flush_inputs_to_zero) {
3862         FloatParts64 p;
3863 
3864         float32_unpack_raw(&p, a);
3865         if (parts_squash_denormal(p, status)) {
3866             return float32_set_sign(float32_zero, p.sign);
3867         }
3868     }
3869     return a;
3870 }
3871 
3872 float64 float64_squash_input_denormal(float64 a, float_status *status)
3873 {
3874     if (status->flush_inputs_to_zero) {
3875         FloatParts64 p;
3876 
3877         float64_unpack_raw(&p, a);
3878         if (parts_squash_denormal(p, status)) {
3879             return float64_set_sign(float64_zero, p.sign);
3880         }
3881     }
3882     return a;
3883 }
3884 
3885 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3886 {
3887     if (status->flush_inputs_to_zero) {
3888         FloatParts64 p;
3889 
3890         bfloat16_unpack_raw(&p, a);
3891         if (parts_squash_denormal(p, status)) {
3892             return bfloat16_set_sign(bfloat16_zero, p.sign);
3893         }
3894     }
3895     return a;
3896 }
3897 
3898 /*----------------------------------------------------------------------------
3899 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3900 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3901 | input.  If `zSign' is 1, the input is negated before being converted to an
3902 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3903 | is simply rounded to an integer, with the inexact exception raised if the
3904 | input cannot be represented exactly as an integer.  However, if the fixed-
3905 | point input is too large, the invalid exception is raised and the largest
3906 | positive or negative integer is returned.
3907 *----------------------------------------------------------------------------*/
3908 
3909 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3910                                  float_status *status)
3911 {
3912     int8_t roundingMode;
3913     bool roundNearestEven;
3914     int8_t roundIncrement, roundBits;
3915     int32_t z;
3916 
3917     roundingMode = status->float_rounding_mode;
3918     roundNearestEven = ( roundingMode == float_round_nearest_even );
3919     switch (roundingMode) {
3920     case float_round_nearest_even:
3921     case float_round_ties_away:
3922         roundIncrement = 0x40;
3923         break;
3924     case float_round_to_zero:
3925         roundIncrement = 0;
3926         break;
3927     case float_round_up:
3928         roundIncrement = zSign ? 0 : 0x7f;
3929         break;
3930     case float_round_down:
3931         roundIncrement = zSign ? 0x7f : 0;
3932         break;
3933     case float_round_to_odd:
3934         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3935         break;
3936     default:
3937         abort();
3938     }
3939     roundBits = absZ & 0x7F;
3940     absZ = ( absZ + roundIncrement )>>7;
3941     if (!(roundBits ^ 0x40) && roundNearestEven) {
3942         absZ &= ~1;
3943     }
3944     z = absZ;
3945     if ( zSign ) z = - z;
3946     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3947         float_raise(float_flag_invalid, status);
3948         return zSign ? INT32_MIN : INT32_MAX;
3949     }
3950     if (roundBits) {
3951         float_raise(float_flag_inexact, status);
3952     }
3953     return z;
3954 
3955 }
3956 
3957 /*----------------------------------------------------------------------------
3958 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3959 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3960 | and returns the properly rounded 64-bit integer corresponding to the input.
3961 | If `zSign' is 1, the input is negated before being converted to an integer.
3962 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3963 | the inexact exception raised if the input cannot be represented exactly as
3964 | an integer.  However, if the fixed-point input is too large, the invalid
3965 | exception is raised and the largest positive or negative integer is
3966 | returned.
3967 *----------------------------------------------------------------------------*/
3968 
3969 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3970                                float_status *status)
3971 {
3972     int8_t roundingMode;
3973     bool roundNearestEven, increment;
3974     int64_t z;
3975 
3976     roundingMode = status->float_rounding_mode;
3977     roundNearestEven = ( roundingMode == float_round_nearest_even );
3978     switch (roundingMode) {
3979     case float_round_nearest_even:
3980     case float_round_ties_away:
3981         increment = ((int64_t) absZ1 < 0);
3982         break;
3983     case float_round_to_zero:
3984         increment = 0;
3985         break;
3986     case float_round_up:
3987         increment = !zSign && absZ1;
3988         break;
3989     case float_round_down:
3990         increment = zSign && absZ1;
3991         break;
3992     case float_round_to_odd:
3993         increment = !(absZ0 & 1) && absZ1;
3994         break;
3995     default:
3996         abort();
3997     }
3998     if ( increment ) {
3999         ++absZ0;
4000         if ( absZ0 == 0 ) goto overflow;
4001         if (!(absZ1 << 1) && roundNearestEven) {
4002             absZ0 &= ~1;
4003         }
4004     }
4005     z = absZ0;
4006     if ( zSign ) z = - z;
4007     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4008  overflow:
4009         float_raise(float_flag_invalid, status);
4010         return zSign ? INT64_MIN : INT64_MAX;
4011     }
4012     if (absZ1) {
4013         float_raise(float_flag_inexact, status);
4014     }
4015     return z;
4016 
4017 }
4018 
4019 /*----------------------------------------------------------------------------
4020 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4021 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4022 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4023 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4024 | with the inexact exception raised if the input cannot be represented exactly
4025 | as an integer.  However, if the fixed-point input is too large, the invalid
4026 | exception is raised and the largest unsigned integer is returned.
4027 *----------------------------------------------------------------------------*/
4028 
4029 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4030                                 uint64_t absZ1, float_status *status)
4031 {
4032     int8_t roundingMode;
4033     bool roundNearestEven, increment;
4034 
4035     roundingMode = status->float_rounding_mode;
4036     roundNearestEven = (roundingMode == float_round_nearest_even);
4037     switch (roundingMode) {
4038     case float_round_nearest_even:
4039     case float_round_ties_away:
4040         increment = ((int64_t)absZ1 < 0);
4041         break;
4042     case float_round_to_zero:
4043         increment = 0;
4044         break;
4045     case float_round_up:
4046         increment = !zSign && absZ1;
4047         break;
4048     case float_round_down:
4049         increment = zSign && absZ1;
4050         break;
4051     case float_round_to_odd:
4052         increment = !(absZ0 & 1) && absZ1;
4053         break;
4054     default:
4055         abort();
4056     }
4057     if (increment) {
4058         ++absZ0;
4059         if (absZ0 == 0) {
4060             float_raise(float_flag_invalid, status);
4061             return UINT64_MAX;
4062         }
4063         if (!(absZ1 << 1) && roundNearestEven) {
4064             absZ0 &= ~1;
4065         }
4066     }
4067 
4068     if (zSign && absZ0) {
4069         float_raise(float_flag_invalid, status);
4070         return 0;
4071     }
4072 
4073     if (absZ1) {
4074         float_raise(float_flag_inexact, status);
4075     }
4076     return absZ0;
4077 }
4078 
4079 /*----------------------------------------------------------------------------
4080 | Normalizes the subnormal single-precision floating-point value represented
4081 | by the denormalized significand `aSig'.  The normalized exponent and
4082 | significand are stored at the locations pointed to by `zExpPtr' and
4083 | `zSigPtr', respectively.
4084 *----------------------------------------------------------------------------*/
4085 
4086 static void
4087  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4088 {
4089     int8_t shiftCount;
4090 
4091     shiftCount = clz32(aSig) - 8;
4092     *zSigPtr = aSig<<shiftCount;
4093     *zExpPtr = 1 - shiftCount;
4094 
4095 }
4096 
4097 /*----------------------------------------------------------------------------
4098 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4099 | and significand `zSig', and returns the proper single-precision floating-
4100 | point value corresponding to the abstract input.  Ordinarily, the abstract
4101 | value is simply rounded and packed into the single-precision format, with
4102 | the inexact exception raised if the abstract input cannot be represented
4103 | exactly.  However, if the abstract value is too large, the overflow and
4104 | inexact exceptions are raised and an infinity or maximal finite value is
4105 | returned.  If the abstract value is too small, the input value is rounded to
4106 | a subnormal number, and the underflow and inexact exceptions are raised if
4107 | the abstract input cannot be represented exactly as a subnormal single-
4108 | precision floating-point number.
4109 |     The input significand `zSig' has its binary point between bits 30
4110 | and 29, which is 7 bits to the left of the usual location.  This shifted
4111 | significand must be normalized or smaller.  If `zSig' is not normalized,
4112 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4113 | and it must not require rounding.  In the usual case that `zSig' is
4114 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4115 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4116 | Binary Floating-Point Arithmetic.
4117 *----------------------------------------------------------------------------*/
4118 
4119 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4120                                    float_status *status)
4121 {
4122     int8_t roundingMode;
4123     bool roundNearestEven;
4124     int8_t roundIncrement, roundBits;
4125     bool isTiny;
4126 
4127     roundingMode = status->float_rounding_mode;
4128     roundNearestEven = ( roundingMode == float_round_nearest_even );
4129     switch (roundingMode) {
4130     case float_round_nearest_even:
4131     case float_round_ties_away:
4132         roundIncrement = 0x40;
4133         break;
4134     case float_round_to_zero:
4135         roundIncrement = 0;
4136         break;
4137     case float_round_up:
4138         roundIncrement = zSign ? 0 : 0x7f;
4139         break;
4140     case float_round_down:
4141         roundIncrement = zSign ? 0x7f : 0;
4142         break;
4143     case float_round_to_odd:
4144         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4145         break;
4146     default:
4147         abort();
4148         break;
4149     }
4150     roundBits = zSig & 0x7F;
4151     if ( 0xFD <= (uint16_t) zExp ) {
4152         if (    ( 0xFD < zExp )
4153              || (    ( zExp == 0xFD )
4154                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4155            ) {
4156             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4157                                    roundIncrement != 0;
4158             float_raise(float_flag_overflow | float_flag_inexact, status);
4159             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4160         }
4161         if ( zExp < 0 ) {
4162             if (status->flush_to_zero) {
4163                 float_raise(float_flag_output_denormal, status);
4164                 return packFloat32(zSign, 0, 0);
4165             }
4166             isTiny = status->tininess_before_rounding
4167                   || (zExp < -1)
4168                   || (zSig + roundIncrement < 0x80000000);
4169             shift32RightJamming( zSig, - zExp, &zSig );
4170             zExp = 0;
4171             roundBits = zSig & 0x7F;
4172             if (isTiny && roundBits) {
4173                 float_raise(float_flag_underflow, status);
4174             }
4175             if (roundingMode == float_round_to_odd) {
4176                 /*
4177                  * For round-to-odd case, the roundIncrement depends on
4178                  * zSig which just changed.
4179                  */
4180                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4181             }
4182         }
4183     }
4184     if (roundBits) {
4185         float_raise(float_flag_inexact, status);
4186     }
4187     zSig = ( zSig + roundIncrement )>>7;
4188     if (!(roundBits ^ 0x40) && roundNearestEven) {
4189         zSig &= ~1;
4190     }
4191     if ( zSig == 0 ) zExp = 0;
4192     return packFloat32( zSign, zExp, zSig );
4193 
4194 }
4195 
4196 /*----------------------------------------------------------------------------
4197 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4198 | and significand `zSig', and returns the proper single-precision floating-
4199 | point value corresponding to the abstract input.  This routine is just like
4200 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4201 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4202 | floating-point exponent.
4203 *----------------------------------------------------------------------------*/
4204 
4205 static float32
4206  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4207                               float_status *status)
4208 {
4209     int8_t shiftCount;
4210 
4211     shiftCount = clz32(zSig) - 1;
4212     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4213                                status);
4214 
4215 }
4216 
4217 /*----------------------------------------------------------------------------
4218 | Normalizes the subnormal double-precision floating-point value represented
4219 | by the denormalized significand `aSig'.  The normalized exponent and
4220 | significand are stored at the locations pointed to by `zExpPtr' and
4221 | `zSigPtr', respectively.
4222 *----------------------------------------------------------------------------*/
4223 
4224 static void
4225  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4226 {
4227     int8_t shiftCount;
4228 
4229     shiftCount = clz64(aSig) - 11;
4230     *zSigPtr = aSig<<shiftCount;
4231     *zExpPtr = 1 - shiftCount;
4232 
4233 }
4234 
4235 /*----------------------------------------------------------------------------
4236 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4237 | double-precision floating-point value, returning the result.  After being
4238 | shifted into the proper positions, the three fields are simply added
4239 | together to form the result.  This means that any integer portion of `zSig'
4240 | will be added into the exponent.  Since a properly normalized significand
4241 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4242 | than the desired result exponent whenever `zSig' is a complete, normalized
4243 | significand.
4244 *----------------------------------------------------------------------------*/
4245 
4246 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4247 {
4248 
4249     return make_float64(
4250         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4251 
4252 }
4253 
4254 /*----------------------------------------------------------------------------
4255 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4256 | and significand `zSig', and returns the proper double-precision floating-
4257 | point value corresponding to the abstract input.  Ordinarily, the abstract
4258 | value is simply rounded and packed into the double-precision format, with
4259 | the inexact exception raised if the abstract input cannot be represented
4260 | exactly.  However, if the abstract value is too large, the overflow and
4261 | inexact exceptions are raised and an infinity or maximal finite value is
4262 | returned.  If the abstract value is too small, the input value is rounded to
4263 | a subnormal number, and the underflow and inexact exceptions are raised if
4264 | the abstract input cannot be represented exactly as a subnormal double-
4265 | precision floating-point number.
4266 |     The input significand `zSig' has its binary point between bits 62
4267 | and 61, which is 10 bits to the left of the usual location.  This shifted
4268 | significand must be normalized or smaller.  If `zSig' is not normalized,
4269 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4270 | and it must not require rounding.  In the usual case that `zSig' is
4271 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4272 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4273 | Binary Floating-Point Arithmetic.
4274 *----------------------------------------------------------------------------*/
4275 
4276 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4277                                    float_status *status)
4278 {
4279     int8_t roundingMode;
4280     bool roundNearestEven;
4281     int roundIncrement, roundBits;
4282     bool isTiny;
4283 
4284     roundingMode = status->float_rounding_mode;
4285     roundNearestEven = ( roundingMode == float_round_nearest_even );
4286     switch (roundingMode) {
4287     case float_round_nearest_even:
4288     case float_round_ties_away:
4289         roundIncrement = 0x200;
4290         break;
4291     case float_round_to_zero:
4292         roundIncrement = 0;
4293         break;
4294     case float_round_up:
4295         roundIncrement = zSign ? 0 : 0x3ff;
4296         break;
4297     case float_round_down:
4298         roundIncrement = zSign ? 0x3ff : 0;
4299         break;
4300     case float_round_to_odd:
4301         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4302         break;
4303     default:
4304         abort();
4305     }
4306     roundBits = zSig & 0x3FF;
4307     if ( 0x7FD <= (uint16_t) zExp ) {
4308         if (    ( 0x7FD < zExp )
4309              || (    ( zExp == 0x7FD )
4310                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4311            ) {
4312             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4313                                    roundIncrement != 0;
4314             float_raise(float_flag_overflow | float_flag_inexact, status);
4315             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4316         }
4317         if ( zExp < 0 ) {
4318             if (status->flush_to_zero) {
4319                 float_raise(float_flag_output_denormal, status);
4320                 return packFloat64(zSign, 0, 0);
4321             }
4322             isTiny = status->tininess_before_rounding
4323                   || (zExp < -1)
4324                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4325             shift64RightJamming( zSig, - zExp, &zSig );
4326             zExp = 0;
4327             roundBits = zSig & 0x3FF;
4328             if (isTiny && roundBits) {
4329                 float_raise(float_flag_underflow, status);
4330             }
4331             if (roundingMode == float_round_to_odd) {
4332                 /*
4333                  * For round-to-odd case, the roundIncrement depends on
4334                  * zSig which just changed.
4335                  */
4336                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4337             }
4338         }
4339     }
4340     if (roundBits) {
4341         float_raise(float_flag_inexact, status);
4342     }
4343     zSig = ( zSig + roundIncrement )>>10;
4344     if (!(roundBits ^ 0x200) && roundNearestEven) {
4345         zSig &= ~1;
4346     }
4347     if ( zSig == 0 ) zExp = 0;
4348     return packFloat64( zSign, zExp, zSig );
4349 
4350 }
4351 
4352 /*----------------------------------------------------------------------------
4353 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4354 | and significand `zSig', and returns the proper double-precision floating-
4355 | point value corresponding to the abstract input.  This routine is just like
4356 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4357 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4358 | floating-point exponent.
4359 *----------------------------------------------------------------------------*/
4360 
4361 static float64
4362  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4363                               float_status *status)
4364 {
4365     int8_t shiftCount;
4366 
4367     shiftCount = clz64(zSig) - 1;
4368     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4369                                status);
4370 
4371 }
4372 
4373 /*----------------------------------------------------------------------------
4374 | Normalizes the subnormal extended double-precision floating-point value
4375 | represented by the denormalized significand `aSig'.  The normalized exponent
4376 | and significand are stored at the locations pointed to by `zExpPtr' and
4377 | `zSigPtr', respectively.
4378 *----------------------------------------------------------------------------*/
4379 
4380 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4381                                 uint64_t *zSigPtr)
4382 {
4383     int8_t shiftCount;
4384 
4385     shiftCount = clz64(aSig);
4386     *zSigPtr = aSig<<shiftCount;
4387     *zExpPtr = 1 - shiftCount;
4388 }
4389 
4390 /*----------------------------------------------------------------------------
4391 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4392 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4393 | and returns the proper extended double-precision floating-point value
4394 | corresponding to the abstract input.  Ordinarily, the abstract value is
4395 | rounded and packed into the extended double-precision format, with the
4396 | inexact exception raised if the abstract input cannot be represented
4397 | exactly.  However, if the abstract value is too large, the overflow and
4398 | inexact exceptions are raised and an infinity or maximal finite value is
4399 | returned.  If the abstract value is too small, the input value is rounded to
4400 | a subnormal number, and the underflow and inexact exceptions are raised if
4401 | the abstract input cannot be represented exactly as a subnormal extended
4402 | double-precision floating-point number.
4403 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4404 | number of bits as single or double precision, respectively.  Otherwise, the
4405 | result is rounded to the full precision of the extended double-precision
4406 | format.
4407 |     The input significand must be normalized or smaller.  If the input
4408 | significand is not normalized, `zExp' must be 0; in that case, the result
4409 | returned is a subnormal number, and it must not require rounding.  The
4410 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4411 | Floating-Point Arithmetic.
4412 *----------------------------------------------------------------------------*/
4413 
4414 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4415                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4416                               float_status *status)
4417 {
4418     int8_t roundingMode;
4419     bool roundNearestEven, increment, isTiny;
4420     int64_t roundIncrement, roundMask, roundBits;
4421 
4422     roundingMode = status->float_rounding_mode;
4423     roundNearestEven = ( roundingMode == float_round_nearest_even );
4424     if ( roundingPrecision == 80 ) goto precision80;
4425     if ( roundingPrecision == 64 ) {
4426         roundIncrement = UINT64_C(0x0000000000000400);
4427         roundMask = UINT64_C(0x00000000000007FF);
4428     }
4429     else if ( roundingPrecision == 32 ) {
4430         roundIncrement = UINT64_C(0x0000008000000000);
4431         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4432     }
4433     else {
4434         goto precision80;
4435     }
4436     zSig0 |= ( zSig1 != 0 );
4437     switch (roundingMode) {
4438     case float_round_nearest_even:
4439     case float_round_ties_away:
4440         break;
4441     case float_round_to_zero:
4442         roundIncrement = 0;
4443         break;
4444     case float_round_up:
4445         roundIncrement = zSign ? 0 : roundMask;
4446         break;
4447     case float_round_down:
4448         roundIncrement = zSign ? roundMask : 0;
4449         break;
4450     default:
4451         abort();
4452     }
4453     roundBits = zSig0 & roundMask;
4454     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4455         if (    ( 0x7FFE < zExp )
4456              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4457            ) {
4458             goto overflow;
4459         }
4460         if ( zExp <= 0 ) {
4461             if (status->flush_to_zero) {
4462                 float_raise(float_flag_output_denormal, status);
4463                 return packFloatx80(zSign, 0, 0);
4464             }
4465             isTiny = status->tininess_before_rounding
4466                   || (zExp < 0 )
4467                   || (zSig0 <= zSig0 + roundIncrement);
4468             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4469             zExp = 0;
4470             roundBits = zSig0 & roundMask;
4471             if (isTiny && roundBits) {
4472                 float_raise(float_flag_underflow, status);
4473             }
4474             if (roundBits) {
4475                 float_raise(float_flag_inexact, status);
4476             }
4477             zSig0 += roundIncrement;
4478             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4479             roundIncrement = roundMask + 1;
4480             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4481                 roundMask |= roundIncrement;
4482             }
4483             zSig0 &= ~ roundMask;
4484             return packFloatx80( zSign, zExp, zSig0 );
4485         }
4486     }
4487     if (roundBits) {
4488         float_raise(float_flag_inexact, status);
4489     }
4490     zSig0 += roundIncrement;
4491     if ( zSig0 < roundIncrement ) {
4492         ++zExp;
4493         zSig0 = UINT64_C(0x8000000000000000);
4494     }
4495     roundIncrement = roundMask + 1;
4496     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4497         roundMask |= roundIncrement;
4498     }
4499     zSig0 &= ~ roundMask;
4500     if ( zSig0 == 0 ) zExp = 0;
4501     return packFloatx80( zSign, zExp, zSig0 );
4502  precision80:
4503     switch (roundingMode) {
4504     case float_round_nearest_even:
4505     case float_round_ties_away:
4506         increment = ((int64_t)zSig1 < 0);
4507         break;
4508     case float_round_to_zero:
4509         increment = 0;
4510         break;
4511     case float_round_up:
4512         increment = !zSign && zSig1;
4513         break;
4514     case float_round_down:
4515         increment = zSign && zSig1;
4516         break;
4517     default:
4518         abort();
4519     }
4520     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4521         if (    ( 0x7FFE < zExp )
4522              || (    ( zExp == 0x7FFE )
4523                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4524                   && increment
4525                 )
4526            ) {
4527             roundMask = 0;
4528  overflow:
4529             float_raise(float_flag_overflow | float_flag_inexact, status);
4530             if (    ( roundingMode == float_round_to_zero )
4531                  || ( zSign && ( roundingMode == float_round_up ) )
4532                  || ( ! zSign && ( roundingMode == float_round_down ) )
4533                ) {
4534                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4535             }
4536             return packFloatx80(zSign,
4537                                 floatx80_infinity_high,
4538                                 floatx80_infinity_low);
4539         }
4540         if ( zExp <= 0 ) {
4541             isTiny = status->tininess_before_rounding
4542                   || (zExp < 0)
4543                   || !increment
4544                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4545             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4546             zExp = 0;
4547             if (isTiny && zSig1) {
4548                 float_raise(float_flag_underflow, status);
4549             }
4550             if (zSig1) {
4551                 float_raise(float_flag_inexact, status);
4552             }
4553             switch (roundingMode) {
4554             case float_round_nearest_even:
4555             case float_round_ties_away:
4556                 increment = ((int64_t)zSig1 < 0);
4557                 break;
4558             case float_round_to_zero:
4559                 increment = 0;
4560                 break;
4561             case float_round_up:
4562                 increment = !zSign && zSig1;
4563                 break;
4564             case float_round_down:
4565                 increment = zSign && zSig1;
4566                 break;
4567             default:
4568                 abort();
4569             }
4570             if ( increment ) {
4571                 ++zSig0;
4572                 if (!(zSig1 << 1) && roundNearestEven) {
4573                     zSig0 &= ~1;
4574                 }
4575                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4576             }
4577             return packFloatx80( zSign, zExp, zSig0 );
4578         }
4579     }
4580     if (zSig1) {
4581         float_raise(float_flag_inexact, status);
4582     }
4583     if ( increment ) {
4584         ++zSig0;
4585         if ( zSig0 == 0 ) {
4586             ++zExp;
4587             zSig0 = UINT64_C(0x8000000000000000);
4588         }
4589         else {
4590             if (!(zSig1 << 1) && roundNearestEven) {
4591                 zSig0 &= ~1;
4592             }
4593         }
4594     }
4595     else {
4596         if ( zSig0 == 0 ) zExp = 0;
4597     }
4598     return packFloatx80( zSign, zExp, zSig0 );
4599 
4600 }
4601 
4602 /*----------------------------------------------------------------------------
4603 | Takes an abstract floating-point value having sign `zSign', exponent
4604 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4605 | and returns the proper extended double-precision floating-point value
4606 | corresponding to the abstract input.  This routine is just like
4607 | `roundAndPackFloatx80' except that the input significand does not have to be
4608 | normalized.
4609 *----------------------------------------------------------------------------*/
4610 
4611 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4612                                        bool zSign, int32_t zExp,
4613                                        uint64_t zSig0, uint64_t zSig1,
4614                                        float_status *status)
4615 {
4616     int8_t shiftCount;
4617 
4618     if ( zSig0 == 0 ) {
4619         zSig0 = zSig1;
4620         zSig1 = 0;
4621         zExp -= 64;
4622     }
4623     shiftCount = clz64(zSig0);
4624     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4625     zExp -= shiftCount;
4626     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4627                                 zSig0, zSig1, status);
4628 
4629 }
4630 
4631 /*----------------------------------------------------------------------------
4632 | Returns the least-significant 64 fraction bits of the quadruple-precision
4633 | floating-point value `a'.
4634 *----------------------------------------------------------------------------*/
4635 
4636 static inline uint64_t extractFloat128Frac1( float128 a )
4637 {
4638 
4639     return a.low;
4640 
4641 }
4642 
4643 /*----------------------------------------------------------------------------
4644 | Returns the most-significant 48 fraction bits of the quadruple-precision
4645 | floating-point value `a'.
4646 *----------------------------------------------------------------------------*/
4647 
4648 static inline uint64_t extractFloat128Frac0( float128 a )
4649 {
4650 
4651     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4652 
4653 }
4654 
4655 /*----------------------------------------------------------------------------
4656 | Returns the exponent bits of the quadruple-precision floating-point value
4657 | `a'.
4658 *----------------------------------------------------------------------------*/
4659 
4660 static inline int32_t extractFloat128Exp( float128 a )
4661 {
4662 
4663     return ( a.high>>48 ) & 0x7FFF;
4664 
4665 }
4666 
4667 /*----------------------------------------------------------------------------
4668 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4669 *----------------------------------------------------------------------------*/
4670 
4671 static inline bool extractFloat128Sign(float128 a)
4672 {
4673     return a.high >> 63;
4674 }
4675 
4676 /*----------------------------------------------------------------------------
4677 | Normalizes the subnormal quadruple-precision floating-point value
4678 | represented by the denormalized significand formed by the concatenation of
4679 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4680 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4681 | significand are stored at the location pointed to by `zSig0Ptr', and the
4682 | least significant 64 bits of the normalized significand are stored at the
4683 | location pointed to by `zSig1Ptr'.
4684 *----------------------------------------------------------------------------*/
4685 
4686 static void
4687  normalizeFloat128Subnormal(
4688      uint64_t aSig0,
4689      uint64_t aSig1,
4690      int32_t *zExpPtr,
4691      uint64_t *zSig0Ptr,
4692      uint64_t *zSig1Ptr
4693  )
4694 {
4695     int8_t shiftCount;
4696 
4697     if ( aSig0 == 0 ) {
4698         shiftCount = clz64(aSig1) - 15;
4699         if ( shiftCount < 0 ) {
4700             *zSig0Ptr = aSig1>>( - shiftCount );
4701             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4702         }
4703         else {
4704             *zSig0Ptr = aSig1<<shiftCount;
4705             *zSig1Ptr = 0;
4706         }
4707         *zExpPtr = - shiftCount - 63;
4708     }
4709     else {
4710         shiftCount = clz64(aSig0) - 15;
4711         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4712         *zExpPtr = 1 - shiftCount;
4713     }
4714 
4715 }
4716 
4717 /*----------------------------------------------------------------------------
4718 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4719 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4720 | floating-point value, returning the result.  After being shifted into the
4721 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4722 | added together to form the most significant 32 bits of the result.  This
4723 | means that any integer portion of `zSig0' will be added into the exponent.
4724 | Since a properly normalized significand will have an integer portion equal
4725 | to 1, the `zExp' input should be 1 less than the desired result exponent
4726 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4727 | significand.
4728 *----------------------------------------------------------------------------*/
4729 
4730 static inline float128
4731 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4732 {
4733     float128 z;
4734 
4735     z.low = zSig1;
4736     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4737     return z;
4738 }
4739 
4740 /*----------------------------------------------------------------------------
4741 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4742 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4743 | and `zSig2', and returns the proper quadruple-precision floating-point value
4744 | corresponding to the abstract input.  Ordinarily, the abstract value is
4745 | simply rounded and packed into the quadruple-precision format, with the
4746 | inexact exception raised if the abstract input cannot be represented
4747 | exactly.  However, if the abstract value is too large, the overflow and
4748 | inexact exceptions are raised and an infinity or maximal finite value is
4749 | returned.  If the abstract value is too small, the input value is rounded to
4750 | a subnormal number, and the underflow and inexact exceptions are raised if
4751 | the abstract input cannot be represented exactly as a subnormal quadruple-
4752 | precision floating-point number.
4753 |     The input significand must be normalized or smaller.  If the input
4754 | significand is not normalized, `zExp' must be 0; in that case, the result
4755 | returned is a subnormal number, and it must not require rounding.  In the
4756 | usual case that the input significand is normalized, `zExp' must be 1 less
4757 | than the ``true'' floating-point exponent.  The handling of underflow and
4758 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4759 *----------------------------------------------------------------------------*/
4760 
4761 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4762                                      uint64_t zSig0, uint64_t zSig1,
4763                                      uint64_t zSig2, float_status *status)
4764 {
4765     int8_t roundingMode;
4766     bool roundNearestEven, increment, isTiny;
4767 
4768     roundingMode = status->float_rounding_mode;
4769     roundNearestEven = ( roundingMode == float_round_nearest_even );
4770     switch (roundingMode) {
4771     case float_round_nearest_even:
4772     case float_round_ties_away:
4773         increment = ((int64_t)zSig2 < 0);
4774         break;
4775     case float_round_to_zero:
4776         increment = 0;
4777         break;
4778     case float_round_up:
4779         increment = !zSign && zSig2;
4780         break;
4781     case float_round_down:
4782         increment = zSign && zSig2;
4783         break;
4784     case float_round_to_odd:
4785         increment = !(zSig1 & 0x1) && zSig2;
4786         break;
4787     default:
4788         abort();
4789     }
4790     if ( 0x7FFD <= (uint32_t) zExp ) {
4791         if (    ( 0x7FFD < zExp )
4792              || (    ( zExp == 0x7FFD )
4793                   && eq128(
4794                          UINT64_C(0x0001FFFFFFFFFFFF),
4795                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4796                          zSig0,
4797                          zSig1
4798                      )
4799                   && increment
4800                 )
4801            ) {
4802             float_raise(float_flag_overflow | float_flag_inexact, status);
4803             if (    ( roundingMode == float_round_to_zero )
4804                  || ( zSign && ( roundingMode == float_round_up ) )
4805                  || ( ! zSign && ( roundingMode == float_round_down ) )
4806                  || (roundingMode == float_round_to_odd)
4807                ) {
4808                 return
4809                     packFloat128(
4810                         zSign,
4811                         0x7FFE,
4812                         UINT64_C(0x0000FFFFFFFFFFFF),
4813                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4814                     );
4815             }
4816             return packFloat128( zSign, 0x7FFF, 0, 0 );
4817         }
4818         if ( zExp < 0 ) {
4819             if (status->flush_to_zero) {
4820                 float_raise(float_flag_output_denormal, status);
4821                 return packFloat128(zSign, 0, 0, 0);
4822             }
4823             isTiny = status->tininess_before_rounding
4824                   || (zExp < -1)
4825                   || !increment
4826                   || lt128(zSig0, zSig1,
4827                            UINT64_C(0x0001FFFFFFFFFFFF),
4828                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4829             shift128ExtraRightJamming(
4830                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4831             zExp = 0;
4832             if (isTiny && zSig2) {
4833                 float_raise(float_flag_underflow, status);
4834             }
4835             switch (roundingMode) {
4836             case float_round_nearest_even:
4837             case float_round_ties_away:
4838                 increment = ((int64_t)zSig2 < 0);
4839                 break;
4840             case float_round_to_zero:
4841                 increment = 0;
4842                 break;
4843             case float_round_up:
4844                 increment = !zSign && zSig2;
4845                 break;
4846             case float_round_down:
4847                 increment = zSign && zSig2;
4848                 break;
4849             case float_round_to_odd:
4850                 increment = !(zSig1 & 0x1) && zSig2;
4851                 break;
4852             default:
4853                 abort();
4854             }
4855         }
4856     }
4857     if (zSig2) {
4858         float_raise(float_flag_inexact, status);
4859     }
4860     if ( increment ) {
4861         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4862         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4863             zSig1 &= ~1;
4864         }
4865     }
4866     else {
4867         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4868     }
4869     return packFloat128( zSign, zExp, zSig0, zSig1 );
4870 
4871 }
4872 
4873 /*----------------------------------------------------------------------------
4874 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4875 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4876 | returns the proper quadruple-precision floating-point value corresponding
4877 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4878 | except that the input significand has fewer bits and does not have to be
4879 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4880 | point exponent.
4881 *----------------------------------------------------------------------------*/
4882 
4883 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4884                                               uint64_t zSig0, uint64_t zSig1,
4885                                               float_status *status)
4886 {
4887     int8_t shiftCount;
4888     uint64_t zSig2;
4889 
4890     if ( zSig0 == 0 ) {
4891         zSig0 = zSig1;
4892         zSig1 = 0;
4893         zExp -= 64;
4894     }
4895     shiftCount = clz64(zSig0) - 15;
4896     if ( 0 <= shiftCount ) {
4897         zSig2 = 0;
4898         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4899     }
4900     else {
4901         shift128ExtraRightJamming(
4902             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4903     }
4904     zExp -= shiftCount;
4905     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4906 
4907 }
4908 
4909 
4910 /*----------------------------------------------------------------------------
4911 | Returns the result of converting the 32-bit two's complement integer `a'
4912 | to the extended double-precision floating-point format.  The conversion
4913 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4914 | Arithmetic.
4915 *----------------------------------------------------------------------------*/
4916 
4917 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4918 {
4919     bool zSign;
4920     uint32_t absA;
4921     int8_t shiftCount;
4922     uint64_t zSig;
4923 
4924     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4925     zSign = ( a < 0 );
4926     absA = zSign ? - a : a;
4927     shiftCount = clz32(absA) + 32;
4928     zSig = absA;
4929     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4930 
4931 }
4932 
4933 /*----------------------------------------------------------------------------
4934 | Returns the result of converting the 32-bit two's complement integer `a' to
4935 | the quadruple-precision floating-point format.  The conversion is performed
4936 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4937 *----------------------------------------------------------------------------*/
4938 
4939 float128 int32_to_float128(int32_t a, float_status *status)
4940 {
4941     bool zSign;
4942     uint32_t absA;
4943     int8_t shiftCount;
4944     uint64_t zSig0;
4945 
4946     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4947     zSign = ( a < 0 );
4948     absA = zSign ? - a : a;
4949     shiftCount = clz32(absA) + 17;
4950     zSig0 = absA;
4951     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4952 
4953 }
4954 
4955 /*----------------------------------------------------------------------------
4956 | Returns the result of converting the 64-bit two's complement integer `a'
4957 | to the extended double-precision floating-point format.  The conversion
4958 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4959 | Arithmetic.
4960 *----------------------------------------------------------------------------*/
4961 
4962 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4963 {
4964     bool zSign;
4965     uint64_t absA;
4966     int8_t shiftCount;
4967 
4968     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4969     zSign = ( a < 0 );
4970     absA = zSign ? - a : a;
4971     shiftCount = clz64(absA);
4972     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4973 
4974 }
4975 
4976 /*----------------------------------------------------------------------------
4977 | Returns the result of converting the 64-bit two's complement integer `a' to
4978 | the quadruple-precision floating-point format.  The conversion is performed
4979 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4980 *----------------------------------------------------------------------------*/
4981 
4982 float128 int64_to_float128(int64_t a, float_status *status)
4983 {
4984     bool zSign;
4985     uint64_t absA;
4986     int8_t shiftCount;
4987     int32_t zExp;
4988     uint64_t zSig0, zSig1;
4989 
4990     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4991     zSign = ( a < 0 );
4992     absA = zSign ? - a : a;
4993     shiftCount = clz64(absA) + 49;
4994     zExp = 0x406E - shiftCount;
4995     if ( 64 <= shiftCount ) {
4996         zSig1 = 0;
4997         zSig0 = absA;
4998         shiftCount -= 64;
4999     }
5000     else {
5001         zSig1 = absA;
5002         zSig0 = 0;
5003     }
5004     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5005     return packFloat128( zSign, zExp, zSig0, zSig1 );
5006 
5007 }
5008 
5009 /*----------------------------------------------------------------------------
5010 | Returns the result of converting the 64-bit unsigned integer `a'
5011 | to the quadruple-precision floating-point format.  The conversion is performed
5012 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5013 *----------------------------------------------------------------------------*/
5014 
5015 float128 uint64_to_float128(uint64_t a, float_status *status)
5016 {
5017     if (a == 0) {
5018         return float128_zero;
5019     }
5020     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5021 }
5022 
5023 /*----------------------------------------------------------------------------
5024 | Returns the result of converting the single-precision floating-point value
5025 | `a' to the extended double-precision floating-point format.  The conversion
5026 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5027 | Arithmetic.
5028 *----------------------------------------------------------------------------*/
5029 
5030 floatx80 float32_to_floatx80(float32 a, float_status *status)
5031 {
5032     bool aSign;
5033     int aExp;
5034     uint32_t aSig;
5035 
5036     a = float32_squash_input_denormal(a, status);
5037     aSig = extractFloat32Frac( a );
5038     aExp = extractFloat32Exp( a );
5039     aSign = extractFloat32Sign( a );
5040     if ( aExp == 0xFF ) {
5041         if (aSig) {
5042             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5043                                                status);
5044             return floatx80_silence_nan(res, status);
5045         }
5046         return packFloatx80(aSign,
5047                             floatx80_infinity_high,
5048                             floatx80_infinity_low);
5049     }
5050     if ( aExp == 0 ) {
5051         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5052         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5053     }
5054     aSig |= 0x00800000;
5055     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5056 
5057 }
5058 
5059 /*----------------------------------------------------------------------------
5060 | Returns the result of converting the single-precision floating-point value
5061 | `a' to the double-precision floating-point format.  The conversion is
5062 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5063 | Arithmetic.
5064 *----------------------------------------------------------------------------*/
5065 
5066 float128 float32_to_float128(float32 a, float_status *status)
5067 {
5068     bool aSign;
5069     int aExp;
5070     uint32_t aSig;
5071 
5072     a = float32_squash_input_denormal(a, status);
5073     aSig = extractFloat32Frac( a );
5074     aExp = extractFloat32Exp( a );
5075     aSign = extractFloat32Sign( a );
5076     if ( aExp == 0xFF ) {
5077         if (aSig) {
5078             return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5079         }
5080         return packFloat128( aSign, 0x7FFF, 0, 0 );
5081     }
5082     if ( aExp == 0 ) {
5083         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5084         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5085         --aExp;
5086     }
5087     return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
5088 
5089 }
5090 
5091 /*----------------------------------------------------------------------------
5092 | Returns the remainder of the single-precision floating-point value `a'
5093 | with respect to the corresponding value `b'.  The operation is performed
5094 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5095 *----------------------------------------------------------------------------*/
5096 
5097 float32 float32_rem(float32 a, float32 b, float_status *status)
5098 {
5099     bool aSign, zSign;
5100     int aExp, bExp, expDiff;
5101     uint32_t aSig, bSig;
5102     uint32_t q;
5103     uint64_t aSig64, bSig64, q64;
5104     uint32_t alternateASig;
5105     int32_t sigMean;
5106     a = float32_squash_input_denormal(a, status);
5107     b = float32_squash_input_denormal(b, status);
5108 
5109     aSig = extractFloat32Frac( a );
5110     aExp = extractFloat32Exp( a );
5111     aSign = extractFloat32Sign( a );
5112     bSig = extractFloat32Frac( b );
5113     bExp = extractFloat32Exp( b );
5114     if ( aExp == 0xFF ) {
5115         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5116             return propagateFloat32NaN(a, b, status);
5117         }
5118         float_raise(float_flag_invalid, status);
5119         return float32_default_nan(status);
5120     }
5121     if ( bExp == 0xFF ) {
5122         if (bSig) {
5123             return propagateFloat32NaN(a, b, status);
5124         }
5125         return a;
5126     }
5127     if ( bExp == 0 ) {
5128         if ( bSig == 0 ) {
5129             float_raise(float_flag_invalid, status);
5130             return float32_default_nan(status);
5131         }
5132         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5133     }
5134     if ( aExp == 0 ) {
5135         if ( aSig == 0 ) return a;
5136         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5137     }
5138     expDiff = aExp - bExp;
5139     aSig |= 0x00800000;
5140     bSig |= 0x00800000;
5141     if ( expDiff < 32 ) {
5142         aSig <<= 8;
5143         bSig <<= 8;
5144         if ( expDiff < 0 ) {
5145             if ( expDiff < -1 ) return a;
5146             aSig >>= 1;
5147         }
5148         q = ( bSig <= aSig );
5149         if ( q ) aSig -= bSig;
5150         if ( 0 < expDiff ) {
5151             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5152             q >>= 32 - expDiff;
5153             bSig >>= 2;
5154             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5155         }
5156         else {
5157             aSig >>= 2;
5158             bSig >>= 2;
5159         }
5160     }
5161     else {
5162         if ( bSig <= aSig ) aSig -= bSig;
5163         aSig64 = ( (uint64_t) aSig )<<40;
5164         bSig64 = ( (uint64_t) bSig )<<40;
5165         expDiff -= 64;
5166         while ( 0 < expDiff ) {
5167             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5168             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5169             aSig64 = - ( ( bSig * q64 )<<38 );
5170             expDiff -= 62;
5171         }
5172         expDiff += 64;
5173         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5174         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5175         q = q64>>( 64 - expDiff );
5176         bSig <<= 6;
5177         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5178     }
5179     do {
5180         alternateASig = aSig;
5181         ++q;
5182         aSig -= bSig;
5183     } while ( 0 <= (int32_t) aSig );
5184     sigMean = aSig + alternateASig;
5185     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5186         aSig = alternateASig;
5187     }
5188     zSign = ( (int32_t) aSig < 0 );
5189     if ( zSign ) aSig = - aSig;
5190     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5191 }
5192 
5193 
5194 
5195 /*----------------------------------------------------------------------------
5196 | Returns the binary exponential of the single-precision floating-point value
5197 | `a'. The operation is performed according to the IEC/IEEE Standard for
5198 | Binary Floating-Point Arithmetic.
5199 |
5200 | Uses the following identities:
5201 |
5202 | 1. -------------------------------------------------------------------------
5203 |      x    x*ln(2)
5204 |     2  = e
5205 |
5206 | 2. -------------------------------------------------------------------------
5207 |                      2     3     4     5           n
5208 |      x        x     x     x     x     x           x
5209 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5210 |               1!    2!    3!    4!    5!          n!
5211 *----------------------------------------------------------------------------*/
5212 
5213 static const float64 float32_exp2_coefficients[15] =
5214 {
5215     const_float64( 0x3ff0000000000000ll ), /*  1 */
5216     const_float64( 0x3fe0000000000000ll ), /*  2 */
5217     const_float64( 0x3fc5555555555555ll ), /*  3 */
5218     const_float64( 0x3fa5555555555555ll ), /*  4 */
5219     const_float64( 0x3f81111111111111ll ), /*  5 */
5220     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5221     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5222     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5223     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5224     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5225     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5226     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5227     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5228     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5229     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5230 };
5231 
5232 float32 float32_exp2(float32 a, float_status *status)
5233 {
5234     bool aSign;
5235     int aExp;
5236     uint32_t aSig;
5237     float64 r, x, xn;
5238     int i;
5239     a = float32_squash_input_denormal(a, status);
5240 
5241     aSig = extractFloat32Frac( a );
5242     aExp = extractFloat32Exp( a );
5243     aSign = extractFloat32Sign( a );
5244 
5245     if ( aExp == 0xFF) {
5246         if (aSig) {
5247             return propagateFloat32NaN(a, float32_zero, status);
5248         }
5249         return (aSign) ? float32_zero : a;
5250     }
5251     if (aExp == 0) {
5252         if (aSig == 0) return float32_one;
5253     }
5254 
5255     float_raise(float_flag_inexact, status);
5256 
5257     /* ******************************* */
5258     /* using float64 for approximation */
5259     /* ******************************* */
5260     x = float32_to_float64(a, status);
5261     x = float64_mul(x, float64_ln2, status);
5262 
5263     xn = x;
5264     r = float64_one;
5265     for (i = 0 ; i < 15 ; i++) {
5266         float64 f;
5267 
5268         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5269         r = float64_add(r, f, status);
5270 
5271         xn = float64_mul(xn, x, status);
5272     }
5273 
5274     return float64_to_float32(r, status);
5275 }
5276 
5277 /*----------------------------------------------------------------------------
5278 | Returns the binary log of the single-precision floating-point value `a'.
5279 | The operation is performed according to the IEC/IEEE Standard for Binary
5280 | Floating-Point Arithmetic.
5281 *----------------------------------------------------------------------------*/
5282 float32 float32_log2(float32 a, float_status *status)
5283 {
5284     bool aSign, zSign;
5285     int aExp;
5286     uint32_t aSig, zSig, i;
5287 
5288     a = float32_squash_input_denormal(a, status);
5289     aSig = extractFloat32Frac( a );
5290     aExp = extractFloat32Exp( a );
5291     aSign = extractFloat32Sign( a );
5292 
5293     if ( aExp == 0 ) {
5294         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5295         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5296     }
5297     if ( aSign ) {
5298         float_raise(float_flag_invalid, status);
5299         return float32_default_nan(status);
5300     }
5301     if ( aExp == 0xFF ) {
5302         if (aSig) {
5303             return propagateFloat32NaN(a, float32_zero, status);
5304         }
5305         return a;
5306     }
5307 
5308     aExp -= 0x7F;
5309     aSig |= 0x00800000;
5310     zSign = aExp < 0;
5311     zSig = aExp << 23;
5312 
5313     for (i = 1 << 22; i > 0; i >>= 1) {
5314         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5315         if ( aSig & 0x01000000 ) {
5316             aSig >>= 1;
5317             zSig |= i;
5318         }
5319     }
5320 
5321     if ( zSign )
5322         zSig = -zSig;
5323 
5324     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5325 }
5326 
5327 /*----------------------------------------------------------------------------
5328 | Returns the result of converting the double-precision floating-point value
5329 | `a' to the extended double-precision floating-point format.  The conversion
5330 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5331 | Arithmetic.
5332 *----------------------------------------------------------------------------*/
5333 
5334 floatx80 float64_to_floatx80(float64 a, float_status *status)
5335 {
5336     bool aSign;
5337     int aExp;
5338     uint64_t aSig;
5339 
5340     a = float64_squash_input_denormal(a, status);
5341     aSig = extractFloat64Frac( a );
5342     aExp = extractFloat64Exp( a );
5343     aSign = extractFloat64Sign( a );
5344     if ( aExp == 0x7FF ) {
5345         if (aSig) {
5346             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5347                                                status);
5348             return floatx80_silence_nan(res, status);
5349         }
5350         return packFloatx80(aSign,
5351                             floatx80_infinity_high,
5352                             floatx80_infinity_low);
5353     }
5354     if ( aExp == 0 ) {
5355         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5356         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5357     }
5358     return
5359         packFloatx80(
5360             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5361 
5362 }
5363 
5364 /*----------------------------------------------------------------------------
5365 | Returns the result of converting the double-precision floating-point value
5366 | `a' to the quadruple-precision floating-point format.  The conversion is
5367 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5368 | Arithmetic.
5369 *----------------------------------------------------------------------------*/
5370 
5371 float128 float64_to_float128(float64 a, float_status *status)
5372 {
5373     bool aSign;
5374     int aExp;
5375     uint64_t aSig, zSig0, zSig1;
5376 
5377     a = float64_squash_input_denormal(a, status);
5378     aSig = extractFloat64Frac( a );
5379     aExp = extractFloat64Exp( a );
5380     aSign = extractFloat64Sign( a );
5381     if ( aExp == 0x7FF ) {
5382         if (aSig) {
5383             return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5384         }
5385         return packFloat128( aSign, 0x7FFF, 0, 0 );
5386     }
5387     if ( aExp == 0 ) {
5388         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5389         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5390         --aExp;
5391     }
5392     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5393     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5394 
5395 }
5396 
5397 
5398 /*----------------------------------------------------------------------------
5399 | Returns the remainder of the double-precision floating-point value `a'
5400 | with respect to the corresponding value `b'.  The operation is performed
5401 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5402 *----------------------------------------------------------------------------*/
5403 
5404 float64 float64_rem(float64 a, float64 b, float_status *status)
5405 {
5406     bool aSign, zSign;
5407     int aExp, bExp, expDiff;
5408     uint64_t aSig, bSig;
5409     uint64_t q, alternateASig;
5410     int64_t sigMean;
5411 
5412     a = float64_squash_input_denormal(a, status);
5413     b = float64_squash_input_denormal(b, status);
5414     aSig = extractFloat64Frac( a );
5415     aExp = extractFloat64Exp( a );
5416     aSign = extractFloat64Sign( a );
5417     bSig = extractFloat64Frac( b );
5418     bExp = extractFloat64Exp( b );
5419     if ( aExp == 0x7FF ) {
5420         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5421             return propagateFloat64NaN(a, b, status);
5422         }
5423         float_raise(float_flag_invalid, status);
5424         return float64_default_nan(status);
5425     }
5426     if ( bExp == 0x7FF ) {
5427         if (bSig) {
5428             return propagateFloat64NaN(a, b, status);
5429         }
5430         return a;
5431     }
5432     if ( bExp == 0 ) {
5433         if ( bSig == 0 ) {
5434             float_raise(float_flag_invalid, status);
5435             return float64_default_nan(status);
5436         }
5437         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5438     }
5439     if ( aExp == 0 ) {
5440         if ( aSig == 0 ) return a;
5441         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5442     }
5443     expDiff = aExp - bExp;
5444     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5445     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5446     if ( expDiff < 0 ) {
5447         if ( expDiff < -1 ) return a;
5448         aSig >>= 1;
5449     }
5450     q = ( bSig <= aSig );
5451     if ( q ) aSig -= bSig;
5452     expDiff -= 64;
5453     while ( 0 < expDiff ) {
5454         q = estimateDiv128To64( aSig, 0, bSig );
5455         q = ( 2 < q ) ? q - 2 : 0;
5456         aSig = - ( ( bSig>>2 ) * q );
5457         expDiff -= 62;
5458     }
5459     expDiff += 64;
5460     if ( 0 < expDiff ) {
5461         q = estimateDiv128To64( aSig, 0, bSig );
5462         q = ( 2 < q ) ? q - 2 : 0;
5463         q >>= 64 - expDiff;
5464         bSig >>= 2;
5465         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5466     }
5467     else {
5468         aSig >>= 2;
5469         bSig >>= 2;
5470     }
5471     do {
5472         alternateASig = aSig;
5473         ++q;
5474         aSig -= bSig;
5475     } while ( 0 <= (int64_t) aSig );
5476     sigMean = aSig + alternateASig;
5477     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5478         aSig = alternateASig;
5479     }
5480     zSign = ( (int64_t) aSig < 0 );
5481     if ( zSign ) aSig = - aSig;
5482     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5483 
5484 }
5485 
5486 /*----------------------------------------------------------------------------
5487 | Returns the binary log of the double-precision floating-point value `a'.
5488 | The operation is performed according to the IEC/IEEE Standard for Binary
5489 | Floating-Point Arithmetic.
5490 *----------------------------------------------------------------------------*/
5491 float64 float64_log2(float64 a, float_status *status)
5492 {
5493     bool aSign, zSign;
5494     int aExp;
5495     uint64_t aSig, aSig0, aSig1, zSig, i;
5496     a = float64_squash_input_denormal(a, status);
5497 
5498     aSig = extractFloat64Frac( a );
5499     aExp = extractFloat64Exp( a );
5500     aSign = extractFloat64Sign( a );
5501 
5502     if ( aExp == 0 ) {
5503         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5504         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5505     }
5506     if ( aSign ) {
5507         float_raise(float_flag_invalid, status);
5508         return float64_default_nan(status);
5509     }
5510     if ( aExp == 0x7FF ) {
5511         if (aSig) {
5512             return propagateFloat64NaN(a, float64_zero, status);
5513         }
5514         return a;
5515     }
5516 
5517     aExp -= 0x3FF;
5518     aSig |= UINT64_C(0x0010000000000000);
5519     zSign = aExp < 0;
5520     zSig = (uint64_t)aExp << 52;
5521     for (i = 1LL << 51; i > 0; i >>= 1) {
5522         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5523         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5524         if ( aSig & UINT64_C(0x0020000000000000) ) {
5525             aSig >>= 1;
5526             zSig |= i;
5527         }
5528     }
5529 
5530     if ( zSign )
5531         zSig = -zSig;
5532     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5533 }
5534 
5535 /*----------------------------------------------------------------------------
5536 | Returns the result of converting the extended double-precision floating-
5537 | point value `a' to the 32-bit two's complement integer format.  The
5538 | conversion is performed according to the IEC/IEEE Standard for Binary
5539 | Floating-Point Arithmetic---which means in particular that the conversion
5540 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5541 | largest positive integer is returned.  Otherwise, if the conversion
5542 | overflows, the largest integer with the same sign as `a' is returned.
5543 *----------------------------------------------------------------------------*/
5544 
5545 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5546 {
5547     bool aSign;
5548     int32_t aExp, shiftCount;
5549     uint64_t aSig;
5550 
5551     if (floatx80_invalid_encoding(a)) {
5552         float_raise(float_flag_invalid, status);
5553         return 1 << 31;
5554     }
5555     aSig = extractFloatx80Frac( a );
5556     aExp = extractFloatx80Exp( a );
5557     aSign = extractFloatx80Sign( a );
5558     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5559     shiftCount = 0x4037 - aExp;
5560     if ( shiftCount <= 0 ) shiftCount = 1;
5561     shift64RightJamming( aSig, shiftCount, &aSig );
5562     return roundAndPackInt32(aSign, aSig, status);
5563 
5564 }
5565 
5566 /*----------------------------------------------------------------------------
5567 | Returns the result of converting the extended double-precision floating-
5568 | point value `a' to the 32-bit two's complement integer format.  The
5569 | conversion is performed according to the IEC/IEEE Standard for Binary
5570 | Floating-Point Arithmetic, except that the conversion is always rounded
5571 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5572 | Otherwise, if the conversion overflows, the largest integer with the same
5573 | sign as `a' is returned.
5574 *----------------------------------------------------------------------------*/
5575 
5576 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5577 {
5578     bool aSign;
5579     int32_t aExp, shiftCount;
5580     uint64_t aSig, savedASig;
5581     int32_t z;
5582 
5583     if (floatx80_invalid_encoding(a)) {
5584         float_raise(float_flag_invalid, status);
5585         return 1 << 31;
5586     }
5587     aSig = extractFloatx80Frac( a );
5588     aExp = extractFloatx80Exp( a );
5589     aSign = extractFloatx80Sign( a );
5590     if ( 0x401E < aExp ) {
5591         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5592         goto invalid;
5593     }
5594     else if ( aExp < 0x3FFF ) {
5595         if (aExp || aSig) {
5596             float_raise(float_flag_inexact, status);
5597         }
5598         return 0;
5599     }
5600     shiftCount = 0x403E - aExp;
5601     savedASig = aSig;
5602     aSig >>= shiftCount;
5603     z = aSig;
5604     if ( aSign ) z = - z;
5605     if ( ( z < 0 ) ^ aSign ) {
5606  invalid:
5607         float_raise(float_flag_invalid, status);
5608         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5609     }
5610     if ( ( aSig<<shiftCount ) != savedASig ) {
5611         float_raise(float_flag_inexact, status);
5612     }
5613     return z;
5614 
5615 }
5616 
5617 /*----------------------------------------------------------------------------
5618 | Returns the result of converting the extended double-precision floating-
5619 | point value `a' to the 64-bit two's complement integer format.  The
5620 | conversion is performed according to the IEC/IEEE Standard for Binary
5621 | Floating-Point Arithmetic---which means in particular that the conversion
5622 | is rounded according to the current rounding mode.  If `a' is a NaN,
5623 | the largest positive integer is returned.  Otherwise, if the conversion
5624 | overflows, the largest integer with the same sign as `a' is returned.
5625 *----------------------------------------------------------------------------*/
5626 
5627 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5628 {
5629     bool aSign;
5630     int32_t aExp, shiftCount;
5631     uint64_t aSig, aSigExtra;
5632 
5633     if (floatx80_invalid_encoding(a)) {
5634         float_raise(float_flag_invalid, status);
5635         return 1ULL << 63;
5636     }
5637     aSig = extractFloatx80Frac( a );
5638     aExp = extractFloatx80Exp( a );
5639     aSign = extractFloatx80Sign( a );
5640     shiftCount = 0x403E - aExp;
5641     if ( shiftCount <= 0 ) {
5642         if ( shiftCount ) {
5643             float_raise(float_flag_invalid, status);
5644             if (!aSign || floatx80_is_any_nan(a)) {
5645                 return INT64_MAX;
5646             }
5647             return INT64_MIN;
5648         }
5649         aSigExtra = 0;
5650     }
5651     else {
5652         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5653     }
5654     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5655 
5656 }
5657 
5658 /*----------------------------------------------------------------------------
5659 | Returns the result of converting the extended double-precision floating-
5660 | point value `a' to the 64-bit two's complement integer format.  The
5661 | conversion is performed according to the IEC/IEEE Standard for Binary
5662 | Floating-Point Arithmetic, except that the conversion is always rounded
5663 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5664 | Otherwise, if the conversion overflows, the largest integer with the same
5665 | sign as `a' is returned.
5666 *----------------------------------------------------------------------------*/
5667 
5668 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5669 {
5670     bool aSign;
5671     int32_t aExp, shiftCount;
5672     uint64_t aSig;
5673     int64_t z;
5674 
5675     if (floatx80_invalid_encoding(a)) {
5676         float_raise(float_flag_invalid, status);
5677         return 1ULL << 63;
5678     }
5679     aSig = extractFloatx80Frac( a );
5680     aExp = extractFloatx80Exp( a );
5681     aSign = extractFloatx80Sign( a );
5682     shiftCount = aExp - 0x403E;
5683     if ( 0 <= shiftCount ) {
5684         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5685         if ( ( a.high != 0xC03E ) || aSig ) {
5686             float_raise(float_flag_invalid, status);
5687             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5688                 return INT64_MAX;
5689             }
5690         }
5691         return INT64_MIN;
5692     }
5693     else if ( aExp < 0x3FFF ) {
5694         if (aExp | aSig) {
5695             float_raise(float_flag_inexact, status);
5696         }
5697         return 0;
5698     }
5699     z = aSig>>( - shiftCount );
5700     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5701         float_raise(float_flag_inexact, status);
5702     }
5703     if ( aSign ) z = - z;
5704     return z;
5705 
5706 }
5707 
5708 /*----------------------------------------------------------------------------
5709 | Returns the result of converting the extended double-precision floating-
5710 | point value `a' to the single-precision floating-point format.  The
5711 | conversion is performed according to the IEC/IEEE Standard for Binary
5712 | Floating-Point Arithmetic.
5713 *----------------------------------------------------------------------------*/
5714 
5715 float32 floatx80_to_float32(floatx80 a, float_status *status)
5716 {
5717     bool aSign;
5718     int32_t aExp;
5719     uint64_t aSig;
5720 
5721     if (floatx80_invalid_encoding(a)) {
5722         float_raise(float_flag_invalid, status);
5723         return float32_default_nan(status);
5724     }
5725     aSig = extractFloatx80Frac( a );
5726     aExp = extractFloatx80Exp( a );
5727     aSign = extractFloatx80Sign( a );
5728     if ( aExp == 0x7FFF ) {
5729         if ( (uint64_t) ( aSig<<1 ) ) {
5730             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5731                                              status);
5732             return float32_silence_nan(res, status);
5733         }
5734         return packFloat32( aSign, 0xFF, 0 );
5735     }
5736     shift64RightJamming( aSig, 33, &aSig );
5737     if ( aExp || aSig ) aExp -= 0x3F81;
5738     return roundAndPackFloat32(aSign, aExp, aSig, status);
5739 
5740 }
5741 
5742 /*----------------------------------------------------------------------------
5743 | Returns the result of converting the extended double-precision floating-
5744 | point value `a' to the double-precision floating-point format.  The
5745 | conversion is performed according to the IEC/IEEE Standard for Binary
5746 | Floating-Point Arithmetic.
5747 *----------------------------------------------------------------------------*/
5748 
5749 float64 floatx80_to_float64(floatx80 a, float_status *status)
5750 {
5751     bool aSign;
5752     int32_t aExp;
5753     uint64_t aSig, zSig;
5754 
5755     if (floatx80_invalid_encoding(a)) {
5756         float_raise(float_flag_invalid, status);
5757         return float64_default_nan(status);
5758     }
5759     aSig = extractFloatx80Frac( a );
5760     aExp = extractFloatx80Exp( a );
5761     aSign = extractFloatx80Sign( a );
5762     if ( aExp == 0x7FFF ) {
5763         if ( (uint64_t) ( aSig<<1 ) ) {
5764             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5765                                              status);
5766             return float64_silence_nan(res, status);
5767         }
5768         return packFloat64( aSign, 0x7FF, 0 );
5769     }
5770     shift64RightJamming( aSig, 1, &zSig );
5771     if ( aExp || aSig ) aExp -= 0x3C01;
5772     return roundAndPackFloat64(aSign, aExp, zSig, status);
5773 
5774 }
5775 
5776 /*----------------------------------------------------------------------------
5777 | Returns the result of converting the extended double-precision floating-
5778 | point value `a' to the quadruple-precision floating-point format.  The
5779 | conversion is performed according to the IEC/IEEE Standard for Binary
5780 | Floating-Point Arithmetic.
5781 *----------------------------------------------------------------------------*/
5782 
5783 float128 floatx80_to_float128(floatx80 a, float_status *status)
5784 {
5785     bool aSign;
5786     int aExp;
5787     uint64_t aSig, zSig0, zSig1;
5788 
5789     if (floatx80_invalid_encoding(a)) {
5790         float_raise(float_flag_invalid, status);
5791         return float128_default_nan(status);
5792     }
5793     aSig = extractFloatx80Frac( a );
5794     aExp = extractFloatx80Exp( a );
5795     aSign = extractFloatx80Sign( a );
5796     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5797         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5798                                            status);
5799         return float128_silence_nan(res, status);
5800     }
5801     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5802     return packFloat128( aSign, aExp, zSig0, zSig1 );
5803 
5804 }
5805 
5806 /*----------------------------------------------------------------------------
5807 | Rounds the extended double-precision floating-point value `a'
5808 | to the precision provided by floatx80_rounding_precision and returns the
5809 | result as an extended double-precision floating-point value.
5810 | The operation is performed according to the IEC/IEEE Standard for Binary
5811 | Floating-Point Arithmetic.
5812 *----------------------------------------------------------------------------*/
5813 
5814 floatx80 floatx80_round(floatx80 a, float_status *status)
5815 {
5816     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5817                                 extractFloatx80Sign(a),
5818                                 extractFloatx80Exp(a),
5819                                 extractFloatx80Frac(a), 0, status);
5820 }
5821 
5822 /*----------------------------------------------------------------------------
5823 | Rounds the extended double-precision floating-point value `a' to an integer,
5824 | and returns the result as an extended quadruple-precision floating-point
5825 | value.  The operation is performed according to the IEC/IEEE Standard for
5826 | Binary Floating-Point Arithmetic.
5827 *----------------------------------------------------------------------------*/
5828 
5829 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5830 {
5831     bool aSign;
5832     int32_t aExp;
5833     uint64_t lastBitMask, roundBitsMask;
5834     floatx80 z;
5835 
5836     if (floatx80_invalid_encoding(a)) {
5837         float_raise(float_flag_invalid, status);
5838         return floatx80_default_nan(status);
5839     }
5840     aExp = extractFloatx80Exp( a );
5841     if ( 0x403E <= aExp ) {
5842         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5843             return propagateFloatx80NaN(a, a, status);
5844         }
5845         return a;
5846     }
5847     if ( aExp < 0x3FFF ) {
5848         if (    ( aExp == 0 )
5849              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5850             return a;
5851         }
5852         float_raise(float_flag_inexact, status);
5853         aSign = extractFloatx80Sign( a );
5854         switch (status->float_rounding_mode) {
5855          case float_round_nearest_even:
5856             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5857                ) {
5858                 return
5859                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5860             }
5861             break;
5862         case float_round_ties_away:
5863             if (aExp == 0x3FFE) {
5864                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5865             }
5866             break;
5867          case float_round_down:
5868             return
5869                   aSign ?
5870                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5871                 : packFloatx80( 0, 0, 0 );
5872          case float_round_up:
5873             return
5874                   aSign ? packFloatx80( 1, 0, 0 )
5875                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5876 
5877         case float_round_to_zero:
5878             break;
5879         default:
5880             g_assert_not_reached();
5881         }
5882         return packFloatx80( aSign, 0, 0 );
5883     }
5884     lastBitMask = 1;
5885     lastBitMask <<= 0x403E - aExp;
5886     roundBitsMask = lastBitMask - 1;
5887     z = a;
5888     switch (status->float_rounding_mode) {
5889     case float_round_nearest_even:
5890         z.low += lastBitMask>>1;
5891         if ((z.low & roundBitsMask) == 0) {
5892             z.low &= ~lastBitMask;
5893         }
5894         break;
5895     case float_round_ties_away:
5896         z.low += lastBitMask >> 1;
5897         break;
5898     case float_round_to_zero:
5899         break;
5900     case float_round_up:
5901         if (!extractFloatx80Sign(z)) {
5902             z.low += roundBitsMask;
5903         }
5904         break;
5905     case float_round_down:
5906         if (extractFloatx80Sign(z)) {
5907             z.low += roundBitsMask;
5908         }
5909         break;
5910     default:
5911         abort();
5912     }
5913     z.low &= ~ roundBitsMask;
5914     if ( z.low == 0 ) {
5915         ++z.high;
5916         z.low = UINT64_C(0x8000000000000000);
5917     }
5918     if (z.low != a.low) {
5919         float_raise(float_flag_inexact, status);
5920     }
5921     return z;
5922 
5923 }
5924 
5925 /*----------------------------------------------------------------------------
5926 | Returns the result of adding the absolute values of the extended double-
5927 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5928 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5929 | The addition is performed according to the IEC/IEEE Standard for Binary
5930 | Floating-Point Arithmetic.
5931 *----------------------------------------------------------------------------*/
5932 
5933 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5934                                 float_status *status)
5935 {
5936     int32_t aExp, bExp, zExp;
5937     uint64_t aSig, bSig, zSig0, zSig1;
5938     int32_t expDiff;
5939 
5940     aSig = extractFloatx80Frac( a );
5941     aExp = extractFloatx80Exp( a );
5942     bSig = extractFloatx80Frac( b );
5943     bExp = extractFloatx80Exp( b );
5944     expDiff = aExp - bExp;
5945     if ( 0 < expDiff ) {
5946         if ( aExp == 0x7FFF ) {
5947             if ((uint64_t)(aSig << 1)) {
5948                 return propagateFloatx80NaN(a, b, status);
5949             }
5950             return a;
5951         }
5952         if ( bExp == 0 ) --expDiff;
5953         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5954         zExp = aExp;
5955     }
5956     else if ( expDiff < 0 ) {
5957         if ( bExp == 0x7FFF ) {
5958             if ((uint64_t)(bSig << 1)) {
5959                 return propagateFloatx80NaN(a, b, status);
5960             }
5961             return packFloatx80(zSign,
5962                                 floatx80_infinity_high,
5963                                 floatx80_infinity_low);
5964         }
5965         if ( aExp == 0 ) ++expDiff;
5966         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5967         zExp = bExp;
5968     }
5969     else {
5970         if ( aExp == 0x7FFF ) {
5971             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5972                 return propagateFloatx80NaN(a, b, status);
5973             }
5974             return a;
5975         }
5976         zSig1 = 0;
5977         zSig0 = aSig + bSig;
5978         if ( aExp == 0 ) {
5979             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5980                 /* At least one of the values is a pseudo-denormal,
5981                  * and there is a carry out of the result.  */
5982                 zExp = 1;
5983                 goto shiftRight1;
5984             }
5985             if (zSig0 == 0) {
5986                 return packFloatx80(zSign, 0, 0);
5987             }
5988             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5989             goto roundAndPack;
5990         }
5991         zExp = aExp;
5992         goto shiftRight1;
5993     }
5994     zSig0 = aSig + bSig;
5995     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5996  shiftRight1:
5997     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5998     zSig0 |= UINT64_C(0x8000000000000000);
5999     ++zExp;
6000  roundAndPack:
6001     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6002                                 zSign, zExp, zSig0, zSig1, status);
6003 }
6004 
6005 /*----------------------------------------------------------------------------
6006 | Returns the result of subtracting the absolute values of the extended
6007 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6008 | difference is negated before being returned.  `zSign' is ignored if the
6009 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6010 | Standard for Binary Floating-Point Arithmetic.
6011 *----------------------------------------------------------------------------*/
6012 
6013 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6014                                 float_status *status)
6015 {
6016     int32_t aExp, bExp, zExp;
6017     uint64_t aSig, bSig, zSig0, zSig1;
6018     int32_t expDiff;
6019 
6020     aSig = extractFloatx80Frac( a );
6021     aExp = extractFloatx80Exp( a );
6022     bSig = extractFloatx80Frac( b );
6023     bExp = extractFloatx80Exp( b );
6024     expDiff = aExp - bExp;
6025     if ( 0 < expDiff ) goto aExpBigger;
6026     if ( expDiff < 0 ) goto bExpBigger;
6027     if ( aExp == 0x7FFF ) {
6028         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6029             return propagateFloatx80NaN(a, b, status);
6030         }
6031         float_raise(float_flag_invalid, status);
6032         return floatx80_default_nan(status);
6033     }
6034     if ( aExp == 0 ) {
6035         aExp = 1;
6036         bExp = 1;
6037     }
6038     zSig1 = 0;
6039     if ( bSig < aSig ) goto aBigger;
6040     if ( aSig < bSig ) goto bBigger;
6041     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6042  bExpBigger:
6043     if ( bExp == 0x7FFF ) {
6044         if ((uint64_t)(bSig << 1)) {
6045             return propagateFloatx80NaN(a, b, status);
6046         }
6047         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6048                             floatx80_infinity_low);
6049     }
6050     if ( aExp == 0 ) ++expDiff;
6051     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6052  bBigger:
6053     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6054     zExp = bExp;
6055     zSign ^= 1;
6056     goto normalizeRoundAndPack;
6057  aExpBigger:
6058     if ( aExp == 0x7FFF ) {
6059         if ((uint64_t)(aSig << 1)) {
6060             return propagateFloatx80NaN(a, b, status);
6061         }
6062         return a;
6063     }
6064     if ( bExp == 0 ) --expDiff;
6065     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6066  aBigger:
6067     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6068     zExp = aExp;
6069  normalizeRoundAndPack:
6070     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6071                                          zSign, zExp, zSig0, zSig1, status);
6072 }
6073 
6074 /*----------------------------------------------------------------------------
6075 | Returns the result of adding the extended double-precision floating-point
6076 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6077 | Standard for Binary Floating-Point Arithmetic.
6078 *----------------------------------------------------------------------------*/
6079 
6080 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6081 {
6082     bool aSign, bSign;
6083 
6084     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6085         float_raise(float_flag_invalid, status);
6086         return floatx80_default_nan(status);
6087     }
6088     aSign = extractFloatx80Sign( a );
6089     bSign = extractFloatx80Sign( b );
6090     if ( aSign == bSign ) {
6091         return addFloatx80Sigs(a, b, aSign, status);
6092     }
6093     else {
6094         return subFloatx80Sigs(a, b, aSign, status);
6095     }
6096 
6097 }
6098 
6099 /*----------------------------------------------------------------------------
6100 | Returns the result of subtracting the extended double-precision floating-
6101 | point values `a' and `b'.  The operation is performed according to the
6102 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6103 *----------------------------------------------------------------------------*/
6104 
6105 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6106 {
6107     bool aSign, bSign;
6108 
6109     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6110         float_raise(float_flag_invalid, status);
6111         return floatx80_default_nan(status);
6112     }
6113     aSign = extractFloatx80Sign( a );
6114     bSign = extractFloatx80Sign( b );
6115     if ( aSign == bSign ) {
6116         return subFloatx80Sigs(a, b, aSign, status);
6117     }
6118     else {
6119         return addFloatx80Sigs(a, b, aSign, status);
6120     }
6121 
6122 }
6123 
6124 /*----------------------------------------------------------------------------
6125 | Returns the result of multiplying the extended double-precision floating-
6126 | point values `a' and `b'.  The operation is performed according to the
6127 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6128 *----------------------------------------------------------------------------*/
6129 
6130 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6131 {
6132     bool aSign, bSign, zSign;
6133     int32_t aExp, bExp, zExp;
6134     uint64_t aSig, bSig, zSig0, zSig1;
6135 
6136     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6137         float_raise(float_flag_invalid, status);
6138         return floatx80_default_nan(status);
6139     }
6140     aSig = extractFloatx80Frac( a );
6141     aExp = extractFloatx80Exp( a );
6142     aSign = extractFloatx80Sign( a );
6143     bSig = extractFloatx80Frac( b );
6144     bExp = extractFloatx80Exp( b );
6145     bSign = extractFloatx80Sign( b );
6146     zSign = aSign ^ bSign;
6147     if ( aExp == 0x7FFF ) {
6148         if (    (uint64_t) ( aSig<<1 )
6149              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6150             return propagateFloatx80NaN(a, b, status);
6151         }
6152         if ( ( bExp | bSig ) == 0 ) goto invalid;
6153         return packFloatx80(zSign, floatx80_infinity_high,
6154                                    floatx80_infinity_low);
6155     }
6156     if ( bExp == 0x7FFF ) {
6157         if ((uint64_t)(bSig << 1)) {
6158             return propagateFloatx80NaN(a, b, status);
6159         }
6160         if ( ( aExp | aSig ) == 0 ) {
6161  invalid:
6162             float_raise(float_flag_invalid, status);
6163             return floatx80_default_nan(status);
6164         }
6165         return packFloatx80(zSign, floatx80_infinity_high,
6166                                    floatx80_infinity_low);
6167     }
6168     if ( aExp == 0 ) {
6169         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6170         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6171     }
6172     if ( bExp == 0 ) {
6173         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6174         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6175     }
6176     zExp = aExp + bExp - 0x3FFE;
6177     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6178     if ( 0 < (int64_t) zSig0 ) {
6179         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6180         --zExp;
6181     }
6182     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6183                                 zSign, zExp, zSig0, zSig1, status);
6184 }
6185 
6186 /*----------------------------------------------------------------------------
6187 | Returns the result of dividing the extended double-precision floating-point
6188 | value `a' by the corresponding value `b'.  The operation is performed
6189 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6190 *----------------------------------------------------------------------------*/
6191 
6192 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6193 {
6194     bool aSign, bSign, zSign;
6195     int32_t aExp, bExp, zExp;
6196     uint64_t aSig, bSig, zSig0, zSig1;
6197     uint64_t rem0, rem1, rem2, term0, term1, term2;
6198 
6199     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6200         float_raise(float_flag_invalid, status);
6201         return floatx80_default_nan(status);
6202     }
6203     aSig = extractFloatx80Frac( a );
6204     aExp = extractFloatx80Exp( a );
6205     aSign = extractFloatx80Sign( a );
6206     bSig = extractFloatx80Frac( b );
6207     bExp = extractFloatx80Exp( b );
6208     bSign = extractFloatx80Sign( b );
6209     zSign = aSign ^ bSign;
6210     if ( aExp == 0x7FFF ) {
6211         if ((uint64_t)(aSig << 1)) {
6212             return propagateFloatx80NaN(a, b, status);
6213         }
6214         if ( bExp == 0x7FFF ) {
6215             if ((uint64_t)(bSig << 1)) {
6216                 return propagateFloatx80NaN(a, b, status);
6217             }
6218             goto invalid;
6219         }
6220         return packFloatx80(zSign, floatx80_infinity_high,
6221                                    floatx80_infinity_low);
6222     }
6223     if ( bExp == 0x7FFF ) {
6224         if ((uint64_t)(bSig << 1)) {
6225             return propagateFloatx80NaN(a, b, status);
6226         }
6227         return packFloatx80( zSign, 0, 0 );
6228     }
6229     if ( bExp == 0 ) {
6230         if ( bSig == 0 ) {
6231             if ( ( aExp | aSig ) == 0 ) {
6232  invalid:
6233                 float_raise(float_flag_invalid, status);
6234                 return floatx80_default_nan(status);
6235             }
6236             float_raise(float_flag_divbyzero, status);
6237             return packFloatx80(zSign, floatx80_infinity_high,
6238                                        floatx80_infinity_low);
6239         }
6240         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6241     }
6242     if ( aExp == 0 ) {
6243         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6244         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6245     }
6246     zExp = aExp - bExp + 0x3FFE;
6247     rem1 = 0;
6248     if ( bSig <= aSig ) {
6249         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6250         ++zExp;
6251     }
6252     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6253     mul64To128( bSig, zSig0, &term0, &term1 );
6254     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6255     while ( (int64_t) rem0 < 0 ) {
6256         --zSig0;
6257         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6258     }
6259     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6260     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6261         mul64To128( bSig, zSig1, &term1, &term2 );
6262         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6263         while ( (int64_t) rem1 < 0 ) {
6264             --zSig1;
6265             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6266         }
6267         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6268     }
6269     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6270                                 zSign, zExp, zSig0, zSig1, status);
6271 }
6272 
6273 /*----------------------------------------------------------------------------
6274 | Returns the remainder of the extended double-precision floating-point value
6275 | `a' with respect to the corresponding value `b'.  The operation is performed
6276 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6277 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6278 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6279 | the absolute value of the integer quotient.
6280 *----------------------------------------------------------------------------*/
6281 
6282 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6283                          float_status *status)
6284 {
6285     bool aSign, zSign;
6286     int32_t aExp, bExp, expDiff, aExpOrig;
6287     uint64_t aSig0, aSig1, bSig;
6288     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6289 
6290     *quotient = 0;
6291     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6292         float_raise(float_flag_invalid, status);
6293         return floatx80_default_nan(status);
6294     }
6295     aSig0 = extractFloatx80Frac( a );
6296     aExpOrig = aExp = extractFloatx80Exp( a );
6297     aSign = extractFloatx80Sign( a );
6298     bSig = extractFloatx80Frac( b );
6299     bExp = extractFloatx80Exp( b );
6300     if ( aExp == 0x7FFF ) {
6301         if (    (uint64_t) ( aSig0<<1 )
6302              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6303             return propagateFloatx80NaN(a, b, status);
6304         }
6305         goto invalid;
6306     }
6307     if ( bExp == 0x7FFF ) {
6308         if ((uint64_t)(bSig << 1)) {
6309             return propagateFloatx80NaN(a, b, status);
6310         }
6311         if (aExp == 0 && aSig0 >> 63) {
6312             /*
6313              * Pseudo-denormal argument must be returned in normalized
6314              * form.
6315              */
6316             return packFloatx80(aSign, 1, aSig0);
6317         }
6318         return a;
6319     }
6320     if ( bExp == 0 ) {
6321         if ( bSig == 0 ) {
6322  invalid:
6323             float_raise(float_flag_invalid, status);
6324             return floatx80_default_nan(status);
6325         }
6326         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6327     }
6328     if ( aExp == 0 ) {
6329         if ( aSig0 == 0 ) return a;
6330         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6331     }
6332     zSign = aSign;
6333     expDiff = aExp - bExp;
6334     aSig1 = 0;
6335     if ( expDiff < 0 ) {
6336         if ( mod || expDiff < -1 ) {
6337             if (aExp == 1 && aExpOrig == 0) {
6338                 /*
6339                  * Pseudo-denormal argument must be returned in
6340                  * normalized form.
6341                  */
6342                 return packFloatx80(aSign, aExp, aSig0);
6343             }
6344             return a;
6345         }
6346         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6347         expDiff = 0;
6348     }
6349     *quotient = q = ( bSig <= aSig0 );
6350     if ( q ) aSig0 -= bSig;
6351     expDiff -= 64;
6352     while ( 0 < expDiff ) {
6353         q = estimateDiv128To64( aSig0, aSig1, bSig );
6354         q = ( 2 < q ) ? q - 2 : 0;
6355         mul64To128( bSig, q, &term0, &term1 );
6356         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6357         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6358         expDiff -= 62;
6359         *quotient <<= 62;
6360         *quotient += q;
6361     }
6362     expDiff += 64;
6363     if ( 0 < expDiff ) {
6364         q = estimateDiv128To64( aSig0, aSig1, bSig );
6365         q = ( 2 < q ) ? q - 2 : 0;
6366         q >>= 64 - expDiff;
6367         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6368         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6369         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6370         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6371             ++q;
6372             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6373         }
6374         if (expDiff < 64) {
6375             *quotient <<= expDiff;
6376         } else {
6377             *quotient = 0;
6378         }
6379         *quotient += q;
6380     }
6381     else {
6382         term1 = 0;
6383         term0 = bSig;
6384     }
6385     if (!mod) {
6386         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6387         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6388                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6389                         && ( q & 1 ) )
6390             ) {
6391             aSig0 = alternateASig0;
6392             aSig1 = alternateASig1;
6393             zSign = ! zSign;
6394             ++*quotient;
6395         }
6396     }
6397     return
6398         normalizeRoundAndPackFloatx80(
6399             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6400 
6401 }
6402 
6403 /*----------------------------------------------------------------------------
6404 | Returns the remainder of the extended double-precision floating-point value
6405 | `a' with respect to the corresponding value `b'.  The operation is performed
6406 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6407 *----------------------------------------------------------------------------*/
6408 
6409 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6410 {
6411     uint64_t quotient;
6412     return floatx80_modrem(a, b, false, &quotient, status);
6413 }
6414 
6415 /*----------------------------------------------------------------------------
6416 | Returns the remainder of the extended double-precision floating-point value
6417 | `a' with respect to the corresponding value `b', with the quotient truncated
6418 | toward zero.
6419 *----------------------------------------------------------------------------*/
6420 
6421 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6422 {
6423     uint64_t quotient;
6424     return floatx80_modrem(a, b, true, &quotient, status);
6425 }
6426 
6427 /*----------------------------------------------------------------------------
6428 | Returns the square root of the extended double-precision floating-point
6429 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6430 | for Binary Floating-Point Arithmetic.
6431 *----------------------------------------------------------------------------*/
6432 
6433 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6434 {
6435     bool aSign;
6436     int32_t aExp, zExp;
6437     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6438     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6439 
6440     if (floatx80_invalid_encoding(a)) {
6441         float_raise(float_flag_invalid, status);
6442         return floatx80_default_nan(status);
6443     }
6444     aSig0 = extractFloatx80Frac( a );
6445     aExp = extractFloatx80Exp( a );
6446     aSign = extractFloatx80Sign( a );
6447     if ( aExp == 0x7FFF ) {
6448         if ((uint64_t)(aSig0 << 1)) {
6449             return propagateFloatx80NaN(a, a, status);
6450         }
6451         if ( ! aSign ) return a;
6452         goto invalid;
6453     }
6454     if ( aSign ) {
6455         if ( ( aExp | aSig0 ) == 0 ) return a;
6456  invalid:
6457         float_raise(float_flag_invalid, status);
6458         return floatx80_default_nan(status);
6459     }
6460     if ( aExp == 0 ) {
6461         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6462         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6463     }
6464     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6465     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6466     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6467     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6468     doubleZSig0 = zSig0<<1;
6469     mul64To128( zSig0, zSig0, &term0, &term1 );
6470     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6471     while ( (int64_t) rem0 < 0 ) {
6472         --zSig0;
6473         doubleZSig0 -= 2;
6474         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6475     }
6476     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6477     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6478         if ( zSig1 == 0 ) zSig1 = 1;
6479         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6480         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6481         mul64To128( zSig1, zSig1, &term2, &term3 );
6482         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6483         while ( (int64_t) rem1 < 0 ) {
6484             --zSig1;
6485             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6486             term3 |= 1;
6487             term2 |= doubleZSig0;
6488             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6489         }
6490         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6491     }
6492     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6493     zSig0 |= doubleZSig0;
6494     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6495                                 0, zExp, zSig0, zSig1, status);
6496 }
6497 
6498 /*----------------------------------------------------------------------------
6499 | Returns the result of converting the quadruple-precision floating-point
6500 | value `a' to the 32-bit two's complement integer format.  The conversion
6501 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6502 | Arithmetic---which means in particular that the conversion is rounded
6503 | according to the current rounding mode.  If `a' is a NaN, the largest
6504 | positive integer is returned.  Otherwise, if the conversion overflows, the
6505 | largest integer with the same sign as `a' is returned.
6506 *----------------------------------------------------------------------------*/
6507 
6508 int32_t float128_to_int32(float128 a, float_status *status)
6509 {
6510     bool aSign;
6511     int32_t aExp, shiftCount;
6512     uint64_t aSig0, aSig1;
6513 
6514     aSig1 = extractFloat128Frac1( a );
6515     aSig0 = extractFloat128Frac0( a );
6516     aExp = extractFloat128Exp( a );
6517     aSign = extractFloat128Sign( a );
6518     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6519     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6520     aSig0 |= ( aSig1 != 0 );
6521     shiftCount = 0x4028 - aExp;
6522     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6523     return roundAndPackInt32(aSign, aSig0, status);
6524 
6525 }
6526 
6527 /*----------------------------------------------------------------------------
6528 | Returns the result of converting the quadruple-precision floating-point
6529 | value `a' to the 32-bit two's complement integer format.  The conversion
6530 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6531 | Arithmetic, except that the conversion is always rounded toward zero.  If
6532 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6533 | conversion overflows, the largest integer with the same sign as `a' is
6534 | returned.
6535 *----------------------------------------------------------------------------*/
6536 
6537 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6538 {
6539     bool aSign;
6540     int32_t aExp, shiftCount;
6541     uint64_t aSig0, aSig1, savedASig;
6542     int32_t z;
6543 
6544     aSig1 = extractFloat128Frac1( a );
6545     aSig0 = extractFloat128Frac0( a );
6546     aExp = extractFloat128Exp( a );
6547     aSign = extractFloat128Sign( a );
6548     aSig0 |= ( aSig1 != 0 );
6549     if ( 0x401E < aExp ) {
6550         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6551         goto invalid;
6552     }
6553     else if ( aExp < 0x3FFF ) {
6554         if (aExp || aSig0) {
6555             float_raise(float_flag_inexact, status);
6556         }
6557         return 0;
6558     }
6559     aSig0 |= UINT64_C(0x0001000000000000);
6560     shiftCount = 0x402F - aExp;
6561     savedASig = aSig0;
6562     aSig0 >>= shiftCount;
6563     z = aSig0;
6564     if ( aSign ) z = - z;
6565     if ( ( z < 0 ) ^ aSign ) {
6566  invalid:
6567         float_raise(float_flag_invalid, status);
6568         return aSign ? INT32_MIN : INT32_MAX;
6569     }
6570     if ( ( aSig0<<shiftCount ) != savedASig ) {
6571         float_raise(float_flag_inexact, status);
6572     }
6573     return z;
6574 
6575 }
6576 
6577 /*----------------------------------------------------------------------------
6578 | Returns the result of converting the quadruple-precision floating-point
6579 | value `a' to the 64-bit two's complement integer format.  The conversion
6580 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6581 | Arithmetic---which means in particular that the conversion is rounded
6582 | according to the current rounding mode.  If `a' is a NaN, the largest
6583 | positive integer is returned.  Otherwise, if the conversion overflows, the
6584 | largest integer with the same sign as `a' is returned.
6585 *----------------------------------------------------------------------------*/
6586 
6587 int64_t float128_to_int64(float128 a, float_status *status)
6588 {
6589     bool aSign;
6590     int32_t aExp, shiftCount;
6591     uint64_t aSig0, aSig1;
6592 
6593     aSig1 = extractFloat128Frac1( a );
6594     aSig0 = extractFloat128Frac0( a );
6595     aExp = extractFloat128Exp( a );
6596     aSign = extractFloat128Sign( a );
6597     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6598     shiftCount = 0x402F - aExp;
6599     if ( shiftCount <= 0 ) {
6600         if ( 0x403E < aExp ) {
6601             float_raise(float_flag_invalid, status);
6602             if (    ! aSign
6603                  || (    ( aExp == 0x7FFF )
6604                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6605                     )
6606                ) {
6607                 return INT64_MAX;
6608             }
6609             return INT64_MIN;
6610         }
6611         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6612     }
6613     else {
6614         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6615     }
6616     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6617 
6618 }
6619 
6620 /*----------------------------------------------------------------------------
6621 | Returns the result of converting the quadruple-precision floating-point
6622 | value `a' to the 64-bit two's complement integer format.  The conversion
6623 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6624 | Arithmetic, except that the conversion is always rounded toward zero.
6625 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6626 | the conversion overflows, the largest integer with the same sign as `a' is
6627 | returned.
6628 *----------------------------------------------------------------------------*/
6629 
6630 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6631 {
6632     bool aSign;
6633     int32_t aExp, shiftCount;
6634     uint64_t aSig0, aSig1;
6635     int64_t z;
6636 
6637     aSig1 = extractFloat128Frac1( a );
6638     aSig0 = extractFloat128Frac0( a );
6639     aExp = extractFloat128Exp( a );
6640     aSign = extractFloat128Sign( a );
6641     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6642     shiftCount = aExp - 0x402F;
6643     if ( 0 < shiftCount ) {
6644         if ( 0x403E <= aExp ) {
6645             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6646             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6647                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6648                 if (aSig1) {
6649                     float_raise(float_flag_inexact, status);
6650                 }
6651             }
6652             else {
6653                 float_raise(float_flag_invalid, status);
6654                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6655                     return INT64_MAX;
6656                 }
6657             }
6658             return INT64_MIN;
6659         }
6660         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6661         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6662             float_raise(float_flag_inexact, status);
6663         }
6664     }
6665     else {
6666         if ( aExp < 0x3FFF ) {
6667             if ( aExp | aSig0 | aSig1 ) {
6668                 float_raise(float_flag_inexact, status);
6669             }
6670             return 0;
6671         }
6672         z = aSig0>>( - shiftCount );
6673         if (    aSig1
6674              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6675             float_raise(float_flag_inexact, status);
6676         }
6677     }
6678     if ( aSign ) z = - z;
6679     return z;
6680 
6681 }
6682 
6683 /*----------------------------------------------------------------------------
6684 | Returns the result of converting the quadruple-precision floating-point value
6685 | `a' to the 64-bit unsigned integer format.  The conversion is
6686 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6687 | Arithmetic---which means in particular that the conversion is rounded
6688 | according to the current rounding mode.  If `a' is a NaN, the largest
6689 | positive integer is returned.  If the conversion overflows, the
6690 | largest unsigned integer is returned.  If 'a' is negative, the value is
6691 | rounded and zero is returned; negative values that do not round to zero
6692 | will raise the inexact exception.
6693 *----------------------------------------------------------------------------*/
6694 
6695 uint64_t float128_to_uint64(float128 a, float_status *status)
6696 {
6697     bool aSign;
6698     int aExp;
6699     int shiftCount;
6700     uint64_t aSig0, aSig1;
6701 
6702     aSig0 = extractFloat128Frac0(a);
6703     aSig1 = extractFloat128Frac1(a);
6704     aExp = extractFloat128Exp(a);
6705     aSign = extractFloat128Sign(a);
6706     if (aSign && (aExp > 0x3FFE)) {
6707         float_raise(float_flag_invalid, status);
6708         if (float128_is_any_nan(a)) {
6709             return UINT64_MAX;
6710         } else {
6711             return 0;
6712         }
6713     }
6714     if (aExp) {
6715         aSig0 |= UINT64_C(0x0001000000000000);
6716     }
6717     shiftCount = 0x402F - aExp;
6718     if (shiftCount <= 0) {
6719         if (0x403E < aExp) {
6720             float_raise(float_flag_invalid, status);
6721             return UINT64_MAX;
6722         }
6723         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6724     } else {
6725         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6726     }
6727     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6728 }
6729 
6730 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6731 {
6732     uint64_t v;
6733     signed char current_rounding_mode = status->float_rounding_mode;
6734 
6735     set_float_rounding_mode(float_round_to_zero, status);
6736     v = float128_to_uint64(a, status);
6737     set_float_rounding_mode(current_rounding_mode, status);
6738 
6739     return v;
6740 }
6741 
6742 /*----------------------------------------------------------------------------
6743 | Returns the result of converting the quadruple-precision floating-point
6744 | value `a' to the 32-bit unsigned integer format.  The conversion
6745 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6746 | Arithmetic except that the conversion is always rounded toward zero.
6747 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6748 | if the conversion overflows, the largest unsigned integer is returned.
6749 | If 'a' is negative, the value is rounded and zero is returned; negative
6750 | values that do not round to zero will raise the inexact exception.
6751 *----------------------------------------------------------------------------*/
6752 
6753 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6754 {
6755     uint64_t v;
6756     uint32_t res;
6757     int old_exc_flags = get_float_exception_flags(status);
6758 
6759     v = float128_to_uint64_round_to_zero(a, status);
6760     if (v > 0xffffffff) {
6761         res = 0xffffffff;
6762     } else {
6763         return v;
6764     }
6765     set_float_exception_flags(old_exc_flags, status);
6766     float_raise(float_flag_invalid, status);
6767     return res;
6768 }
6769 
6770 /*----------------------------------------------------------------------------
6771 | Returns the result of converting the quadruple-precision floating-point value
6772 | `a' to the 32-bit unsigned integer format.  The conversion is
6773 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6774 | Arithmetic---which means in particular that the conversion is rounded
6775 | according to the current rounding mode.  If `a' is a NaN, the largest
6776 | positive integer is returned.  If the conversion overflows, the
6777 | largest unsigned integer is returned.  If 'a' is negative, the value is
6778 | rounded and zero is returned; negative values that do not round to zero
6779 | will raise the inexact exception.
6780 *----------------------------------------------------------------------------*/
6781 
6782 uint32_t float128_to_uint32(float128 a, float_status *status)
6783 {
6784     uint64_t v;
6785     uint32_t res;
6786     int old_exc_flags = get_float_exception_flags(status);
6787 
6788     v = float128_to_uint64(a, status);
6789     if (v > 0xffffffff) {
6790         res = 0xffffffff;
6791     } else {
6792         return v;
6793     }
6794     set_float_exception_flags(old_exc_flags, status);
6795     float_raise(float_flag_invalid, status);
6796     return res;
6797 }
6798 
6799 /*----------------------------------------------------------------------------
6800 | Returns the result of converting the quadruple-precision floating-point
6801 | value `a' to the single-precision floating-point format.  The conversion
6802 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6803 | Arithmetic.
6804 *----------------------------------------------------------------------------*/
6805 
6806 float32 float128_to_float32(float128 a, float_status *status)
6807 {
6808     bool aSign;
6809     int32_t aExp;
6810     uint64_t aSig0, aSig1;
6811     uint32_t zSig;
6812 
6813     aSig1 = extractFloat128Frac1( a );
6814     aSig0 = extractFloat128Frac0( a );
6815     aExp = extractFloat128Exp( a );
6816     aSign = extractFloat128Sign( a );
6817     if ( aExp == 0x7FFF ) {
6818         if ( aSig0 | aSig1 ) {
6819             return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6820         }
6821         return packFloat32( aSign, 0xFF, 0 );
6822     }
6823     aSig0 |= ( aSig1 != 0 );
6824     shift64RightJamming( aSig0, 18, &aSig0 );
6825     zSig = aSig0;
6826     if ( aExp || zSig ) {
6827         zSig |= 0x40000000;
6828         aExp -= 0x3F81;
6829     }
6830     return roundAndPackFloat32(aSign, aExp, zSig, status);
6831 
6832 }
6833 
6834 /*----------------------------------------------------------------------------
6835 | Returns the result of converting the quadruple-precision floating-point
6836 | value `a' to the double-precision floating-point format.  The conversion
6837 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6838 | Arithmetic.
6839 *----------------------------------------------------------------------------*/
6840 
6841 float64 float128_to_float64(float128 a, float_status *status)
6842 {
6843     bool aSign;
6844     int32_t aExp;
6845     uint64_t aSig0, aSig1;
6846 
6847     aSig1 = extractFloat128Frac1( a );
6848     aSig0 = extractFloat128Frac0( a );
6849     aExp = extractFloat128Exp( a );
6850     aSign = extractFloat128Sign( a );
6851     if ( aExp == 0x7FFF ) {
6852         if ( aSig0 | aSig1 ) {
6853             return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6854         }
6855         return packFloat64( aSign, 0x7FF, 0 );
6856     }
6857     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6858     aSig0 |= ( aSig1 != 0 );
6859     if ( aExp || aSig0 ) {
6860         aSig0 |= UINT64_C(0x4000000000000000);
6861         aExp -= 0x3C01;
6862     }
6863     return roundAndPackFloat64(aSign, aExp, aSig0, status);
6864 
6865 }
6866 
6867 /*----------------------------------------------------------------------------
6868 | Returns the result of converting the quadruple-precision floating-point
6869 | value `a' to the extended double-precision floating-point format.  The
6870 | conversion is performed according to the IEC/IEEE Standard for Binary
6871 | Floating-Point Arithmetic.
6872 *----------------------------------------------------------------------------*/
6873 
6874 floatx80 float128_to_floatx80(float128 a, float_status *status)
6875 {
6876     bool aSign;
6877     int32_t aExp;
6878     uint64_t aSig0, aSig1;
6879 
6880     aSig1 = extractFloat128Frac1( a );
6881     aSig0 = extractFloat128Frac0( a );
6882     aExp = extractFloat128Exp( a );
6883     aSign = extractFloat128Sign( a );
6884     if ( aExp == 0x7FFF ) {
6885         if ( aSig0 | aSig1 ) {
6886             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6887                                                status);
6888             return floatx80_silence_nan(res, status);
6889         }
6890         return packFloatx80(aSign, floatx80_infinity_high,
6891                                    floatx80_infinity_low);
6892     }
6893     if ( aExp == 0 ) {
6894         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6895         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6896     }
6897     else {
6898         aSig0 |= UINT64_C(0x0001000000000000);
6899     }
6900     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6901     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6902 
6903 }
6904 
6905 /*----------------------------------------------------------------------------
6906 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6907 | returns the result as a quadruple-precision floating-point value.  The
6908 | operation is performed according to the IEC/IEEE Standard for Binary
6909 | Floating-Point Arithmetic.
6910 *----------------------------------------------------------------------------*/
6911 
6912 float128 float128_round_to_int(float128 a, float_status *status)
6913 {
6914     bool aSign;
6915     int32_t aExp;
6916     uint64_t lastBitMask, roundBitsMask;
6917     float128 z;
6918 
6919     aExp = extractFloat128Exp( a );
6920     if ( 0x402F <= aExp ) {
6921         if ( 0x406F <= aExp ) {
6922             if (    ( aExp == 0x7FFF )
6923                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6924                ) {
6925                 return propagateFloat128NaN(a, a, status);
6926             }
6927             return a;
6928         }
6929         lastBitMask = 1;
6930         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6931         roundBitsMask = lastBitMask - 1;
6932         z = a;
6933         switch (status->float_rounding_mode) {
6934         case float_round_nearest_even:
6935             if ( lastBitMask ) {
6936                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6937                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6938             }
6939             else {
6940                 if ( (int64_t) z.low < 0 ) {
6941                     ++z.high;
6942                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6943                 }
6944             }
6945             break;
6946         case float_round_ties_away:
6947             if (lastBitMask) {
6948                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6949             } else {
6950                 if ((int64_t) z.low < 0) {
6951                     ++z.high;
6952                 }
6953             }
6954             break;
6955         case float_round_to_zero:
6956             break;
6957         case float_round_up:
6958             if (!extractFloat128Sign(z)) {
6959                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6960             }
6961             break;
6962         case float_round_down:
6963             if (extractFloat128Sign(z)) {
6964                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6965             }
6966             break;
6967         case float_round_to_odd:
6968             /*
6969              * Note that if lastBitMask == 0, the last bit is the lsb
6970              * of high, and roundBitsMask == -1.
6971              */
6972             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6973                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6974             }
6975             break;
6976         default:
6977             abort();
6978         }
6979         z.low &= ~ roundBitsMask;
6980     }
6981     else {
6982         if ( aExp < 0x3FFF ) {
6983             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6984             float_raise(float_flag_inexact, status);
6985             aSign = extractFloat128Sign( a );
6986             switch (status->float_rounding_mode) {
6987             case float_round_nearest_even:
6988                 if (    ( aExp == 0x3FFE )
6989                      && (   extractFloat128Frac0( a )
6990                           | extractFloat128Frac1( a ) )
6991                    ) {
6992                     return packFloat128( aSign, 0x3FFF, 0, 0 );
6993                 }
6994                 break;
6995             case float_round_ties_away:
6996                 if (aExp == 0x3FFE) {
6997                     return packFloat128(aSign, 0x3FFF, 0, 0);
6998                 }
6999                 break;
7000             case float_round_down:
7001                 return
7002                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7003                     : packFloat128( 0, 0, 0, 0 );
7004             case float_round_up:
7005                 return
7006                       aSign ? packFloat128( 1, 0, 0, 0 )
7007                     : packFloat128( 0, 0x3FFF, 0, 0 );
7008 
7009             case float_round_to_odd:
7010                 return packFloat128(aSign, 0x3FFF, 0, 0);
7011 
7012             case float_round_to_zero:
7013                 break;
7014             }
7015             return packFloat128( aSign, 0, 0, 0 );
7016         }
7017         lastBitMask = 1;
7018         lastBitMask <<= 0x402F - aExp;
7019         roundBitsMask = lastBitMask - 1;
7020         z.low = 0;
7021         z.high = a.high;
7022         switch (status->float_rounding_mode) {
7023         case float_round_nearest_even:
7024             z.high += lastBitMask>>1;
7025             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7026                 z.high &= ~ lastBitMask;
7027             }
7028             break;
7029         case float_round_ties_away:
7030             z.high += lastBitMask>>1;
7031             break;
7032         case float_round_to_zero:
7033             break;
7034         case float_round_up:
7035             if (!extractFloat128Sign(z)) {
7036                 z.high |= ( a.low != 0 );
7037                 z.high += roundBitsMask;
7038             }
7039             break;
7040         case float_round_down:
7041             if (extractFloat128Sign(z)) {
7042                 z.high |= (a.low != 0);
7043                 z.high += roundBitsMask;
7044             }
7045             break;
7046         case float_round_to_odd:
7047             if ((z.high & lastBitMask) == 0) {
7048                 z.high |= (a.low != 0);
7049                 z.high += roundBitsMask;
7050             }
7051             break;
7052         default:
7053             abort();
7054         }
7055         z.high &= ~ roundBitsMask;
7056     }
7057     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7058         float_raise(float_flag_inexact, status);
7059     }
7060     return z;
7061 
7062 }
7063 
7064 /*----------------------------------------------------------------------------
7065 | Returns the result of adding the absolute values of the quadruple-precision
7066 | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
7067 | before being returned.  `zSign' is ignored if the result is a NaN.
7068 | The addition is performed according to the IEC/IEEE Standard for Binary
7069 | Floating-Point Arithmetic.
7070 *----------------------------------------------------------------------------*/
7071 
7072 static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
7073                                 float_status *status)
7074 {
7075     int32_t aExp, bExp, zExp;
7076     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7077     int32_t expDiff;
7078 
7079     aSig1 = extractFloat128Frac1( a );
7080     aSig0 = extractFloat128Frac0( a );
7081     aExp = extractFloat128Exp( a );
7082     bSig1 = extractFloat128Frac1( b );
7083     bSig0 = extractFloat128Frac0( b );
7084     bExp = extractFloat128Exp( b );
7085     expDiff = aExp - bExp;
7086     if ( 0 < expDiff ) {
7087         if ( aExp == 0x7FFF ) {
7088             if (aSig0 | aSig1) {
7089                 return propagateFloat128NaN(a, b, status);
7090             }
7091             return a;
7092         }
7093         if ( bExp == 0 ) {
7094             --expDiff;
7095         }
7096         else {
7097             bSig0 |= UINT64_C(0x0001000000000000);
7098         }
7099         shift128ExtraRightJamming(
7100             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7101         zExp = aExp;
7102     }
7103     else if ( expDiff < 0 ) {
7104         if ( bExp == 0x7FFF ) {
7105             if (bSig0 | bSig1) {
7106                 return propagateFloat128NaN(a, b, status);
7107             }
7108             return packFloat128( zSign, 0x7FFF, 0, 0 );
7109         }
7110         if ( aExp == 0 ) {
7111             ++expDiff;
7112         }
7113         else {
7114             aSig0 |= UINT64_C(0x0001000000000000);
7115         }
7116         shift128ExtraRightJamming(
7117             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7118         zExp = bExp;
7119     }
7120     else {
7121         if ( aExp == 0x7FFF ) {
7122             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
7123                 return propagateFloat128NaN(a, b, status);
7124             }
7125             return a;
7126         }
7127         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7128         if ( aExp == 0 ) {
7129             if (status->flush_to_zero) {
7130                 if (zSig0 | zSig1) {
7131                     float_raise(float_flag_output_denormal, status);
7132                 }
7133                 return packFloat128(zSign, 0, 0, 0);
7134             }
7135             return packFloat128( zSign, 0, zSig0, zSig1 );
7136         }
7137         zSig2 = 0;
7138         zSig0 |= UINT64_C(0x0002000000000000);
7139         zExp = aExp;
7140         goto shiftRight1;
7141     }
7142     aSig0 |= UINT64_C(0x0001000000000000);
7143     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7144     --zExp;
7145     if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
7146     ++zExp;
7147  shiftRight1:
7148     shift128ExtraRightJamming(
7149         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7150  roundAndPack:
7151     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7152 
7153 }
7154 
7155 /*----------------------------------------------------------------------------
7156 | Returns the result of subtracting the absolute values of the quadruple-
7157 | precision floating-point values `a' and `b'.  If `zSign' is 1, the
7158 | difference is negated before being returned.  `zSign' is ignored if the
7159 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
7160 | Standard for Binary Floating-Point Arithmetic.
7161 *----------------------------------------------------------------------------*/
7162 
7163 static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
7164                                 float_status *status)
7165 {
7166     int32_t aExp, bExp, zExp;
7167     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
7168     int32_t expDiff;
7169 
7170     aSig1 = extractFloat128Frac1( a );
7171     aSig0 = extractFloat128Frac0( a );
7172     aExp = extractFloat128Exp( a );
7173     bSig1 = extractFloat128Frac1( b );
7174     bSig0 = extractFloat128Frac0( b );
7175     bExp = extractFloat128Exp( b );
7176     expDiff = aExp - bExp;
7177     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7178     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7179     if ( 0 < expDiff ) goto aExpBigger;
7180     if ( expDiff < 0 ) goto bExpBigger;
7181     if ( aExp == 0x7FFF ) {
7182         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
7183             return propagateFloat128NaN(a, b, status);
7184         }
7185         float_raise(float_flag_invalid, status);
7186         return float128_default_nan(status);
7187     }
7188     if ( aExp == 0 ) {
7189         aExp = 1;
7190         bExp = 1;
7191     }
7192     if ( bSig0 < aSig0 ) goto aBigger;
7193     if ( aSig0 < bSig0 ) goto bBigger;
7194     if ( bSig1 < aSig1 ) goto aBigger;
7195     if ( aSig1 < bSig1 ) goto bBigger;
7196     return packFloat128(status->float_rounding_mode == float_round_down,
7197                         0, 0, 0);
7198  bExpBigger:
7199     if ( bExp == 0x7FFF ) {
7200         if (bSig0 | bSig1) {
7201             return propagateFloat128NaN(a, b, status);
7202         }
7203         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7204     }
7205     if ( aExp == 0 ) {
7206         ++expDiff;
7207     }
7208     else {
7209         aSig0 |= UINT64_C(0x4000000000000000);
7210     }
7211     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7212     bSig0 |= UINT64_C(0x4000000000000000);
7213  bBigger:
7214     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7215     zExp = bExp;
7216     zSign ^= 1;
7217     goto normalizeRoundAndPack;
7218  aExpBigger:
7219     if ( aExp == 0x7FFF ) {
7220         if (aSig0 | aSig1) {
7221             return propagateFloat128NaN(a, b, status);
7222         }
7223         return a;
7224     }
7225     if ( bExp == 0 ) {
7226         --expDiff;
7227     }
7228     else {
7229         bSig0 |= UINT64_C(0x4000000000000000);
7230     }
7231     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7232     aSig0 |= UINT64_C(0x4000000000000000);
7233  aBigger:
7234     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7235     zExp = aExp;
7236  normalizeRoundAndPack:
7237     --zExp;
7238     return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7239                                          status);
7240 
7241 }
7242 
7243 /*----------------------------------------------------------------------------
7244 | Returns the result of adding the quadruple-precision floating-point values
7245 | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
7246 | for Binary Floating-Point Arithmetic.
7247 *----------------------------------------------------------------------------*/
7248 
7249 float128 float128_add(float128 a, float128 b, float_status *status)
7250 {
7251     bool aSign, bSign;
7252 
7253     aSign = extractFloat128Sign( a );
7254     bSign = extractFloat128Sign( b );
7255     if ( aSign == bSign ) {
7256         return addFloat128Sigs(a, b, aSign, status);
7257     }
7258     else {
7259         return subFloat128Sigs(a, b, aSign, status);
7260     }
7261 
7262 }
7263 
7264 /*----------------------------------------------------------------------------
7265 | Returns the result of subtracting the quadruple-precision floating-point
7266 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
7267 | Standard for Binary Floating-Point Arithmetic.
7268 *----------------------------------------------------------------------------*/
7269 
7270 float128 float128_sub(float128 a, float128 b, float_status *status)
7271 {
7272     bool aSign, bSign;
7273 
7274     aSign = extractFloat128Sign( a );
7275     bSign = extractFloat128Sign( b );
7276     if ( aSign == bSign ) {
7277         return subFloat128Sigs(a, b, aSign, status);
7278     }
7279     else {
7280         return addFloat128Sigs(a, b, aSign, status);
7281     }
7282 
7283 }
7284 
7285 /*----------------------------------------------------------------------------
7286 | Returns the result of multiplying the quadruple-precision floating-point
7287 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
7288 | Standard for Binary Floating-Point Arithmetic.
7289 *----------------------------------------------------------------------------*/
7290 
7291 float128 float128_mul(float128 a, float128 b, float_status *status)
7292 {
7293     bool aSign, bSign, zSign;
7294     int32_t aExp, bExp, zExp;
7295     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
7296 
7297     aSig1 = extractFloat128Frac1( a );
7298     aSig0 = extractFloat128Frac0( a );
7299     aExp = extractFloat128Exp( a );
7300     aSign = extractFloat128Sign( a );
7301     bSig1 = extractFloat128Frac1( b );
7302     bSig0 = extractFloat128Frac0( b );
7303     bExp = extractFloat128Exp( b );
7304     bSign = extractFloat128Sign( b );
7305     zSign = aSign ^ bSign;
7306     if ( aExp == 0x7FFF ) {
7307         if (    ( aSig0 | aSig1 )
7308              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7309             return propagateFloat128NaN(a, b, status);
7310         }
7311         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7312         return packFloat128( zSign, 0x7FFF, 0, 0 );
7313     }
7314     if ( bExp == 0x7FFF ) {
7315         if (bSig0 | bSig1) {
7316             return propagateFloat128NaN(a, b, status);
7317         }
7318         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7319  invalid:
7320             float_raise(float_flag_invalid, status);
7321             return float128_default_nan(status);
7322         }
7323         return packFloat128( zSign, 0x7FFF, 0, 0 );
7324     }
7325     if ( aExp == 0 ) {
7326         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7327         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7328     }
7329     if ( bExp == 0 ) {
7330         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7331         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7332     }
7333     zExp = aExp + bExp - 0x4000;
7334     aSig0 |= UINT64_C(0x0001000000000000);
7335     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7336     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7337     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7338     zSig2 |= ( zSig3 != 0 );
7339     if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
7340         shift128ExtraRightJamming(
7341             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7342         ++zExp;
7343     }
7344     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7345 
7346 }
7347 
7348 /*----------------------------------------------------------------------------
7349 | Returns the result of dividing the quadruple-precision floating-point value
7350 | `a' by the corresponding value `b'.  The operation is performed according to
7351 | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7352 *----------------------------------------------------------------------------*/
7353 
7354 float128 float128_div(float128 a, float128 b, float_status *status)
7355 {
7356     bool aSign, bSign, zSign;
7357     int32_t aExp, bExp, zExp;
7358     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7359     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7360 
7361     aSig1 = extractFloat128Frac1( a );
7362     aSig0 = extractFloat128Frac0( a );
7363     aExp = extractFloat128Exp( a );
7364     aSign = extractFloat128Sign( a );
7365     bSig1 = extractFloat128Frac1( b );
7366     bSig0 = extractFloat128Frac0( b );
7367     bExp = extractFloat128Exp( b );
7368     bSign = extractFloat128Sign( b );
7369     zSign = aSign ^ bSign;
7370     if ( aExp == 0x7FFF ) {
7371         if (aSig0 | aSig1) {
7372             return propagateFloat128NaN(a, b, status);
7373         }
7374         if ( bExp == 0x7FFF ) {
7375             if (bSig0 | bSig1) {
7376                 return propagateFloat128NaN(a, b, status);
7377             }
7378             goto invalid;
7379         }
7380         return packFloat128( zSign, 0x7FFF, 0, 0 );
7381     }
7382     if ( bExp == 0x7FFF ) {
7383         if (bSig0 | bSig1) {
7384             return propagateFloat128NaN(a, b, status);
7385         }
7386         return packFloat128( zSign, 0, 0, 0 );
7387     }
7388     if ( bExp == 0 ) {
7389         if ( ( bSig0 | bSig1 ) == 0 ) {
7390             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7391  invalid:
7392                 float_raise(float_flag_invalid, status);
7393                 return float128_default_nan(status);
7394             }
7395             float_raise(float_flag_divbyzero, status);
7396             return packFloat128( zSign, 0x7FFF, 0, 0 );
7397         }
7398         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7399     }
7400     if ( aExp == 0 ) {
7401         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7402         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7403     }
7404     zExp = aExp - bExp + 0x3FFD;
7405     shortShift128Left(
7406         aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
7407     shortShift128Left(
7408         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7409     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7410         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7411         ++zExp;
7412     }
7413     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7414     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7415     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
7416     while ( (int64_t) rem0 < 0 ) {
7417         --zSig0;
7418         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7419     }
7420     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7421     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7422         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7423         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
7424         while ( (int64_t) rem1 < 0 ) {
7425             --zSig1;
7426             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7427         }
7428         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7429     }
7430     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
7431     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7432 
7433 }
7434 
7435 /*----------------------------------------------------------------------------
7436 | Returns the remainder of the quadruple-precision floating-point value `a'
7437 | with respect to the corresponding value `b'.  The operation is performed
7438 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7439 *----------------------------------------------------------------------------*/
7440 
7441 float128 float128_rem(float128 a, float128 b, float_status *status)
7442 {
7443     bool aSign, zSign;
7444     int32_t aExp, bExp, expDiff;
7445     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7446     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7447     int64_t sigMean0;
7448 
7449     aSig1 = extractFloat128Frac1( a );
7450     aSig0 = extractFloat128Frac0( a );
7451     aExp = extractFloat128Exp( a );
7452     aSign = extractFloat128Sign( a );
7453     bSig1 = extractFloat128Frac1( b );
7454     bSig0 = extractFloat128Frac0( b );
7455     bExp = extractFloat128Exp( b );
7456     if ( aExp == 0x7FFF ) {
7457         if (    ( aSig0 | aSig1 )
7458              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7459             return propagateFloat128NaN(a, b, status);
7460         }
7461         goto invalid;
7462     }
7463     if ( bExp == 0x7FFF ) {
7464         if (bSig0 | bSig1) {
7465             return propagateFloat128NaN(a, b, status);
7466         }
7467         return a;
7468     }
7469     if ( bExp == 0 ) {
7470         if ( ( bSig0 | bSig1 ) == 0 ) {
7471  invalid:
7472             float_raise(float_flag_invalid, status);
7473             return float128_default_nan(status);
7474         }
7475         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7476     }
7477     if ( aExp == 0 ) {
7478         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7479         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7480     }
7481     expDiff = aExp - bExp;
7482     if ( expDiff < -1 ) return a;
7483     shortShift128Left(
7484         aSig0 | UINT64_C(0x0001000000000000),
7485         aSig1,
7486         15 - ( expDiff < 0 ),
7487         &aSig0,
7488         &aSig1
7489     );
7490     shortShift128Left(
7491         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7492     q = le128( bSig0, bSig1, aSig0, aSig1 );
7493     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7494     expDiff -= 64;
7495     while ( 0 < expDiff ) {
7496         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7497         q = ( 4 < q ) ? q - 4 : 0;
7498         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7499         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7500         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7501         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7502         expDiff -= 61;
7503     }
7504     if ( -64 < expDiff ) {
7505         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7506         q = ( 4 < q ) ? q - 4 : 0;
7507         q >>= - expDiff;
7508         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7509         expDiff += 52;
7510         if ( expDiff < 0 ) {
7511             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7512         }
7513         else {
7514             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7515         }
7516         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7517         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7518     }
7519     else {
7520         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7521         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7522     }
7523     do {
7524         alternateASig0 = aSig0;
7525         alternateASig1 = aSig1;
7526         ++q;
7527         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7528     } while ( 0 <= (int64_t) aSig0 );
7529     add128(
7530         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7531     if (    ( sigMean0 < 0 )
7532          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7533         aSig0 = alternateASig0;
7534         aSig1 = alternateASig1;
7535     }
7536     zSign = ( (int64_t) aSig0 < 0 );
7537     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7538     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7539                                          status);
7540 }
7541 
7542 /*----------------------------------------------------------------------------
7543 | Returns the square root of the quadruple-precision floating-point value `a'.
7544 | The operation is performed according to the IEC/IEEE Standard for Binary
7545 | Floating-Point Arithmetic.
7546 *----------------------------------------------------------------------------*/
7547 
7548 float128 float128_sqrt(float128 a, float_status *status)
7549 {
7550     bool aSign;
7551     int32_t aExp, zExp;
7552     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7553     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7554 
7555     aSig1 = extractFloat128Frac1( a );
7556     aSig0 = extractFloat128Frac0( a );
7557     aExp = extractFloat128Exp( a );
7558     aSign = extractFloat128Sign( a );
7559     if ( aExp == 0x7FFF ) {
7560         if (aSig0 | aSig1) {
7561             return propagateFloat128NaN(a, a, status);
7562         }
7563         if ( ! aSign ) return a;
7564         goto invalid;
7565     }
7566     if ( aSign ) {
7567         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7568  invalid:
7569         float_raise(float_flag_invalid, status);
7570         return float128_default_nan(status);
7571     }
7572     if ( aExp == 0 ) {
7573         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7574         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7575     }
7576     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7577     aSig0 |= UINT64_C(0x0001000000000000);
7578     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7579     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7580     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7581     doubleZSig0 = zSig0<<1;
7582     mul64To128( zSig0, zSig0, &term0, &term1 );
7583     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7584     while ( (int64_t) rem0 < 0 ) {
7585         --zSig0;
7586         doubleZSig0 -= 2;
7587         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7588     }
7589     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7590     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7591         if ( zSig1 == 0 ) zSig1 = 1;
7592         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7593         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7594         mul64To128( zSig1, zSig1, &term2, &term3 );
7595         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7596         while ( (int64_t) rem1 < 0 ) {
7597             --zSig1;
7598             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7599             term3 |= 1;
7600             term2 |= doubleZSig0;
7601             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7602         }
7603         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7604     }
7605     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7606     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7607 
7608 }
7609 
7610 static inline FloatRelation
7611 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7612                           float_status *status)
7613 {
7614     bool aSign, bSign;
7615 
7616     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7617         float_raise(float_flag_invalid, status);
7618         return float_relation_unordered;
7619     }
7620     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7621           ( extractFloatx80Frac( a )<<1 ) ) ||
7622         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7623           ( extractFloatx80Frac( b )<<1 ) )) {
7624         if (!is_quiet ||
7625             floatx80_is_signaling_nan(a, status) ||
7626             floatx80_is_signaling_nan(b, status)) {
7627             float_raise(float_flag_invalid, status);
7628         }
7629         return float_relation_unordered;
7630     }
7631     aSign = extractFloatx80Sign( a );
7632     bSign = extractFloatx80Sign( b );
7633     if ( aSign != bSign ) {
7634 
7635         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7636              ( ( a.low | b.low ) == 0 ) ) {
7637             /* zero case */
7638             return float_relation_equal;
7639         } else {
7640             return 1 - (2 * aSign);
7641         }
7642     } else {
7643         /* Normalize pseudo-denormals before comparison.  */
7644         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7645             ++a.high;
7646         }
7647         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7648             ++b.high;
7649         }
7650         if (a.low == b.low && a.high == b.high) {
7651             return float_relation_equal;
7652         } else {
7653             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7654         }
7655     }
7656 }
7657 
7658 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7659 {
7660     return floatx80_compare_internal(a, b, 0, status);
7661 }
7662 
7663 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7664                                      float_status *status)
7665 {
7666     return floatx80_compare_internal(a, b, 1, status);
7667 }
7668 
7669 static inline FloatRelation
7670 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7671                           float_status *status)
7672 {
7673     bool aSign, bSign;
7674 
7675     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7676           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7677         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7678           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7679         if (!is_quiet ||
7680             float128_is_signaling_nan(a, status) ||
7681             float128_is_signaling_nan(b, status)) {
7682             float_raise(float_flag_invalid, status);
7683         }
7684         return float_relation_unordered;
7685     }
7686     aSign = extractFloat128Sign( a );
7687     bSign = extractFloat128Sign( b );
7688     if ( aSign != bSign ) {
7689         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7690             /* zero case */
7691             return float_relation_equal;
7692         } else {
7693             return 1 - (2 * aSign);
7694         }
7695     } else {
7696         if (a.low == b.low && a.high == b.high) {
7697             return float_relation_equal;
7698         } else {
7699             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7700         }
7701     }
7702 }
7703 
7704 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7705 {
7706     return float128_compare_internal(a, b, 0, status);
7707 }
7708 
7709 FloatRelation float128_compare_quiet(float128 a, float128 b,
7710                                      float_status *status)
7711 {
7712     return float128_compare_internal(a, b, 1, status);
7713 }
7714 
7715 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7716 {
7717     bool aSign;
7718     int32_t aExp;
7719     uint64_t aSig;
7720 
7721     if (floatx80_invalid_encoding(a)) {
7722         float_raise(float_flag_invalid, status);
7723         return floatx80_default_nan(status);
7724     }
7725     aSig = extractFloatx80Frac( a );
7726     aExp = extractFloatx80Exp( a );
7727     aSign = extractFloatx80Sign( a );
7728 
7729     if ( aExp == 0x7FFF ) {
7730         if ( aSig<<1 ) {
7731             return propagateFloatx80NaN(a, a, status);
7732         }
7733         return a;
7734     }
7735 
7736     if (aExp == 0) {
7737         if (aSig == 0) {
7738             return a;
7739         }
7740         aExp++;
7741     }
7742 
7743     if (n > 0x10000) {
7744         n = 0x10000;
7745     } else if (n < -0x10000) {
7746         n = -0x10000;
7747     }
7748 
7749     aExp += n;
7750     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7751                                          aSign, aExp, aSig, 0, status);
7752 }
7753 
7754 float128 float128_scalbn(float128 a, int n, float_status *status)
7755 {
7756     bool aSign;
7757     int32_t aExp;
7758     uint64_t aSig0, aSig1;
7759 
7760     aSig1 = extractFloat128Frac1( a );
7761     aSig0 = extractFloat128Frac0( a );
7762     aExp = extractFloat128Exp( a );
7763     aSign = extractFloat128Sign( a );
7764     if ( aExp == 0x7FFF ) {
7765         if ( aSig0 | aSig1 ) {
7766             return propagateFloat128NaN(a, a, status);
7767         }
7768         return a;
7769     }
7770     if (aExp != 0) {
7771         aSig0 |= UINT64_C(0x0001000000000000);
7772     } else if (aSig0 == 0 && aSig1 == 0) {
7773         return a;
7774     } else {
7775         aExp++;
7776     }
7777 
7778     if (n > 0x10000) {
7779         n = 0x10000;
7780     } else if (n < -0x10000) {
7781         n = -0x10000;
7782     }
7783 
7784     aExp += n - 1;
7785     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7786                                          , status);
7787 
7788 }
7789 
7790 static void __attribute__((constructor)) softfloat_init(void)
7791 {
7792     union_float64 ua, ub, uc, ur;
7793 
7794     if (QEMU_NO_HARDFLOAT) {
7795         return;
7796     }
7797     /*
7798      * Test that the host's FMA is not obviously broken. For example,
7799      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7800      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7801      */
7802     ua.s = 0x0020000000000001ULL;
7803     ub.s = 0x3ca0000000000000ULL;
7804     uc.s = 0x0020000000000000ULL;
7805     ur.h = fma(ua.h, ub.h, uc.h);
7806     if (ur.s != 0x0020000000000001ULL) {
7807         force_soft_fma = true;
7808     }
7809 }
7810