xref: /openbmc/qemu/fpu/softfloat.c (revision 9882ccaf)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 typedef struct {
537     FloatClass cls;
538     bool sign;
539     int32_t exp;
540     uint64_t frac_hi;
541     uint64_t frac_hm;  /* high-middle */
542     uint64_t frac_lm;  /* low-middle */
543     uint64_t frac_lo;
544 } FloatParts256;
545 
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT    63
548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
549 
550 /* Structure holding all of the relevant parameters for a format.
551  *   exp_size: the size of the exponent field
552  *   exp_bias: the offset applied to the exponent field
553  *   exp_max: the maximum normalised exponent
554  *   frac_size: the size of the fraction field
555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556  * The following are computed based the size of fraction
557  *   frac_lsb: least significant bit of fraction
558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
559  *   round_mask/roundeven_mask: masks used for rounding
560  * The following optional modifiers are available:
561  *   arm_althp: handle ARM Alternative Half Precision
562  */
563 typedef struct {
564     int exp_size;
565     int exp_bias;
566     int exp_max;
567     int frac_size;
568     int frac_shift;
569     uint64_t frac_lsb;
570     uint64_t frac_lsbm1;
571     uint64_t round_mask;
572     uint64_t roundeven_mask;
573     bool arm_althp;
574 } FloatFmt;
575 
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F)                                           \
578     .exp_size       = E,                                             \
579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
580     .exp_max        = (1 << E) - 1,                                  \
581     .frac_size      = F,                                             \
582     .frac_shift     = (-F - 1) & 63,                                 \
583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587 
588 static const FloatFmt float16_params = {
589     FLOAT_PARAMS(5, 10)
590 };
591 
592 static const FloatFmt float16_params_ahp = {
593     FLOAT_PARAMS(5, 10),
594     .arm_althp = true
595 };
596 
597 static const FloatFmt bfloat16_params = {
598     FLOAT_PARAMS(8, 7)
599 };
600 
601 static const FloatFmt float32_params = {
602     FLOAT_PARAMS(8, 23)
603 };
604 
605 static const FloatFmt float64_params = {
606     FLOAT_PARAMS(11, 52)
607 };
608 
609 static const FloatFmt float128_params = {
610     FLOAT_PARAMS(15, 112)
611 };
612 
613 /* Unpack a float to parts, but do not canonicalize.  */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616     const int f_size = fmt->frac_size;
617     const int e_size = fmt->exp_size;
618 
619     *r = (FloatParts64) {
620         .cls = float_class_unclassified,
621         .sign = extract64(raw, f_size + e_size, 1),
622         .exp = extract64(raw, f_size, e_size),
623         .frac = extract64(raw, 0, f_size)
624     };
625 }
626 
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629     unpack_raw64(p, &float16_params, f);
630 }
631 
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634     unpack_raw64(p, &bfloat16_params, f);
635 }
636 
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639     unpack_raw64(p, &float32_params, f);
640 }
641 
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644     unpack_raw64(p, &float64_params, f);
645 }
646 
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718 
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722 
723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725 
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728 
729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
730 
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732                                       float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734                                         float_status *s);
735 
736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737 
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739                                              FloatParts64 *c, float_status *s,
740                                              int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742                                                FloatParts128 *b,
743                                                FloatParts128 *c,
744                                                float_status *s,
745                                                int ab_mask, int abc_mask);
746 
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749 
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751                                  const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753                                   const FloatFmt *fmt);
754 
755 #define parts_canonicalize(A, S, F) \
756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757 
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759                             const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761                              const FloatFmt *fmt);
762 
763 #define parts_uncanon(A, S, F) \
764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765 
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769 
770 #define parts_add_normal(A, B) \
771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772 
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776 
777 #define parts_sub_normal(A, B) \
778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779 
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781                                     float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783                                       float_status *s, bool subtract);
784 
785 #define parts_addsub(A, B, S, Z) \
786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787 
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_mul(A, B, S) \
794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
795 
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797                                     FloatParts64 *c, int flags,
798                                     float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800                                       FloatParts128 *c, int flags,
801                                       float_status *s);
802 
803 #define parts_muladd(A, B, C, Z, S) \
804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805 
806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807                                  float_status *s);
808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809                                    float_status *s);
810 
811 #define parts_div(A, B, S) \
812     PARTS_GENERIC_64_128(div, A)(A, B, S)
813 
814 /*
815  * Helper functions for softfloat-parts.c.inc, per-size operations.
816  */
817 
818 #define FRAC_GENERIC_64_128(NAME, P) \
819     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
820 
821 #define FRAC_GENERIC_64_128_256(NAME, P) \
822     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
823                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
824 
825 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
826 {
827     return uadd64_overflow(a->frac, b->frac, &r->frac);
828 }
829 
830 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
831 {
832     bool c = 0;
833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
835     return c;
836 }
837 
838 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
839 {
840     bool c = 0;
841     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
842     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
843     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
844     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
845     return c;
846 }
847 
848 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
849 
850 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
851 {
852     return uadd64_overflow(a->frac, c, &r->frac);
853 }
854 
855 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
856 {
857     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
858     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
859 }
860 
861 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
862 
863 static void frac64_allones(FloatParts64 *a)
864 {
865     a->frac = -1;
866 }
867 
868 static void frac128_allones(FloatParts128 *a)
869 {
870     a->frac_hi = a->frac_lo = -1;
871 }
872 
873 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
874 
875 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
876 {
877     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
878 }
879 
880 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
881 {
882     uint64_t ta = a->frac_hi, tb = b->frac_hi;
883     if (ta == tb) {
884         ta = a->frac_lo, tb = b->frac_lo;
885         if (ta == tb) {
886             return 0;
887         }
888     }
889     return ta < tb ? -1 : 1;
890 }
891 
892 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
893 
894 static void frac64_clear(FloatParts64 *a)
895 {
896     a->frac = 0;
897 }
898 
899 static void frac128_clear(FloatParts128 *a)
900 {
901     a->frac_hi = a->frac_lo = 0;
902 }
903 
904 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
905 
906 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
907 {
908     uint64_t n1, n0, r, q;
909     bool ret;
910 
911     /*
912      * We want a 2*N / N-bit division to produce exactly an N-bit
913      * result, so that we do not lose any precision and so that we
914      * do not have to renormalize afterward.  If A.frac < B.frac,
915      * then division would produce an (N-1)-bit result; shift A left
916      * by one to produce the an N-bit result, and return true to
917      * decrement the exponent to match.
918      *
919      * The udiv_qrnnd algorithm that we're using requires normalization,
920      * i.e. the msb of the denominator must be set, which is already true.
921      */
922     ret = a->frac < b->frac;
923     if (ret) {
924         n0 = a->frac;
925         n1 = 0;
926     } else {
927         n0 = a->frac >> 1;
928         n1 = a->frac << 63;
929     }
930     q = udiv_qrnnd(&r, n0, n1, b->frac);
931 
932     /* Set lsb if there is a remainder, to set inexact. */
933     a->frac = q | (r != 0);
934 
935     return ret;
936 }
937 
938 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
939 {
940     uint64_t q0, q1, a0, a1, b0, b1;
941     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
942     bool ret = false;
943 
944     a0 = a->frac_hi, a1 = a->frac_lo;
945     b0 = b->frac_hi, b1 = b->frac_lo;
946 
947     ret = lt128(a0, a1, b0, b1);
948     if (!ret) {
949         a1 = shr_double(a0, a1, 1);
950         a0 = a0 >> 1;
951     }
952 
953     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
954     q0 = estimateDiv128To64(a0, a1, b0);
955 
956     /*
957      * Estimate is high because B1 was not included (unless B1 == 0).
958      * Reduce quotient and increase remainder until remainder is non-negative.
959      * This loop will execute 0 to 2 times.
960      */
961     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
962     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
963     while (r0 != 0) {
964         q0--;
965         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
966     }
967 
968     /* Repeat using the remainder, producing a second word of quotient. */
969     q1 = estimateDiv128To64(r1, r2, b0);
970     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
971     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
972     while (r1 != 0) {
973         q1--;
974         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
975     }
976 
977     /* Any remainder indicates inexact; set sticky bit. */
978     q1 |= (r2 | r3) != 0;
979 
980     a->frac_hi = q0;
981     a->frac_lo = q1;
982     return ret;
983 }
984 
985 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
986 
987 static bool frac64_eqz(FloatParts64 *a)
988 {
989     return a->frac == 0;
990 }
991 
992 static bool frac128_eqz(FloatParts128 *a)
993 {
994     return (a->frac_hi | a->frac_lo) == 0;
995 }
996 
997 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
998 
999 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1000 {
1001     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1002 }
1003 
1004 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1005 {
1006     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1007                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1008 }
1009 
1010 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1011 
1012 static void frac64_neg(FloatParts64 *a)
1013 {
1014     a->frac = -a->frac;
1015 }
1016 
1017 static void frac128_neg(FloatParts128 *a)
1018 {
1019     bool c = 0;
1020     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1021     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1022 }
1023 
1024 static void frac256_neg(FloatParts256 *a)
1025 {
1026     bool c = 0;
1027     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1028     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1029     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1030     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1031 }
1032 
1033 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1034 
1035 static int frac64_normalize(FloatParts64 *a)
1036 {
1037     if (a->frac) {
1038         int shift = clz64(a->frac);
1039         a->frac <<= shift;
1040         return shift;
1041     }
1042     return 64;
1043 }
1044 
1045 static int frac128_normalize(FloatParts128 *a)
1046 {
1047     if (a->frac_hi) {
1048         int shl = clz64(a->frac_hi);
1049         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1050         a->frac_lo <<= shl;
1051         return shl;
1052     } else if (a->frac_lo) {
1053         int shl = clz64(a->frac_lo);
1054         a->frac_hi = a->frac_lo << shl;
1055         a->frac_lo = 0;
1056         return shl + 64;
1057     }
1058     return 128;
1059 }
1060 
1061 static int frac256_normalize(FloatParts256 *a)
1062 {
1063     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1064     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1065     int ret, shl;
1066 
1067     if (likely(a0)) {
1068         shl = clz64(a0);
1069         if (shl == 0) {
1070             return 0;
1071         }
1072         ret = shl;
1073     } else {
1074         if (a1) {
1075             ret = 64;
1076             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1077         } else if (a2) {
1078             ret = 128;
1079             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1080         } else if (a3) {
1081             ret = 192;
1082             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1083         } else {
1084             ret = 256;
1085             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1086             goto done;
1087         }
1088         shl = clz64(a0);
1089         if (shl == 0) {
1090             goto done;
1091         }
1092         ret += shl;
1093     }
1094 
1095     a0 = shl_double(a0, a1, shl);
1096     a1 = shl_double(a1, a2, shl);
1097     a2 = shl_double(a2, a3, shl);
1098     a3 <<= shl;
1099 
1100  done:
1101     a->frac_hi = a0;
1102     a->frac_hm = a1;
1103     a->frac_lm = a2;
1104     a->frac_lo = a3;
1105     return ret;
1106 }
1107 
1108 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1109 
1110 static void frac64_shl(FloatParts64 *a, int c)
1111 {
1112     a->frac <<= c;
1113 }
1114 
1115 static void frac128_shl(FloatParts128 *a, int c)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1118 
1119     if (c & 64) {
1120         a0 = a1, a1 = 0;
1121     }
1122 
1123     c &= 63;
1124     if (c) {
1125         a0 = shl_double(a0, a1, c);
1126         a1 = a1 << c;
1127     }
1128 
1129     a->frac_hi = a0;
1130     a->frac_lo = a1;
1131 }
1132 
1133 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1134 
1135 static void frac64_shr(FloatParts64 *a, int c)
1136 {
1137     a->frac >>= c;
1138 }
1139 
1140 static void frac128_shr(FloatParts128 *a, int c)
1141 {
1142     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1143 
1144     if (c & 64) {
1145         a1 = a0, a0 = 0;
1146     }
1147 
1148     c &= 63;
1149     if (c) {
1150         a1 = shr_double(a0, a1, c);
1151         a0 = a0 >> c;
1152     }
1153 
1154     a->frac_hi = a0;
1155     a->frac_lo = a1;
1156 }
1157 
1158 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1159 
1160 static void frac64_shrjam(FloatParts64 *a, int c)
1161 {
1162     uint64_t a0 = a->frac;
1163 
1164     if (likely(c != 0)) {
1165         if (likely(c < 64)) {
1166             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1167         } else {
1168             a0 = a0 != 0;
1169         }
1170         a->frac = a0;
1171     }
1172 }
1173 
1174 static void frac128_shrjam(FloatParts128 *a, int c)
1175 {
1176     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1177     uint64_t sticky = 0;
1178 
1179     if (unlikely(c == 0)) {
1180         return;
1181     } else if (likely(c < 64)) {
1182         /* nothing */
1183     } else if (likely(c < 128)) {
1184         sticky = a1;
1185         a1 = a0;
1186         a0 = 0;
1187         c &= 63;
1188         if (c == 0) {
1189             goto done;
1190         }
1191     } else {
1192         sticky = a0 | a1;
1193         a0 = a1 = 0;
1194         goto done;
1195     }
1196 
1197     sticky |= shr_double(a1, 0, c);
1198     a1 = shr_double(a0, a1, c);
1199     a0 = a0 >> c;
1200 
1201  done:
1202     a->frac_lo = a1 | (sticky != 0);
1203     a->frac_hi = a0;
1204 }
1205 
1206 static void frac256_shrjam(FloatParts256 *a, int c)
1207 {
1208     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1209     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1210     uint64_t sticky = 0;
1211 
1212     if (unlikely(c == 0)) {
1213         return;
1214     } else if (likely(c < 64)) {
1215         /* nothing */
1216     } else if (likely(c < 256)) {
1217         if (unlikely(c & 128)) {
1218             sticky |= a2 | a3;
1219             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1220         }
1221         if (unlikely(c & 64)) {
1222             sticky |= a3;
1223             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1224         }
1225         c &= 63;
1226         if (c == 0) {
1227             goto done;
1228         }
1229     } else {
1230         sticky = a0 | a1 | a2 | a3;
1231         a0 = a1 = a2 = a3 = 0;
1232         goto done;
1233     }
1234 
1235     sticky |= shr_double(a3, 0, c);
1236     a3 = shr_double(a2, a3, c);
1237     a2 = shr_double(a1, a2, c);
1238     a1 = shr_double(a0, a1, c);
1239     a0 = a0 >> c;
1240 
1241  done:
1242     a->frac_lo = a3 | (sticky != 0);
1243     a->frac_lm = a2;
1244     a->frac_hm = a1;
1245     a->frac_hi = a0;
1246 }
1247 
1248 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1249 
1250 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1251 {
1252     return usub64_overflow(a->frac, b->frac, &r->frac);
1253 }
1254 
1255 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1256 {
1257     bool c = 0;
1258     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1259     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1260     return c;
1261 }
1262 
1263 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1264 {
1265     bool c = 0;
1266     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1267     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1268     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1269     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1270     return c;
1271 }
1272 
1273 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1274 
1275 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1276 {
1277     r->frac = a->frac_hi | (a->frac_lo != 0);
1278 }
1279 
1280 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1281 {
1282     r->frac_hi = a->frac_hi;
1283     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1284 }
1285 
1286 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1287 
1288 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1289 {
1290     r->frac_hi = a->frac;
1291     r->frac_lo = 0;
1292 }
1293 
1294 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1295 {
1296     r->frac_hi = a->frac_hi;
1297     r->frac_hm = a->frac_lo;
1298     r->frac_lm = 0;
1299     r->frac_lo = 0;
1300 }
1301 
1302 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1303 
1304 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1305 #define FloatPartsN    glue(FloatParts,N)
1306 #define FloatPartsW    glue(FloatParts,W)
1307 
1308 #define N 64
1309 #define W 128
1310 
1311 #include "softfloat-parts-addsub.c.inc"
1312 #include "softfloat-parts.c.inc"
1313 
1314 #undef  N
1315 #undef  W
1316 #define N 128
1317 #define W 256
1318 
1319 #include "softfloat-parts-addsub.c.inc"
1320 #include "softfloat-parts.c.inc"
1321 
1322 #undef  N
1323 #undef  W
1324 #define N            256
1325 
1326 #include "softfloat-parts-addsub.c.inc"
1327 
1328 #undef  N
1329 #undef  W
1330 #undef  partsN
1331 #undef  FloatPartsN
1332 #undef  FloatPartsW
1333 
1334 /*
1335  * Pack/unpack routines with a specific FloatFmt.
1336  */
1337 
1338 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1339                                       float_status *s, const FloatFmt *params)
1340 {
1341     float16_unpack_raw(p, f);
1342     parts_canonicalize(p, s, params);
1343 }
1344 
1345 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1346                                      float_status *s)
1347 {
1348     float16a_unpack_canonical(p, f, s, &float16_params);
1349 }
1350 
1351 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1352                                       float_status *s)
1353 {
1354     bfloat16_unpack_raw(p, f);
1355     parts_canonicalize(p, s, &bfloat16_params);
1356 }
1357 
1358 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1359                                              float_status *s,
1360                                              const FloatFmt *params)
1361 {
1362     parts_uncanon(p, s, params);
1363     return float16_pack_raw(p);
1364 }
1365 
1366 static float16 float16_round_pack_canonical(FloatParts64 *p,
1367                                             float_status *s)
1368 {
1369     return float16a_round_pack_canonical(p, s, &float16_params);
1370 }
1371 
1372 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1373                                               float_status *s)
1374 {
1375     parts_uncanon(p, s, &bfloat16_params);
1376     return bfloat16_pack_raw(p);
1377 }
1378 
1379 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1380                                      float_status *s)
1381 {
1382     float32_unpack_raw(p, f);
1383     parts_canonicalize(p, s, &float32_params);
1384 }
1385 
1386 static float32 float32_round_pack_canonical(FloatParts64 *p,
1387                                             float_status *s)
1388 {
1389     parts_uncanon(p, s, &float32_params);
1390     return float32_pack_raw(p);
1391 }
1392 
1393 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1394                                      float_status *s)
1395 {
1396     float64_unpack_raw(p, f);
1397     parts_canonicalize(p, s, &float64_params);
1398 }
1399 
1400 static float64 float64_round_pack_canonical(FloatParts64 *p,
1401                                             float_status *s)
1402 {
1403     parts_uncanon(p, s, &float64_params);
1404     return float64_pack_raw(p);
1405 }
1406 
1407 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1408                                       float_status *s)
1409 {
1410     float128_unpack_raw(p, f);
1411     parts_canonicalize(p, s, &float128_params);
1412 }
1413 
1414 static float128 float128_round_pack_canonical(FloatParts128 *p,
1415                                               float_status *s)
1416 {
1417     parts_uncanon(p, s, &float128_params);
1418     return float128_pack_raw(p);
1419 }
1420 
1421 /*
1422  * Addition and subtraction
1423  */
1424 
1425 static float16 QEMU_FLATTEN
1426 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1427 {
1428     FloatParts64 pa, pb, *pr;
1429 
1430     float16_unpack_canonical(&pa, a, status);
1431     float16_unpack_canonical(&pb, b, status);
1432     pr = parts_addsub(&pa, &pb, status, subtract);
1433 
1434     return float16_round_pack_canonical(pr, status);
1435 }
1436 
1437 float16 float16_add(float16 a, float16 b, float_status *status)
1438 {
1439     return float16_addsub(a, b, status, false);
1440 }
1441 
1442 float16 float16_sub(float16 a, float16 b, float_status *status)
1443 {
1444     return float16_addsub(a, b, status, true);
1445 }
1446 
1447 static float32 QEMU_SOFTFLOAT_ATTR
1448 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1449 {
1450     FloatParts64 pa, pb, *pr;
1451 
1452     float32_unpack_canonical(&pa, a, status);
1453     float32_unpack_canonical(&pb, b, status);
1454     pr = parts_addsub(&pa, &pb, status, subtract);
1455 
1456     return float32_round_pack_canonical(pr, status);
1457 }
1458 
1459 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1460 {
1461     return soft_f32_addsub(a, b, status, false);
1462 }
1463 
1464 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1465 {
1466     return soft_f32_addsub(a, b, status, true);
1467 }
1468 
1469 static float64 QEMU_SOFTFLOAT_ATTR
1470 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1471 {
1472     FloatParts64 pa, pb, *pr;
1473 
1474     float64_unpack_canonical(&pa, a, status);
1475     float64_unpack_canonical(&pb, b, status);
1476     pr = parts_addsub(&pa, &pb, status, subtract);
1477 
1478     return float64_round_pack_canonical(pr, status);
1479 }
1480 
1481 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1482 {
1483     return soft_f64_addsub(a, b, status, false);
1484 }
1485 
1486 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1487 {
1488     return soft_f64_addsub(a, b, status, true);
1489 }
1490 
1491 static float hard_f32_add(float a, float b)
1492 {
1493     return a + b;
1494 }
1495 
1496 static float hard_f32_sub(float a, float b)
1497 {
1498     return a - b;
1499 }
1500 
1501 static double hard_f64_add(double a, double b)
1502 {
1503     return a + b;
1504 }
1505 
1506 static double hard_f64_sub(double a, double b)
1507 {
1508     return a - b;
1509 }
1510 
1511 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1512 {
1513     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1514         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1515     }
1516     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1517 }
1518 
1519 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1520 {
1521     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1522         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1523     } else {
1524         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1525     }
1526 }
1527 
1528 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1529                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1530 {
1531     return float32_gen2(a, b, s, hard, soft,
1532                         f32_is_zon2, f32_addsubmul_post);
1533 }
1534 
1535 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1536                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1537 {
1538     return float64_gen2(a, b, s, hard, soft,
1539                         f64_is_zon2, f64_addsubmul_post);
1540 }
1541 
1542 float32 QEMU_FLATTEN
1543 float32_add(float32 a, float32 b, float_status *s)
1544 {
1545     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1546 }
1547 
1548 float32 QEMU_FLATTEN
1549 float32_sub(float32 a, float32 b, float_status *s)
1550 {
1551     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1552 }
1553 
1554 float64 QEMU_FLATTEN
1555 float64_add(float64 a, float64 b, float_status *s)
1556 {
1557     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1558 }
1559 
1560 float64 QEMU_FLATTEN
1561 float64_sub(float64 a, float64 b, float_status *s)
1562 {
1563     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1564 }
1565 
1566 static bfloat16 QEMU_FLATTEN
1567 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1568 {
1569     FloatParts64 pa, pb, *pr;
1570 
1571     bfloat16_unpack_canonical(&pa, a, status);
1572     bfloat16_unpack_canonical(&pb, b, status);
1573     pr = parts_addsub(&pa, &pb, status, subtract);
1574 
1575     return bfloat16_round_pack_canonical(pr, status);
1576 }
1577 
1578 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1579 {
1580     return bfloat16_addsub(a, b, status, false);
1581 }
1582 
1583 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1584 {
1585     return bfloat16_addsub(a, b, status, true);
1586 }
1587 
1588 static float128 QEMU_FLATTEN
1589 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1590 {
1591     FloatParts128 pa, pb, *pr;
1592 
1593     float128_unpack_canonical(&pa, a, status);
1594     float128_unpack_canonical(&pb, b, status);
1595     pr = parts_addsub(&pa, &pb, status, subtract);
1596 
1597     return float128_round_pack_canonical(pr, status);
1598 }
1599 
1600 float128 float128_add(float128 a, float128 b, float_status *status)
1601 {
1602     return float128_addsub(a, b, status, false);
1603 }
1604 
1605 float128 float128_sub(float128 a, float128 b, float_status *status)
1606 {
1607     return float128_addsub(a, b, status, true);
1608 }
1609 
1610 /*
1611  * Multiplication
1612  */
1613 
1614 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1615 {
1616     FloatParts64 pa, pb, *pr;
1617 
1618     float16_unpack_canonical(&pa, a, status);
1619     float16_unpack_canonical(&pb, b, status);
1620     pr = parts_mul(&pa, &pb, status);
1621 
1622     return float16_round_pack_canonical(pr, status);
1623 }
1624 
1625 static float32 QEMU_SOFTFLOAT_ATTR
1626 soft_f32_mul(float32 a, float32 b, float_status *status)
1627 {
1628     FloatParts64 pa, pb, *pr;
1629 
1630     float32_unpack_canonical(&pa, a, status);
1631     float32_unpack_canonical(&pb, b, status);
1632     pr = parts_mul(&pa, &pb, status);
1633 
1634     return float32_round_pack_canonical(pr, status);
1635 }
1636 
1637 static float64 QEMU_SOFTFLOAT_ATTR
1638 soft_f64_mul(float64 a, float64 b, float_status *status)
1639 {
1640     FloatParts64 pa, pb, *pr;
1641 
1642     float64_unpack_canonical(&pa, a, status);
1643     float64_unpack_canonical(&pb, b, status);
1644     pr = parts_mul(&pa, &pb, status);
1645 
1646     return float64_round_pack_canonical(pr, status);
1647 }
1648 
1649 static float hard_f32_mul(float a, float b)
1650 {
1651     return a * b;
1652 }
1653 
1654 static double hard_f64_mul(double a, double b)
1655 {
1656     return a * b;
1657 }
1658 
1659 float32 QEMU_FLATTEN
1660 float32_mul(float32 a, float32 b, float_status *s)
1661 {
1662     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1663                         f32_is_zon2, f32_addsubmul_post);
1664 }
1665 
1666 float64 QEMU_FLATTEN
1667 float64_mul(float64 a, float64 b, float_status *s)
1668 {
1669     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1670                         f64_is_zon2, f64_addsubmul_post);
1671 }
1672 
1673 bfloat16 QEMU_FLATTEN
1674 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677 
1678     bfloat16_unpack_canonical(&pa, a, status);
1679     bfloat16_unpack_canonical(&pb, b, status);
1680     pr = parts_mul(&pa, &pb, status);
1681 
1682     return bfloat16_round_pack_canonical(pr, status);
1683 }
1684 
1685 float128 QEMU_FLATTEN
1686 float128_mul(float128 a, float128 b, float_status *status)
1687 {
1688     FloatParts128 pa, pb, *pr;
1689 
1690     float128_unpack_canonical(&pa, a, status);
1691     float128_unpack_canonical(&pb, b, status);
1692     pr = parts_mul(&pa, &pb, status);
1693 
1694     return float128_round_pack_canonical(pr, status);
1695 }
1696 
1697 /*
1698  * Fused multiply-add
1699  */
1700 
1701 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1702                                     int flags, float_status *status)
1703 {
1704     FloatParts64 pa, pb, pc, *pr;
1705 
1706     float16_unpack_canonical(&pa, a, status);
1707     float16_unpack_canonical(&pb, b, status);
1708     float16_unpack_canonical(&pc, c, status);
1709     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1710 
1711     return float16_round_pack_canonical(pr, status);
1712 }
1713 
1714 static float32 QEMU_SOFTFLOAT_ATTR
1715 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1716                 float_status *status)
1717 {
1718     FloatParts64 pa, pb, pc, *pr;
1719 
1720     float32_unpack_canonical(&pa, a, status);
1721     float32_unpack_canonical(&pb, b, status);
1722     float32_unpack_canonical(&pc, c, status);
1723     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1724 
1725     return float32_round_pack_canonical(pr, status);
1726 }
1727 
1728 static float64 QEMU_SOFTFLOAT_ATTR
1729 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1730                 float_status *status)
1731 {
1732     FloatParts64 pa, pb, pc, *pr;
1733 
1734     float64_unpack_canonical(&pa, a, status);
1735     float64_unpack_canonical(&pb, b, status);
1736     float64_unpack_canonical(&pc, c, status);
1737     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1738 
1739     return float64_round_pack_canonical(pr, status);
1740 }
1741 
1742 static bool force_soft_fma;
1743 
1744 float32 QEMU_FLATTEN
1745 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1746 {
1747     union_float32 ua, ub, uc, ur;
1748 
1749     ua.s = xa;
1750     ub.s = xb;
1751     uc.s = xc;
1752 
1753     if (unlikely(!can_use_fpu(s))) {
1754         goto soft;
1755     }
1756     if (unlikely(flags & float_muladd_halve_result)) {
1757         goto soft;
1758     }
1759 
1760     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1761     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1762         goto soft;
1763     }
1764 
1765     if (unlikely(force_soft_fma)) {
1766         goto soft;
1767     }
1768 
1769     /*
1770      * When (a || b) == 0, there's no need to check for under/over flow,
1771      * since we know the addend is (normal || 0) and the product is 0.
1772      */
1773     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1774         union_float32 up;
1775         bool prod_sign;
1776 
1777         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1778         prod_sign ^= !!(flags & float_muladd_negate_product);
1779         up.s = float32_set_sign(float32_zero, prod_sign);
1780 
1781         if (flags & float_muladd_negate_c) {
1782             uc.h = -uc.h;
1783         }
1784         ur.h = up.h + uc.h;
1785     } else {
1786         union_float32 ua_orig = ua;
1787         union_float32 uc_orig = uc;
1788 
1789         if (flags & float_muladd_negate_product) {
1790             ua.h = -ua.h;
1791         }
1792         if (flags & float_muladd_negate_c) {
1793             uc.h = -uc.h;
1794         }
1795 
1796         ur.h = fmaf(ua.h, ub.h, uc.h);
1797 
1798         if (unlikely(f32_is_inf(ur))) {
1799             float_raise(float_flag_overflow, s);
1800         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1801             ua = ua_orig;
1802             uc = uc_orig;
1803             goto soft;
1804         }
1805     }
1806     if (flags & float_muladd_negate_result) {
1807         return float32_chs(ur.s);
1808     }
1809     return ur.s;
1810 
1811  soft:
1812     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1813 }
1814 
1815 float64 QEMU_FLATTEN
1816 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1817 {
1818     union_float64 ua, ub, uc, ur;
1819 
1820     ua.s = xa;
1821     ub.s = xb;
1822     uc.s = xc;
1823 
1824     if (unlikely(!can_use_fpu(s))) {
1825         goto soft;
1826     }
1827     if (unlikely(flags & float_muladd_halve_result)) {
1828         goto soft;
1829     }
1830 
1831     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1833         goto soft;
1834     }
1835 
1836     if (unlikely(force_soft_fma)) {
1837         goto soft;
1838     }
1839 
1840     /*
1841      * When (a || b) == 0, there's no need to check for under/over flow,
1842      * since we know the addend is (normal || 0) and the product is 0.
1843      */
1844     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1845         union_float64 up;
1846         bool prod_sign;
1847 
1848         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1849         prod_sign ^= !!(flags & float_muladd_negate_product);
1850         up.s = float64_set_sign(float64_zero, prod_sign);
1851 
1852         if (flags & float_muladd_negate_c) {
1853             uc.h = -uc.h;
1854         }
1855         ur.h = up.h + uc.h;
1856     } else {
1857         union_float64 ua_orig = ua;
1858         union_float64 uc_orig = uc;
1859 
1860         if (flags & float_muladd_negate_product) {
1861             ua.h = -ua.h;
1862         }
1863         if (flags & float_muladd_negate_c) {
1864             uc.h = -uc.h;
1865         }
1866 
1867         ur.h = fma(ua.h, ub.h, uc.h);
1868 
1869         if (unlikely(f64_is_inf(ur))) {
1870             float_raise(float_flag_overflow, s);
1871         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1872             ua = ua_orig;
1873             uc = uc_orig;
1874             goto soft;
1875         }
1876     }
1877     if (flags & float_muladd_negate_result) {
1878         return float64_chs(ur.s);
1879     }
1880     return ur.s;
1881 
1882  soft:
1883     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1884 }
1885 
1886 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1887                                       int flags, float_status *status)
1888 {
1889     FloatParts64 pa, pb, pc, *pr;
1890 
1891     bfloat16_unpack_canonical(&pa, a, status);
1892     bfloat16_unpack_canonical(&pb, b, status);
1893     bfloat16_unpack_canonical(&pc, c, status);
1894     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1895 
1896     return bfloat16_round_pack_canonical(pr, status);
1897 }
1898 
1899 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1900                                       int flags, float_status *status)
1901 {
1902     FloatParts128 pa, pb, pc, *pr;
1903 
1904     float128_unpack_canonical(&pa, a, status);
1905     float128_unpack_canonical(&pb, b, status);
1906     float128_unpack_canonical(&pc, c, status);
1907     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1908 
1909     return float128_round_pack_canonical(pr, status);
1910 }
1911 
1912 /*
1913  * Division
1914  */
1915 
1916 float16 float16_div(float16 a, float16 b, float_status *status)
1917 {
1918     FloatParts64 pa, pb, *pr;
1919 
1920     float16_unpack_canonical(&pa, a, status);
1921     float16_unpack_canonical(&pb, b, status);
1922     pr = parts_div(&pa, &pb, status);
1923 
1924     return float16_round_pack_canonical(pr, status);
1925 }
1926 
1927 static float32 QEMU_SOFTFLOAT_ATTR
1928 soft_f32_div(float32 a, float32 b, float_status *status)
1929 {
1930     FloatParts64 pa, pb, *pr;
1931 
1932     float32_unpack_canonical(&pa, a, status);
1933     float32_unpack_canonical(&pb, b, status);
1934     pr = parts_div(&pa, &pb, status);
1935 
1936     return float32_round_pack_canonical(pr, status);
1937 }
1938 
1939 static float64 QEMU_SOFTFLOAT_ATTR
1940 soft_f64_div(float64 a, float64 b, float_status *status)
1941 {
1942     FloatParts64 pa, pb, *pr;
1943 
1944     float64_unpack_canonical(&pa, a, status);
1945     float64_unpack_canonical(&pb, b, status);
1946     pr = parts_div(&pa, &pb, status);
1947 
1948     return float64_round_pack_canonical(pr, status);
1949 }
1950 
1951 static float hard_f32_div(float a, float b)
1952 {
1953     return a / b;
1954 }
1955 
1956 static double hard_f64_div(double a, double b)
1957 {
1958     return a / b;
1959 }
1960 
1961 static bool f32_div_pre(union_float32 a, union_float32 b)
1962 {
1963     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1964         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1965                fpclassify(b.h) == FP_NORMAL;
1966     }
1967     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1968 }
1969 
1970 static bool f64_div_pre(union_float64 a, union_float64 b)
1971 {
1972     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1973         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1974                fpclassify(b.h) == FP_NORMAL;
1975     }
1976     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1977 }
1978 
1979 static bool f32_div_post(union_float32 a, union_float32 b)
1980 {
1981     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1982         return fpclassify(a.h) != FP_ZERO;
1983     }
1984     return !float32_is_zero(a.s);
1985 }
1986 
1987 static bool f64_div_post(union_float64 a, union_float64 b)
1988 {
1989     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1990         return fpclassify(a.h) != FP_ZERO;
1991     }
1992     return !float64_is_zero(a.s);
1993 }
1994 
1995 float32 QEMU_FLATTEN
1996 float32_div(float32 a, float32 b, float_status *s)
1997 {
1998     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1999                         f32_div_pre, f32_div_post);
2000 }
2001 
2002 float64 QEMU_FLATTEN
2003 float64_div(float64 a, float64 b, float_status *s)
2004 {
2005     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2006                         f64_div_pre, f64_div_post);
2007 }
2008 
2009 bfloat16 QEMU_FLATTEN
2010 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013 
2014     bfloat16_unpack_canonical(&pa, a, status);
2015     bfloat16_unpack_canonical(&pb, b, status);
2016     pr = parts_div(&pa, &pb, status);
2017 
2018     return bfloat16_round_pack_canonical(pr, status);
2019 }
2020 
2021 float128 QEMU_FLATTEN
2022 float128_div(float128 a, float128 b, float_status *status)
2023 {
2024     FloatParts128 pa, pb, *pr;
2025 
2026     float128_unpack_canonical(&pa, a, status);
2027     float128_unpack_canonical(&pb, b, status);
2028     pr = parts_div(&pa, &pb, status);
2029 
2030     return float128_round_pack_canonical(pr, status);
2031 }
2032 
2033 /*
2034  * Float to Float conversions
2035  *
2036  * Returns the result of converting one float format to another. The
2037  * conversion is performed according to the IEC/IEEE Standard for
2038  * Binary Floating-Point Arithmetic.
2039  *
2040  * Usually this only needs to take care of raising invalid exceptions
2041  * and handling the conversion on NaNs.
2042  */
2043 
2044 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2045 {
2046     switch (a->cls) {
2047     case float_class_qnan:
2048     case float_class_snan:
2049         /*
2050          * There is no NaN in the destination format.  Raise Invalid
2051          * and return a zero with the sign of the input NaN.
2052          */
2053         float_raise(float_flag_invalid, s);
2054         a->cls = float_class_zero;
2055         break;
2056 
2057     case float_class_inf:
2058         /*
2059          * There is no Inf in the destination format.  Raise Invalid
2060          * and return the maximum normal with the correct sign.
2061          */
2062         float_raise(float_flag_invalid, s);
2063         a->cls = float_class_normal;
2064         a->exp = float16_params_ahp.exp_max;
2065         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2066                                   float16_params_ahp.frac_size + 1);
2067         break;
2068 
2069     case float_class_normal:
2070     case float_class_zero:
2071         break;
2072 
2073     default:
2074         g_assert_not_reached();
2075     }
2076 }
2077 
2078 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2079 {
2080     if (is_nan(a->cls)) {
2081         parts_return_nan(a, s);
2082     }
2083 }
2084 
2085 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2086 {
2087     if (is_nan(a->cls)) {
2088         parts_return_nan(a, s);
2089     }
2090 }
2091 
2092 #define parts_float_to_float(P, S) \
2093     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2094 
2095 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2096                                         float_status *s)
2097 {
2098     a->cls = b->cls;
2099     a->sign = b->sign;
2100     a->exp = b->exp;
2101 
2102     if (a->cls == float_class_normal) {
2103         frac_truncjam(a, b);
2104     } else if (is_nan(a->cls)) {
2105         /* Discard the low bits of the NaN. */
2106         a->frac = b->frac_hi;
2107         parts_return_nan(a, s);
2108     }
2109 }
2110 
2111 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2112                                        float_status *s)
2113 {
2114     a->cls = b->cls;
2115     a->sign = b->sign;
2116     a->exp = b->exp;
2117     frac_widen(a, b);
2118 
2119     if (is_nan(a->cls)) {
2120         parts_return_nan(a, s);
2121     }
2122 }
2123 
2124 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2125 {
2126     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2127     FloatParts64 p;
2128 
2129     float16a_unpack_canonical(&p, a, s, fmt16);
2130     parts_float_to_float(&p, s);
2131     return float32_round_pack_canonical(&p, s);
2132 }
2133 
2134 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2135 {
2136     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2137     FloatParts64 p;
2138 
2139     float16a_unpack_canonical(&p, a, s, fmt16);
2140     parts_float_to_float(&p, s);
2141     return float64_round_pack_canonical(&p, s);
2142 }
2143 
2144 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2145 {
2146     FloatParts64 p;
2147     const FloatFmt *fmt;
2148 
2149     float32_unpack_canonical(&p, a, s);
2150     if (ieee) {
2151         parts_float_to_float(&p, s);
2152         fmt = &float16_params;
2153     } else {
2154         parts_float_to_ahp(&p, s);
2155         fmt = &float16_params_ahp;
2156     }
2157     return float16a_round_pack_canonical(&p, s, fmt);
2158 }
2159 
2160 static float64 QEMU_SOFTFLOAT_ATTR
2161 soft_float32_to_float64(float32 a, float_status *s)
2162 {
2163     FloatParts64 p;
2164 
2165     float32_unpack_canonical(&p, a, s);
2166     parts_float_to_float(&p, s);
2167     return float64_round_pack_canonical(&p, s);
2168 }
2169 
2170 float64 float32_to_float64(float32 a, float_status *s)
2171 {
2172     if (likely(float32_is_normal(a))) {
2173         /* Widening conversion can never produce inexact results.  */
2174         union_float32 uf;
2175         union_float64 ud;
2176         uf.s = a;
2177         ud.h = uf.h;
2178         return ud.s;
2179     } else if (float32_is_zero(a)) {
2180         return float64_set_sign(float64_zero, float32_is_neg(a));
2181     } else {
2182         return soft_float32_to_float64(a, s);
2183     }
2184 }
2185 
2186 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2187 {
2188     FloatParts64 p;
2189     const FloatFmt *fmt;
2190 
2191     float64_unpack_canonical(&p, a, s);
2192     if (ieee) {
2193         parts_float_to_float(&p, s);
2194         fmt = &float16_params;
2195     } else {
2196         parts_float_to_ahp(&p, s);
2197         fmt = &float16_params_ahp;
2198     }
2199     return float16a_round_pack_canonical(&p, s, fmt);
2200 }
2201 
2202 float32 float64_to_float32(float64 a, float_status *s)
2203 {
2204     FloatParts64 p;
2205 
2206     float64_unpack_canonical(&p, a, s);
2207     parts_float_to_float(&p, s);
2208     return float32_round_pack_canonical(&p, s);
2209 }
2210 
2211 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2212 {
2213     FloatParts64 p;
2214 
2215     bfloat16_unpack_canonical(&p, a, s);
2216     parts_float_to_float(&p, s);
2217     return float32_round_pack_canonical(&p, s);
2218 }
2219 
2220 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2221 {
2222     FloatParts64 p;
2223 
2224     bfloat16_unpack_canonical(&p, a, s);
2225     parts_float_to_float(&p, s);
2226     return float64_round_pack_canonical(&p, s);
2227 }
2228 
2229 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2230 {
2231     FloatParts64 p;
2232 
2233     float32_unpack_canonical(&p, a, s);
2234     parts_float_to_float(&p, s);
2235     return bfloat16_round_pack_canonical(&p, s);
2236 }
2237 
2238 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2239 {
2240     FloatParts64 p;
2241 
2242     float64_unpack_canonical(&p, a, s);
2243     parts_float_to_float(&p, s);
2244     return bfloat16_round_pack_canonical(&p, s);
2245 }
2246 
2247 float32 float128_to_float32(float128 a, float_status *s)
2248 {
2249     FloatParts64 p64;
2250     FloatParts128 p128;
2251 
2252     float128_unpack_canonical(&p128, a, s);
2253     parts_float_to_float_narrow(&p64, &p128, s);
2254     return float32_round_pack_canonical(&p64, s);
2255 }
2256 
2257 float64 float128_to_float64(float128 a, float_status *s)
2258 {
2259     FloatParts64 p64;
2260     FloatParts128 p128;
2261 
2262     float128_unpack_canonical(&p128, a, s);
2263     parts_float_to_float_narrow(&p64, &p128, s);
2264     return float64_round_pack_canonical(&p64, s);
2265 }
2266 
2267 float128 float32_to_float128(float32 a, float_status *s)
2268 {
2269     FloatParts64 p64;
2270     FloatParts128 p128;
2271 
2272     float32_unpack_canonical(&p64, a, s);
2273     parts_float_to_float_widen(&p128, &p64, s);
2274     return float128_round_pack_canonical(&p128, s);
2275 }
2276 
2277 float128 float64_to_float128(float64 a, float_status *s)
2278 {
2279     FloatParts64 p64;
2280     FloatParts128 p128;
2281 
2282     float64_unpack_canonical(&p64, a, s);
2283     parts_float_to_float_widen(&p128, &p64, s);
2284     return float128_round_pack_canonical(&p128, s);
2285 }
2286 
2287 /*
2288  * Rounds the floating-point value `a' to an integer, and returns the
2289  * result as a floating-point value. The operation is performed
2290  * according to the IEC/IEEE Standard for Binary Floating-Point
2291  * Arithmetic.
2292  */
2293 
2294 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2295                                int scale, float_status *s)
2296 {
2297     switch (a.cls) {
2298     case float_class_qnan:
2299     case float_class_snan:
2300         parts_return_nan(&a, s);
2301         break;
2302 
2303     case float_class_zero:
2304     case float_class_inf:
2305         /* already "integral" */
2306         break;
2307 
2308     case float_class_normal:
2309         scale = MIN(MAX(scale, -0x10000), 0x10000);
2310         a.exp += scale;
2311 
2312         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2313             /* already integral */
2314             break;
2315         }
2316         if (a.exp < 0) {
2317             bool one;
2318             /* all fractional */
2319             float_raise(float_flag_inexact, s);
2320             switch (rmode) {
2321             case float_round_nearest_even:
2322                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2323                 break;
2324             case float_round_ties_away:
2325                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2326                 break;
2327             case float_round_to_zero:
2328                 one = false;
2329                 break;
2330             case float_round_up:
2331                 one = !a.sign;
2332                 break;
2333             case float_round_down:
2334                 one = a.sign;
2335                 break;
2336             case float_round_to_odd:
2337                 one = true;
2338                 break;
2339             default:
2340                 g_assert_not_reached();
2341             }
2342 
2343             if (one) {
2344                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2345                 a.exp = 0;
2346             } else {
2347                 a.cls = float_class_zero;
2348             }
2349         } else {
2350             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2351             uint64_t frac_lsbm1 = frac_lsb >> 1;
2352             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2353             uint64_t rnd_mask = rnd_even_mask >> 1;
2354             uint64_t inc;
2355 
2356             switch (rmode) {
2357             case float_round_nearest_even:
2358                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2359                 break;
2360             case float_round_ties_away:
2361                 inc = frac_lsbm1;
2362                 break;
2363             case float_round_to_zero:
2364                 inc = 0;
2365                 break;
2366             case float_round_up:
2367                 inc = a.sign ? 0 : rnd_mask;
2368                 break;
2369             case float_round_down:
2370                 inc = a.sign ? rnd_mask : 0;
2371                 break;
2372             case float_round_to_odd:
2373                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2374                 break;
2375             default:
2376                 g_assert_not_reached();
2377             }
2378 
2379             if (a.frac & rnd_mask) {
2380                 float_raise(float_flag_inexact, s);
2381                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2382                     a.frac >>= 1;
2383                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2384                     a.exp++;
2385                 }
2386                 a.frac &= ~rnd_mask;
2387             }
2388         }
2389         break;
2390     default:
2391         g_assert_not_reached();
2392     }
2393     return a;
2394 }
2395 
2396 float16 float16_round_to_int(float16 a, float_status *s)
2397 {
2398     FloatParts64 pa, pr;
2399 
2400     float16_unpack_canonical(&pa, a, s);
2401     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2402     return float16_round_pack_canonical(&pr, s);
2403 }
2404 
2405 float32 float32_round_to_int(float32 a, float_status *s)
2406 {
2407     FloatParts64 pa, pr;
2408 
2409     float32_unpack_canonical(&pa, a, s);
2410     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2411     return float32_round_pack_canonical(&pr, s);
2412 }
2413 
2414 float64 float64_round_to_int(float64 a, float_status *s)
2415 {
2416     FloatParts64 pa, pr;
2417 
2418     float64_unpack_canonical(&pa, a, s);
2419     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2420     return float64_round_pack_canonical(&pr, s);
2421 }
2422 
2423 /*
2424  * Rounds the bfloat16 value `a' to an integer, and returns the
2425  * result as a bfloat16 value.
2426  */
2427 
2428 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2429 {
2430     FloatParts64 pa, pr;
2431 
2432     bfloat16_unpack_canonical(&pa, a, s);
2433     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2434     return bfloat16_round_pack_canonical(&pr, s);
2435 }
2436 
2437 /*
2438  * Returns the result of converting the floating-point value `a' to
2439  * the two's complement integer format. The conversion is performed
2440  * according to the IEC/IEEE Standard for Binary Floating-Point
2441  * Arithmetic---which means in particular that the conversion is
2442  * rounded according to the current rounding mode. If `a' is a NaN,
2443  * the largest positive integer is returned. Otherwise, if the
2444  * conversion overflows, the largest integer with the same sign as `a'
2445  * is returned.
2446 */
2447 
2448 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2449                                      int scale, int64_t min, int64_t max,
2450                                      float_status *s)
2451 {
2452     uint64_t r;
2453     int orig_flags = get_float_exception_flags(s);
2454     FloatParts64 p = round_to_int(in, rmode, scale, s);
2455 
2456     switch (p.cls) {
2457     case float_class_snan:
2458     case float_class_qnan:
2459         s->float_exception_flags = orig_flags | float_flag_invalid;
2460         return max;
2461     case float_class_inf:
2462         s->float_exception_flags = orig_flags | float_flag_invalid;
2463         return p.sign ? min : max;
2464     case float_class_zero:
2465         return 0;
2466     case float_class_normal:
2467         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2468             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2469         } else {
2470             r = UINT64_MAX;
2471         }
2472         if (p.sign) {
2473             if (r <= -(uint64_t) min) {
2474                 return -r;
2475             } else {
2476                 s->float_exception_flags = orig_flags | float_flag_invalid;
2477                 return min;
2478             }
2479         } else {
2480             if (r <= max) {
2481                 return r;
2482             } else {
2483                 s->float_exception_flags = orig_flags | float_flag_invalid;
2484                 return max;
2485             }
2486         }
2487     default:
2488         g_assert_not_reached();
2489     }
2490 }
2491 
2492 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2493                               float_status *s)
2494 {
2495     FloatParts64 p;
2496 
2497     float16_unpack_canonical(&p, a, s);
2498     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2499 }
2500 
2501 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2502                                 float_status *s)
2503 {
2504     FloatParts64 p;
2505 
2506     float16_unpack_canonical(&p, a, s);
2507     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2508 }
2509 
2510 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2511                                 float_status *s)
2512 {
2513     FloatParts64 p;
2514 
2515     float16_unpack_canonical(&p, a, s);
2516     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2517 }
2518 
2519 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2520                                 float_status *s)
2521 {
2522     FloatParts64 p;
2523 
2524     float16_unpack_canonical(&p, a, s);
2525     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2526 }
2527 
2528 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2529                                 float_status *s)
2530 {
2531     FloatParts64 p;
2532 
2533     float32_unpack_canonical(&p, a, s);
2534     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2535 }
2536 
2537 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2538                                 float_status *s)
2539 {
2540     FloatParts64 p;
2541 
2542     float32_unpack_canonical(&p, a, s);
2543     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2544 }
2545 
2546 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2547                                 float_status *s)
2548 {
2549     FloatParts64 p;
2550 
2551     float32_unpack_canonical(&p, a, s);
2552     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2553 }
2554 
2555 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2556                                 float_status *s)
2557 {
2558     FloatParts64 p;
2559 
2560     float64_unpack_canonical(&p, a, s);
2561     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2562 }
2563 
2564 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2565                                 float_status *s)
2566 {
2567     FloatParts64 p;
2568 
2569     float64_unpack_canonical(&p, a, s);
2570     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2571 }
2572 
2573 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2574                                 float_status *s)
2575 {
2576     FloatParts64 p;
2577 
2578     float64_unpack_canonical(&p, a, s);
2579     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2580 }
2581 
2582 int8_t float16_to_int8(float16 a, float_status *s)
2583 {
2584     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2585 }
2586 
2587 int16_t float16_to_int16(float16 a, float_status *s)
2588 {
2589     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2590 }
2591 
2592 int32_t float16_to_int32(float16 a, float_status *s)
2593 {
2594     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2595 }
2596 
2597 int64_t float16_to_int64(float16 a, float_status *s)
2598 {
2599     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2600 }
2601 
2602 int16_t float32_to_int16(float32 a, float_status *s)
2603 {
2604     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2605 }
2606 
2607 int32_t float32_to_int32(float32 a, float_status *s)
2608 {
2609     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2610 }
2611 
2612 int64_t float32_to_int64(float32 a, float_status *s)
2613 {
2614     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2615 }
2616 
2617 int16_t float64_to_int16(float64 a, float_status *s)
2618 {
2619     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2620 }
2621 
2622 int32_t float64_to_int32(float64 a, float_status *s)
2623 {
2624     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2625 }
2626 
2627 int64_t float64_to_int64(float64 a, float_status *s)
2628 {
2629     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2630 }
2631 
2632 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2633 {
2634     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2635 }
2636 
2637 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2638 {
2639     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2640 }
2641 
2642 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2643 {
2644     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2645 }
2646 
2647 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2648 {
2649     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2650 }
2651 
2652 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2653 {
2654     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2655 }
2656 
2657 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2658 {
2659     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2660 }
2661 
2662 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2663 {
2664     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2665 }
2666 
2667 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2668 {
2669     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2670 }
2671 
2672 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2673 {
2674     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2675 }
2676 
2677 /*
2678  * Returns the result of converting the floating-point value `a' to
2679  * the two's complement integer format.
2680  */
2681 
2682 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2683                                  float_status *s)
2684 {
2685     FloatParts64 p;
2686 
2687     bfloat16_unpack_canonical(&p, a, s);
2688     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2689 }
2690 
2691 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2692                                  float_status *s)
2693 {
2694     FloatParts64 p;
2695 
2696     bfloat16_unpack_canonical(&p, a, s);
2697     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2698 }
2699 
2700 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2701                                  float_status *s)
2702 {
2703     FloatParts64 p;
2704 
2705     bfloat16_unpack_canonical(&p, a, s);
2706     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2707 }
2708 
2709 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2710 {
2711     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2712 }
2713 
2714 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2715 {
2716     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2717 }
2718 
2719 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2720 {
2721     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2722 }
2723 
2724 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2725 {
2726     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2727 }
2728 
2729 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2730 {
2731     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2732 }
2733 
2734 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2735 {
2736     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2737 }
2738 
2739 /*
2740  *  Returns the result of converting the floating-point value `a' to
2741  *  the unsigned integer format. The conversion is performed according
2742  *  to the IEC/IEEE Standard for Binary Floating-Point
2743  *  Arithmetic---which means in particular that the conversion is
2744  *  rounded according to the current rounding mode. If `a' is a NaN,
2745  *  the largest unsigned integer is returned. Otherwise, if the
2746  *  conversion overflows, the largest unsigned integer is returned. If
2747  *  the 'a' is negative, the result is rounded and zero is returned;
2748  *  values that do not round to zero will raise the inexact exception
2749  *  flag.
2750  */
2751 
2752 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2753                                        int scale, uint64_t max,
2754                                        float_status *s)
2755 {
2756     int orig_flags = get_float_exception_flags(s);
2757     FloatParts64 p = round_to_int(in, rmode, scale, s);
2758     uint64_t r;
2759 
2760     switch (p.cls) {
2761     case float_class_snan:
2762     case float_class_qnan:
2763         s->float_exception_flags = orig_flags | float_flag_invalid;
2764         return max;
2765     case float_class_inf:
2766         s->float_exception_flags = orig_flags | float_flag_invalid;
2767         return p.sign ? 0 : max;
2768     case float_class_zero:
2769         return 0;
2770     case float_class_normal:
2771         if (p.sign) {
2772             s->float_exception_flags = orig_flags | float_flag_invalid;
2773             return 0;
2774         }
2775 
2776         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2777             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2778         } else {
2779             s->float_exception_flags = orig_flags | float_flag_invalid;
2780             return max;
2781         }
2782 
2783         /* For uint64 this will never trip, but if p.exp is too large
2784          * to shift a decomposed fraction we shall have exited via the
2785          * 3rd leg above.
2786          */
2787         if (r > max) {
2788             s->float_exception_flags = orig_flags | float_flag_invalid;
2789             return max;
2790         }
2791         return r;
2792     default:
2793         g_assert_not_reached();
2794     }
2795 }
2796 
2797 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2798                                 float_status *s)
2799 {
2800     FloatParts64 p;
2801 
2802     float16_unpack_canonical(&p, a, s);
2803     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2804 }
2805 
2806 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2807                                   float_status *s)
2808 {
2809     FloatParts64 p;
2810 
2811     float16_unpack_canonical(&p, a, s);
2812     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2813 }
2814 
2815 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2816                                   float_status *s)
2817 {
2818     FloatParts64 p;
2819 
2820     float16_unpack_canonical(&p, a, s);
2821     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2822 }
2823 
2824 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2825                                   float_status *s)
2826 {
2827     FloatParts64 p;
2828 
2829     float16_unpack_canonical(&p, a, s);
2830     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2831 }
2832 
2833 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2834                                   float_status *s)
2835 {
2836     FloatParts64 p;
2837 
2838     float32_unpack_canonical(&p, a, s);
2839     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2840 }
2841 
2842 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2843                                   float_status *s)
2844 {
2845     FloatParts64 p;
2846 
2847     float32_unpack_canonical(&p, a, s);
2848     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2849 }
2850 
2851 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2852                                   float_status *s)
2853 {
2854     FloatParts64 p;
2855 
2856     float32_unpack_canonical(&p, a, s);
2857     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2858 }
2859 
2860 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2861                                   float_status *s)
2862 {
2863     FloatParts64 p;
2864 
2865     float64_unpack_canonical(&p, a, s);
2866     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2867 }
2868 
2869 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2870                                   float_status *s)
2871 {
2872     FloatParts64 p;
2873 
2874     float64_unpack_canonical(&p, a, s);
2875     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2876 }
2877 
2878 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2879                                   float_status *s)
2880 {
2881     FloatParts64 p;
2882 
2883     float64_unpack_canonical(&p, a, s);
2884     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2885 }
2886 
2887 uint8_t float16_to_uint8(float16 a, float_status *s)
2888 {
2889     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2890 }
2891 
2892 uint16_t float16_to_uint16(float16 a, float_status *s)
2893 {
2894     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2895 }
2896 
2897 uint32_t float16_to_uint32(float16 a, float_status *s)
2898 {
2899     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2900 }
2901 
2902 uint64_t float16_to_uint64(float16 a, float_status *s)
2903 {
2904     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2905 }
2906 
2907 uint16_t float32_to_uint16(float32 a, float_status *s)
2908 {
2909     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2910 }
2911 
2912 uint32_t float32_to_uint32(float32 a, float_status *s)
2913 {
2914     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2915 }
2916 
2917 uint64_t float32_to_uint64(float32 a, float_status *s)
2918 {
2919     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2920 }
2921 
2922 uint16_t float64_to_uint16(float64 a, float_status *s)
2923 {
2924     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2925 }
2926 
2927 uint32_t float64_to_uint32(float64 a, float_status *s)
2928 {
2929     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2930 }
2931 
2932 uint64_t float64_to_uint64(float64 a, float_status *s)
2933 {
2934     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2935 }
2936 
2937 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2938 {
2939     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2940 }
2941 
2942 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2943 {
2944     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2945 }
2946 
2947 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2948 {
2949     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2950 }
2951 
2952 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2953 {
2954     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2955 }
2956 
2957 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2958 {
2959     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2960 }
2961 
2962 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2963 {
2964     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2965 }
2966 
2967 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2968 {
2969     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2970 }
2971 
2972 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2973 {
2974     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2975 }
2976 
2977 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2978 {
2979     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2980 }
2981 
2982 /*
2983  *  Returns the result of converting the bfloat16 value `a' to
2984  *  the unsigned integer format.
2985  */
2986 
2987 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2988                                    int scale, float_status *s)
2989 {
2990     FloatParts64 p;
2991 
2992     bfloat16_unpack_canonical(&p, a, s);
2993     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2994 }
2995 
2996 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2997                                    int scale, float_status *s)
2998 {
2999     FloatParts64 p;
3000 
3001     bfloat16_unpack_canonical(&p, a, s);
3002     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
3003 }
3004 
3005 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3006                                    int scale, float_status *s)
3007 {
3008     FloatParts64 p;
3009 
3010     bfloat16_unpack_canonical(&p, a, s);
3011     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
3012 }
3013 
3014 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3015 {
3016     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3017 }
3018 
3019 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3020 {
3021     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3022 }
3023 
3024 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3025 {
3026     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3027 }
3028 
3029 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3030 {
3031     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3032 }
3033 
3034 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3035 {
3036     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3037 }
3038 
3039 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3040 {
3041     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3042 }
3043 
3044 /*
3045  * Integer to float conversions
3046  *
3047  * Returns the result of converting the two's complement integer `a'
3048  * to the floating-point format. The conversion is performed according
3049  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3050  */
3051 
3052 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
3053 {
3054     FloatParts64 r = { .sign = false };
3055 
3056     if (a == 0) {
3057         r.cls = float_class_zero;
3058     } else {
3059         uint64_t f = a;
3060         int shift;
3061 
3062         r.cls = float_class_normal;
3063         if (a < 0) {
3064             f = -f;
3065             r.sign = true;
3066         }
3067         shift = clz64(f);
3068         scale = MIN(MAX(scale, -0x10000), 0x10000);
3069 
3070         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3071         r.frac = f << shift;
3072     }
3073 
3074     return r;
3075 }
3076 
3077 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3078 {
3079     FloatParts64 pa = int_to_float(a, scale, status);
3080     return float16_round_pack_canonical(&pa, status);
3081 }
3082 
3083 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3084 {
3085     return int64_to_float16_scalbn(a, scale, status);
3086 }
3087 
3088 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3089 {
3090     return int64_to_float16_scalbn(a, scale, status);
3091 }
3092 
3093 float16 int64_to_float16(int64_t a, float_status *status)
3094 {
3095     return int64_to_float16_scalbn(a, 0, status);
3096 }
3097 
3098 float16 int32_to_float16(int32_t a, float_status *status)
3099 {
3100     return int64_to_float16_scalbn(a, 0, status);
3101 }
3102 
3103 float16 int16_to_float16(int16_t a, float_status *status)
3104 {
3105     return int64_to_float16_scalbn(a, 0, status);
3106 }
3107 
3108 float16 int8_to_float16(int8_t a, float_status *status)
3109 {
3110     return int64_to_float16_scalbn(a, 0, status);
3111 }
3112 
3113 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3114 {
3115     FloatParts64 pa = int_to_float(a, scale, status);
3116     return float32_round_pack_canonical(&pa, status);
3117 }
3118 
3119 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3120 {
3121     return int64_to_float32_scalbn(a, scale, status);
3122 }
3123 
3124 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3125 {
3126     return int64_to_float32_scalbn(a, scale, status);
3127 }
3128 
3129 float32 int64_to_float32(int64_t a, float_status *status)
3130 {
3131     return int64_to_float32_scalbn(a, 0, status);
3132 }
3133 
3134 float32 int32_to_float32(int32_t a, float_status *status)
3135 {
3136     return int64_to_float32_scalbn(a, 0, status);
3137 }
3138 
3139 float32 int16_to_float32(int16_t a, float_status *status)
3140 {
3141     return int64_to_float32_scalbn(a, 0, status);
3142 }
3143 
3144 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3145 {
3146     FloatParts64 pa = int_to_float(a, scale, status);
3147     return float64_round_pack_canonical(&pa, status);
3148 }
3149 
3150 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3151 {
3152     return int64_to_float64_scalbn(a, scale, status);
3153 }
3154 
3155 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3156 {
3157     return int64_to_float64_scalbn(a, scale, status);
3158 }
3159 
3160 float64 int64_to_float64(int64_t a, float_status *status)
3161 {
3162     return int64_to_float64_scalbn(a, 0, status);
3163 }
3164 
3165 float64 int32_to_float64(int32_t a, float_status *status)
3166 {
3167     return int64_to_float64_scalbn(a, 0, status);
3168 }
3169 
3170 float64 int16_to_float64(int16_t a, float_status *status)
3171 {
3172     return int64_to_float64_scalbn(a, 0, status);
3173 }
3174 
3175 /*
3176  * Returns the result of converting the two's complement integer `a'
3177  * to the bfloat16 format.
3178  */
3179 
3180 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3181 {
3182     FloatParts64 pa = int_to_float(a, scale, status);
3183     return bfloat16_round_pack_canonical(&pa, status);
3184 }
3185 
3186 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3187 {
3188     return int64_to_bfloat16_scalbn(a, scale, status);
3189 }
3190 
3191 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3192 {
3193     return int64_to_bfloat16_scalbn(a, scale, status);
3194 }
3195 
3196 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3197 {
3198     return int64_to_bfloat16_scalbn(a, 0, status);
3199 }
3200 
3201 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3202 {
3203     return int64_to_bfloat16_scalbn(a, 0, status);
3204 }
3205 
3206 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3207 {
3208     return int64_to_bfloat16_scalbn(a, 0, status);
3209 }
3210 
3211 /*
3212  * Unsigned Integer to float conversions
3213  *
3214  * Returns the result of converting the unsigned integer `a' to the
3215  * floating-point format. The conversion is performed according to the
3216  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3217  */
3218 
3219 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3220 {
3221     FloatParts64 r = { .sign = false };
3222     int shift;
3223 
3224     if (a == 0) {
3225         r.cls = float_class_zero;
3226     } else {
3227         scale = MIN(MAX(scale, -0x10000), 0x10000);
3228         shift = clz64(a);
3229         r.cls = float_class_normal;
3230         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3231         r.frac = a << shift;
3232     }
3233 
3234     return r;
3235 }
3236 
3237 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3238 {
3239     FloatParts64 pa = uint_to_float(a, scale, status);
3240     return float16_round_pack_canonical(&pa, status);
3241 }
3242 
3243 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3244 {
3245     return uint64_to_float16_scalbn(a, scale, status);
3246 }
3247 
3248 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3249 {
3250     return uint64_to_float16_scalbn(a, scale, status);
3251 }
3252 
3253 float16 uint64_to_float16(uint64_t a, float_status *status)
3254 {
3255     return uint64_to_float16_scalbn(a, 0, status);
3256 }
3257 
3258 float16 uint32_to_float16(uint32_t a, float_status *status)
3259 {
3260     return uint64_to_float16_scalbn(a, 0, status);
3261 }
3262 
3263 float16 uint16_to_float16(uint16_t a, float_status *status)
3264 {
3265     return uint64_to_float16_scalbn(a, 0, status);
3266 }
3267 
3268 float16 uint8_to_float16(uint8_t a, float_status *status)
3269 {
3270     return uint64_to_float16_scalbn(a, 0, status);
3271 }
3272 
3273 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3274 {
3275     FloatParts64 pa = uint_to_float(a, scale, status);
3276     return float32_round_pack_canonical(&pa, status);
3277 }
3278 
3279 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3280 {
3281     return uint64_to_float32_scalbn(a, scale, status);
3282 }
3283 
3284 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3285 {
3286     return uint64_to_float32_scalbn(a, scale, status);
3287 }
3288 
3289 float32 uint64_to_float32(uint64_t a, float_status *status)
3290 {
3291     return uint64_to_float32_scalbn(a, 0, status);
3292 }
3293 
3294 float32 uint32_to_float32(uint32_t a, float_status *status)
3295 {
3296     return uint64_to_float32_scalbn(a, 0, status);
3297 }
3298 
3299 float32 uint16_to_float32(uint16_t a, float_status *status)
3300 {
3301     return uint64_to_float32_scalbn(a, 0, status);
3302 }
3303 
3304 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3305 {
3306     FloatParts64 pa = uint_to_float(a, scale, status);
3307     return float64_round_pack_canonical(&pa, status);
3308 }
3309 
3310 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3311 {
3312     return uint64_to_float64_scalbn(a, scale, status);
3313 }
3314 
3315 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3316 {
3317     return uint64_to_float64_scalbn(a, scale, status);
3318 }
3319 
3320 float64 uint64_to_float64(uint64_t a, float_status *status)
3321 {
3322     return uint64_to_float64_scalbn(a, 0, status);
3323 }
3324 
3325 float64 uint32_to_float64(uint32_t a, float_status *status)
3326 {
3327     return uint64_to_float64_scalbn(a, 0, status);
3328 }
3329 
3330 float64 uint16_to_float64(uint16_t a, float_status *status)
3331 {
3332     return uint64_to_float64_scalbn(a, 0, status);
3333 }
3334 
3335 /*
3336  * Returns the result of converting the unsigned integer `a' to the
3337  * bfloat16 format.
3338  */
3339 
3340 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3341 {
3342     FloatParts64 pa = uint_to_float(a, scale, status);
3343     return bfloat16_round_pack_canonical(&pa, status);
3344 }
3345 
3346 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3347 {
3348     return uint64_to_bfloat16_scalbn(a, scale, status);
3349 }
3350 
3351 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3352 {
3353     return uint64_to_bfloat16_scalbn(a, scale, status);
3354 }
3355 
3356 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3357 {
3358     return uint64_to_bfloat16_scalbn(a, 0, status);
3359 }
3360 
3361 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3362 {
3363     return uint64_to_bfloat16_scalbn(a, 0, status);
3364 }
3365 
3366 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3367 {
3368     return uint64_to_bfloat16_scalbn(a, 0, status);
3369 }
3370 
3371 /* Float Min/Max */
3372 /* min() and max() functions. These can't be implemented as
3373  * 'compare and pick one input' because that would mishandle
3374  * NaNs and +0 vs -0.
3375  *
3376  * minnum() and maxnum() functions. These are similar to the min()
3377  * and max() functions but if one of the arguments is a QNaN and
3378  * the other is numerical then the numerical argument is returned.
3379  * SNaNs will get quietened before being returned.
3380  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3381  * and maxNum() operations. min() and max() are the typical min/max
3382  * semantics provided by many CPUs which predate that specification.
3383  *
3384  * minnummag() and maxnummag() functions correspond to minNumMag()
3385  * and minNumMag() from the IEEE-754 2008.
3386  */
3387 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3388                                 bool ieee, bool ismag, float_status *s)
3389 {
3390     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3391         if (ieee) {
3392             /* Takes two floating-point values `a' and `b', one of
3393              * which is a NaN, and returns the appropriate NaN
3394              * result. If either `a' or `b' is a signaling NaN,
3395              * the invalid exception is raised.
3396              */
3397             if (is_snan(a.cls) || is_snan(b.cls)) {
3398                 return *parts_pick_nan(&a, &b, s);
3399             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3400                 return b;
3401             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3402                 return a;
3403             }
3404         }
3405         return *parts_pick_nan(&a, &b, s);
3406     } else {
3407         int a_exp, b_exp;
3408 
3409         switch (a.cls) {
3410         case float_class_normal:
3411             a_exp = a.exp;
3412             break;
3413         case float_class_inf:
3414             a_exp = INT_MAX;
3415             break;
3416         case float_class_zero:
3417             a_exp = INT_MIN;
3418             break;
3419         default:
3420             g_assert_not_reached();
3421             break;
3422         }
3423         switch (b.cls) {
3424         case float_class_normal:
3425             b_exp = b.exp;
3426             break;
3427         case float_class_inf:
3428             b_exp = INT_MAX;
3429             break;
3430         case float_class_zero:
3431             b_exp = INT_MIN;
3432             break;
3433         default:
3434             g_assert_not_reached();
3435             break;
3436         }
3437 
3438         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3439             bool a_less = a_exp < b_exp;
3440             if (a_exp == b_exp) {
3441                 a_less = a.frac < b.frac;
3442             }
3443             return a_less ^ ismin ? b : a;
3444         }
3445 
3446         if (a.sign == b.sign) {
3447             bool a_less = a_exp < b_exp;
3448             if (a_exp == b_exp) {
3449                 a_less = a.frac < b.frac;
3450             }
3451             return a.sign ^ a_less ^ ismin ? b : a;
3452         } else {
3453             return a.sign ^ ismin ? b : a;
3454         }
3455     }
3456 }
3457 
3458 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3459 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3460                                      float_status *s)                   \
3461 {                                                                       \
3462     FloatParts64 pa, pb, pr;                                            \
3463     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3464     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3465     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3466     return float ## sz ## _round_pack_canonical(&pr, s);                \
3467 }
3468 
3469 MINMAX(16, min, true, false, false)
3470 MINMAX(16, minnum, true, true, false)
3471 MINMAX(16, minnummag, true, true, true)
3472 MINMAX(16, max, false, false, false)
3473 MINMAX(16, maxnum, false, true, false)
3474 MINMAX(16, maxnummag, false, true, true)
3475 
3476 MINMAX(32, min, true, false, false)
3477 MINMAX(32, minnum, true, true, false)
3478 MINMAX(32, minnummag, true, true, true)
3479 MINMAX(32, max, false, false, false)
3480 MINMAX(32, maxnum, false, true, false)
3481 MINMAX(32, maxnummag, false, true, true)
3482 
3483 MINMAX(64, min, true, false, false)
3484 MINMAX(64, minnum, true, true, false)
3485 MINMAX(64, minnummag, true, true, true)
3486 MINMAX(64, max, false, false, false)
3487 MINMAX(64, maxnum, false, true, false)
3488 MINMAX(64, maxnummag, false, true, true)
3489 
3490 #undef MINMAX
3491 
3492 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3493 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3494 {                                                                       \
3495     FloatParts64 pa, pb, pr;                                            \
3496     bfloat16_unpack_canonical(&pa, a, s);                               \
3497     bfloat16_unpack_canonical(&pb, b, s);                               \
3498     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3499     return bfloat16_round_pack_canonical(&pr, s);                       \
3500 }
3501 
3502 BF16_MINMAX(min, true, false, false)
3503 BF16_MINMAX(minnum, true, true, false)
3504 BF16_MINMAX(minnummag, true, true, true)
3505 BF16_MINMAX(max, false, false, false)
3506 BF16_MINMAX(maxnum, false, true, false)
3507 BF16_MINMAX(maxnummag, false, true, true)
3508 
3509 #undef BF16_MINMAX
3510 
3511 /* Floating point compare */
3512 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3513                                     float_status *s)
3514 {
3515     if (is_nan(a.cls) || is_nan(b.cls)) {
3516         if (!is_quiet ||
3517             a.cls == float_class_snan ||
3518             b.cls == float_class_snan) {
3519             float_raise(float_flag_invalid, s);
3520         }
3521         return float_relation_unordered;
3522     }
3523 
3524     if (a.cls == float_class_zero) {
3525         if (b.cls == float_class_zero) {
3526             return float_relation_equal;
3527         }
3528         return b.sign ? float_relation_greater : float_relation_less;
3529     } else if (b.cls == float_class_zero) {
3530         return a.sign ? float_relation_less : float_relation_greater;
3531     }
3532 
3533     /* The only really important thing about infinity is its sign. If
3534      * both are infinities the sign marks the smallest of the two.
3535      */
3536     if (a.cls == float_class_inf) {
3537         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3538             return float_relation_equal;
3539         }
3540         return a.sign ? float_relation_less : float_relation_greater;
3541     } else if (b.cls == float_class_inf) {
3542         return b.sign ? float_relation_greater : float_relation_less;
3543     }
3544 
3545     if (a.sign != b.sign) {
3546         return a.sign ? float_relation_less : float_relation_greater;
3547     }
3548 
3549     if (a.exp == b.exp) {
3550         if (a.frac == b.frac) {
3551             return float_relation_equal;
3552         }
3553         if (a.sign) {
3554             return a.frac > b.frac ?
3555                 float_relation_less : float_relation_greater;
3556         } else {
3557             return a.frac > b.frac ?
3558                 float_relation_greater : float_relation_less;
3559         }
3560     } else {
3561         if (a.sign) {
3562             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3563         } else {
3564             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3565         }
3566     }
3567 }
3568 
3569 #define COMPARE(name, attr, sz)                                         \
3570 static int attr                                                         \
3571 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3572 {                                                                       \
3573     FloatParts64 pa, pb;                                                \
3574     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3575     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3576     return compare_floats(pa, pb, is_quiet, s);                         \
3577 }
3578 
3579 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3580 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3581 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3582 
3583 #undef COMPARE
3584 
3585 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3586 {
3587     return soft_f16_compare(a, b, false, s);
3588 }
3589 
3590 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3591 {
3592     return soft_f16_compare(a, b, true, s);
3593 }
3594 
3595 static FloatRelation QEMU_FLATTEN
3596 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3597 {
3598     union_float32 ua, ub;
3599 
3600     ua.s = xa;
3601     ub.s = xb;
3602 
3603     if (QEMU_NO_HARDFLOAT) {
3604         goto soft;
3605     }
3606 
3607     float32_input_flush2(&ua.s, &ub.s, s);
3608     if (isgreaterequal(ua.h, ub.h)) {
3609         if (isgreater(ua.h, ub.h)) {
3610             return float_relation_greater;
3611         }
3612         return float_relation_equal;
3613     }
3614     if (likely(isless(ua.h, ub.h))) {
3615         return float_relation_less;
3616     }
3617     /* The only condition remaining is unordered.
3618      * Fall through to set flags.
3619      */
3620  soft:
3621     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3622 }
3623 
3624 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3625 {
3626     return f32_compare(a, b, false, s);
3627 }
3628 
3629 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3630 {
3631     return f32_compare(a, b, true, s);
3632 }
3633 
3634 static FloatRelation QEMU_FLATTEN
3635 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3636 {
3637     union_float64 ua, ub;
3638 
3639     ua.s = xa;
3640     ub.s = xb;
3641 
3642     if (QEMU_NO_HARDFLOAT) {
3643         goto soft;
3644     }
3645 
3646     float64_input_flush2(&ua.s, &ub.s, s);
3647     if (isgreaterequal(ua.h, ub.h)) {
3648         if (isgreater(ua.h, ub.h)) {
3649             return float_relation_greater;
3650         }
3651         return float_relation_equal;
3652     }
3653     if (likely(isless(ua.h, ub.h))) {
3654         return float_relation_less;
3655     }
3656     /* The only condition remaining is unordered.
3657      * Fall through to set flags.
3658      */
3659  soft:
3660     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3661 }
3662 
3663 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3664 {
3665     return f64_compare(a, b, false, s);
3666 }
3667 
3668 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3669 {
3670     return f64_compare(a, b, true, s);
3671 }
3672 
3673 static FloatRelation QEMU_FLATTEN
3674 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3675 {
3676     FloatParts64 pa, pb;
3677 
3678     bfloat16_unpack_canonical(&pa, a, s);
3679     bfloat16_unpack_canonical(&pb, b, s);
3680     return compare_floats(pa, pb, is_quiet, s);
3681 }
3682 
3683 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3684 {
3685     return soft_bf16_compare(a, b, false, s);
3686 }
3687 
3688 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3689 {
3690     return soft_bf16_compare(a, b, true, s);
3691 }
3692 
3693 /* Multiply A by 2 raised to the power N.  */
3694 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3695 {
3696     if (unlikely(is_nan(a.cls))) {
3697         parts_return_nan(&a, s);
3698     }
3699     if (a.cls == float_class_normal) {
3700         /* The largest float type (even though not supported by FloatParts64)
3701          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3702          * still allows rounding to infinity, without allowing overflow
3703          * within the int32_t that backs FloatParts64.exp.
3704          */
3705         n = MIN(MAX(n, -0x10000), 0x10000);
3706         a.exp += n;
3707     }
3708     return a;
3709 }
3710 
3711 float16 float16_scalbn(float16 a, int n, float_status *status)
3712 {
3713     FloatParts64 pa, pr;
3714 
3715     float16_unpack_canonical(&pa, a, status);
3716     pr = scalbn_decomposed(pa, n, status);
3717     return float16_round_pack_canonical(&pr, status);
3718 }
3719 
3720 float32 float32_scalbn(float32 a, int n, float_status *status)
3721 {
3722     FloatParts64 pa, pr;
3723 
3724     float32_unpack_canonical(&pa, a, status);
3725     pr = scalbn_decomposed(pa, n, status);
3726     return float32_round_pack_canonical(&pr, status);
3727 }
3728 
3729 float64 float64_scalbn(float64 a, int n, float_status *status)
3730 {
3731     FloatParts64 pa, pr;
3732 
3733     float64_unpack_canonical(&pa, a, status);
3734     pr = scalbn_decomposed(pa, n, status);
3735     return float64_round_pack_canonical(&pr, status);
3736 }
3737 
3738 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3739 {
3740     FloatParts64 pa, pr;
3741 
3742     bfloat16_unpack_canonical(&pa, a, status);
3743     pr = scalbn_decomposed(pa, n, status);
3744     return bfloat16_round_pack_canonical(&pr, status);
3745 }
3746 
3747 /*
3748  * Square Root
3749  *
3750  * The old softfloat code did an approximation step before zeroing in
3751  * on the final result. However for simpleness we just compute the
3752  * square root by iterating down from the implicit bit to enough extra
3753  * bits to ensure we get a correctly rounded result.
3754  *
3755  * This does mean however the calculation is slower than before,
3756  * especially for 64 bit floats.
3757  */
3758 
3759 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3760 {
3761     uint64_t a_frac, r_frac, s_frac;
3762     int bit, last_bit;
3763 
3764     if (is_nan(a.cls)) {
3765         parts_return_nan(&a, s);
3766         return a;
3767     }
3768     if (a.cls == float_class_zero) {
3769         return a;  /* sqrt(+-0) = +-0 */
3770     }
3771     if (a.sign) {
3772         float_raise(float_flag_invalid, s);
3773         parts_default_nan(&a, s);
3774         return a;
3775     }
3776     if (a.cls == float_class_inf) {
3777         return a;  /* sqrt(+inf) = +inf */
3778     }
3779 
3780     assert(a.cls == float_class_normal);
3781 
3782     /* We need two overflow bits at the top. Adding room for that is a
3783      * right shift. If the exponent is odd, we can discard the low bit
3784      * by multiplying the fraction by 2; that's a left shift. Combine
3785      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3786      */
3787     a_frac = a.frac >> (2 - (a.exp & 1));
3788     a.exp >>= 1;
3789 
3790     /* Bit-by-bit computation of sqrt.  */
3791     r_frac = 0;
3792     s_frac = 0;
3793 
3794     /* Iterate from implicit bit down to the 3 extra bits to compute a
3795      * properly rounded result. Remember we've inserted two more bits
3796      * at the top, so these positions are two less.
3797      */
3798     bit = DECOMPOSED_BINARY_POINT - 2;
3799     last_bit = MAX(p->frac_shift - 4, 0);
3800     do {
3801         uint64_t q = 1ULL << bit;
3802         uint64_t t_frac = s_frac + q;
3803         if (t_frac <= a_frac) {
3804             s_frac = t_frac + q;
3805             a_frac -= t_frac;
3806             r_frac += q;
3807         }
3808         a_frac <<= 1;
3809     } while (--bit >= last_bit);
3810 
3811     /* Undo the right shift done above. If there is any remaining
3812      * fraction, the result is inexact. Set the sticky bit.
3813      */
3814     a.frac = (r_frac << 2) + (a_frac != 0);
3815 
3816     return a;
3817 }
3818 
3819 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3820 {
3821     FloatParts64 pa, pr;
3822 
3823     float16_unpack_canonical(&pa, a, status);
3824     pr = sqrt_float(pa, status, &float16_params);
3825     return float16_round_pack_canonical(&pr, status);
3826 }
3827 
3828 static float32 QEMU_SOFTFLOAT_ATTR
3829 soft_f32_sqrt(float32 a, float_status *status)
3830 {
3831     FloatParts64 pa, pr;
3832 
3833     float32_unpack_canonical(&pa, a, status);
3834     pr = sqrt_float(pa, status, &float32_params);
3835     return float32_round_pack_canonical(&pr, status);
3836 }
3837 
3838 static float64 QEMU_SOFTFLOAT_ATTR
3839 soft_f64_sqrt(float64 a, float_status *status)
3840 {
3841     FloatParts64 pa, pr;
3842 
3843     float64_unpack_canonical(&pa, a, status);
3844     pr = sqrt_float(pa, status, &float64_params);
3845     return float64_round_pack_canonical(&pr, status);
3846 }
3847 
3848 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3849 {
3850     union_float32 ua, ur;
3851 
3852     ua.s = xa;
3853     if (unlikely(!can_use_fpu(s))) {
3854         goto soft;
3855     }
3856 
3857     float32_input_flush1(&ua.s, s);
3858     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3859         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3860                        fpclassify(ua.h) == FP_ZERO) ||
3861                      signbit(ua.h))) {
3862             goto soft;
3863         }
3864     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3865                         float32_is_neg(ua.s))) {
3866         goto soft;
3867     }
3868     ur.h = sqrtf(ua.h);
3869     return ur.s;
3870 
3871  soft:
3872     return soft_f32_sqrt(ua.s, s);
3873 }
3874 
3875 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3876 {
3877     union_float64 ua, ur;
3878 
3879     ua.s = xa;
3880     if (unlikely(!can_use_fpu(s))) {
3881         goto soft;
3882     }
3883 
3884     float64_input_flush1(&ua.s, s);
3885     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3886         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3887                        fpclassify(ua.h) == FP_ZERO) ||
3888                      signbit(ua.h))) {
3889             goto soft;
3890         }
3891     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3892                         float64_is_neg(ua.s))) {
3893         goto soft;
3894     }
3895     ur.h = sqrt(ua.h);
3896     return ur.s;
3897 
3898  soft:
3899     return soft_f64_sqrt(ua.s, s);
3900 }
3901 
3902 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3903 {
3904     FloatParts64 pa, pr;
3905 
3906     bfloat16_unpack_canonical(&pa, a, status);
3907     pr = sqrt_float(pa, status, &bfloat16_params);
3908     return bfloat16_round_pack_canonical(&pr, status);
3909 }
3910 
3911 /*----------------------------------------------------------------------------
3912 | The pattern for a default generated NaN.
3913 *----------------------------------------------------------------------------*/
3914 
3915 float16 float16_default_nan(float_status *status)
3916 {
3917     FloatParts64 p;
3918 
3919     parts_default_nan(&p, status);
3920     p.frac >>= float16_params.frac_shift;
3921     return float16_pack_raw(&p);
3922 }
3923 
3924 float32 float32_default_nan(float_status *status)
3925 {
3926     FloatParts64 p;
3927 
3928     parts_default_nan(&p, status);
3929     p.frac >>= float32_params.frac_shift;
3930     return float32_pack_raw(&p);
3931 }
3932 
3933 float64 float64_default_nan(float_status *status)
3934 {
3935     FloatParts64 p;
3936 
3937     parts_default_nan(&p, status);
3938     p.frac >>= float64_params.frac_shift;
3939     return float64_pack_raw(&p);
3940 }
3941 
3942 float128 float128_default_nan(float_status *status)
3943 {
3944     FloatParts128 p;
3945 
3946     parts_default_nan(&p, status);
3947     frac_shr(&p, float128_params.frac_shift);
3948     return float128_pack_raw(&p);
3949 }
3950 
3951 bfloat16 bfloat16_default_nan(float_status *status)
3952 {
3953     FloatParts64 p;
3954 
3955     parts_default_nan(&p, status);
3956     p.frac >>= bfloat16_params.frac_shift;
3957     return bfloat16_pack_raw(&p);
3958 }
3959 
3960 /*----------------------------------------------------------------------------
3961 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3962 *----------------------------------------------------------------------------*/
3963 
3964 float16 float16_silence_nan(float16 a, float_status *status)
3965 {
3966     FloatParts64 p;
3967 
3968     float16_unpack_raw(&p, a);
3969     p.frac <<= float16_params.frac_shift;
3970     parts_silence_nan(&p, status);
3971     p.frac >>= float16_params.frac_shift;
3972     return float16_pack_raw(&p);
3973 }
3974 
3975 float32 float32_silence_nan(float32 a, float_status *status)
3976 {
3977     FloatParts64 p;
3978 
3979     float32_unpack_raw(&p, a);
3980     p.frac <<= float32_params.frac_shift;
3981     parts_silence_nan(&p, status);
3982     p.frac >>= float32_params.frac_shift;
3983     return float32_pack_raw(&p);
3984 }
3985 
3986 float64 float64_silence_nan(float64 a, float_status *status)
3987 {
3988     FloatParts64 p;
3989 
3990     float64_unpack_raw(&p, a);
3991     p.frac <<= float64_params.frac_shift;
3992     parts_silence_nan(&p, status);
3993     p.frac >>= float64_params.frac_shift;
3994     return float64_pack_raw(&p);
3995 }
3996 
3997 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3998 {
3999     FloatParts64 p;
4000 
4001     bfloat16_unpack_raw(&p, a);
4002     p.frac <<= bfloat16_params.frac_shift;
4003     parts_silence_nan(&p, status);
4004     p.frac >>= bfloat16_params.frac_shift;
4005     return bfloat16_pack_raw(&p);
4006 }
4007 
4008 float128 float128_silence_nan(float128 a, float_status *status)
4009 {
4010     FloatParts128 p;
4011 
4012     float128_unpack_raw(&p, a);
4013     frac_shl(&p, float128_params.frac_shift);
4014     parts_silence_nan(&p, status);
4015     frac_shr(&p, float128_params.frac_shift);
4016     return float128_pack_raw(&p);
4017 }
4018 
4019 /*----------------------------------------------------------------------------
4020 | If `a' is denormal and we are in flush-to-zero mode then set the
4021 | input-denormal exception and return zero. Otherwise just return the value.
4022 *----------------------------------------------------------------------------*/
4023 
4024 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4025 {
4026     if (p.exp == 0 && p.frac != 0) {
4027         float_raise(float_flag_input_denormal, status);
4028         return true;
4029     }
4030 
4031     return false;
4032 }
4033 
4034 float16 float16_squash_input_denormal(float16 a, float_status *status)
4035 {
4036     if (status->flush_inputs_to_zero) {
4037         FloatParts64 p;
4038 
4039         float16_unpack_raw(&p, a);
4040         if (parts_squash_denormal(p, status)) {
4041             return float16_set_sign(float16_zero, p.sign);
4042         }
4043     }
4044     return a;
4045 }
4046 
4047 float32 float32_squash_input_denormal(float32 a, float_status *status)
4048 {
4049     if (status->flush_inputs_to_zero) {
4050         FloatParts64 p;
4051 
4052         float32_unpack_raw(&p, a);
4053         if (parts_squash_denormal(p, status)) {
4054             return float32_set_sign(float32_zero, p.sign);
4055         }
4056     }
4057     return a;
4058 }
4059 
4060 float64 float64_squash_input_denormal(float64 a, float_status *status)
4061 {
4062     if (status->flush_inputs_to_zero) {
4063         FloatParts64 p;
4064 
4065         float64_unpack_raw(&p, a);
4066         if (parts_squash_denormal(p, status)) {
4067             return float64_set_sign(float64_zero, p.sign);
4068         }
4069     }
4070     return a;
4071 }
4072 
4073 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4074 {
4075     if (status->flush_inputs_to_zero) {
4076         FloatParts64 p;
4077 
4078         bfloat16_unpack_raw(&p, a);
4079         if (parts_squash_denormal(p, status)) {
4080             return bfloat16_set_sign(bfloat16_zero, p.sign);
4081         }
4082     }
4083     return a;
4084 }
4085 
4086 /*----------------------------------------------------------------------------
4087 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4088 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4089 | input.  If `zSign' is 1, the input is negated before being converted to an
4090 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4091 | is simply rounded to an integer, with the inexact exception raised if the
4092 | input cannot be represented exactly as an integer.  However, if the fixed-
4093 | point input is too large, the invalid exception is raised and the largest
4094 | positive or negative integer is returned.
4095 *----------------------------------------------------------------------------*/
4096 
4097 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4098                                  float_status *status)
4099 {
4100     int8_t roundingMode;
4101     bool roundNearestEven;
4102     int8_t roundIncrement, roundBits;
4103     int32_t z;
4104 
4105     roundingMode = status->float_rounding_mode;
4106     roundNearestEven = ( roundingMode == float_round_nearest_even );
4107     switch (roundingMode) {
4108     case float_round_nearest_even:
4109     case float_round_ties_away:
4110         roundIncrement = 0x40;
4111         break;
4112     case float_round_to_zero:
4113         roundIncrement = 0;
4114         break;
4115     case float_round_up:
4116         roundIncrement = zSign ? 0 : 0x7f;
4117         break;
4118     case float_round_down:
4119         roundIncrement = zSign ? 0x7f : 0;
4120         break;
4121     case float_round_to_odd:
4122         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4123         break;
4124     default:
4125         abort();
4126     }
4127     roundBits = absZ & 0x7F;
4128     absZ = ( absZ + roundIncrement )>>7;
4129     if (!(roundBits ^ 0x40) && roundNearestEven) {
4130         absZ &= ~1;
4131     }
4132     z = absZ;
4133     if ( zSign ) z = - z;
4134     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4135         float_raise(float_flag_invalid, status);
4136         return zSign ? INT32_MIN : INT32_MAX;
4137     }
4138     if (roundBits) {
4139         float_raise(float_flag_inexact, status);
4140     }
4141     return z;
4142 
4143 }
4144 
4145 /*----------------------------------------------------------------------------
4146 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4147 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4148 | and returns the properly rounded 64-bit integer corresponding to the input.
4149 | If `zSign' is 1, the input is negated before being converted to an integer.
4150 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4151 | the inexact exception raised if the input cannot be represented exactly as
4152 | an integer.  However, if the fixed-point input is too large, the invalid
4153 | exception is raised and the largest positive or negative integer is
4154 | returned.
4155 *----------------------------------------------------------------------------*/
4156 
4157 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4158                                float_status *status)
4159 {
4160     int8_t roundingMode;
4161     bool roundNearestEven, increment;
4162     int64_t z;
4163 
4164     roundingMode = status->float_rounding_mode;
4165     roundNearestEven = ( roundingMode == float_round_nearest_even );
4166     switch (roundingMode) {
4167     case float_round_nearest_even:
4168     case float_round_ties_away:
4169         increment = ((int64_t) absZ1 < 0);
4170         break;
4171     case float_round_to_zero:
4172         increment = 0;
4173         break;
4174     case float_round_up:
4175         increment = !zSign && absZ1;
4176         break;
4177     case float_round_down:
4178         increment = zSign && absZ1;
4179         break;
4180     case float_round_to_odd:
4181         increment = !(absZ0 & 1) && absZ1;
4182         break;
4183     default:
4184         abort();
4185     }
4186     if ( increment ) {
4187         ++absZ0;
4188         if ( absZ0 == 0 ) goto overflow;
4189         if (!(absZ1 << 1) && roundNearestEven) {
4190             absZ0 &= ~1;
4191         }
4192     }
4193     z = absZ0;
4194     if ( zSign ) z = - z;
4195     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4196  overflow:
4197         float_raise(float_flag_invalid, status);
4198         return zSign ? INT64_MIN : INT64_MAX;
4199     }
4200     if (absZ1) {
4201         float_raise(float_flag_inexact, status);
4202     }
4203     return z;
4204 
4205 }
4206 
4207 /*----------------------------------------------------------------------------
4208 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4209 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4210 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4211 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4212 | with the inexact exception raised if the input cannot be represented exactly
4213 | as an integer.  However, if the fixed-point input is too large, the invalid
4214 | exception is raised and the largest unsigned integer is returned.
4215 *----------------------------------------------------------------------------*/
4216 
4217 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4218                                 uint64_t absZ1, float_status *status)
4219 {
4220     int8_t roundingMode;
4221     bool roundNearestEven, increment;
4222 
4223     roundingMode = status->float_rounding_mode;
4224     roundNearestEven = (roundingMode == float_round_nearest_even);
4225     switch (roundingMode) {
4226     case float_round_nearest_even:
4227     case float_round_ties_away:
4228         increment = ((int64_t)absZ1 < 0);
4229         break;
4230     case float_round_to_zero:
4231         increment = 0;
4232         break;
4233     case float_round_up:
4234         increment = !zSign && absZ1;
4235         break;
4236     case float_round_down:
4237         increment = zSign && absZ1;
4238         break;
4239     case float_round_to_odd:
4240         increment = !(absZ0 & 1) && absZ1;
4241         break;
4242     default:
4243         abort();
4244     }
4245     if (increment) {
4246         ++absZ0;
4247         if (absZ0 == 0) {
4248             float_raise(float_flag_invalid, status);
4249             return UINT64_MAX;
4250         }
4251         if (!(absZ1 << 1) && roundNearestEven) {
4252             absZ0 &= ~1;
4253         }
4254     }
4255 
4256     if (zSign && absZ0) {
4257         float_raise(float_flag_invalid, status);
4258         return 0;
4259     }
4260 
4261     if (absZ1) {
4262         float_raise(float_flag_inexact, status);
4263     }
4264     return absZ0;
4265 }
4266 
4267 /*----------------------------------------------------------------------------
4268 | Normalizes the subnormal single-precision floating-point value represented
4269 | by the denormalized significand `aSig'.  The normalized exponent and
4270 | significand are stored at the locations pointed to by `zExpPtr' and
4271 | `zSigPtr', respectively.
4272 *----------------------------------------------------------------------------*/
4273 
4274 static void
4275  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4276 {
4277     int8_t shiftCount;
4278 
4279     shiftCount = clz32(aSig) - 8;
4280     *zSigPtr = aSig<<shiftCount;
4281     *zExpPtr = 1 - shiftCount;
4282 
4283 }
4284 
4285 /*----------------------------------------------------------------------------
4286 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4287 | and significand `zSig', and returns the proper single-precision floating-
4288 | point value corresponding to the abstract input.  Ordinarily, the abstract
4289 | value is simply rounded and packed into the single-precision format, with
4290 | the inexact exception raised if the abstract input cannot be represented
4291 | exactly.  However, if the abstract value is too large, the overflow and
4292 | inexact exceptions are raised and an infinity or maximal finite value is
4293 | returned.  If the abstract value is too small, the input value is rounded to
4294 | a subnormal number, and the underflow and inexact exceptions are raised if
4295 | the abstract input cannot be represented exactly as a subnormal single-
4296 | precision floating-point number.
4297 |     The input significand `zSig' has its binary point between bits 30
4298 | and 29, which is 7 bits to the left of the usual location.  This shifted
4299 | significand must be normalized or smaller.  If `zSig' is not normalized,
4300 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4301 | and it must not require rounding.  In the usual case that `zSig' is
4302 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4303 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4304 | Binary Floating-Point Arithmetic.
4305 *----------------------------------------------------------------------------*/
4306 
4307 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4308                                    float_status *status)
4309 {
4310     int8_t roundingMode;
4311     bool roundNearestEven;
4312     int8_t roundIncrement, roundBits;
4313     bool isTiny;
4314 
4315     roundingMode = status->float_rounding_mode;
4316     roundNearestEven = ( roundingMode == float_round_nearest_even );
4317     switch (roundingMode) {
4318     case float_round_nearest_even:
4319     case float_round_ties_away:
4320         roundIncrement = 0x40;
4321         break;
4322     case float_round_to_zero:
4323         roundIncrement = 0;
4324         break;
4325     case float_round_up:
4326         roundIncrement = zSign ? 0 : 0x7f;
4327         break;
4328     case float_round_down:
4329         roundIncrement = zSign ? 0x7f : 0;
4330         break;
4331     case float_round_to_odd:
4332         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4333         break;
4334     default:
4335         abort();
4336         break;
4337     }
4338     roundBits = zSig & 0x7F;
4339     if ( 0xFD <= (uint16_t) zExp ) {
4340         if (    ( 0xFD < zExp )
4341              || (    ( zExp == 0xFD )
4342                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4343            ) {
4344             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4345                                    roundIncrement != 0;
4346             float_raise(float_flag_overflow | float_flag_inexact, status);
4347             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4348         }
4349         if ( zExp < 0 ) {
4350             if (status->flush_to_zero) {
4351                 float_raise(float_flag_output_denormal, status);
4352                 return packFloat32(zSign, 0, 0);
4353             }
4354             isTiny = status->tininess_before_rounding
4355                   || (zExp < -1)
4356                   || (zSig + roundIncrement < 0x80000000);
4357             shift32RightJamming( zSig, - zExp, &zSig );
4358             zExp = 0;
4359             roundBits = zSig & 0x7F;
4360             if (isTiny && roundBits) {
4361                 float_raise(float_flag_underflow, status);
4362             }
4363             if (roundingMode == float_round_to_odd) {
4364                 /*
4365                  * For round-to-odd case, the roundIncrement depends on
4366                  * zSig which just changed.
4367                  */
4368                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4369             }
4370         }
4371     }
4372     if (roundBits) {
4373         float_raise(float_flag_inexact, status);
4374     }
4375     zSig = ( zSig + roundIncrement )>>7;
4376     if (!(roundBits ^ 0x40) && roundNearestEven) {
4377         zSig &= ~1;
4378     }
4379     if ( zSig == 0 ) zExp = 0;
4380     return packFloat32( zSign, zExp, zSig );
4381 
4382 }
4383 
4384 /*----------------------------------------------------------------------------
4385 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4386 | and significand `zSig', and returns the proper single-precision floating-
4387 | point value corresponding to the abstract input.  This routine is just like
4388 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4389 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4390 | floating-point exponent.
4391 *----------------------------------------------------------------------------*/
4392 
4393 static float32
4394  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4395                               float_status *status)
4396 {
4397     int8_t shiftCount;
4398 
4399     shiftCount = clz32(zSig) - 1;
4400     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4401                                status);
4402 
4403 }
4404 
4405 /*----------------------------------------------------------------------------
4406 | Normalizes the subnormal double-precision floating-point value represented
4407 | by the denormalized significand `aSig'.  The normalized exponent and
4408 | significand are stored at the locations pointed to by `zExpPtr' and
4409 | `zSigPtr', respectively.
4410 *----------------------------------------------------------------------------*/
4411 
4412 static void
4413  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4414 {
4415     int8_t shiftCount;
4416 
4417     shiftCount = clz64(aSig) - 11;
4418     *zSigPtr = aSig<<shiftCount;
4419     *zExpPtr = 1 - shiftCount;
4420 
4421 }
4422 
4423 /*----------------------------------------------------------------------------
4424 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4425 | double-precision floating-point value, returning the result.  After being
4426 | shifted into the proper positions, the three fields are simply added
4427 | together to form the result.  This means that any integer portion of `zSig'
4428 | will be added into the exponent.  Since a properly normalized significand
4429 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4430 | than the desired result exponent whenever `zSig' is a complete, normalized
4431 | significand.
4432 *----------------------------------------------------------------------------*/
4433 
4434 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4435 {
4436 
4437     return make_float64(
4438         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4439 
4440 }
4441 
4442 /*----------------------------------------------------------------------------
4443 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4444 | and significand `zSig', and returns the proper double-precision floating-
4445 | point value corresponding to the abstract input.  Ordinarily, the abstract
4446 | value is simply rounded and packed into the double-precision format, with
4447 | the inexact exception raised if the abstract input cannot be represented
4448 | exactly.  However, if the abstract value is too large, the overflow and
4449 | inexact exceptions are raised and an infinity or maximal finite value is
4450 | returned.  If the abstract value is too small, the input value is rounded to
4451 | a subnormal number, and the underflow and inexact exceptions are raised if
4452 | the abstract input cannot be represented exactly as a subnormal double-
4453 | precision floating-point number.
4454 |     The input significand `zSig' has its binary point between bits 62
4455 | and 61, which is 10 bits to the left of the usual location.  This shifted
4456 | significand must be normalized or smaller.  If `zSig' is not normalized,
4457 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4458 | and it must not require rounding.  In the usual case that `zSig' is
4459 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4460 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4461 | Binary Floating-Point Arithmetic.
4462 *----------------------------------------------------------------------------*/
4463 
4464 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4465                                    float_status *status)
4466 {
4467     int8_t roundingMode;
4468     bool roundNearestEven;
4469     int roundIncrement, roundBits;
4470     bool isTiny;
4471 
4472     roundingMode = status->float_rounding_mode;
4473     roundNearestEven = ( roundingMode == float_round_nearest_even );
4474     switch (roundingMode) {
4475     case float_round_nearest_even:
4476     case float_round_ties_away:
4477         roundIncrement = 0x200;
4478         break;
4479     case float_round_to_zero:
4480         roundIncrement = 0;
4481         break;
4482     case float_round_up:
4483         roundIncrement = zSign ? 0 : 0x3ff;
4484         break;
4485     case float_round_down:
4486         roundIncrement = zSign ? 0x3ff : 0;
4487         break;
4488     case float_round_to_odd:
4489         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4490         break;
4491     default:
4492         abort();
4493     }
4494     roundBits = zSig & 0x3FF;
4495     if ( 0x7FD <= (uint16_t) zExp ) {
4496         if (    ( 0x7FD < zExp )
4497              || (    ( zExp == 0x7FD )
4498                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4499            ) {
4500             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4501                                    roundIncrement != 0;
4502             float_raise(float_flag_overflow | float_flag_inexact, status);
4503             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4504         }
4505         if ( zExp < 0 ) {
4506             if (status->flush_to_zero) {
4507                 float_raise(float_flag_output_denormal, status);
4508                 return packFloat64(zSign, 0, 0);
4509             }
4510             isTiny = status->tininess_before_rounding
4511                   || (zExp < -1)
4512                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4513             shift64RightJamming( zSig, - zExp, &zSig );
4514             zExp = 0;
4515             roundBits = zSig & 0x3FF;
4516             if (isTiny && roundBits) {
4517                 float_raise(float_flag_underflow, status);
4518             }
4519             if (roundingMode == float_round_to_odd) {
4520                 /*
4521                  * For round-to-odd case, the roundIncrement depends on
4522                  * zSig which just changed.
4523                  */
4524                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4525             }
4526         }
4527     }
4528     if (roundBits) {
4529         float_raise(float_flag_inexact, status);
4530     }
4531     zSig = ( zSig + roundIncrement )>>10;
4532     if (!(roundBits ^ 0x200) && roundNearestEven) {
4533         zSig &= ~1;
4534     }
4535     if ( zSig == 0 ) zExp = 0;
4536     return packFloat64( zSign, zExp, zSig );
4537 
4538 }
4539 
4540 /*----------------------------------------------------------------------------
4541 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4542 | and significand `zSig', and returns the proper double-precision floating-
4543 | point value corresponding to the abstract input.  This routine is just like
4544 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4545 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4546 | floating-point exponent.
4547 *----------------------------------------------------------------------------*/
4548 
4549 static float64
4550  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4551                               float_status *status)
4552 {
4553     int8_t shiftCount;
4554 
4555     shiftCount = clz64(zSig) - 1;
4556     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4557                                status);
4558 
4559 }
4560 
4561 /*----------------------------------------------------------------------------
4562 | Normalizes the subnormal extended double-precision floating-point value
4563 | represented by the denormalized significand `aSig'.  The normalized exponent
4564 | and significand are stored at the locations pointed to by `zExpPtr' and
4565 | `zSigPtr', respectively.
4566 *----------------------------------------------------------------------------*/
4567 
4568 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4569                                 uint64_t *zSigPtr)
4570 {
4571     int8_t shiftCount;
4572 
4573     shiftCount = clz64(aSig);
4574     *zSigPtr = aSig<<shiftCount;
4575     *zExpPtr = 1 - shiftCount;
4576 }
4577 
4578 /*----------------------------------------------------------------------------
4579 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4580 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4581 | and returns the proper extended double-precision floating-point value
4582 | corresponding to the abstract input.  Ordinarily, the abstract value is
4583 | rounded and packed into the extended double-precision format, with the
4584 | inexact exception raised if the abstract input cannot be represented
4585 | exactly.  However, if the abstract value is too large, the overflow and
4586 | inexact exceptions are raised and an infinity or maximal finite value is
4587 | returned.  If the abstract value is too small, the input value is rounded to
4588 | a subnormal number, and the underflow and inexact exceptions are raised if
4589 | the abstract input cannot be represented exactly as a subnormal extended
4590 | double-precision floating-point number.
4591 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4592 | number of bits as single or double precision, respectively.  Otherwise, the
4593 | result is rounded to the full precision of the extended double-precision
4594 | format.
4595 |     The input significand must be normalized or smaller.  If the input
4596 | significand is not normalized, `zExp' must be 0; in that case, the result
4597 | returned is a subnormal number, and it must not require rounding.  The
4598 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4599 | Floating-Point Arithmetic.
4600 *----------------------------------------------------------------------------*/
4601 
4602 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4603                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4604                               float_status *status)
4605 {
4606     int8_t roundingMode;
4607     bool roundNearestEven, increment, isTiny;
4608     int64_t roundIncrement, roundMask, roundBits;
4609 
4610     roundingMode = status->float_rounding_mode;
4611     roundNearestEven = ( roundingMode == float_round_nearest_even );
4612     if ( roundingPrecision == 80 ) goto precision80;
4613     if ( roundingPrecision == 64 ) {
4614         roundIncrement = UINT64_C(0x0000000000000400);
4615         roundMask = UINT64_C(0x00000000000007FF);
4616     }
4617     else if ( roundingPrecision == 32 ) {
4618         roundIncrement = UINT64_C(0x0000008000000000);
4619         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4620     }
4621     else {
4622         goto precision80;
4623     }
4624     zSig0 |= ( zSig1 != 0 );
4625     switch (roundingMode) {
4626     case float_round_nearest_even:
4627     case float_round_ties_away:
4628         break;
4629     case float_round_to_zero:
4630         roundIncrement = 0;
4631         break;
4632     case float_round_up:
4633         roundIncrement = zSign ? 0 : roundMask;
4634         break;
4635     case float_round_down:
4636         roundIncrement = zSign ? roundMask : 0;
4637         break;
4638     default:
4639         abort();
4640     }
4641     roundBits = zSig0 & roundMask;
4642     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4643         if (    ( 0x7FFE < zExp )
4644              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4645            ) {
4646             goto overflow;
4647         }
4648         if ( zExp <= 0 ) {
4649             if (status->flush_to_zero) {
4650                 float_raise(float_flag_output_denormal, status);
4651                 return packFloatx80(zSign, 0, 0);
4652             }
4653             isTiny = status->tininess_before_rounding
4654                   || (zExp < 0 )
4655                   || (zSig0 <= zSig0 + roundIncrement);
4656             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4657             zExp = 0;
4658             roundBits = zSig0 & roundMask;
4659             if (isTiny && roundBits) {
4660                 float_raise(float_flag_underflow, status);
4661             }
4662             if (roundBits) {
4663                 float_raise(float_flag_inexact, status);
4664             }
4665             zSig0 += roundIncrement;
4666             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4667             roundIncrement = roundMask + 1;
4668             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4669                 roundMask |= roundIncrement;
4670             }
4671             zSig0 &= ~ roundMask;
4672             return packFloatx80( zSign, zExp, zSig0 );
4673         }
4674     }
4675     if (roundBits) {
4676         float_raise(float_flag_inexact, status);
4677     }
4678     zSig0 += roundIncrement;
4679     if ( zSig0 < roundIncrement ) {
4680         ++zExp;
4681         zSig0 = UINT64_C(0x8000000000000000);
4682     }
4683     roundIncrement = roundMask + 1;
4684     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4685         roundMask |= roundIncrement;
4686     }
4687     zSig0 &= ~ roundMask;
4688     if ( zSig0 == 0 ) zExp = 0;
4689     return packFloatx80( zSign, zExp, zSig0 );
4690  precision80:
4691     switch (roundingMode) {
4692     case float_round_nearest_even:
4693     case float_round_ties_away:
4694         increment = ((int64_t)zSig1 < 0);
4695         break;
4696     case float_round_to_zero:
4697         increment = 0;
4698         break;
4699     case float_round_up:
4700         increment = !zSign && zSig1;
4701         break;
4702     case float_round_down:
4703         increment = zSign && zSig1;
4704         break;
4705     default:
4706         abort();
4707     }
4708     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4709         if (    ( 0x7FFE < zExp )
4710              || (    ( zExp == 0x7FFE )
4711                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4712                   && increment
4713                 )
4714            ) {
4715             roundMask = 0;
4716  overflow:
4717             float_raise(float_flag_overflow | float_flag_inexact, status);
4718             if (    ( roundingMode == float_round_to_zero )
4719                  || ( zSign && ( roundingMode == float_round_up ) )
4720                  || ( ! zSign && ( roundingMode == float_round_down ) )
4721                ) {
4722                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4723             }
4724             return packFloatx80(zSign,
4725                                 floatx80_infinity_high,
4726                                 floatx80_infinity_low);
4727         }
4728         if ( zExp <= 0 ) {
4729             isTiny = status->tininess_before_rounding
4730                   || (zExp < 0)
4731                   || !increment
4732                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4733             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4734             zExp = 0;
4735             if (isTiny && zSig1) {
4736                 float_raise(float_flag_underflow, status);
4737             }
4738             if (zSig1) {
4739                 float_raise(float_flag_inexact, status);
4740             }
4741             switch (roundingMode) {
4742             case float_round_nearest_even:
4743             case float_round_ties_away:
4744                 increment = ((int64_t)zSig1 < 0);
4745                 break;
4746             case float_round_to_zero:
4747                 increment = 0;
4748                 break;
4749             case float_round_up:
4750                 increment = !zSign && zSig1;
4751                 break;
4752             case float_round_down:
4753                 increment = zSign && zSig1;
4754                 break;
4755             default:
4756                 abort();
4757             }
4758             if ( increment ) {
4759                 ++zSig0;
4760                 if (!(zSig1 << 1) && roundNearestEven) {
4761                     zSig0 &= ~1;
4762                 }
4763                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4764             }
4765             return packFloatx80( zSign, zExp, zSig0 );
4766         }
4767     }
4768     if (zSig1) {
4769         float_raise(float_flag_inexact, status);
4770     }
4771     if ( increment ) {
4772         ++zSig0;
4773         if ( zSig0 == 0 ) {
4774             ++zExp;
4775             zSig0 = UINT64_C(0x8000000000000000);
4776         }
4777         else {
4778             if (!(zSig1 << 1) && roundNearestEven) {
4779                 zSig0 &= ~1;
4780             }
4781         }
4782     }
4783     else {
4784         if ( zSig0 == 0 ) zExp = 0;
4785     }
4786     return packFloatx80( zSign, zExp, zSig0 );
4787 
4788 }
4789 
4790 /*----------------------------------------------------------------------------
4791 | Takes an abstract floating-point value having sign `zSign', exponent
4792 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4793 | and returns the proper extended double-precision floating-point value
4794 | corresponding to the abstract input.  This routine is just like
4795 | `roundAndPackFloatx80' except that the input significand does not have to be
4796 | normalized.
4797 *----------------------------------------------------------------------------*/
4798 
4799 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4800                                        bool zSign, int32_t zExp,
4801                                        uint64_t zSig0, uint64_t zSig1,
4802                                        float_status *status)
4803 {
4804     int8_t shiftCount;
4805 
4806     if ( zSig0 == 0 ) {
4807         zSig0 = zSig1;
4808         zSig1 = 0;
4809         zExp -= 64;
4810     }
4811     shiftCount = clz64(zSig0);
4812     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4813     zExp -= shiftCount;
4814     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4815                                 zSig0, zSig1, status);
4816 
4817 }
4818 
4819 /*----------------------------------------------------------------------------
4820 | Returns the least-significant 64 fraction bits of the quadruple-precision
4821 | floating-point value `a'.
4822 *----------------------------------------------------------------------------*/
4823 
4824 static inline uint64_t extractFloat128Frac1( float128 a )
4825 {
4826 
4827     return a.low;
4828 
4829 }
4830 
4831 /*----------------------------------------------------------------------------
4832 | Returns the most-significant 48 fraction bits of the quadruple-precision
4833 | floating-point value `a'.
4834 *----------------------------------------------------------------------------*/
4835 
4836 static inline uint64_t extractFloat128Frac0( float128 a )
4837 {
4838 
4839     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4840 
4841 }
4842 
4843 /*----------------------------------------------------------------------------
4844 | Returns the exponent bits of the quadruple-precision floating-point value
4845 | `a'.
4846 *----------------------------------------------------------------------------*/
4847 
4848 static inline int32_t extractFloat128Exp( float128 a )
4849 {
4850 
4851     return ( a.high>>48 ) & 0x7FFF;
4852 
4853 }
4854 
4855 /*----------------------------------------------------------------------------
4856 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4857 *----------------------------------------------------------------------------*/
4858 
4859 static inline bool extractFloat128Sign(float128 a)
4860 {
4861     return a.high >> 63;
4862 }
4863 
4864 /*----------------------------------------------------------------------------
4865 | Normalizes the subnormal quadruple-precision floating-point value
4866 | represented by the denormalized significand formed by the concatenation of
4867 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4868 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4869 | significand are stored at the location pointed to by `zSig0Ptr', and the
4870 | least significant 64 bits of the normalized significand are stored at the
4871 | location pointed to by `zSig1Ptr'.
4872 *----------------------------------------------------------------------------*/
4873 
4874 static void
4875  normalizeFloat128Subnormal(
4876      uint64_t aSig0,
4877      uint64_t aSig1,
4878      int32_t *zExpPtr,
4879      uint64_t *zSig0Ptr,
4880      uint64_t *zSig1Ptr
4881  )
4882 {
4883     int8_t shiftCount;
4884 
4885     if ( aSig0 == 0 ) {
4886         shiftCount = clz64(aSig1) - 15;
4887         if ( shiftCount < 0 ) {
4888             *zSig0Ptr = aSig1>>( - shiftCount );
4889             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4890         }
4891         else {
4892             *zSig0Ptr = aSig1<<shiftCount;
4893             *zSig1Ptr = 0;
4894         }
4895         *zExpPtr = - shiftCount - 63;
4896     }
4897     else {
4898         shiftCount = clz64(aSig0) - 15;
4899         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4900         *zExpPtr = 1 - shiftCount;
4901     }
4902 
4903 }
4904 
4905 /*----------------------------------------------------------------------------
4906 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4907 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4908 | floating-point value, returning the result.  After being shifted into the
4909 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4910 | added together to form the most significant 32 bits of the result.  This
4911 | means that any integer portion of `zSig0' will be added into the exponent.
4912 | Since a properly normalized significand will have an integer portion equal
4913 | to 1, the `zExp' input should be 1 less than the desired result exponent
4914 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4915 | significand.
4916 *----------------------------------------------------------------------------*/
4917 
4918 static inline float128
4919 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4920 {
4921     float128 z;
4922 
4923     z.low = zSig1;
4924     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4925     return z;
4926 }
4927 
4928 /*----------------------------------------------------------------------------
4929 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4930 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4931 | and `zSig2', and returns the proper quadruple-precision floating-point value
4932 | corresponding to the abstract input.  Ordinarily, the abstract value is
4933 | simply rounded and packed into the quadruple-precision format, with the
4934 | inexact exception raised if the abstract input cannot be represented
4935 | exactly.  However, if the abstract value is too large, the overflow and
4936 | inexact exceptions are raised and an infinity or maximal finite value is
4937 | returned.  If the abstract value is too small, the input value is rounded to
4938 | a subnormal number, and the underflow and inexact exceptions are raised if
4939 | the abstract input cannot be represented exactly as a subnormal quadruple-
4940 | precision floating-point number.
4941 |     The input significand must be normalized or smaller.  If the input
4942 | significand is not normalized, `zExp' must be 0; in that case, the result
4943 | returned is a subnormal number, and it must not require rounding.  In the
4944 | usual case that the input significand is normalized, `zExp' must be 1 less
4945 | than the ``true'' floating-point exponent.  The handling of underflow and
4946 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4947 *----------------------------------------------------------------------------*/
4948 
4949 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4950                                      uint64_t zSig0, uint64_t zSig1,
4951                                      uint64_t zSig2, float_status *status)
4952 {
4953     int8_t roundingMode;
4954     bool roundNearestEven, increment, isTiny;
4955 
4956     roundingMode = status->float_rounding_mode;
4957     roundNearestEven = ( roundingMode == float_round_nearest_even );
4958     switch (roundingMode) {
4959     case float_round_nearest_even:
4960     case float_round_ties_away:
4961         increment = ((int64_t)zSig2 < 0);
4962         break;
4963     case float_round_to_zero:
4964         increment = 0;
4965         break;
4966     case float_round_up:
4967         increment = !zSign && zSig2;
4968         break;
4969     case float_round_down:
4970         increment = zSign && zSig2;
4971         break;
4972     case float_round_to_odd:
4973         increment = !(zSig1 & 0x1) && zSig2;
4974         break;
4975     default:
4976         abort();
4977     }
4978     if ( 0x7FFD <= (uint32_t) zExp ) {
4979         if (    ( 0x7FFD < zExp )
4980              || (    ( zExp == 0x7FFD )
4981                   && eq128(
4982                          UINT64_C(0x0001FFFFFFFFFFFF),
4983                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4984                          zSig0,
4985                          zSig1
4986                      )
4987                   && increment
4988                 )
4989            ) {
4990             float_raise(float_flag_overflow | float_flag_inexact, status);
4991             if (    ( roundingMode == float_round_to_zero )
4992                  || ( zSign && ( roundingMode == float_round_up ) )
4993                  || ( ! zSign && ( roundingMode == float_round_down ) )
4994                  || (roundingMode == float_round_to_odd)
4995                ) {
4996                 return
4997                     packFloat128(
4998                         zSign,
4999                         0x7FFE,
5000                         UINT64_C(0x0000FFFFFFFFFFFF),
5001                         UINT64_C(0xFFFFFFFFFFFFFFFF)
5002                     );
5003             }
5004             return packFloat128( zSign, 0x7FFF, 0, 0 );
5005         }
5006         if ( zExp < 0 ) {
5007             if (status->flush_to_zero) {
5008                 float_raise(float_flag_output_denormal, status);
5009                 return packFloat128(zSign, 0, 0, 0);
5010             }
5011             isTiny = status->tininess_before_rounding
5012                   || (zExp < -1)
5013                   || !increment
5014                   || lt128(zSig0, zSig1,
5015                            UINT64_C(0x0001FFFFFFFFFFFF),
5016                            UINT64_C(0xFFFFFFFFFFFFFFFF));
5017             shift128ExtraRightJamming(
5018                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
5019             zExp = 0;
5020             if (isTiny && zSig2) {
5021                 float_raise(float_flag_underflow, status);
5022             }
5023             switch (roundingMode) {
5024             case float_round_nearest_even:
5025             case float_round_ties_away:
5026                 increment = ((int64_t)zSig2 < 0);
5027                 break;
5028             case float_round_to_zero:
5029                 increment = 0;
5030                 break;
5031             case float_round_up:
5032                 increment = !zSign && zSig2;
5033                 break;
5034             case float_round_down:
5035                 increment = zSign && zSig2;
5036                 break;
5037             case float_round_to_odd:
5038                 increment = !(zSig1 & 0x1) && zSig2;
5039                 break;
5040             default:
5041                 abort();
5042             }
5043         }
5044     }
5045     if (zSig2) {
5046         float_raise(float_flag_inexact, status);
5047     }
5048     if ( increment ) {
5049         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
5050         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
5051             zSig1 &= ~1;
5052         }
5053     }
5054     else {
5055         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
5056     }
5057     return packFloat128( zSign, zExp, zSig0, zSig1 );
5058 
5059 }
5060 
5061 /*----------------------------------------------------------------------------
5062 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5063 | and significand formed by the concatenation of `zSig0' and `zSig1', and
5064 | returns the proper quadruple-precision floating-point value corresponding
5065 | to the abstract input.  This routine is just like `roundAndPackFloat128'
5066 | except that the input significand has fewer bits and does not have to be
5067 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
5068 | point exponent.
5069 *----------------------------------------------------------------------------*/
5070 
5071 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
5072                                               uint64_t zSig0, uint64_t zSig1,
5073                                               float_status *status)
5074 {
5075     int8_t shiftCount;
5076     uint64_t zSig2;
5077 
5078     if ( zSig0 == 0 ) {
5079         zSig0 = zSig1;
5080         zSig1 = 0;
5081         zExp -= 64;
5082     }
5083     shiftCount = clz64(zSig0) - 15;
5084     if ( 0 <= shiftCount ) {
5085         zSig2 = 0;
5086         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5087     }
5088     else {
5089         shift128ExtraRightJamming(
5090             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5091     }
5092     zExp -= shiftCount;
5093     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5094 
5095 }
5096 
5097 
5098 /*----------------------------------------------------------------------------
5099 | Returns the result of converting the 32-bit two's complement integer `a'
5100 | to the extended double-precision floating-point format.  The conversion
5101 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5102 | Arithmetic.
5103 *----------------------------------------------------------------------------*/
5104 
5105 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5106 {
5107     bool zSign;
5108     uint32_t absA;
5109     int8_t shiftCount;
5110     uint64_t zSig;
5111 
5112     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5113     zSign = ( a < 0 );
5114     absA = zSign ? - a : a;
5115     shiftCount = clz32(absA) + 32;
5116     zSig = absA;
5117     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5118 
5119 }
5120 
5121 /*----------------------------------------------------------------------------
5122 | Returns the result of converting the 32-bit two's complement integer `a' to
5123 | the quadruple-precision floating-point format.  The conversion is performed
5124 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5125 *----------------------------------------------------------------------------*/
5126 
5127 float128 int32_to_float128(int32_t a, float_status *status)
5128 {
5129     bool zSign;
5130     uint32_t absA;
5131     int8_t shiftCount;
5132     uint64_t zSig0;
5133 
5134     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5135     zSign = ( a < 0 );
5136     absA = zSign ? - a : a;
5137     shiftCount = clz32(absA) + 17;
5138     zSig0 = absA;
5139     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5140 
5141 }
5142 
5143 /*----------------------------------------------------------------------------
5144 | Returns the result of converting the 64-bit two's complement integer `a'
5145 | to the extended double-precision floating-point format.  The conversion
5146 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5147 | Arithmetic.
5148 *----------------------------------------------------------------------------*/
5149 
5150 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5151 {
5152     bool zSign;
5153     uint64_t absA;
5154     int8_t shiftCount;
5155 
5156     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5157     zSign = ( a < 0 );
5158     absA = zSign ? - a : a;
5159     shiftCount = clz64(absA);
5160     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5161 
5162 }
5163 
5164 /*----------------------------------------------------------------------------
5165 | Returns the result of converting the 64-bit two's complement integer `a' to
5166 | the quadruple-precision floating-point format.  The conversion is performed
5167 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5168 *----------------------------------------------------------------------------*/
5169 
5170 float128 int64_to_float128(int64_t a, float_status *status)
5171 {
5172     bool zSign;
5173     uint64_t absA;
5174     int8_t shiftCount;
5175     int32_t zExp;
5176     uint64_t zSig0, zSig1;
5177 
5178     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5179     zSign = ( a < 0 );
5180     absA = zSign ? - a : a;
5181     shiftCount = clz64(absA) + 49;
5182     zExp = 0x406E - shiftCount;
5183     if ( 64 <= shiftCount ) {
5184         zSig1 = 0;
5185         zSig0 = absA;
5186         shiftCount -= 64;
5187     }
5188     else {
5189         zSig1 = absA;
5190         zSig0 = 0;
5191     }
5192     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5193     return packFloat128( zSign, zExp, zSig0, zSig1 );
5194 
5195 }
5196 
5197 /*----------------------------------------------------------------------------
5198 | Returns the result of converting the 64-bit unsigned integer `a'
5199 | to the quadruple-precision floating-point format.  The conversion is performed
5200 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5201 *----------------------------------------------------------------------------*/
5202 
5203 float128 uint64_to_float128(uint64_t a, float_status *status)
5204 {
5205     if (a == 0) {
5206         return float128_zero;
5207     }
5208     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5209 }
5210 
5211 /*----------------------------------------------------------------------------
5212 | Returns the result of converting the single-precision floating-point value
5213 | `a' to the extended double-precision floating-point format.  The conversion
5214 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5215 | Arithmetic.
5216 *----------------------------------------------------------------------------*/
5217 
5218 floatx80 float32_to_floatx80(float32 a, float_status *status)
5219 {
5220     bool aSign;
5221     int aExp;
5222     uint32_t aSig;
5223 
5224     a = float32_squash_input_denormal(a, status);
5225     aSig = extractFloat32Frac( a );
5226     aExp = extractFloat32Exp( a );
5227     aSign = extractFloat32Sign( a );
5228     if ( aExp == 0xFF ) {
5229         if (aSig) {
5230             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5231                                                status);
5232             return floatx80_silence_nan(res, status);
5233         }
5234         return packFloatx80(aSign,
5235                             floatx80_infinity_high,
5236                             floatx80_infinity_low);
5237     }
5238     if ( aExp == 0 ) {
5239         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5240         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5241     }
5242     aSig |= 0x00800000;
5243     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5244 
5245 }
5246 
5247 /*----------------------------------------------------------------------------
5248 | Returns the remainder of the single-precision floating-point value `a'
5249 | with respect to the corresponding value `b'.  The operation is performed
5250 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5251 *----------------------------------------------------------------------------*/
5252 
5253 float32 float32_rem(float32 a, float32 b, float_status *status)
5254 {
5255     bool aSign, zSign;
5256     int aExp, bExp, expDiff;
5257     uint32_t aSig, bSig;
5258     uint32_t q;
5259     uint64_t aSig64, bSig64, q64;
5260     uint32_t alternateASig;
5261     int32_t sigMean;
5262     a = float32_squash_input_denormal(a, status);
5263     b = float32_squash_input_denormal(b, status);
5264 
5265     aSig = extractFloat32Frac( a );
5266     aExp = extractFloat32Exp( a );
5267     aSign = extractFloat32Sign( a );
5268     bSig = extractFloat32Frac( b );
5269     bExp = extractFloat32Exp( b );
5270     if ( aExp == 0xFF ) {
5271         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5272             return propagateFloat32NaN(a, b, status);
5273         }
5274         float_raise(float_flag_invalid, status);
5275         return float32_default_nan(status);
5276     }
5277     if ( bExp == 0xFF ) {
5278         if (bSig) {
5279             return propagateFloat32NaN(a, b, status);
5280         }
5281         return a;
5282     }
5283     if ( bExp == 0 ) {
5284         if ( bSig == 0 ) {
5285             float_raise(float_flag_invalid, status);
5286             return float32_default_nan(status);
5287         }
5288         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5289     }
5290     if ( aExp == 0 ) {
5291         if ( aSig == 0 ) return a;
5292         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5293     }
5294     expDiff = aExp - bExp;
5295     aSig |= 0x00800000;
5296     bSig |= 0x00800000;
5297     if ( expDiff < 32 ) {
5298         aSig <<= 8;
5299         bSig <<= 8;
5300         if ( expDiff < 0 ) {
5301             if ( expDiff < -1 ) return a;
5302             aSig >>= 1;
5303         }
5304         q = ( bSig <= aSig );
5305         if ( q ) aSig -= bSig;
5306         if ( 0 < expDiff ) {
5307             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5308             q >>= 32 - expDiff;
5309             bSig >>= 2;
5310             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5311         }
5312         else {
5313             aSig >>= 2;
5314             bSig >>= 2;
5315         }
5316     }
5317     else {
5318         if ( bSig <= aSig ) aSig -= bSig;
5319         aSig64 = ( (uint64_t) aSig )<<40;
5320         bSig64 = ( (uint64_t) bSig )<<40;
5321         expDiff -= 64;
5322         while ( 0 < expDiff ) {
5323             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5324             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5325             aSig64 = - ( ( bSig * q64 )<<38 );
5326             expDiff -= 62;
5327         }
5328         expDiff += 64;
5329         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5330         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5331         q = q64>>( 64 - expDiff );
5332         bSig <<= 6;
5333         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5334     }
5335     do {
5336         alternateASig = aSig;
5337         ++q;
5338         aSig -= bSig;
5339     } while ( 0 <= (int32_t) aSig );
5340     sigMean = aSig + alternateASig;
5341     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5342         aSig = alternateASig;
5343     }
5344     zSign = ( (int32_t) aSig < 0 );
5345     if ( zSign ) aSig = - aSig;
5346     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5347 }
5348 
5349 
5350 
5351 /*----------------------------------------------------------------------------
5352 | Returns the binary exponential of the single-precision floating-point value
5353 | `a'. The operation is performed according to the IEC/IEEE Standard for
5354 | Binary Floating-Point Arithmetic.
5355 |
5356 | Uses the following identities:
5357 |
5358 | 1. -------------------------------------------------------------------------
5359 |      x    x*ln(2)
5360 |     2  = e
5361 |
5362 | 2. -------------------------------------------------------------------------
5363 |                      2     3     4     5           n
5364 |      x        x     x     x     x     x           x
5365 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5366 |               1!    2!    3!    4!    5!          n!
5367 *----------------------------------------------------------------------------*/
5368 
5369 static const float64 float32_exp2_coefficients[15] =
5370 {
5371     const_float64( 0x3ff0000000000000ll ), /*  1 */
5372     const_float64( 0x3fe0000000000000ll ), /*  2 */
5373     const_float64( 0x3fc5555555555555ll ), /*  3 */
5374     const_float64( 0x3fa5555555555555ll ), /*  4 */
5375     const_float64( 0x3f81111111111111ll ), /*  5 */
5376     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5377     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5378     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5379     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5380     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5381     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5382     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5383     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5384     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5385     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5386 };
5387 
5388 float32 float32_exp2(float32 a, float_status *status)
5389 {
5390     bool aSign;
5391     int aExp;
5392     uint32_t aSig;
5393     float64 r, x, xn;
5394     int i;
5395     a = float32_squash_input_denormal(a, status);
5396 
5397     aSig = extractFloat32Frac( a );
5398     aExp = extractFloat32Exp( a );
5399     aSign = extractFloat32Sign( a );
5400 
5401     if ( aExp == 0xFF) {
5402         if (aSig) {
5403             return propagateFloat32NaN(a, float32_zero, status);
5404         }
5405         return (aSign) ? float32_zero : a;
5406     }
5407     if (aExp == 0) {
5408         if (aSig == 0) return float32_one;
5409     }
5410 
5411     float_raise(float_flag_inexact, status);
5412 
5413     /* ******************************* */
5414     /* using float64 for approximation */
5415     /* ******************************* */
5416     x = float32_to_float64(a, status);
5417     x = float64_mul(x, float64_ln2, status);
5418 
5419     xn = x;
5420     r = float64_one;
5421     for (i = 0 ; i < 15 ; i++) {
5422         float64 f;
5423 
5424         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5425         r = float64_add(r, f, status);
5426 
5427         xn = float64_mul(xn, x, status);
5428     }
5429 
5430     return float64_to_float32(r, status);
5431 }
5432 
5433 /*----------------------------------------------------------------------------
5434 | Returns the binary log of the single-precision floating-point value `a'.
5435 | The operation is performed according to the IEC/IEEE Standard for Binary
5436 | Floating-Point Arithmetic.
5437 *----------------------------------------------------------------------------*/
5438 float32 float32_log2(float32 a, float_status *status)
5439 {
5440     bool aSign, zSign;
5441     int aExp;
5442     uint32_t aSig, zSig, i;
5443 
5444     a = float32_squash_input_denormal(a, status);
5445     aSig = extractFloat32Frac( a );
5446     aExp = extractFloat32Exp( a );
5447     aSign = extractFloat32Sign( a );
5448 
5449     if ( aExp == 0 ) {
5450         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5451         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5452     }
5453     if ( aSign ) {
5454         float_raise(float_flag_invalid, status);
5455         return float32_default_nan(status);
5456     }
5457     if ( aExp == 0xFF ) {
5458         if (aSig) {
5459             return propagateFloat32NaN(a, float32_zero, status);
5460         }
5461         return a;
5462     }
5463 
5464     aExp -= 0x7F;
5465     aSig |= 0x00800000;
5466     zSign = aExp < 0;
5467     zSig = aExp << 23;
5468 
5469     for (i = 1 << 22; i > 0; i >>= 1) {
5470         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5471         if ( aSig & 0x01000000 ) {
5472             aSig >>= 1;
5473             zSig |= i;
5474         }
5475     }
5476 
5477     if ( zSign )
5478         zSig = -zSig;
5479 
5480     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5481 }
5482 
5483 /*----------------------------------------------------------------------------
5484 | Returns the result of converting the double-precision floating-point value
5485 | `a' to the extended double-precision floating-point format.  The conversion
5486 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5487 | Arithmetic.
5488 *----------------------------------------------------------------------------*/
5489 
5490 floatx80 float64_to_floatx80(float64 a, float_status *status)
5491 {
5492     bool aSign;
5493     int aExp;
5494     uint64_t aSig;
5495 
5496     a = float64_squash_input_denormal(a, status);
5497     aSig = extractFloat64Frac( a );
5498     aExp = extractFloat64Exp( a );
5499     aSign = extractFloat64Sign( a );
5500     if ( aExp == 0x7FF ) {
5501         if (aSig) {
5502             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5503                                                status);
5504             return floatx80_silence_nan(res, status);
5505         }
5506         return packFloatx80(aSign,
5507                             floatx80_infinity_high,
5508                             floatx80_infinity_low);
5509     }
5510     if ( aExp == 0 ) {
5511         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5512         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5513     }
5514     return
5515         packFloatx80(
5516             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5517 
5518 }
5519 
5520 /*----------------------------------------------------------------------------
5521 | Returns the remainder of the double-precision floating-point value `a'
5522 | with respect to the corresponding value `b'.  The operation is performed
5523 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5524 *----------------------------------------------------------------------------*/
5525 
5526 float64 float64_rem(float64 a, float64 b, float_status *status)
5527 {
5528     bool aSign, zSign;
5529     int aExp, bExp, expDiff;
5530     uint64_t aSig, bSig;
5531     uint64_t q, alternateASig;
5532     int64_t sigMean;
5533 
5534     a = float64_squash_input_denormal(a, status);
5535     b = float64_squash_input_denormal(b, status);
5536     aSig = extractFloat64Frac( a );
5537     aExp = extractFloat64Exp( a );
5538     aSign = extractFloat64Sign( a );
5539     bSig = extractFloat64Frac( b );
5540     bExp = extractFloat64Exp( b );
5541     if ( aExp == 0x7FF ) {
5542         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5543             return propagateFloat64NaN(a, b, status);
5544         }
5545         float_raise(float_flag_invalid, status);
5546         return float64_default_nan(status);
5547     }
5548     if ( bExp == 0x7FF ) {
5549         if (bSig) {
5550             return propagateFloat64NaN(a, b, status);
5551         }
5552         return a;
5553     }
5554     if ( bExp == 0 ) {
5555         if ( bSig == 0 ) {
5556             float_raise(float_flag_invalid, status);
5557             return float64_default_nan(status);
5558         }
5559         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5560     }
5561     if ( aExp == 0 ) {
5562         if ( aSig == 0 ) return a;
5563         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5564     }
5565     expDiff = aExp - bExp;
5566     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5567     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5568     if ( expDiff < 0 ) {
5569         if ( expDiff < -1 ) return a;
5570         aSig >>= 1;
5571     }
5572     q = ( bSig <= aSig );
5573     if ( q ) aSig -= bSig;
5574     expDiff -= 64;
5575     while ( 0 < expDiff ) {
5576         q = estimateDiv128To64( aSig, 0, bSig );
5577         q = ( 2 < q ) ? q - 2 : 0;
5578         aSig = - ( ( bSig>>2 ) * q );
5579         expDiff -= 62;
5580     }
5581     expDiff += 64;
5582     if ( 0 < expDiff ) {
5583         q = estimateDiv128To64( aSig, 0, bSig );
5584         q = ( 2 < q ) ? q - 2 : 0;
5585         q >>= 64 - expDiff;
5586         bSig >>= 2;
5587         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5588     }
5589     else {
5590         aSig >>= 2;
5591         bSig >>= 2;
5592     }
5593     do {
5594         alternateASig = aSig;
5595         ++q;
5596         aSig -= bSig;
5597     } while ( 0 <= (int64_t) aSig );
5598     sigMean = aSig + alternateASig;
5599     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5600         aSig = alternateASig;
5601     }
5602     zSign = ( (int64_t) aSig < 0 );
5603     if ( zSign ) aSig = - aSig;
5604     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5605 
5606 }
5607 
5608 /*----------------------------------------------------------------------------
5609 | Returns the binary log of the double-precision floating-point value `a'.
5610 | The operation is performed according to the IEC/IEEE Standard for Binary
5611 | Floating-Point Arithmetic.
5612 *----------------------------------------------------------------------------*/
5613 float64 float64_log2(float64 a, float_status *status)
5614 {
5615     bool aSign, zSign;
5616     int aExp;
5617     uint64_t aSig, aSig0, aSig1, zSig, i;
5618     a = float64_squash_input_denormal(a, status);
5619 
5620     aSig = extractFloat64Frac( a );
5621     aExp = extractFloat64Exp( a );
5622     aSign = extractFloat64Sign( a );
5623 
5624     if ( aExp == 0 ) {
5625         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5626         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5627     }
5628     if ( aSign ) {
5629         float_raise(float_flag_invalid, status);
5630         return float64_default_nan(status);
5631     }
5632     if ( aExp == 0x7FF ) {
5633         if (aSig) {
5634             return propagateFloat64NaN(a, float64_zero, status);
5635         }
5636         return a;
5637     }
5638 
5639     aExp -= 0x3FF;
5640     aSig |= UINT64_C(0x0010000000000000);
5641     zSign = aExp < 0;
5642     zSig = (uint64_t)aExp << 52;
5643     for (i = 1LL << 51; i > 0; i >>= 1) {
5644         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5645         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5646         if ( aSig & UINT64_C(0x0020000000000000) ) {
5647             aSig >>= 1;
5648             zSig |= i;
5649         }
5650     }
5651 
5652     if ( zSign )
5653         zSig = -zSig;
5654     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5655 }
5656 
5657 /*----------------------------------------------------------------------------
5658 | Returns the result of converting the extended double-precision floating-
5659 | point value `a' to the 32-bit two's complement integer format.  The
5660 | conversion is performed according to the IEC/IEEE Standard for Binary
5661 | Floating-Point Arithmetic---which means in particular that the conversion
5662 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5663 | largest positive integer is returned.  Otherwise, if the conversion
5664 | overflows, the largest integer with the same sign as `a' is returned.
5665 *----------------------------------------------------------------------------*/
5666 
5667 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5668 {
5669     bool aSign;
5670     int32_t aExp, shiftCount;
5671     uint64_t aSig;
5672 
5673     if (floatx80_invalid_encoding(a)) {
5674         float_raise(float_flag_invalid, status);
5675         return 1 << 31;
5676     }
5677     aSig = extractFloatx80Frac( a );
5678     aExp = extractFloatx80Exp( a );
5679     aSign = extractFloatx80Sign( a );
5680     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5681     shiftCount = 0x4037 - aExp;
5682     if ( shiftCount <= 0 ) shiftCount = 1;
5683     shift64RightJamming( aSig, shiftCount, &aSig );
5684     return roundAndPackInt32(aSign, aSig, status);
5685 
5686 }
5687 
5688 /*----------------------------------------------------------------------------
5689 | Returns the result of converting the extended double-precision floating-
5690 | point value `a' to the 32-bit two's complement integer format.  The
5691 | conversion is performed according to the IEC/IEEE Standard for Binary
5692 | Floating-Point Arithmetic, except that the conversion is always rounded
5693 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5694 | Otherwise, if the conversion overflows, the largest integer with the same
5695 | sign as `a' is returned.
5696 *----------------------------------------------------------------------------*/
5697 
5698 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5699 {
5700     bool aSign;
5701     int32_t aExp, shiftCount;
5702     uint64_t aSig, savedASig;
5703     int32_t z;
5704 
5705     if (floatx80_invalid_encoding(a)) {
5706         float_raise(float_flag_invalid, status);
5707         return 1 << 31;
5708     }
5709     aSig = extractFloatx80Frac( a );
5710     aExp = extractFloatx80Exp( a );
5711     aSign = extractFloatx80Sign( a );
5712     if ( 0x401E < aExp ) {
5713         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5714         goto invalid;
5715     }
5716     else if ( aExp < 0x3FFF ) {
5717         if (aExp || aSig) {
5718             float_raise(float_flag_inexact, status);
5719         }
5720         return 0;
5721     }
5722     shiftCount = 0x403E - aExp;
5723     savedASig = aSig;
5724     aSig >>= shiftCount;
5725     z = aSig;
5726     if ( aSign ) z = - z;
5727     if ( ( z < 0 ) ^ aSign ) {
5728  invalid:
5729         float_raise(float_flag_invalid, status);
5730         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5731     }
5732     if ( ( aSig<<shiftCount ) != savedASig ) {
5733         float_raise(float_flag_inexact, status);
5734     }
5735     return z;
5736 
5737 }
5738 
5739 /*----------------------------------------------------------------------------
5740 | Returns the result of converting the extended double-precision floating-
5741 | point value `a' to the 64-bit two's complement integer format.  The
5742 | conversion is performed according to the IEC/IEEE Standard for Binary
5743 | Floating-Point Arithmetic---which means in particular that the conversion
5744 | is rounded according to the current rounding mode.  If `a' is a NaN,
5745 | the largest positive integer is returned.  Otherwise, if the conversion
5746 | overflows, the largest integer with the same sign as `a' is returned.
5747 *----------------------------------------------------------------------------*/
5748 
5749 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5750 {
5751     bool aSign;
5752     int32_t aExp, shiftCount;
5753     uint64_t aSig, aSigExtra;
5754 
5755     if (floatx80_invalid_encoding(a)) {
5756         float_raise(float_flag_invalid, status);
5757         return 1ULL << 63;
5758     }
5759     aSig = extractFloatx80Frac( a );
5760     aExp = extractFloatx80Exp( a );
5761     aSign = extractFloatx80Sign( a );
5762     shiftCount = 0x403E - aExp;
5763     if ( shiftCount <= 0 ) {
5764         if ( shiftCount ) {
5765             float_raise(float_flag_invalid, status);
5766             if (!aSign || floatx80_is_any_nan(a)) {
5767                 return INT64_MAX;
5768             }
5769             return INT64_MIN;
5770         }
5771         aSigExtra = 0;
5772     }
5773     else {
5774         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5775     }
5776     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5777 
5778 }
5779 
5780 /*----------------------------------------------------------------------------
5781 | Returns the result of converting the extended double-precision floating-
5782 | point value `a' to the 64-bit two's complement integer format.  The
5783 | conversion is performed according to the IEC/IEEE Standard for Binary
5784 | Floating-Point Arithmetic, except that the conversion is always rounded
5785 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5786 | Otherwise, if the conversion overflows, the largest integer with the same
5787 | sign as `a' is returned.
5788 *----------------------------------------------------------------------------*/
5789 
5790 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5791 {
5792     bool aSign;
5793     int32_t aExp, shiftCount;
5794     uint64_t aSig;
5795     int64_t z;
5796 
5797     if (floatx80_invalid_encoding(a)) {
5798         float_raise(float_flag_invalid, status);
5799         return 1ULL << 63;
5800     }
5801     aSig = extractFloatx80Frac( a );
5802     aExp = extractFloatx80Exp( a );
5803     aSign = extractFloatx80Sign( a );
5804     shiftCount = aExp - 0x403E;
5805     if ( 0 <= shiftCount ) {
5806         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5807         if ( ( a.high != 0xC03E ) || aSig ) {
5808             float_raise(float_flag_invalid, status);
5809             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5810                 return INT64_MAX;
5811             }
5812         }
5813         return INT64_MIN;
5814     }
5815     else if ( aExp < 0x3FFF ) {
5816         if (aExp | aSig) {
5817             float_raise(float_flag_inexact, status);
5818         }
5819         return 0;
5820     }
5821     z = aSig>>( - shiftCount );
5822     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5823         float_raise(float_flag_inexact, status);
5824     }
5825     if ( aSign ) z = - z;
5826     return z;
5827 
5828 }
5829 
5830 /*----------------------------------------------------------------------------
5831 | Returns the result of converting the extended double-precision floating-
5832 | point value `a' to the single-precision floating-point format.  The
5833 | conversion is performed according to the IEC/IEEE Standard for Binary
5834 | Floating-Point Arithmetic.
5835 *----------------------------------------------------------------------------*/
5836 
5837 float32 floatx80_to_float32(floatx80 a, float_status *status)
5838 {
5839     bool aSign;
5840     int32_t aExp;
5841     uint64_t aSig;
5842 
5843     if (floatx80_invalid_encoding(a)) {
5844         float_raise(float_flag_invalid, status);
5845         return float32_default_nan(status);
5846     }
5847     aSig = extractFloatx80Frac( a );
5848     aExp = extractFloatx80Exp( a );
5849     aSign = extractFloatx80Sign( a );
5850     if ( aExp == 0x7FFF ) {
5851         if ( (uint64_t) ( aSig<<1 ) ) {
5852             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5853                                              status);
5854             return float32_silence_nan(res, status);
5855         }
5856         return packFloat32( aSign, 0xFF, 0 );
5857     }
5858     shift64RightJamming( aSig, 33, &aSig );
5859     if ( aExp || aSig ) aExp -= 0x3F81;
5860     return roundAndPackFloat32(aSign, aExp, aSig, status);
5861 
5862 }
5863 
5864 /*----------------------------------------------------------------------------
5865 | Returns the result of converting the extended double-precision floating-
5866 | point value `a' to the double-precision floating-point format.  The
5867 | conversion is performed according to the IEC/IEEE Standard for Binary
5868 | Floating-Point Arithmetic.
5869 *----------------------------------------------------------------------------*/
5870 
5871 float64 floatx80_to_float64(floatx80 a, float_status *status)
5872 {
5873     bool aSign;
5874     int32_t aExp;
5875     uint64_t aSig, zSig;
5876 
5877     if (floatx80_invalid_encoding(a)) {
5878         float_raise(float_flag_invalid, status);
5879         return float64_default_nan(status);
5880     }
5881     aSig = extractFloatx80Frac( a );
5882     aExp = extractFloatx80Exp( a );
5883     aSign = extractFloatx80Sign( a );
5884     if ( aExp == 0x7FFF ) {
5885         if ( (uint64_t) ( aSig<<1 ) ) {
5886             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5887                                              status);
5888             return float64_silence_nan(res, status);
5889         }
5890         return packFloat64( aSign, 0x7FF, 0 );
5891     }
5892     shift64RightJamming( aSig, 1, &zSig );
5893     if ( aExp || aSig ) aExp -= 0x3C01;
5894     return roundAndPackFloat64(aSign, aExp, zSig, status);
5895 
5896 }
5897 
5898 /*----------------------------------------------------------------------------
5899 | Returns the result of converting the extended double-precision floating-
5900 | point value `a' to the quadruple-precision floating-point format.  The
5901 | conversion is performed according to the IEC/IEEE Standard for Binary
5902 | Floating-Point Arithmetic.
5903 *----------------------------------------------------------------------------*/
5904 
5905 float128 floatx80_to_float128(floatx80 a, float_status *status)
5906 {
5907     bool aSign;
5908     int aExp;
5909     uint64_t aSig, zSig0, zSig1;
5910 
5911     if (floatx80_invalid_encoding(a)) {
5912         float_raise(float_flag_invalid, status);
5913         return float128_default_nan(status);
5914     }
5915     aSig = extractFloatx80Frac( a );
5916     aExp = extractFloatx80Exp( a );
5917     aSign = extractFloatx80Sign( a );
5918     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5919         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5920                                            status);
5921         return float128_silence_nan(res, status);
5922     }
5923     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5924     return packFloat128( aSign, aExp, zSig0, zSig1 );
5925 
5926 }
5927 
5928 /*----------------------------------------------------------------------------
5929 | Rounds the extended double-precision floating-point value `a'
5930 | to the precision provided by floatx80_rounding_precision and returns the
5931 | result as an extended double-precision floating-point value.
5932 | The operation is performed according to the IEC/IEEE Standard for Binary
5933 | Floating-Point Arithmetic.
5934 *----------------------------------------------------------------------------*/
5935 
5936 floatx80 floatx80_round(floatx80 a, float_status *status)
5937 {
5938     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5939                                 extractFloatx80Sign(a),
5940                                 extractFloatx80Exp(a),
5941                                 extractFloatx80Frac(a), 0, status);
5942 }
5943 
5944 /*----------------------------------------------------------------------------
5945 | Rounds the extended double-precision floating-point value `a' to an integer,
5946 | and returns the result as an extended quadruple-precision floating-point
5947 | value.  The operation is performed according to the IEC/IEEE Standard for
5948 | Binary Floating-Point Arithmetic.
5949 *----------------------------------------------------------------------------*/
5950 
5951 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5952 {
5953     bool aSign;
5954     int32_t aExp;
5955     uint64_t lastBitMask, roundBitsMask;
5956     floatx80 z;
5957 
5958     if (floatx80_invalid_encoding(a)) {
5959         float_raise(float_flag_invalid, status);
5960         return floatx80_default_nan(status);
5961     }
5962     aExp = extractFloatx80Exp( a );
5963     if ( 0x403E <= aExp ) {
5964         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5965             return propagateFloatx80NaN(a, a, status);
5966         }
5967         return a;
5968     }
5969     if ( aExp < 0x3FFF ) {
5970         if (    ( aExp == 0 )
5971              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5972             return a;
5973         }
5974         float_raise(float_flag_inexact, status);
5975         aSign = extractFloatx80Sign( a );
5976         switch (status->float_rounding_mode) {
5977          case float_round_nearest_even:
5978             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5979                ) {
5980                 return
5981                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5982             }
5983             break;
5984         case float_round_ties_away:
5985             if (aExp == 0x3FFE) {
5986                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5987             }
5988             break;
5989          case float_round_down:
5990             return
5991                   aSign ?
5992                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5993                 : packFloatx80( 0, 0, 0 );
5994          case float_round_up:
5995             return
5996                   aSign ? packFloatx80( 1, 0, 0 )
5997                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5998 
5999         case float_round_to_zero:
6000             break;
6001         default:
6002             g_assert_not_reached();
6003         }
6004         return packFloatx80( aSign, 0, 0 );
6005     }
6006     lastBitMask = 1;
6007     lastBitMask <<= 0x403E - aExp;
6008     roundBitsMask = lastBitMask - 1;
6009     z = a;
6010     switch (status->float_rounding_mode) {
6011     case float_round_nearest_even:
6012         z.low += lastBitMask>>1;
6013         if ((z.low & roundBitsMask) == 0) {
6014             z.low &= ~lastBitMask;
6015         }
6016         break;
6017     case float_round_ties_away:
6018         z.low += lastBitMask >> 1;
6019         break;
6020     case float_round_to_zero:
6021         break;
6022     case float_round_up:
6023         if (!extractFloatx80Sign(z)) {
6024             z.low += roundBitsMask;
6025         }
6026         break;
6027     case float_round_down:
6028         if (extractFloatx80Sign(z)) {
6029             z.low += roundBitsMask;
6030         }
6031         break;
6032     default:
6033         abort();
6034     }
6035     z.low &= ~ roundBitsMask;
6036     if ( z.low == 0 ) {
6037         ++z.high;
6038         z.low = UINT64_C(0x8000000000000000);
6039     }
6040     if (z.low != a.low) {
6041         float_raise(float_flag_inexact, status);
6042     }
6043     return z;
6044 
6045 }
6046 
6047 /*----------------------------------------------------------------------------
6048 | Returns the result of adding the absolute values of the extended double-
6049 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
6050 | negated before being returned.  `zSign' is ignored if the result is a NaN.
6051 | The addition is performed according to the IEC/IEEE Standard for Binary
6052 | Floating-Point Arithmetic.
6053 *----------------------------------------------------------------------------*/
6054 
6055 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6056                                 float_status *status)
6057 {
6058     int32_t aExp, bExp, zExp;
6059     uint64_t aSig, bSig, zSig0, zSig1;
6060     int32_t expDiff;
6061 
6062     aSig = extractFloatx80Frac( a );
6063     aExp = extractFloatx80Exp( a );
6064     bSig = extractFloatx80Frac( b );
6065     bExp = extractFloatx80Exp( b );
6066     expDiff = aExp - bExp;
6067     if ( 0 < expDiff ) {
6068         if ( aExp == 0x7FFF ) {
6069             if ((uint64_t)(aSig << 1)) {
6070                 return propagateFloatx80NaN(a, b, status);
6071             }
6072             return a;
6073         }
6074         if ( bExp == 0 ) --expDiff;
6075         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6076         zExp = aExp;
6077     }
6078     else if ( expDiff < 0 ) {
6079         if ( bExp == 0x7FFF ) {
6080             if ((uint64_t)(bSig << 1)) {
6081                 return propagateFloatx80NaN(a, b, status);
6082             }
6083             return packFloatx80(zSign,
6084                                 floatx80_infinity_high,
6085                                 floatx80_infinity_low);
6086         }
6087         if ( aExp == 0 ) ++expDiff;
6088         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6089         zExp = bExp;
6090     }
6091     else {
6092         if ( aExp == 0x7FFF ) {
6093             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6094                 return propagateFloatx80NaN(a, b, status);
6095             }
6096             return a;
6097         }
6098         zSig1 = 0;
6099         zSig0 = aSig + bSig;
6100         if ( aExp == 0 ) {
6101             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6102                 /* At least one of the values is a pseudo-denormal,
6103                  * and there is a carry out of the result.  */
6104                 zExp = 1;
6105                 goto shiftRight1;
6106             }
6107             if (zSig0 == 0) {
6108                 return packFloatx80(zSign, 0, 0);
6109             }
6110             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6111             goto roundAndPack;
6112         }
6113         zExp = aExp;
6114         goto shiftRight1;
6115     }
6116     zSig0 = aSig + bSig;
6117     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6118  shiftRight1:
6119     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6120     zSig0 |= UINT64_C(0x8000000000000000);
6121     ++zExp;
6122  roundAndPack:
6123     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6124                                 zSign, zExp, zSig0, zSig1, status);
6125 }
6126 
6127 /*----------------------------------------------------------------------------
6128 | Returns the result of subtracting the absolute values of the extended
6129 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6130 | difference is negated before being returned.  `zSign' is ignored if the
6131 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6132 | Standard for Binary Floating-Point Arithmetic.
6133 *----------------------------------------------------------------------------*/
6134 
6135 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6136                                 float_status *status)
6137 {
6138     int32_t aExp, bExp, zExp;
6139     uint64_t aSig, bSig, zSig0, zSig1;
6140     int32_t expDiff;
6141 
6142     aSig = extractFloatx80Frac( a );
6143     aExp = extractFloatx80Exp( a );
6144     bSig = extractFloatx80Frac( b );
6145     bExp = extractFloatx80Exp( b );
6146     expDiff = aExp - bExp;
6147     if ( 0 < expDiff ) goto aExpBigger;
6148     if ( expDiff < 0 ) goto bExpBigger;
6149     if ( aExp == 0x7FFF ) {
6150         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6151             return propagateFloatx80NaN(a, b, status);
6152         }
6153         float_raise(float_flag_invalid, status);
6154         return floatx80_default_nan(status);
6155     }
6156     if ( aExp == 0 ) {
6157         aExp = 1;
6158         bExp = 1;
6159     }
6160     zSig1 = 0;
6161     if ( bSig < aSig ) goto aBigger;
6162     if ( aSig < bSig ) goto bBigger;
6163     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6164  bExpBigger:
6165     if ( bExp == 0x7FFF ) {
6166         if ((uint64_t)(bSig << 1)) {
6167             return propagateFloatx80NaN(a, b, status);
6168         }
6169         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6170                             floatx80_infinity_low);
6171     }
6172     if ( aExp == 0 ) ++expDiff;
6173     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6174  bBigger:
6175     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6176     zExp = bExp;
6177     zSign ^= 1;
6178     goto normalizeRoundAndPack;
6179  aExpBigger:
6180     if ( aExp == 0x7FFF ) {
6181         if ((uint64_t)(aSig << 1)) {
6182             return propagateFloatx80NaN(a, b, status);
6183         }
6184         return a;
6185     }
6186     if ( bExp == 0 ) --expDiff;
6187     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6188  aBigger:
6189     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6190     zExp = aExp;
6191  normalizeRoundAndPack:
6192     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6193                                          zSign, zExp, zSig0, zSig1, status);
6194 }
6195 
6196 /*----------------------------------------------------------------------------
6197 | Returns the result of adding the extended double-precision floating-point
6198 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6199 | Standard for Binary Floating-Point Arithmetic.
6200 *----------------------------------------------------------------------------*/
6201 
6202 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6203 {
6204     bool aSign, bSign;
6205 
6206     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6207         float_raise(float_flag_invalid, status);
6208         return floatx80_default_nan(status);
6209     }
6210     aSign = extractFloatx80Sign( a );
6211     bSign = extractFloatx80Sign( b );
6212     if ( aSign == bSign ) {
6213         return addFloatx80Sigs(a, b, aSign, status);
6214     }
6215     else {
6216         return subFloatx80Sigs(a, b, aSign, status);
6217     }
6218 
6219 }
6220 
6221 /*----------------------------------------------------------------------------
6222 | Returns the result of subtracting the extended double-precision floating-
6223 | point values `a' and `b'.  The operation is performed according to the
6224 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6225 *----------------------------------------------------------------------------*/
6226 
6227 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6228 {
6229     bool aSign, bSign;
6230 
6231     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6232         float_raise(float_flag_invalid, status);
6233         return floatx80_default_nan(status);
6234     }
6235     aSign = extractFloatx80Sign( a );
6236     bSign = extractFloatx80Sign( b );
6237     if ( aSign == bSign ) {
6238         return subFloatx80Sigs(a, b, aSign, status);
6239     }
6240     else {
6241         return addFloatx80Sigs(a, b, aSign, status);
6242     }
6243 
6244 }
6245 
6246 /*----------------------------------------------------------------------------
6247 | Returns the result of multiplying the extended double-precision floating-
6248 | point values `a' and `b'.  The operation is performed according to the
6249 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6250 *----------------------------------------------------------------------------*/
6251 
6252 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6253 {
6254     bool aSign, bSign, zSign;
6255     int32_t aExp, bExp, zExp;
6256     uint64_t aSig, bSig, zSig0, zSig1;
6257 
6258     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6259         float_raise(float_flag_invalid, status);
6260         return floatx80_default_nan(status);
6261     }
6262     aSig = extractFloatx80Frac( a );
6263     aExp = extractFloatx80Exp( a );
6264     aSign = extractFloatx80Sign( a );
6265     bSig = extractFloatx80Frac( b );
6266     bExp = extractFloatx80Exp( b );
6267     bSign = extractFloatx80Sign( b );
6268     zSign = aSign ^ bSign;
6269     if ( aExp == 0x7FFF ) {
6270         if (    (uint64_t) ( aSig<<1 )
6271              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6272             return propagateFloatx80NaN(a, b, status);
6273         }
6274         if ( ( bExp | bSig ) == 0 ) goto invalid;
6275         return packFloatx80(zSign, floatx80_infinity_high,
6276                                    floatx80_infinity_low);
6277     }
6278     if ( bExp == 0x7FFF ) {
6279         if ((uint64_t)(bSig << 1)) {
6280             return propagateFloatx80NaN(a, b, status);
6281         }
6282         if ( ( aExp | aSig ) == 0 ) {
6283  invalid:
6284             float_raise(float_flag_invalid, status);
6285             return floatx80_default_nan(status);
6286         }
6287         return packFloatx80(zSign, floatx80_infinity_high,
6288                                    floatx80_infinity_low);
6289     }
6290     if ( aExp == 0 ) {
6291         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6292         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6293     }
6294     if ( bExp == 0 ) {
6295         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6296         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6297     }
6298     zExp = aExp + bExp - 0x3FFE;
6299     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6300     if ( 0 < (int64_t) zSig0 ) {
6301         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6302         --zExp;
6303     }
6304     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6305                                 zSign, zExp, zSig0, zSig1, status);
6306 }
6307 
6308 /*----------------------------------------------------------------------------
6309 | Returns the result of dividing the extended double-precision floating-point
6310 | value `a' by the corresponding value `b'.  The operation is performed
6311 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6312 *----------------------------------------------------------------------------*/
6313 
6314 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6315 {
6316     bool aSign, bSign, zSign;
6317     int32_t aExp, bExp, zExp;
6318     uint64_t aSig, bSig, zSig0, zSig1;
6319     uint64_t rem0, rem1, rem2, term0, term1, term2;
6320 
6321     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6322         float_raise(float_flag_invalid, status);
6323         return floatx80_default_nan(status);
6324     }
6325     aSig = extractFloatx80Frac( a );
6326     aExp = extractFloatx80Exp( a );
6327     aSign = extractFloatx80Sign( a );
6328     bSig = extractFloatx80Frac( b );
6329     bExp = extractFloatx80Exp( b );
6330     bSign = extractFloatx80Sign( b );
6331     zSign = aSign ^ bSign;
6332     if ( aExp == 0x7FFF ) {
6333         if ((uint64_t)(aSig << 1)) {
6334             return propagateFloatx80NaN(a, b, status);
6335         }
6336         if ( bExp == 0x7FFF ) {
6337             if ((uint64_t)(bSig << 1)) {
6338                 return propagateFloatx80NaN(a, b, status);
6339             }
6340             goto invalid;
6341         }
6342         return packFloatx80(zSign, floatx80_infinity_high,
6343                                    floatx80_infinity_low);
6344     }
6345     if ( bExp == 0x7FFF ) {
6346         if ((uint64_t)(bSig << 1)) {
6347             return propagateFloatx80NaN(a, b, status);
6348         }
6349         return packFloatx80( zSign, 0, 0 );
6350     }
6351     if ( bExp == 0 ) {
6352         if ( bSig == 0 ) {
6353             if ( ( aExp | aSig ) == 0 ) {
6354  invalid:
6355                 float_raise(float_flag_invalid, status);
6356                 return floatx80_default_nan(status);
6357             }
6358             float_raise(float_flag_divbyzero, status);
6359             return packFloatx80(zSign, floatx80_infinity_high,
6360                                        floatx80_infinity_low);
6361         }
6362         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6363     }
6364     if ( aExp == 0 ) {
6365         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6366         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6367     }
6368     zExp = aExp - bExp + 0x3FFE;
6369     rem1 = 0;
6370     if ( bSig <= aSig ) {
6371         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6372         ++zExp;
6373     }
6374     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6375     mul64To128( bSig, zSig0, &term0, &term1 );
6376     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6377     while ( (int64_t) rem0 < 0 ) {
6378         --zSig0;
6379         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6380     }
6381     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6382     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6383         mul64To128( bSig, zSig1, &term1, &term2 );
6384         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6385         while ( (int64_t) rem1 < 0 ) {
6386             --zSig1;
6387             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6388         }
6389         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6390     }
6391     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6392                                 zSign, zExp, zSig0, zSig1, status);
6393 }
6394 
6395 /*----------------------------------------------------------------------------
6396 | Returns the remainder of the extended double-precision floating-point value
6397 | `a' with respect to the corresponding value `b'.  The operation is performed
6398 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6399 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6400 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6401 | the absolute value of the integer quotient.
6402 *----------------------------------------------------------------------------*/
6403 
6404 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6405                          float_status *status)
6406 {
6407     bool aSign, zSign;
6408     int32_t aExp, bExp, expDiff, aExpOrig;
6409     uint64_t aSig0, aSig1, bSig;
6410     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6411 
6412     *quotient = 0;
6413     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6414         float_raise(float_flag_invalid, status);
6415         return floatx80_default_nan(status);
6416     }
6417     aSig0 = extractFloatx80Frac( a );
6418     aExpOrig = aExp = extractFloatx80Exp( a );
6419     aSign = extractFloatx80Sign( a );
6420     bSig = extractFloatx80Frac( b );
6421     bExp = extractFloatx80Exp( b );
6422     if ( aExp == 0x7FFF ) {
6423         if (    (uint64_t) ( aSig0<<1 )
6424              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6425             return propagateFloatx80NaN(a, b, status);
6426         }
6427         goto invalid;
6428     }
6429     if ( bExp == 0x7FFF ) {
6430         if ((uint64_t)(bSig << 1)) {
6431             return propagateFloatx80NaN(a, b, status);
6432         }
6433         if (aExp == 0 && aSig0 >> 63) {
6434             /*
6435              * Pseudo-denormal argument must be returned in normalized
6436              * form.
6437              */
6438             return packFloatx80(aSign, 1, aSig0);
6439         }
6440         return a;
6441     }
6442     if ( bExp == 0 ) {
6443         if ( bSig == 0 ) {
6444  invalid:
6445             float_raise(float_flag_invalid, status);
6446             return floatx80_default_nan(status);
6447         }
6448         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6449     }
6450     if ( aExp == 0 ) {
6451         if ( aSig0 == 0 ) return a;
6452         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6453     }
6454     zSign = aSign;
6455     expDiff = aExp - bExp;
6456     aSig1 = 0;
6457     if ( expDiff < 0 ) {
6458         if ( mod || expDiff < -1 ) {
6459             if (aExp == 1 && aExpOrig == 0) {
6460                 /*
6461                  * Pseudo-denormal argument must be returned in
6462                  * normalized form.
6463                  */
6464                 return packFloatx80(aSign, aExp, aSig0);
6465             }
6466             return a;
6467         }
6468         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6469         expDiff = 0;
6470     }
6471     *quotient = q = ( bSig <= aSig0 );
6472     if ( q ) aSig0 -= bSig;
6473     expDiff -= 64;
6474     while ( 0 < expDiff ) {
6475         q = estimateDiv128To64( aSig0, aSig1, bSig );
6476         q = ( 2 < q ) ? q - 2 : 0;
6477         mul64To128( bSig, q, &term0, &term1 );
6478         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6479         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6480         expDiff -= 62;
6481         *quotient <<= 62;
6482         *quotient += q;
6483     }
6484     expDiff += 64;
6485     if ( 0 < expDiff ) {
6486         q = estimateDiv128To64( aSig0, aSig1, bSig );
6487         q = ( 2 < q ) ? q - 2 : 0;
6488         q >>= 64 - expDiff;
6489         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6490         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6491         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6492         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6493             ++q;
6494             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6495         }
6496         if (expDiff < 64) {
6497             *quotient <<= expDiff;
6498         } else {
6499             *quotient = 0;
6500         }
6501         *quotient += q;
6502     }
6503     else {
6504         term1 = 0;
6505         term0 = bSig;
6506     }
6507     if (!mod) {
6508         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6509         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6510                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6511                         && ( q & 1 ) )
6512             ) {
6513             aSig0 = alternateASig0;
6514             aSig1 = alternateASig1;
6515             zSign = ! zSign;
6516             ++*quotient;
6517         }
6518     }
6519     return
6520         normalizeRoundAndPackFloatx80(
6521             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6522 
6523 }
6524 
6525 /*----------------------------------------------------------------------------
6526 | Returns the remainder of the extended double-precision floating-point value
6527 | `a' with respect to the corresponding value `b'.  The operation is performed
6528 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6529 *----------------------------------------------------------------------------*/
6530 
6531 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6532 {
6533     uint64_t quotient;
6534     return floatx80_modrem(a, b, false, &quotient, status);
6535 }
6536 
6537 /*----------------------------------------------------------------------------
6538 | Returns the remainder of the extended double-precision floating-point value
6539 | `a' with respect to the corresponding value `b', with the quotient truncated
6540 | toward zero.
6541 *----------------------------------------------------------------------------*/
6542 
6543 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6544 {
6545     uint64_t quotient;
6546     return floatx80_modrem(a, b, true, &quotient, status);
6547 }
6548 
6549 /*----------------------------------------------------------------------------
6550 | Returns the square root of the extended double-precision floating-point
6551 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6552 | for Binary Floating-Point Arithmetic.
6553 *----------------------------------------------------------------------------*/
6554 
6555 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6556 {
6557     bool aSign;
6558     int32_t aExp, zExp;
6559     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6560     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6561 
6562     if (floatx80_invalid_encoding(a)) {
6563         float_raise(float_flag_invalid, status);
6564         return floatx80_default_nan(status);
6565     }
6566     aSig0 = extractFloatx80Frac( a );
6567     aExp = extractFloatx80Exp( a );
6568     aSign = extractFloatx80Sign( a );
6569     if ( aExp == 0x7FFF ) {
6570         if ((uint64_t)(aSig0 << 1)) {
6571             return propagateFloatx80NaN(a, a, status);
6572         }
6573         if ( ! aSign ) return a;
6574         goto invalid;
6575     }
6576     if ( aSign ) {
6577         if ( ( aExp | aSig0 ) == 0 ) return a;
6578  invalid:
6579         float_raise(float_flag_invalid, status);
6580         return floatx80_default_nan(status);
6581     }
6582     if ( aExp == 0 ) {
6583         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6584         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6585     }
6586     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6587     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6588     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6589     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6590     doubleZSig0 = zSig0<<1;
6591     mul64To128( zSig0, zSig0, &term0, &term1 );
6592     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6593     while ( (int64_t) rem0 < 0 ) {
6594         --zSig0;
6595         doubleZSig0 -= 2;
6596         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6597     }
6598     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6599     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6600         if ( zSig1 == 0 ) zSig1 = 1;
6601         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6602         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6603         mul64To128( zSig1, zSig1, &term2, &term3 );
6604         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6605         while ( (int64_t) rem1 < 0 ) {
6606             --zSig1;
6607             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6608             term3 |= 1;
6609             term2 |= doubleZSig0;
6610             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6611         }
6612         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6613     }
6614     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6615     zSig0 |= doubleZSig0;
6616     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6617                                 0, zExp, zSig0, zSig1, status);
6618 }
6619 
6620 /*----------------------------------------------------------------------------
6621 | Returns the result of converting the quadruple-precision floating-point
6622 | value `a' to the 32-bit two's complement integer format.  The conversion
6623 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6624 | Arithmetic---which means in particular that the conversion is rounded
6625 | according to the current rounding mode.  If `a' is a NaN, the largest
6626 | positive integer is returned.  Otherwise, if the conversion overflows, the
6627 | largest integer with the same sign as `a' is returned.
6628 *----------------------------------------------------------------------------*/
6629 
6630 int32_t float128_to_int32(float128 a, float_status *status)
6631 {
6632     bool aSign;
6633     int32_t aExp, shiftCount;
6634     uint64_t aSig0, aSig1;
6635 
6636     aSig1 = extractFloat128Frac1( a );
6637     aSig0 = extractFloat128Frac0( a );
6638     aExp = extractFloat128Exp( a );
6639     aSign = extractFloat128Sign( a );
6640     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6641     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6642     aSig0 |= ( aSig1 != 0 );
6643     shiftCount = 0x4028 - aExp;
6644     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6645     return roundAndPackInt32(aSign, aSig0, status);
6646 
6647 }
6648 
6649 /*----------------------------------------------------------------------------
6650 | Returns the result of converting the quadruple-precision floating-point
6651 | value `a' to the 32-bit two's complement integer format.  The conversion
6652 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6653 | Arithmetic, except that the conversion is always rounded toward zero.  If
6654 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6655 | conversion overflows, the largest integer with the same sign as `a' is
6656 | returned.
6657 *----------------------------------------------------------------------------*/
6658 
6659 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6660 {
6661     bool aSign;
6662     int32_t aExp, shiftCount;
6663     uint64_t aSig0, aSig1, savedASig;
6664     int32_t z;
6665 
6666     aSig1 = extractFloat128Frac1( a );
6667     aSig0 = extractFloat128Frac0( a );
6668     aExp = extractFloat128Exp( a );
6669     aSign = extractFloat128Sign( a );
6670     aSig0 |= ( aSig1 != 0 );
6671     if ( 0x401E < aExp ) {
6672         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6673         goto invalid;
6674     }
6675     else if ( aExp < 0x3FFF ) {
6676         if (aExp || aSig0) {
6677             float_raise(float_flag_inexact, status);
6678         }
6679         return 0;
6680     }
6681     aSig0 |= UINT64_C(0x0001000000000000);
6682     shiftCount = 0x402F - aExp;
6683     savedASig = aSig0;
6684     aSig0 >>= shiftCount;
6685     z = aSig0;
6686     if ( aSign ) z = - z;
6687     if ( ( z < 0 ) ^ aSign ) {
6688  invalid:
6689         float_raise(float_flag_invalid, status);
6690         return aSign ? INT32_MIN : INT32_MAX;
6691     }
6692     if ( ( aSig0<<shiftCount ) != savedASig ) {
6693         float_raise(float_flag_inexact, status);
6694     }
6695     return z;
6696 
6697 }
6698 
6699 /*----------------------------------------------------------------------------
6700 | Returns the result of converting the quadruple-precision floating-point
6701 | value `a' to the 64-bit two's complement integer format.  The conversion
6702 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6703 | Arithmetic---which means in particular that the conversion is rounded
6704 | according to the current rounding mode.  If `a' is a NaN, the largest
6705 | positive integer is returned.  Otherwise, if the conversion overflows, the
6706 | largest integer with the same sign as `a' is returned.
6707 *----------------------------------------------------------------------------*/
6708 
6709 int64_t float128_to_int64(float128 a, float_status *status)
6710 {
6711     bool aSign;
6712     int32_t aExp, shiftCount;
6713     uint64_t aSig0, aSig1;
6714 
6715     aSig1 = extractFloat128Frac1( a );
6716     aSig0 = extractFloat128Frac0( a );
6717     aExp = extractFloat128Exp( a );
6718     aSign = extractFloat128Sign( a );
6719     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6720     shiftCount = 0x402F - aExp;
6721     if ( shiftCount <= 0 ) {
6722         if ( 0x403E < aExp ) {
6723             float_raise(float_flag_invalid, status);
6724             if (    ! aSign
6725                  || (    ( aExp == 0x7FFF )
6726                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6727                     )
6728                ) {
6729                 return INT64_MAX;
6730             }
6731             return INT64_MIN;
6732         }
6733         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6734     }
6735     else {
6736         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6737     }
6738     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6739 
6740 }
6741 
6742 /*----------------------------------------------------------------------------
6743 | Returns the result of converting the quadruple-precision floating-point
6744 | value `a' to the 64-bit two's complement integer format.  The conversion
6745 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6746 | Arithmetic, except that the conversion is always rounded toward zero.
6747 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6748 | the conversion overflows, the largest integer with the same sign as `a' is
6749 | returned.
6750 *----------------------------------------------------------------------------*/
6751 
6752 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6753 {
6754     bool aSign;
6755     int32_t aExp, shiftCount;
6756     uint64_t aSig0, aSig1;
6757     int64_t z;
6758 
6759     aSig1 = extractFloat128Frac1( a );
6760     aSig0 = extractFloat128Frac0( a );
6761     aExp = extractFloat128Exp( a );
6762     aSign = extractFloat128Sign( a );
6763     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6764     shiftCount = aExp - 0x402F;
6765     if ( 0 < shiftCount ) {
6766         if ( 0x403E <= aExp ) {
6767             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6768             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6769                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6770                 if (aSig1) {
6771                     float_raise(float_flag_inexact, status);
6772                 }
6773             }
6774             else {
6775                 float_raise(float_flag_invalid, status);
6776                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6777                     return INT64_MAX;
6778                 }
6779             }
6780             return INT64_MIN;
6781         }
6782         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6783         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6784             float_raise(float_flag_inexact, status);
6785         }
6786     }
6787     else {
6788         if ( aExp < 0x3FFF ) {
6789             if ( aExp | aSig0 | aSig1 ) {
6790                 float_raise(float_flag_inexact, status);
6791             }
6792             return 0;
6793         }
6794         z = aSig0>>( - shiftCount );
6795         if (    aSig1
6796              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6797             float_raise(float_flag_inexact, status);
6798         }
6799     }
6800     if ( aSign ) z = - z;
6801     return z;
6802 
6803 }
6804 
6805 /*----------------------------------------------------------------------------
6806 | Returns the result of converting the quadruple-precision floating-point value
6807 | `a' to the 64-bit unsigned integer format.  The conversion is
6808 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6809 | Arithmetic---which means in particular that the conversion is rounded
6810 | according to the current rounding mode.  If `a' is a NaN, the largest
6811 | positive integer is returned.  If the conversion overflows, the
6812 | largest unsigned integer is returned.  If 'a' is negative, the value is
6813 | rounded and zero is returned; negative values that do not round to zero
6814 | will raise the inexact exception.
6815 *----------------------------------------------------------------------------*/
6816 
6817 uint64_t float128_to_uint64(float128 a, float_status *status)
6818 {
6819     bool aSign;
6820     int aExp;
6821     int shiftCount;
6822     uint64_t aSig0, aSig1;
6823 
6824     aSig0 = extractFloat128Frac0(a);
6825     aSig1 = extractFloat128Frac1(a);
6826     aExp = extractFloat128Exp(a);
6827     aSign = extractFloat128Sign(a);
6828     if (aSign && (aExp > 0x3FFE)) {
6829         float_raise(float_flag_invalid, status);
6830         if (float128_is_any_nan(a)) {
6831             return UINT64_MAX;
6832         } else {
6833             return 0;
6834         }
6835     }
6836     if (aExp) {
6837         aSig0 |= UINT64_C(0x0001000000000000);
6838     }
6839     shiftCount = 0x402F - aExp;
6840     if (shiftCount <= 0) {
6841         if (0x403E < aExp) {
6842             float_raise(float_flag_invalid, status);
6843             return UINT64_MAX;
6844         }
6845         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6846     } else {
6847         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6848     }
6849     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6850 }
6851 
6852 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6853 {
6854     uint64_t v;
6855     signed char current_rounding_mode = status->float_rounding_mode;
6856 
6857     set_float_rounding_mode(float_round_to_zero, status);
6858     v = float128_to_uint64(a, status);
6859     set_float_rounding_mode(current_rounding_mode, status);
6860 
6861     return v;
6862 }
6863 
6864 /*----------------------------------------------------------------------------
6865 | Returns the result of converting the quadruple-precision floating-point
6866 | value `a' to the 32-bit unsigned integer format.  The conversion
6867 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6868 | Arithmetic except that the conversion is always rounded toward zero.
6869 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6870 | if the conversion overflows, the largest unsigned integer is returned.
6871 | If 'a' is negative, the value is rounded and zero is returned; negative
6872 | values that do not round to zero will raise the inexact exception.
6873 *----------------------------------------------------------------------------*/
6874 
6875 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6876 {
6877     uint64_t v;
6878     uint32_t res;
6879     int old_exc_flags = get_float_exception_flags(status);
6880 
6881     v = float128_to_uint64_round_to_zero(a, status);
6882     if (v > 0xffffffff) {
6883         res = 0xffffffff;
6884     } else {
6885         return v;
6886     }
6887     set_float_exception_flags(old_exc_flags, status);
6888     float_raise(float_flag_invalid, status);
6889     return res;
6890 }
6891 
6892 /*----------------------------------------------------------------------------
6893 | Returns the result of converting the quadruple-precision floating-point value
6894 | `a' to the 32-bit unsigned integer format.  The conversion is
6895 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6896 | Arithmetic---which means in particular that the conversion is rounded
6897 | according to the current rounding mode.  If `a' is a NaN, the largest
6898 | positive integer is returned.  If the conversion overflows, the
6899 | largest unsigned integer is returned.  If 'a' is negative, the value is
6900 | rounded and zero is returned; negative values that do not round to zero
6901 | will raise the inexact exception.
6902 *----------------------------------------------------------------------------*/
6903 
6904 uint32_t float128_to_uint32(float128 a, float_status *status)
6905 {
6906     uint64_t v;
6907     uint32_t res;
6908     int old_exc_flags = get_float_exception_flags(status);
6909 
6910     v = float128_to_uint64(a, status);
6911     if (v > 0xffffffff) {
6912         res = 0xffffffff;
6913     } else {
6914         return v;
6915     }
6916     set_float_exception_flags(old_exc_flags, status);
6917     float_raise(float_flag_invalid, status);
6918     return res;
6919 }
6920 
6921 /*----------------------------------------------------------------------------
6922 | Returns the result of converting the quadruple-precision floating-point
6923 | value `a' to the extended double-precision floating-point format.  The
6924 | conversion is performed according to the IEC/IEEE Standard for Binary
6925 | Floating-Point Arithmetic.
6926 *----------------------------------------------------------------------------*/
6927 
6928 floatx80 float128_to_floatx80(float128 a, float_status *status)
6929 {
6930     bool aSign;
6931     int32_t aExp;
6932     uint64_t aSig0, aSig1;
6933 
6934     aSig1 = extractFloat128Frac1( a );
6935     aSig0 = extractFloat128Frac0( a );
6936     aExp = extractFloat128Exp( a );
6937     aSign = extractFloat128Sign( a );
6938     if ( aExp == 0x7FFF ) {
6939         if ( aSig0 | aSig1 ) {
6940             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6941                                                status);
6942             return floatx80_silence_nan(res, status);
6943         }
6944         return packFloatx80(aSign, floatx80_infinity_high,
6945                                    floatx80_infinity_low);
6946     }
6947     if ( aExp == 0 ) {
6948         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6949         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6950     }
6951     else {
6952         aSig0 |= UINT64_C(0x0001000000000000);
6953     }
6954     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6955     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6956 
6957 }
6958 
6959 /*----------------------------------------------------------------------------
6960 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6961 | returns the result as a quadruple-precision floating-point value.  The
6962 | operation is performed according to the IEC/IEEE Standard for Binary
6963 | Floating-Point Arithmetic.
6964 *----------------------------------------------------------------------------*/
6965 
6966 float128 float128_round_to_int(float128 a, float_status *status)
6967 {
6968     bool aSign;
6969     int32_t aExp;
6970     uint64_t lastBitMask, roundBitsMask;
6971     float128 z;
6972 
6973     aExp = extractFloat128Exp( a );
6974     if ( 0x402F <= aExp ) {
6975         if ( 0x406F <= aExp ) {
6976             if (    ( aExp == 0x7FFF )
6977                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6978                ) {
6979                 return propagateFloat128NaN(a, a, status);
6980             }
6981             return a;
6982         }
6983         lastBitMask = 1;
6984         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6985         roundBitsMask = lastBitMask - 1;
6986         z = a;
6987         switch (status->float_rounding_mode) {
6988         case float_round_nearest_even:
6989             if ( lastBitMask ) {
6990                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6991                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6992             }
6993             else {
6994                 if ( (int64_t) z.low < 0 ) {
6995                     ++z.high;
6996                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6997                 }
6998             }
6999             break;
7000         case float_round_ties_away:
7001             if (lastBitMask) {
7002                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7003             } else {
7004                 if ((int64_t) z.low < 0) {
7005                     ++z.high;
7006                 }
7007             }
7008             break;
7009         case float_round_to_zero:
7010             break;
7011         case float_round_up:
7012             if (!extractFloat128Sign(z)) {
7013                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7014             }
7015             break;
7016         case float_round_down:
7017             if (extractFloat128Sign(z)) {
7018                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7019             }
7020             break;
7021         case float_round_to_odd:
7022             /*
7023              * Note that if lastBitMask == 0, the last bit is the lsb
7024              * of high, and roundBitsMask == -1.
7025              */
7026             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7027                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7028             }
7029             break;
7030         default:
7031             abort();
7032         }
7033         z.low &= ~ roundBitsMask;
7034     }
7035     else {
7036         if ( aExp < 0x3FFF ) {
7037             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
7038             float_raise(float_flag_inexact, status);
7039             aSign = extractFloat128Sign( a );
7040             switch (status->float_rounding_mode) {
7041             case float_round_nearest_even:
7042                 if (    ( aExp == 0x3FFE )
7043                      && (   extractFloat128Frac0( a )
7044                           | extractFloat128Frac1( a ) )
7045                    ) {
7046                     return packFloat128( aSign, 0x3FFF, 0, 0 );
7047                 }
7048                 break;
7049             case float_round_ties_away:
7050                 if (aExp == 0x3FFE) {
7051                     return packFloat128(aSign, 0x3FFF, 0, 0);
7052                 }
7053                 break;
7054             case float_round_down:
7055                 return
7056                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7057                     : packFloat128( 0, 0, 0, 0 );
7058             case float_round_up:
7059                 return
7060                       aSign ? packFloat128( 1, 0, 0, 0 )
7061                     : packFloat128( 0, 0x3FFF, 0, 0 );
7062 
7063             case float_round_to_odd:
7064                 return packFloat128(aSign, 0x3FFF, 0, 0);
7065 
7066             case float_round_to_zero:
7067                 break;
7068             }
7069             return packFloat128( aSign, 0, 0, 0 );
7070         }
7071         lastBitMask = 1;
7072         lastBitMask <<= 0x402F - aExp;
7073         roundBitsMask = lastBitMask - 1;
7074         z.low = 0;
7075         z.high = a.high;
7076         switch (status->float_rounding_mode) {
7077         case float_round_nearest_even:
7078             z.high += lastBitMask>>1;
7079             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7080                 z.high &= ~ lastBitMask;
7081             }
7082             break;
7083         case float_round_ties_away:
7084             z.high += lastBitMask>>1;
7085             break;
7086         case float_round_to_zero:
7087             break;
7088         case float_round_up:
7089             if (!extractFloat128Sign(z)) {
7090                 z.high |= ( a.low != 0 );
7091                 z.high += roundBitsMask;
7092             }
7093             break;
7094         case float_round_down:
7095             if (extractFloat128Sign(z)) {
7096                 z.high |= (a.low != 0);
7097                 z.high += roundBitsMask;
7098             }
7099             break;
7100         case float_round_to_odd:
7101             if ((z.high & lastBitMask) == 0) {
7102                 z.high |= (a.low != 0);
7103                 z.high += roundBitsMask;
7104             }
7105             break;
7106         default:
7107             abort();
7108         }
7109         z.high &= ~ roundBitsMask;
7110     }
7111     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7112         float_raise(float_flag_inexact, status);
7113     }
7114     return z;
7115 
7116 }
7117 
7118 /*----------------------------------------------------------------------------
7119 | Returns the remainder of the quadruple-precision floating-point value `a'
7120 | with respect to the corresponding value `b'.  The operation is performed
7121 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7122 *----------------------------------------------------------------------------*/
7123 
7124 float128 float128_rem(float128 a, float128 b, float_status *status)
7125 {
7126     bool aSign, zSign;
7127     int32_t aExp, bExp, expDiff;
7128     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7129     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7130     int64_t sigMean0;
7131 
7132     aSig1 = extractFloat128Frac1( a );
7133     aSig0 = extractFloat128Frac0( a );
7134     aExp = extractFloat128Exp( a );
7135     aSign = extractFloat128Sign( a );
7136     bSig1 = extractFloat128Frac1( b );
7137     bSig0 = extractFloat128Frac0( b );
7138     bExp = extractFloat128Exp( b );
7139     if ( aExp == 0x7FFF ) {
7140         if (    ( aSig0 | aSig1 )
7141              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7142             return propagateFloat128NaN(a, b, status);
7143         }
7144         goto invalid;
7145     }
7146     if ( bExp == 0x7FFF ) {
7147         if (bSig0 | bSig1) {
7148             return propagateFloat128NaN(a, b, status);
7149         }
7150         return a;
7151     }
7152     if ( bExp == 0 ) {
7153         if ( ( bSig0 | bSig1 ) == 0 ) {
7154  invalid:
7155             float_raise(float_flag_invalid, status);
7156             return float128_default_nan(status);
7157         }
7158         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7159     }
7160     if ( aExp == 0 ) {
7161         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7162         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7163     }
7164     expDiff = aExp - bExp;
7165     if ( expDiff < -1 ) return a;
7166     shortShift128Left(
7167         aSig0 | UINT64_C(0x0001000000000000),
7168         aSig1,
7169         15 - ( expDiff < 0 ),
7170         &aSig0,
7171         &aSig1
7172     );
7173     shortShift128Left(
7174         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7175     q = le128( bSig0, bSig1, aSig0, aSig1 );
7176     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7177     expDiff -= 64;
7178     while ( 0 < expDiff ) {
7179         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7180         q = ( 4 < q ) ? q - 4 : 0;
7181         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7182         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7183         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7184         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7185         expDiff -= 61;
7186     }
7187     if ( -64 < expDiff ) {
7188         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7189         q = ( 4 < q ) ? q - 4 : 0;
7190         q >>= - expDiff;
7191         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7192         expDiff += 52;
7193         if ( expDiff < 0 ) {
7194             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7195         }
7196         else {
7197             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7198         }
7199         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7200         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7201     }
7202     else {
7203         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7204         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7205     }
7206     do {
7207         alternateASig0 = aSig0;
7208         alternateASig1 = aSig1;
7209         ++q;
7210         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7211     } while ( 0 <= (int64_t) aSig0 );
7212     add128(
7213         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7214     if (    ( sigMean0 < 0 )
7215          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7216         aSig0 = alternateASig0;
7217         aSig1 = alternateASig1;
7218     }
7219     zSign = ( (int64_t) aSig0 < 0 );
7220     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7221     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7222                                          status);
7223 }
7224 
7225 /*----------------------------------------------------------------------------
7226 | Returns the square root of the quadruple-precision floating-point value `a'.
7227 | The operation is performed according to the IEC/IEEE Standard for Binary
7228 | Floating-Point Arithmetic.
7229 *----------------------------------------------------------------------------*/
7230 
7231 float128 float128_sqrt(float128 a, float_status *status)
7232 {
7233     bool aSign;
7234     int32_t aExp, zExp;
7235     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7236     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7237 
7238     aSig1 = extractFloat128Frac1( a );
7239     aSig0 = extractFloat128Frac0( a );
7240     aExp = extractFloat128Exp( a );
7241     aSign = extractFloat128Sign( a );
7242     if ( aExp == 0x7FFF ) {
7243         if (aSig0 | aSig1) {
7244             return propagateFloat128NaN(a, a, status);
7245         }
7246         if ( ! aSign ) return a;
7247         goto invalid;
7248     }
7249     if ( aSign ) {
7250         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7251  invalid:
7252         float_raise(float_flag_invalid, status);
7253         return float128_default_nan(status);
7254     }
7255     if ( aExp == 0 ) {
7256         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7257         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7258     }
7259     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7260     aSig0 |= UINT64_C(0x0001000000000000);
7261     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7262     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7263     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7264     doubleZSig0 = zSig0<<1;
7265     mul64To128( zSig0, zSig0, &term0, &term1 );
7266     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7267     while ( (int64_t) rem0 < 0 ) {
7268         --zSig0;
7269         doubleZSig0 -= 2;
7270         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7271     }
7272     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7273     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7274         if ( zSig1 == 0 ) zSig1 = 1;
7275         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7276         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7277         mul64To128( zSig1, zSig1, &term2, &term3 );
7278         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7279         while ( (int64_t) rem1 < 0 ) {
7280             --zSig1;
7281             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7282             term3 |= 1;
7283             term2 |= doubleZSig0;
7284             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7285         }
7286         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7287     }
7288     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7289     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7290 
7291 }
7292 
7293 static inline FloatRelation
7294 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7295                           float_status *status)
7296 {
7297     bool aSign, bSign;
7298 
7299     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7300         float_raise(float_flag_invalid, status);
7301         return float_relation_unordered;
7302     }
7303     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7304           ( extractFloatx80Frac( a )<<1 ) ) ||
7305         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7306           ( extractFloatx80Frac( b )<<1 ) )) {
7307         if (!is_quiet ||
7308             floatx80_is_signaling_nan(a, status) ||
7309             floatx80_is_signaling_nan(b, status)) {
7310             float_raise(float_flag_invalid, status);
7311         }
7312         return float_relation_unordered;
7313     }
7314     aSign = extractFloatx80Sign( a );
7315     bSign = extractFloatx80Sign( b );
7316     if ( aSign != bSign ) {
7317 
7318         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7319              ( ( a.low | b.low ) == 0 ) ) {
7320             /* zero case */
7321             return float_relation_equal;
7322         } else {
7323             return 1 - (2 * aSign);
7324         }
7325     } else {
7326         /* Normalize pseudo-denormals before comparison.  */
7327         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7328             ++a.high;
7329         }
7330         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7331             ++b.high;
7332         }
7333         if (a.low == b.low && a.high == b.high) {
7334             return float_relation_equal;
7335         } else {
7336             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7337         }
7338     }
7339 }
7340 
7341 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7342 {
7343     return floatx80_compare_internal(a, b, 0, status);
7344 }
7345 
7346 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7347                                      float_status *status)
7348 {
7349     return floatx80_compare_internal(a, b, 1, status);
7350 }
7351 
7352 static inline FloatRelation
7353 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7354                           float_status *status)
7355 {
7356     bool aSign, bSign;
7357 
7358     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7359           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7360         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7361           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7362         if (!is_quiet ||
7363             float128_is_signaling_nan(a, status) ||
7364             float128_is_signaling_nan(b, status)) {
7365             float_raise(float_flag_invalid, status);
7366         }
7367         return float_relation_unordered;
7368     }
7369     aSign = extractFloat128Sign( a );
7370     bSign = extractFloat128Sign( b );
7371     if ( aSign != bSign ) {
7372         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7373             /* zero case */
7374             return float_relation_equal;
7375         } else {
7376             return 1 - (2 * aSign);
7377         }
7378     } else {
7379         if (a.low == b.low && a.high == b.high) {
7380             return float_relation_equal;
7381         } else {
7382             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7383         }
7384     }
7385 }
7386 
7387 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7388 {
7389     return float128_compare_internal(a, b, 0, status);
7390 }
7391 
7392 FloatRelation float128_compare_quiet(float128 a, float128 b,
7393                                      float_status *status)
7394 {
7395     return float128_compare_internal(a, b, 1, status);
7396 }
7397 
7398 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7399 {
7400     bool aSign;
7401     int32_t aExp;
7402     uint64_t aSig;
7403 
7404     if (floatx80_invalid_encoding(a)) {
7405         float_raise(float_flag_invalid, status);
7406         return floatx80_default_nan(status);
7407     }
7408     aSig = extractFloatx80Frac( a );
7409     aExp = extractFloatx80Exp( a );
7410     aSign = extractFloatx80Sign( a );
7411 
7412     if ( aExp == 0x7FFF ) {
7413         if ( aSig<<1 ) {
7414             return propagateFloatx80NaN(a, a, status);
7415         }
7416         return a;
7417     }
7418 
7419     if (aExp == 0) {
7420         if (aSig == 0) {
7421             return a;
7422         }
7423         aExp++;
7424     }
7425 
7426     if (n > 0x10000) {
7427         n = 0x10000;
7428     } else if (n < -0x10000) {
7429         n = -0x10000;
7430     }
7431 
7432     aExp += n;
7433     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7434                                          aSign, aExp, aSig, 0, status);
7435 }
7436 
7437 float128 float128_scalbn(float128 a, int n, float_status *status)
7438 {
7439     bool aSign;
7440     int32_t aExp;
7441     uint64_t aSig0, aSig1;
7442 
7443     aSig1 = extractFloat128Frac1( a );
7444     aSig0 = extractFloat128Frac0( a );
7445     aExp = extractFloat128Exp( a );
7446     aSign = extractFloat128Sign( a );
7447     if ( aExp == 0x7FFF ) {
7448         if ( aSig0 | aSig1 ) {
7449             return propagateFloat128NaN(a, a, status);
7450         }
7451         return a;
7452     }
7453     if (aExp != 0) {
7454         aSig0 |= UINT64_C(0x0001000000000000);
7455     } else if (aSig0 == 0 && aSig1 == 0) {
7456         return a;
7457     } else {
7458         aExp++;
7459     }
7460 
7461     if (n > 0x10000) {
7462         n = 0x10000;
7463     } else if (n < -0x10000) {
7464         n = -0x10000;
7465     }
7466 
7467     aExp += n - 1;
7468     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7469                                          , status);
7470 
7471 }
7472 
7473 static void __attribute__((constructor)) softfloat_init(void)
7474 {
7475     union_float64 ua, ub, uc, ur;
7476 
7477     if (QEMU_NO_HARDFLOAT) {
7478         return;
7479     }
7480     /*
7481      * Test that the host's FMA is not obviously broken. For example,
7482      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7483      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7484      */
7485     ua.s = 0x0020000000000001ULL;
7486     ub.s = 0x3ca0000000000000ULL;
7487     uc.s = 0x0020000000000000ULL;
7488     ur.h = fma(ua.h, ub.h, uc.h);
7489     if (ur.s != 0x0020000000000001ULL) {
7490         force_soft_fma = true;
7491     }
7492 }
7493