xref: /openbmc/qemu/fpu/softfloat.c (revision d7754940d78a7d5bfb13531afa9a67f8c57e987e)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck,float32)139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
f32_is_zon2(union_float32 a,union_float32 b)275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
f64_is_zon2(union_float64 a,union_float64 b)289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
f32_is_zon3(union_float32 a,union_float32 b,union_float32 c)301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
f64_is_zon3(union_float64 a,union_float64 b,union_float64 c)314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
f32_is_inf(union_float32 a)326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
f64_is_inf(union_float64 a)334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
float32_gen2(float32 xa,float32 xb,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft,f32_check_fn pre,f32_check_fn post)343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
float64_gen2(float64 xa,float64 xb,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft,f64_check_fn pre,f64_check_fn post)374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
is_nan(FloatClass c)447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
is_snan(FloatClass c)452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
is_qnan(FloatClass c)457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  *   m68k_denormal: explicit integer bit for extended precision may be 1
521  */
522 typedef struct {
523     int exp_size;
524     int exp_bias;
525     int exp_re_bias;
526     int exp_max;
527     int frac_size;
528     int frac_shift;
529     bool arm_althp;
530     bool m68k_denormal;
531     uint64_t round_mask;
532 } FloatFmt;
533 
534 /* Expand fields based on the size of exponent and fraction */
535 #define FLOAT_PARAMS_(E)                                \
536     .exp_size       = E,                                \
537     .exp_bias       = ((1 << E) - 1) >> 1,              \
538     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
539     .exp_max        = (1 << E) - 1
540 
541 #define FLOAT_PARAMS(E, F)                              \
542     FLOAT_PARAMS_(E),                                   \
543     .frac_size      = F,                                \
544     .frac_shift     = (-F - 1) & 63,                    \
545     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
546 
547 static const FloatFmt float16_params = {
548     FLOAT_PARAMS(5, 10)
549 };
550 
551 static const FloatFmt float16_params_ahp = {
552     FLOAT_PARAMS(5, 10),
553     .arm_althp = true
554 };
555 
556 static const FloatFmt bfloat16_params = {
557     FLOAT_PARAMS(8, 7)
558 };
559 
560 static const FloatFmt float32_params = {
561     FLOAT_PARAMS(8, 23)
562 };
563 
564 static const FloatFmt float64_params = {
565     FLOAT_PARAMS(11, 52)
566 };
567 
568 static const FloatFmt float128_params = {
569     FLOAT_PARAMS(15, 112)
570 };
571 
572 #define FLOATX80_PARAMS(R)              \
573     FLOAT_PARAMS_(15),                  \
574     .frac_size = R == 64 ? 63 : R,      \
575     .frac_shift = 0,                    \
576     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577 
578 static const FloatFmt floatx80_params[3] = {
579     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581     [floatx80_precision_x] = {
582         FLOATX80_PARAMS(64),
583 #ifdef TARGET_M68K
584         .m68k_denormal = true,
585 #endif
586     },
587 };
588 
589 /* Unpack a float to parts, but do not canonicalize.  */
unpack_raw64(FloatParts64 * r,const FloatFmt * fmt,uint64_t raw)590 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
591 {
592     const int f_size = fmt->frac_size;
593     const int e_size = fmt->exp_size;
594 
595     *r = (FloatParts64) {
596         .cls = float_class_unclassified,
597         .sign = extract64(raw, f_size + e_size, 1),
598         .exp = extract64(raw, f_size, e_size),
599         .frac = extract64(raw, 0, f_size)
600     };
601 }
602 
float16_unpack_raw(FloatParts64 * p,float16 f)603 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
604 {
605     unpack_raw64(p, &float16_params, f);
606 }
607 
bfloat16_unpack_raw(FloatParts64 * p,bfloat16 f)608 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
609 {
610     unpack_raw64(p, &bfloat16_params, f);
611 }
612 
float32_unpack_raw(FloatParts64 * p,float32 f)613 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
614 {
615     unpack_raw64(p, &float32_params, f);
616 }
617 
float64_unpack_raw(FloatParts64 * p,float64 f)618 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
619 {
620     unpack_raw64(p, &float64_params, f);
621 }
622 
floatx80_unpack_raw(FloatParts128 * p,floatx80 f)623 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
624 {
625     *p = (FloatParts128) {
626         .cls = float_class_unclassified,
627         .sign = extract32(f.high, 15, 1),
628         .exp = extract32(f.high, 0, 15),
629         .frac_hi = f.low
630     };
631 }
632 
float128_unpack_raw(FloatParts128 * p,float128 f)633 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
634 {
635     const int f_size = float128_params.frac_size - 64;
636     const int e_size = float128_params.exp_size;
637 
638     *p = (FloatParts128) {
639         .cls = float_class_unclassified,
640         .sign = extract64(f.high, f_size + e_size, 1),
641         .exp = extract64(f.high, f_size, e_size),
642         .frac_hi = extract64(f.high, 0, f_size),
643         .frac_lo = f.low,
644     };
645 }
646 
647 /* Pack a float from parts, but do not canonicalize.  */
pack_raw64(const FloatParts64 * p,const FloatFmt * fmt)648 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
649 {
650     const int f_size = fmt->frac_size;
651     const int e_size = fmt->exp_size;
652     uint64_t ret;
653 
654     ret = (uint64_t)p->sign << (f_size + e_size);
655     ret = deposit64(ret, f_size, e_size, p->exp);
656     ret = deposit64(ret, 0, f_size, p->frac);
657     return ret;
658 }
659 
float16_pack_raw(const FloatParts64 * p)660 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
661 {
662     return make_float16(pack_raw64(p, &float16_params));
663 }
664 
bfloat16_pack_raw(const FloatParts64 * p)665 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
666 {
667     return pack_raw64(p, &bfloat16_params);
668 }
669 
float32_pack_raw(const FloatParts64 * p)670 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
671 {
672     return make_float32(pack_raw64(p, &float32_params));
673 }
674 
float64_pack_raw(const FloatParts64 * p)675 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
676 {
677     return make_float64(pack_raw64(p, &float64_params));
678 }
679 
float128_pack_raw(const FloatParts128 * p)680 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
681 {
682     const int f_size = float128_params.frac_size - 64;
683     const int e_size = float128_params.exp_size;
684     uint64_t hi;
685 
686     hi = (uint64_t)p->sign << (f_size + e_size);
687     hi = deposit64(hi, f_size, e_size, p->exp);
688     hi = deposit64(hi, 0, f_size, p->frac_hi);
689     return make_float128(hi, p->frac_lo);
690 }
691 
692 /*----------------------------------------------------------------------------
693 | Functions and definitions to determine:  (1) whether tininess for underflow
694 | is detected before or after rounding by default, (2) what (if anything)
695 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
696 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697 | are propagated from function inputs to output.  These details are target-
698 | specific.
699 *----------------------------------------------------------------------------*/
700 #include "softfloat-specialize.c.inc"
701 
702 #define PARTS_GENERIC_64_128(NAME, P) \
703     _Generic((P), FloatParts64 *: parts64_##NAME, \
704                   FloatParts128 *: parts128_##NAME)
705 
706 #define PARTS_GENERIC_64_128_256(NAME, P) \
707     _Generic((P), FloatParts64 *: parts64_##NAME, \
708                   FloatParts128 *: parts128_##NAME, \
709                   FloatParts256 *: parts256_##NAME)
710 
711 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
712 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713 
714 static void parts64_return_nan(FloatParts64 *a, float_status *s);
715 static void parts128_return_nan(FloatParts128 *a, float_status *s);
716 
717 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
718 
719 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720                                       float_status *s);
721 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722                                         float_status *s);
723 
724 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725 
726 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727                                              FloatParts64 *c, float_status *s,
728                                              int ab_mask, int abc_mask);
729 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730                                                FloatParts128 *b,
731                                                FloatParts128 *c,
732                                                float_status *s,
733                                                int ab_mask, int abc_mask);
734 
735 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737 
738 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739                                  const FloatFmt *fmt);
740 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741                                   const FloatFmt *fmt);
742 
743 #define parts_canonicalize(A, S, F) \
744     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745 
746 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747                                    const FloatFmt *fmt);
748 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749                                     const FloatFmt *fmt);
750 
751 #define parts_uncanon_normal(A, S, F) \
752     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753 
754 static void parts64_uncanon(FloatParts64 *p, float_status *status,
755                             const FloatFmt *fmt);
756 static void parts128_uncanon(FloatParts128 *p, float_status *status,
757                              const FloatFmt *fmt);
758 
759 #define parts_uncanon(A, S, F) \
760     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761 
762 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
765 
766 #define parts_add_normal(A, B) \
767     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
768 
769 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
772 
773 #define parts_sub_normal(A, B) \
774     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
775 
776 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777                                     float_status *s, bool subtract);
778 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779                                       float_status *s, bool subtract);
780 
781 #define parts_addsub(A, B, S, Z) \
782     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783 
784 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785                                  float_status *s);
786 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787                                    float_status *s);
788 
789 #define parts_mul(A, B, S) \
790     PARTS_GENERIC_64_128(mul, A)(A, B, S)
791 
792 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
793                                     FloatParts64 *c, int flags,
794                                     float_status *s);
795 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
796                                       FloatParts128 *c, int flags,
797                                       float_status *s);
798 
799 #define parts_muladd(A, B, C, Z, S) \
800     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
801 
802 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803                                  float_status *s);
804 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805                                    float_status *s);
806 
807 #define parts_div(A, B, S) \
808     PARTS_GENERIC_64_128(div, A)(A, B, S)
809 
810 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811                                     uint64_t *mod_quot, float_status *s);
812 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813                                       uint64_t *mod_quot, float_status *s);
814 
815 #define parts_modrem(A, B, Q, S) \
816     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817 
818 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820 
821 #define parts_sqrt(A, S, F) \
822     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823 
824 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825                                         int scale, int frac_size);
826 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827                                          int scale, int frac_size);
828 
829 #define parts_round_to_int_normal(A, R, C, F) \
830     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831 
832 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833                                  int scale, float_status *s,
834                                  const FloatFmt *fmt);
835 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836                                   int scale, float_status *s,
837                                   const FloatFmt *fmt);
838 
839 #define parts_round_to_int(A, R, C, S, F) \
840     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841 
842 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843                                      int scale, int64_t min, int64_t max,
844                                      float_status *s);
845 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846                                      int scale, int64_t min, int64_t max,
847                                      float_status *s);
848 
849 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
850     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851 
852 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853                                       int scale, uint64_t max,
854                                       float_status *s);
855 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856                                        int scale, uint64_t max,
857                                        float_status *s);
858 
859 #define parts_float_to_uint(P, R, Z, M, S) \
860     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861 
862 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863                                             FloatRoundMode rmode,
864                                             int bitsm1, float_status *s);
865 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866                                              FloatRoundMode rmode,
867                                              int bitsm1, float_status *s);
868 
869 #define parts_float_to_sint_modulo(P, R, M, S) \
870     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871 
872 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873                                   int scale, float_status *s);
874 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875                                    int scale, float_status *s);
876 
877 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
878     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879 
880 #define parts_sint_to_float(P, I, Z, S) \
881     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882 
883 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884                                   int scale, float_status *s);
885 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886                                    int scale, float_status *s);
887 
888 #define parts_uint_to_float(P, I, Z, S) \
889     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890 
891 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892                                     float_status *s, int flags);
893 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894                                       float_status *s, int flags);
895 
896 #define parts_minmax(A, B, S, F) \
897     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898 
899 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900                                      float_status *s, bool q);
901 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902                                       float_status *s, bool q);
903 
904 #define parts_compare(A, B, S, Q) \
905     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906 
907 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909 
910 #define parts_scalbn(A, N, S) \
911     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912 
913 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915 
916 #define parts_log2(A, S, F) \
917     PARTS_GENERIC_64_128(log2, A)(A, S, F)
918 
919 /*
920  * Helper functions for softfloat-parts.c.inc, per-size operations.
921  */
922 
923 #define FRAC_GENERIC_64_128(NAME, P) \
924     _Generic((P), FloatParts64 *: frac64_##NAME, \
925                   FloatParts128 *: frac128_##NAME)
926 
927 #define FRAC_GENERIC_64_128_256(NAME, P) \
928     _Generic((P), FloatParts64 *: frac64_##NAME, \
929                   FloatParts128 *: frac128_##NAME, \
930                   FloatParts256 *: frac256_##NAME)
931 
frac64_add(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)932 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933 {
934     return uadd64_overflow(a->frac, b->frac, &r->frac);
935 }
936 
frac128_add(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)937 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938 {
939     bool c = 0;
940     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942     return c;
943 }
944 
frac256_add(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)945 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946 {
947     bool c = 0;
948     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952     return c;
953 }
954 
955 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
956 
frac64_addi(FloatParts64 * r,FloatParts64 * a,uint64_t c)957 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958 {
959     return uadd64_overflow(a->frac, c, &r->frac);
960 }
961 
frac128_addi(FloatParts128 * r,FloatParts128 * a,uint64_t c)962 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963 {
964     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966 }
967 
968 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
969 
frac64_allones(FloatParts64 * a)970 static void frac64_allones(FloatParts64 *a)
971 {
972     a->frac = -1;
973 }
974 
frac128_allones(FloatParts128 * a)975 static void frac128_allones(FloatParts128 *a)
976 {
977     a->frac_hi = a->frac_lo = -1;
978 }
979 
980 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
981 
frac64_cmp(FloatParts64 * a,FloatParts64 * b)982 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
983 {
984     return (a->frac == b->frac ? float_relation_equal
985             : a->frac < b->frac ? float_relation_less
986             : float_relation_greater);
987 }
988 
frac128_cmp(FloatParts128 * a,FloatParts128 * b)989 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
990 {
991     uint64_t ta = a->frac_hi, tb = b->frac_hi;
992     if (ta == tb) {
993         ta = a->frac_lo, tb = b->frac_lo;
994         if (ta == tb) {
995             return float_relation_equal;
996         }
997     }
998     return ta < tb ? float_relation_less : float_relation_greater;
999 }
1000 
1001 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002 
frac64_clear(FloatParts64 * a)1003 static void frac64_clear(FloatParts64 *a)
1004 {
1005     a->frac = 0;
1006 }
1007 
frac128_clear(FloatParts128 * a)1008 static void frac128_clear(FloatParts128 *a)
1009 {
1010     a->frac_hi = a->frac_lo = 0;
1011 }
1012 
1013 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014 
frac64_div(FloatParts64 * a,FloatParts64 * b)1015 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016 {
1017     uint64_t n1, n0, r, q;
1018     bool ret;
1019 
1020     /*
1021      * We want a 2*N / N-bit division to produce exactly an N-bit
1022      * result, so that we do not lose any precision and so that we
1023      * do not have to renormalize afterward.  If A.frac < B.frac,
1024      * then division would produce an (N-1)-bit result; shift A left
1025      * by one to produce the an N-bit result, and return true to
1026      * decrement the exponent to match.
1027      *
1028      * The udiv_qrnnd algorithm that we're using requires normalization,
1029      * i.e. the msb of the denominator must be set, which is already true.
1030      */
1031     ret = a->frac < b->frac;
1032     if (ret) {
1033         n0 = a->frac;
1034         n1 = 0;
1035     } else {
1036         n0 = a->frac >> 1;
1037         n1 = a->frac << 63;
1038     }
1039     q = udiv_qrnnd(&r, n0, n1, b->frac);
1040 
1041     /* Set lsb if there is a remainder, to set inexact. */
1042     a->frac = q | (r != 0);
1043 
1044     return ret;
1045 }
1046 
frac128_div(FloatParts128 * a,FloatParts128 * b)1047 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048 {
1049     uint64_t q0, q1, a0, a1, b0, b1;
1050     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051     bool ret = false;
1052 
1053     a0 = a->frac_hi, a1 = a->frac_lo;
1054     b0 = b->frac_hi, b1 = b->frac_lo;
1055 
1056     ret = lt128(a0, a1, b0, b1);
1057     if (!ret) {
1058         a1 = shr_double(a0, a1, 1);
1059         a0 = a0 >> 1;
1060     }
1061 
1062     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063     q0 = estimateDiv128To64(a0, a1, b0);
1064 
1065     /*
1066      * Estimate is high because B1 was not included (unless B1 == 0).
1067      * Reduce quotient and increase remainder until remainder is non-negative.
1068      * This loop will execute 0 to 2 times.
1069      */
1070     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072     while (r0 != 0) {
1073         q0--;
1074         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075     }
1076 
1077     /* Repeat using the remainder, producing a second word of quotient. */
1078     q1 = estimateDiv128To64(r1, r2, b0);
1079     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081     while (r1 != 0) {
1082         q1--;
1083         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084     }
1085 
1086     /* Any remainder indicates inexact; set sticky bit. */
1087     q1 |= (r2 | r3) != 0;
1088 
1089     a->frac_hi = q0;
1090     a->frac_lo = q1;
1091     return ret;
1092 }
1093 
1094 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095 
frac64_eqz(FloatParts64 * a)1096 static bool frac64_eqz(FloatParts64 *a)
1097 {
1098     return a->frac == 0;
1099 }
1100 
frac128_eqz(FloatParts128 * a)1101 static bool frac128_eqz(FloatParts128 *a)
1102 {
1103     return (a->frac_hi | a->frac_lo) == 0;
1104 }
1105 
1106 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107 
frac64_mulw(FloatParts128 * r,FloatParts64 * a,FloatParts64 * b)1108 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109 {
1110     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111 }
1112 
frac128_mulw(FloatParts256 * r,FloatParts128 * a,FloatParts128 * b)1113 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114 {
1115     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117 }
1118 
1119 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120 
frac64_neg(FloatParts64 * a)1121 static void frac64_neg(FloatParts64 *a)
1122 {
1123     a->frac = -a->frac;
1124 }
1125 
frac128_neg(FloatParts128 * a)1126 static void frac128_neg(FloatParts128 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131 }
1132 
frac256_neg(FloatParts256 * a)1133 static void frac256_neg(FloatParts256 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140 }
1141 
1142 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143 
frac64_normalize(FloatParts64 * a)1144 static int frac64_normalize(FloatParts64 *a)
1145 {
1146     if (a->frac) {
1147         int shift = clz64(a->frac);
1148         a->frac <<= shift;
1149         return shift;
1150     }
1151     return 64;
1152 }
1153 
frac128_normalize(FloatParts128 * a)1154 static int frac128_normalize(FloatParts128 *a)
1155 {
1156     if (a->frac_hi) {
1157         int shl = clz64(a->frac_hi);
1158         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159         a->frac_lo <<= shl;
1160         return shl;
1161     } else if (a->frac_lo) {
1162         int shl = clz64(a->frac_lo);
1163         a->frac_hi = a->frac_lo << shl;
1164         a->frac_lo = 0;
1165         return shl + 64;
1166     }
1167     return 128;
1168 }
1169 
frac256_normalize(FloatParts256 * a)1170 static int frac256_normalize(FloatParts256 *a)
1171 {
1172     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174     int ret, shl;
1175 
1176     if (likely(a0)) {
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             return 0;
1180         }
1181         ret = shl;
1182     } else {
1183         if (a1) {
1184             ret = 64;
1185             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186         } else if (a2) {
1187             ret = 128;
1188             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189         } else if (a3) {
1190             ret = 192;
1191             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192         } else {
1193             ret = 256;
1194             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195             goto done;
1196         }
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             goto done;
1200         }
1201         ret += shl;
1202     }
1203 
1204     a0 = shl_double(a0, a1, shl);
1205     a1 = shl_double(a1, a2, shl);
1206     a2 = shl_double(a2, a3, shl);
1207     a3 <<= shl;
1208 
1209  done:
1210     a->frac_hi = a0;
1211     a->frac_hm = a1;
1212     a->frac_lm = a2;
1213     a->frac_lo = a3;
1214     return ret;
1215 }
1216 
1217 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218 
frac64_modrem(FloatParts64 * a,FloatParts64 * b,uint64_t * mod_quot)1219 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220 {
1221     uint64_t a0, a1, b0, t0, t1, q, quot;
1222     int exp_diff = a->exp - b->exp;
1223     int shift;
1224 
1225     a0 = a->frac;
1226     a1 = 0;
1227 
1228     if (exp_diff < -1) {
1229         if (mod_quot) {
1230             *mod_quot = 0;
1231         }
1232         return;
1233     }
1234     if (exp_diff == -1) {
1235         a0 >>= 1;
1236         exp_diff = 0;
1237     }
1238 
1239     b0 = b->frac;
1240     quot = q = b0 <= a0;
1241     if (q) {
1242         a0 -= b0;
1243     }
1244 
1245     exp_diff -= 64;
1246     while (exp_diff > 0) {
1247         q = estimateDiv128To64(a0, a1, b0);
1248         q = q > 2 ? q - 2 : 0;
1249         mul64To128(b0, q, &t0, &t1);
1250         sub128(a0, a1, t0, t1, &a0, &a1);
1251         shortShift128Left(a0, a1, 62, &a0, &a1);
1252         exp_diff -= 62;
1253         quot = (quot << 62) + q;
1254     }
1255 
1256     exp_diff += 64;
1257     if (exp_diff > 0) {
1258         q = estimateDiv128To64(a0, a1, b0);
1259         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261         sub128(a0, a1, t0, t1, &a0, &a1);
1262         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263         while (le128(t0, t1, a0, a1)) {
1264             ++q;
1265             sub128(a0, a1, t0, t1, &a0, &a1);
1266         }
1267         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268     } else {
1269         t0 = b0;
1270         t1 = 0;
1271     }
1272 
1273     if (mod_quot) {
1274         *mod_quot = quot;
1275     } else {
1276         sub128(t0, t1, a0, a1, &t0, &t1);
1277         if (lt128(t0, t1, a0, a1) ||
1278             (eq128(t0, t1, a0, a1) && (q & 1))) {
1279             a0 = t0;
1280             a1 = t1;
1281             a->sign = !a->sign;
1282         }
1283     }
1284 
1285     if (likely(a0)) {
1286         shift = clz64(a0);
1287         shortShift128Left(a0, a1, shift, &a0, &a1);
1288     } else if (likely(a1)) {
1289         shift = clz64(a1);
1290         a0 = a1 << shift;
1291         a1 = 0;
1292         shift += 64;
1293     } else {
1294         a->cls = float_class_zero;
1295         return;
1296     }
1297 
1298     a->exp = b->exp + exp_diff - shift;
1299     a->frac = a0 | (a1 != 0);
1300 }
1301 
frac128_modrem(FloatParts128 * a,FloatParts128 * b,uint64_t * mod_quot)1302 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303                            uint64_t *mod_quot)
1304 {
1305     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306     int exp_diff = a->exp - b->exp;
1307     int shift;
1308 
1309     a0 = a->frac_hi;
1310     a1 = a->frac_lo;
1311     a2 = 0;
1312 
1313     if (exp_diff < -1) {
1314         if (mod_quot) {
1315             *mod_quot = 0;
1316         }
1317         return;
1318     }
1319     if (exp_diff == -1) {
1320         shift128Right(a0, a1, 1, &a0, &a1);
1321         exp_diff = 0;
1322     }
1323 
1324     b0 = b->frac_hi;
1325     b1 = b->frac_lo;
1326 
1327     quot = q = le128(b0, b1, a0, a1);
1328     if (q) {
1329         sub128(a0, a1, b0, b1, &a0, &a1);
1330     }
1331 
1332     exp_diff -= 64;
1333     while (exp_diff > 0) {
1334         q = estimateDiv128To64(a0, a1, b0);
1335         q = q > 4 ? q - 4 : 0;
1336         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339         exp_diff -= 61;
1340         quot = (quot << 61) + q;
1341     }
1342 
1343     exp_diff += 64;
1344     if (exp_diff > 0) {
1345         q = estimateDiv128To64(a0, a1, b0);
1346         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350         while (le192(t0, t1, t2, a0, a1, a2)) {
1351             ++q;
1352             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353         }
1354         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355     } else {
1356         t0 = b0;
1357         t1 = b1;
1358         t2 = 0;
1359     }
1360 
1361     if (mod_quot) {
1362         *mod_quot = quot;
1363     } else {
1364         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365         if (lt192(t0, t1, t2, a0, a1, a2) ||
1366             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367             a0 = t0;
1368             a1 = t1;
1369             a2 = t2;
1370             a->sign = !a->sign;
1371         }
1372     }
1373 
1374     if (likely(a0)) {
1375         shift = clz64(a0);
1376         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377     } else if (likely(a1)) {
1378         shift = clz64(a1);
1379         shortShift128Left(a1, a2, shift, &a0, &a1);
1380         a2 = 0;
1381         shift += 64;
1382     } else if (likely(a2)) {
1383         shift = clz64(a2);
1384         a0 = a2 << shift;
1385         a1 = a2 = 0;
1386         shift += 128;
1387     } else {
1388         a->cls = float_class_zero;
1389         return;
1390     }
1391 
1392     a->exp = b->exp + exp_diff - shift;
1393     a->frac_hi = a0;
1394     a->frac_lo = a1 | (a2 != 0);
1395 }
1396 
1397 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398 
frac64_shl(FloatParts64 * a,int c)1399 static void frac64_shl(FloatParts64 *a, int c)
1400 {
1401     a->frac <<= c;
1402 }
1403 
frac128_shl(FloatParts128 * a,int c)1404 static void frac128_shl(FloatParts128 *a, int c)
1405 {
1406     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407 
1408     if (c & 64) {
1409         a0 = a1, a1 = 0;
1410     }
1411 
1412     c &= 63;
1413     if (c) {
1414         a0 = shl_double(a0, a1, c);
1415         a1 = a1 << c;
1416     }
1417 
1418     a->frac_hi = a0;
1419     a->frac_lo = a1;
1420 }
1421 
1422 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423 
frac64_shr(FloatParts64 * a,int c)1424 static void frac64_shr(FloatParts64 *a, int c)
1425 {
1426     a->frac >>= c;
1427 }
1428 
frac128_shr(FloatParts128 * a,int c)1429 static void frac128_shr(FloatParts128 *a, int c)
1430 {
1431     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432 
1433     if (c & 64) {
1434         a1 = a0, a0 = 0;
1435     }
1436 
1437     c &= 63;
1438     if (c) {
1439         a1 = shr_double(a0, a1, c);
1440         a0 = a0 >> c;
1441     }
1442 
1443     a->frac_hi = a0;
1444     a->frac_lo = a1;
1445 }
1446 
1447 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448 
frac64_shrjam(FloatParts64 * a,int c)1449 static void frac64_shrjam(FloatParts64 *a, int c)
1450 {
1451     uint64_t a0 = a->frac;
1452 
1453     if (likely(c != 0)) {
1454         if (likely(c < 64)) {
1455             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456         } else {
1457             a0 = a0 != 0;
1458         }
1459         a->frac = a0;
1460     }
1461 }
1462 
frac128_shrjam(FloatParts128 * a,int c)1463 static void frac128_shrjam(FloatParts128 *a, int c)
1464 {
1465     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466     uint64_t sticky = 0;
1467 
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 128)) {
1473         sticky = a1;
1474         a1 = a0;
1475         a0 = 0;
1476         c &= 63;
1477         if (c == 0) {
1478             goto done;
1479         }
1480     } else {
1481         sticky = a0 | a1;
1482         a0 = a1 = 0;
1483         goto done;
1484     }
1485 
1486     sticky |= shr_double(a1, 0, c);
1487     a1 = shr_double(a0, a1, c);
1488     a0 = a0 >> c;
1489 
1490  done:
1491     a->frac_lo = a1 | (sticky != 0);
1492     a->frac_hi = a0;
1493 }
1494 
frac256_shrjam(FloatParts256 * a,int c)1495 static void frac256_shrjam(FloatParts256 *a, int c)
1496 {
1497     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499     uint64_t sticky = 0;
1500 
1501     if (unlikely(c == 0)) {
1502         return;
1503     } else if (likely(c < 64)) {
1504         /* nothing */
1505     } else if (likely(c < 256)) {
1506         if (unlikely(c & 128)) {
1507             sticky |= a2 | a3;
1508             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509         }
1510         if (unlikely(c & 64)) {
1511             sticky |= a3;
1512             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513         }
1514         c &= 63;
1515         if (c == 0) {
1516             goto done;
1517         }
1518     } else {
1519         sticky = a0 | a1 | a2 | a3;
1520         a0 = a1 = a2 = a3 = 0;
1521         goto done;
1522     }
1523 
1524     sticky |= shr_double(a3, 0, c);
1525     a3 = shr_double(a2, a3, c);
1526     a2 = shr_double(a1, a2, c);
1527     a1 = shr_double(a0, a1, c);
1528     a0 = a0 >> c;
1529 
1530  done:
1531     a->frac_lo = a3 | (sticky != 0);
1532     a->frac_lm = a2;
1533     a->frac_hm = a1;
1534     a->frac_hi = a0;
1535 }
1536 
1537 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538 
frac64_sub(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)1539 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540 {
1541     return usub64_overflow(a->frac, b->frac, &r->frac);
1542 }
1543 
frac128_sub(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)1544 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545 {
1546     bool c = 0;
1547     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549     return c;
1550 }
1551 
frac256_sub(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)1552 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553 {
1554     bool c = 0;
1555     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559     return c;
1560 }
1561 
1562 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563 
frac64_truncjam(FloatParts64 * r,FloatParts128 * a)1564 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565 {
1566     r->frac = a->frac_hi | (a->frac_lo != 0);
1567 }
1568 
frac128_truncjam(FloatParts128 * r,FloatParts256 * a)1569 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570 {
1571     r->frac_hi = a->frac_hi;
1572     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573 }
1574 
1575 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576 
frac64_widen(FloatParts128 * r,FloatParts64 * a)1577 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578 {
1579     r->frac_hi = a->frac;
1580     r->frac_lo = 0;
1581 }
1582 
frac128_widen(FloatParts256 * r,FloatParts128 * a)1583 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_hm = a->frac_lo;
1587     r->frac_lm = 0;
1588     r->frac_lo = 0;
1589 }
1590 
1591 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592 
1593 /*
1594  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596  * and thus MIT licenced.
1597  */
1598 static const uint16_t rsqrt_tab[128] = {
1599     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615 };
1616 
1617 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618 #define FloatPartsN    glue(FloatParts,N)
1619 #define FloatPartsW    glue(FloatParts,W)
1620 
1621 #define N 64
1622 #define W 128
1623 
1624 #include "softfloat-parts-addsub.c.inc"
1625 #include "softfloat-parts.c.inc"
1626 
1627 #undef  N
1628 #undef  W
1629 #define N 128
1630 #define W 256
1631 
1632 #include "softfloat-parts-addsub.c.inc"
1633 #include "softfloat-parts.c.inc"
1634 
1635 #undef  N
1636 #undef  W
1637 #define N            256
1638 
1639 #include "softfloat-parts-addsub.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #undef  partsN
1644 #undef  FloatPartsN
1645 #undef  FloatPartsW
1646 
1647 /*
1648  * Pack/unpack routines with a specific FloatFmt.
1649  */
1650 
float16a_unpack_canonical(FloatParts64 * p,float16 f,float_status * s,const FloatFmt * params)1651 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652                                       float_status *s, const FloatFmt *params)
1653 {
1654     float16_unpack_raw(p, f);
1655     parts_canonicalize(p, s, params);
1656 }
1657 
float16_unpack_canonical(FloatParts64 * p,float16 f,float_status * s)1658 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659                                      float_status *s)
1660 {
1661     float16a_unpack_canonical(p, f, s, &float16_params);
1662 }
1663 
bfloat16_unpack_canonical(FloatParts64 * p,bfloat16 f,float_status * s)1664 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665                                       float_status *s)
1666 {
1667     bfloat16_unpack_raw(p, f);
1668     parts_canonicalize(p, s, &bfloat16_params);
1669 }
1670 
float16a_round_pack_canonical(FloatParts64 * p,float_status * s,const FloatFmt * params)1671 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672                                              float_status *s,
1673                                              const FloatFmt *params)
1674 {
1675     parts_uncanon(p, s, params);
1676     return float16_pack_raw(p);
1677 }
1678 
float16_round_pack_canonical(FloatParts64 * p,float_status * s)1679 static float16 float16_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     return float16a_round_pack_canonical(p, s, &float16_params);
1683 }
1684 
bfloat16_round_pack_canonical(FloatParts64 * p,float_status * s)1685 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686                                               float_status *s)
1687 {
1688     parts_uncanon(p, s, &bfloat16_params);
1689     return bfloat16_pack_raw(p);
1690 }
1691 
float32_unpack_canonical(FloatParts64 * p,float32 f,float_status * s)1692 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693                                      float_status *s)
1694 {
1695     float32_unpack_raw(p, f);
1696     parts_canonicalize(p, s, &float32_params);
1697 }
1698 
float32_round_pack_canonical(FloatParts64 * p,float_status * s)1699 static float32 float32_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     parts_uncanon(p, s, &float32_params);
1703     return float32_pack_raw(p);
1704 }
1705 
float64_unpack_canonical(FloatParts64 * p,float64 f,float_status * s)1706 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707                                      float_status *s)
1708 {
1709     float64_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float64_params);
1711 }
1712 
float64_round_pack_canonical(FloatParts64 * p,float_status * s)1713 static float64 float64_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float64_params);
1717     return float64_pack_raw(p);
1718 }
1719 
float64r32_round_pack_canonical(FloatParts64 * p,float_status * s)1720 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721                                                float_status *s)
1722 {
1723     parts_uncanon(p, s, &float32_params);
1724 
1725     /*
1726      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727      * We need to adjust the fraction higher so that the least N bits are
1728      * zero, and the fraction is adjacent to the float64 implicit bit.
1729      */
1730     switch (p->cls) {
1731     case float_class_normal:
1732         if (unlikely(p->exp == 0)) {
1733             /*
1734              * The result is denormal for float32, but can be represented
1735              * in normalized form for float64.  Adjust, per canonicalize.
1736              */
1737             int shift = frac_normalize(p);
1738             p->exp = (float32_params.frac_shift -
1739                       float32_params.exp_bias - shift + 1 +
1740                       float64_params.exp_bias);
1741             frac_shr(p, float64_params.frac_shift);
1742         } else {
1743             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745         }
1746         break;
1747     case float_class_snan:
1748     case float_class_qnan:
1749         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750         p->exp = float64_params.exp_max;
1751         break;
1752     case float_class_inf:
1753         p->exp = float64_params.exp_max;
1754         break;
1755     case float_class_zero:
1756         break;
1757     default:
1758         g_assert_not_reached();
1759     }
1760 
1761     return float64_pack_raw(p);
1762 }
1763 
float128_unpack_canonical(FloatParts128 * p,float128 f,float_status * s)1764 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765                                       float_status *s)
1766 {
1767     float128_unpack_raw(p, f);
1768     parts_canonicalize(p, s, &float128_params);
1769 }
1770 
float128_round_pack_canonical(FloatParts128 * p,float_status * s)1771 static float128 float128_round_pack_canonical(FloatParts128 *p,
1772                                               float_status *s)
1773 {
1774     parts_uncanon(p, s, &float128_params);
1775     return float128_pack_raw(p);
1776 }
1777 
1778 /* Returns false if the encoding is invalid. */
floatx80_unpack_canonical(FloatParts128 * p,floatx80 f,float_status * s)1779 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780                                       float_status *s)
1781 {
1782     /* Ensure rounding precision is set before beginning. */
1783     switch (s->floatx80_rounding_precision) {
1784     case floatx80_precision_x:
1785     case floatx80_precision_d:
1786     case floatx80_precision_s:
1787         break;
1788     default:
1789         g_assert_not_reached();
1790     }
1791 
1792     if (unlikely(floatx80_invalid_encoding(f))) {
1793         float_raise(float_flag_invalid, s);
1794         return false;
1795     }
1796 
1797     floatx80_unpack_raw(p, f);
1798 
1799     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801     } else {
1802         /* The explicit integer bit is ignored, after invalid checks. */
1803         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804         p->cls = (p->frac_hi == 0 ? float_class_inf
1805                   : parts_is_snan_frac(p->frac_hi, s)
1806                   ? float_class_snan : float_class_qnan);
1807     }
1808     return true;
1809 }
1810 
floatx80_round_pack_canonical(FloatParts128 * p,float_status * s)1811 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812                                               float_status *s)
1813 {
1814     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815     uint64_t frac;
1816     int exp;
1817 
1818     switch (p->cls) {
1819     case float_class_normal:
1820         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821             parts_uncanon_normal(p, s, fmt);
1822             frac = p->frac_hi;
1823             exp = p->exp;
1824         } else {
1825             FloatParts64 p64;
1826 
1827             p64.sign = p->sign;
1828             p64.exp = p->exp;
1829             frac_truncjam(&p64, p);
1830             parts_uncanon_normal(&p64, s, fmt);
1831             frac = p64.frac;
1832             exp = p64.exp;
1833         }
1834         if (exp != fmt->exp_max) {
1835             break;
1836         }
1837         /* rounded to inf -- fall through to set frac correctly */
1838 
1839     case float_class_inf:
1840         /* x86 and m68k differ in the setting of the integer bit. */
1841         frac = floatx80_infinity_low;
1842         exp = fmt->exp_max;
1843         break;
1844 
1845     case float_class_zero:
1846         frac = 0;
1847         exp = 0;
1848         break;
1849 
1850     case float_class_snan:
1851     case float_class_qnan:
1852         /* NaNs have the integer bit set. */
1853         frac = p->frac_hi | (1ull << 63);
1854         exp = fmt->exp_max;
1855         break;
1856 
1857     default:
1858         g_assert_not_reached();
1859     }
1860 
1861     return packFloatx80(p->sign, exp, frac);
1862 }
1863 
1864 /*
1865  * Addition and subtraction
1866  */
1867 
1868 static float16 QEMU_FLATTEN
float16_addsub(float16 a,float16 b,float_status * status,bool subtract)1869 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870 {
1871     FloatParts64 pa, pb, *pr;
1872 
1873     float16_unpack_canonical(&pa, a, status);
1874     float16_unpack_canonical(&pb, b, status);
1875     pr = parts_addsub(&pa, &pb, status, subtract);
1876 
1877     return float16_round_pack_canonical(pr, status);
1878 }
1879 
float16_add(float16 a,float16 b,float_status * status)1880 float16 float16_add(float16 a, float16 b, float_status *status)
1881 {
1882     return float16_addsub(a, b, status, false);
1883 }
1884 
float16_sub(float16 a,float16 b,float_status * status)1885 float16 float16_sub(float16 a, float16 b, float_status *status)
1886 {
1887     return float16_addsub(a, b, status, true);
1888 }
1889 
1890 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a,float32 b,float_status * status,bool subtract)1891 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894 
1895     float32_unpack_canonical(&pa, a, status);
1896     float32_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898 
1899     return float32_round_pack_canonical(pr, status);
1900 }
1901 
soft_f32_add(float32 a,float32 b,float_status * status)1902 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903 {
1904     return soft_f32_addsub(a, b, status, false);
1905 }
1906 
soft_f32_sub(float32 a,float32 b,float_status * status)1907 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908 {
1909     return soft_f32_addsub(a, b, status, true);
1910 }
1911 
1912 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a,float64 b,float_status * status,bool subtract)1913 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916 
1917     float64_unpack_canonical(&pa, a, status);
1918     float64_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920 
1921     return float64_round_pack_canonical(pr, status);
1922 }
1923 
soft_f64_add(float64 a,float64 b,float_status * status)1924 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925 {
1926     return soft_f64_addsub(a, b, status, false);
1927 }
1928 
soft_f64_sub(float64 a,float64 b,float_status * status)1929 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930 {
1931     return soft_f64_addsub(a, b, status, true);
1932 }
1933 
hard_f32_add(float a,float b)1934 static float hard_f32_add(float a, float b)
1935 {
1936     return a + b;
1937 }
1938 
hard_f32_sub(float a,float b)1939 static float hard_f32_sub(float a, float b)
1940 {
1941     return a - b;
1942 }
1943 
hard_f64_add(double a,double b)1944 static double hard_f64_add(double a, double b)
1945 {
1946     return a + b;
1947 }
1948 
hard_f64_sub(double a,double b)1949 static double hard_f64_sub(double a, double b)
1950 {
1951     return a - b;
1952 }
1953 
f32_addsubmul_post(union_float32 a,union_float32 b)1954 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955 {
1956     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958     }
1959     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960 }
1961 
f64_addsubmul_post(union_float64 a,union_float64 b)1962 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963 {
1964     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966     } else {
1967         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968     }
1969 }
1970 
float32_addsub(float32 a,float32 b,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft)1971 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973 {
1974     return float32_gen2(a, b, s, hard, soft,
1975                         f32_is_zon2, f32_addsubmul_post);
1976 }
1977 
float64_addsub(float64 a,float64 b,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft)1978 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980 {
1981     return float64_gen2(a, b, s, hard, soft,
1982                         f64_is_zon2, f64_addsubmul_post);
1983 }
1984 
1985 float32 QEMU_FLATTEN
float32_add(float32 a,float32 b,float_status * s)1986 float32_add(float32 a, float32 b, float_status *s)
1987 {
1988     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989 }
1990 
1991 float32 QEMU_FLATTEN
float32_sub(float32 a,float32 b,float_status * s)1992 float32_sub(float32 a, float32 b, float_status *s)
1993 {
1994     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995 }
1996 
1997 float64 QEMU_FLATTEN
float64_add(float64 a,float64 b,float_status * s)1998 float64_add(float64 a, float64 b, float_status *s)
1999 {
2000     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001 }
2002 
2003 float64 QEMU_FLATTEN
float64_sub(float64 a,float64 b,float_status * s)2004 float64_sub(float64 a, float64 b, float_status *s)
2005 {
2006     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007 }
2008 
float64r32_addsub(float64 a,float64 b,float_status * status,bool subtract)2009 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010                                  bool subtract)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013 
2014     float64_unpack_canonical(&pa, a, status);
2015     float64_unpack_canonical(&pb, b, status);
2016     pr = parts_addsub(&pa, &pb, status, subtract);
2017 
2018     return float64r32_round_pack_canonical(pr, status);
2019 }
2020 
float64r32_add(float64 a,float64 b,float_status * status)2021 float64 float64r32_add(float64 a, float64 b, float_status *status)
2022 {
2023     return float64r32_addsub(a, b, status, false);
2024 }
2025 
float64r32_sub(float64 a,float64 b,float_status * status)2026 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027 {
2028     return float64r32_addsub(a, b, status, true);
2029 }
2030 
2031 static bfloat16 QEMU_FLATTEN
bfloat16_addsub(bfloat16 a,bfloat16 b,float_status * status,bool subtract)2032 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     bfloat16_unpack_canonical(&pa, a, status);
2037     bfloat16_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039 
2040     return bfloat16_round_pack_canonical(pr, status);
2041 }
2042 
bfloat16_add(bfloat16 a,bfloat16 b,float_status * status)2043 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044 {
2045     return bfloat16_addsub(a, b, status, false);
2046 }
2047 
bfloat16_sub(bfloat16 a,bfloat16 b,float_status * status)2048 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049 {
2050     return bfloat16_addsub(a, b, status, true);
2051 }
2052 
2053 static float128 QEMU_FLATTEN
float128_addsub(float128 a,float128 b,float_status * status,bool subtract)2054 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055 {
2056     FloatParts128 pa, pb, *pr;
2057 
2058     float128_unpack_canonical(&pa, a, status);
2059     float128_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061 
2062     return float128_round_pack_canonical(pr, status);
2063 }
2064 
float128_add(float128 a,float128 b,float_status * status)2065 float128 float128_add(float128 a, float128 b, float_status *status)
2066 {
2067     return float128_addsub(a, b, status, false);
2068 }
2069 
float128_sub(float128 a,float128 b,float_status * status)2070 float128 float128_sub(float128 a, float128 b, float_status *status)
2071 {
2072     return float128_addsub(a, b, status, true);
2073 }
2074 
2075 static floatx80 QEMU_FLATTEN
floatx80_addsub(floatx80 a,floatx80 b,float_status * status,bool subtract)2076 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     if (!floatx80_unpack_canonical(&pa, a, status) ||
2081         !floatx80_unpack_canonical(&pb, b, status)) {
2082         return floatx80_default_nan(status);
2083     }
2084 
2085     pr = parts_addsub(&pa, &pb, status, subtract);
2086     return floatx80_round_pack_canonical(pr, status);
2087 }
2088 
floatx80_add(floatx80 a,floatx80 b,float_status * status)2089 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090 {
2091     return floatx80_addsub(a, b, status, false);
2092 }
2093 
floatx80_sub(floatx80 a,floatx80 b,float_status * status)2094 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095 {
2096     return floatx80_addsub(a, b, status, true);
2097 }
2098 
2099 /*
2100  * Multiplication
2101  */
2102 
float16_mul(float16 a,float16 b,float_status * status)2103 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106 
2107     float16_unpack_canonical(&pa, a, status);
2108     float16_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110 
2111     return float16_round_pack_canonical(pr, status);
2112 }
2113 
2114 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a,float32 b,float_status * status)2115 soft_f32_mul(float32 a, float32 b, float_status *status)
2116 {
2117     FloatParts64 pa, pb, *pr;
2118 
2119     float32_unpack_canonical(&pa, a, status);
2120     float32_unpack_canonical(&pb, b, status);
2121     pr = parts_mul(&pa, &pb, status);
2122 
2123     return float32_round_pack_canonical(pr, status);
2124 }
2125 
2126 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a,float64 b,float_status * status)2127 soft_f64_mul(float64 a, float64 b, float_status *status)
2128 {
2129     FloatParts64 pa, pb, *pr;
2130 
2131     float64_unpack_canonical(&pa, a, status);
2132     float64_unpack_canonical(&pb, b, status);
2133     pr = parts_mul(&pa, &pb, status);
2134 
2135     return float64_round_pack_canonical(pr, status);
2136 }
2137 
hard_f32_mul(float a,float b)2138 static float hard_f32_mul(float a, float b)
2139 {
2140     return a * b;
2141 }
2142 
hard_f64_mul(double a,double b)2143 static double hard_f64_mul(double a, double b)
2144 {
2145     return a * b;
2146 }
2147 
2148 float32 QEMU_FLATTEN
float32_mul(float32 a,float32 b,float_status * s)2149 float32_mul(float32 a, float32 b, float_status *s)
2150 {
2151     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152                         f32_is_zon2, f32_addsubmul_post);
2153 }
2154 
2155 float64 QEMU_FLATTEN
float64_mul(float64 a,float64 b,float_status * s)2156 float64_mul(float64 a, float64 b, float_status *s)
2157 {
2158     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159                         f64_is_zon2, f64_addsubmul_post);
2160 }
2161 
float64r32_mul(float64 a,float64 b,float_status * status)2162 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163 {
2164     FloatParts64 pa, pb, *pr;
2165 
2166     float64_unpack_canonical(&pa, a, status);
2167     float64_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169 
2170     return float64r32_round_pack_canonical(pr, status);
2171 }
2172 
2173 bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a,bfloat16 b,float_status * status)2174 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175 {
2176     FloatParts64 pa, pb, *pr;
2177 
2178     bfloat16_unpack_canonical(&pa, a, status);
2179     bfloat16_unpack_canonical(&pb, b, status);
2180     pr = parts_mul(&pa, &pb, status);
2181 
2182     return bfloat16_round_pack_canonical(pr, status);
2183 }
2184 
2185 float128 QEMU_FLATTEN
float128_mul(float128 a,float128 b,float_status * status)2186 float128_mul(float128 a, float128 b, float_status *status)
2187 {
2188     FloatParts128 pa, pb, *pr;
2189 
2190     float128_unpack_canonical(&pa, a, status);
2191     float128_unpack_canonical(&pb, b, status);
2192     pr = parts_mul(&pa, &pb, status);
2193 
2194     return float128_round_pack_canonical(pr, status);
2195 }
2196 
2197 floatx80 QEMU_FLATTEN
floatx80_mul(floatx80 a,floatx80 b,float_status * status)2198 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199 {
2200     FloatParts128 pa, pb, *pr;
2201 
2202     if (!floatx80_unpack_canonical(&pa, a, status) ||
2203         !floatx80_unpack_canonical(&pb, b, status)) {
2204         return floatx80_default_nan(status);
2205     }
2206 
2207     pr = parts_mul(&pa, &pb, status);
2208     return floatx80_round_pack_canonical(pr, status);
2209 }
2210 
2211 /*
2212  * Fused multiply-add
2213  */
2214 
float16_muladd(float16 a,float16 b,float16 c,int flags,float_status * status)2215 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2216                                     int flags, float_status *status)
2217 {
2218     FloatParts64 pa, pb, pc, *pr;
2219 
2220     float16_unpack_canonical(&pa, a, status);
2221     float16_unpack_canonical(&pb, b, status);
2222     float16_unpack_canonical(&pc, c, status);
2223     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2224 
2225     return float16_round_pack_canonical(pr, status);
2226 }
2227 
2228 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_muladd(float32 a,float32 b,float32 c,int flags,float_status * status)2229 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230                 float_status *status)
2231 {
2232     FloatParts64 pa, pb, pc, *pr;
2233 
2234     float32_unpack_canonical(&pa, a, status);
2235     float32_unpack_canonical(&pb, b, status);
2236     float32_unpack_canonical(&pc, c, status);
2237     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2238 
2239     return float32_round_pack_canonical(pr, status);
2240 }
2241 
2242 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2243 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244                 float_status *status)
2245 {
2246     FloatParts64 pa, pb, pc, *pr;
2247 
2248     float64_unpack_canonical(&pa, a, status);
2249     float64_unpack_canonical(&pb, b, status);
2250     float64_unpack_canonical(&pc, c, status);
2251     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2252 
2253     return float64_round_pack_canonical(pr, status);
2254 }
2255 
2256 static bool force_soft_fma;
2257 
2258 float32 QEMU_FLATTEN
float32_muladd(float32 xa,float32 xb,float32 xc,int flags,float_status * s)2259 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2260 {
2261     union_float32 ua, ub, uc, ur;
2262 
2263     ua.s = xa;
2264     ub.s = xb;
2265     uc.s = xc;
2266 
2267     if (unlikely(!can_use_fpu(s))) {
2268         goto soft;
2269     }
2270     if (unlikely(flags & float_muladd_halve_result)) {
2271         goto soft;
2272     }
2273 
2274     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276         goto soft;
2277     }
2278 
2279     if (unlikely(force_soft_fma)) {
2280         goto soft;
2281     }
2282 
2283     /*
2284      * When (a || b) == 0, there's no need to check for under/over flow,
2285      * since we know the addend is (normal || 0) and the product is 0.
2286      */
2287     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288         union_float32 up;
2289         bool prod_sign;
2290 
2291         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292         prod_sign ^= !!(flags & float_muladd_negate_product);
2293         up.s = float32_set_sign(float32_zero, prod_sign);
2294 
2295         if (flags & float_muladd_negate_c) {
2296             uc.h = -uc.h;
2297         }
2298         ur.h = up.h + uc.h;
2299     } else {
2300         union_float32 ua_orig = ua;
2301         union_float32 uc_orig = uc;
2302 
2303         if (flags & float_muladd_negate_product) {
2304             ua.h = -ua.h;
2305         }
2306         if (flags & float_muladd_negate_c) {
2307             uc.h = -uc.h;
2308         }
2309 
2310         ur.h = fmaf(ua.h, ub.h, uc.h);
2311 
2312         if (unlikely(f32_is_inf(ur))) {
2313             float_raise(float_flag_overflow, s);
2314         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2315             ua = ua_orig;
2316             uc = uc_orig;
2317             goto soft;
2318         }
2319     }
2320     if (flags & float_muladd_negate_result) {
2321         return float32_chs(ur.s);
2322     }
2323     return ur.s;
2324 
2325  soft:
2326     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2327 }
2328 
2329 float64 QEMU_FLATTEN
float64_muladd(float64 xa,float64 xb,float64 xc,int flags,float_status * s)2330 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2331 {
2332     union_float64 ua, ub, uc, ur;
2333 
2334     ua.s = xa;
2335     ub.s = xb;
2336     uc.s = xc;
2337 
2338     if (unlikely(!can_use_fpu(s))) {
2339         goto soft;
2340     }
2341     if (unlikely(flags & float_muladd_halve_result)) {
2342         goto soft;
2343     }
2344 
2345     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347         goto soft;
2348     }
2349 
2350     if (unlikely(force_soft_fma)) {
2351         goto soft;
2352     }
2353 
2354     /*
2355      * When (a || b) == 0, there's no need to check for under/over flow,
2356      * since we know the addend is (normal || 0) and the product is 0.
2357      */
2358     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359         union_float64 up;
2360         bool prod_sign;
2361 
2362         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363         prod_sign ^= !!(flags & float_muladd_negate_product);
2364         up.s = float64_set_sign(float64_zero, prod_sign);
2365 
2366         if (flags & float_muladd_negate_c) {
2367             uc.h = -uc.h;
2368         }
2369         ur.h = up.h + uc.h;
2370     } else {
2371         union_float64 ua_orig = ua;
2372         union_float64 uc_orig = uc;
2373 
2374         if (flags & float_muladd_negate_product) {
2375             ua.h = -ua.h;
2376         }
2377         if (flags & float_muladd_negate_c) {
2378             uc.h = -uc.h;
2379         }
2380 
2381         ur.h = fma(ua.h, ub.h, uc.h);
2382 
2383         if (unlikely(f64_is_inf(ur))) {
2384             float_raise(float_flag_overflow, s);
2385         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2386             ua = ua_orig;
2387             uc = uc_orig;
2388             goto soft;
2389         }
2390     }
2391     if (flags & float_muladd_negate_result) {
2392         return float64_chs(ur.s);
2393     }
2394     return ur.s;
2395 
2396  soft:
2397     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2398 }
2399 
float64r32_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2400 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401                           int flags, float_status *status)
2402 {
2403     FloatParts64 pa, pb, pc, *pr;
2404 
2405     float64_unpack_canonical(&pa, a, status);
2406     float64_unpack_canonical(&pb, b, status);
2407     float64_unpack_canonical(&pc, c, status);
2408     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2409 
2410     return float64r32_round_pack_canonical(pr, status);
2411 }
2412 
bfloat16_muladd(bfloat16 a,bfloat16 b,bfloat16 c,int flags,float_status * status)2413 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414                                       int flags, float_status *status)
2415 {
2416     FloatParts64 pa, pb, pc, *pr;
2417 
2418     bfloat16_unpack_canonical(&pa, a, status);
2419     bfloat16_unpack_canonical(&pb, b, status);
2420     bfloat16_unpack_canonical(&pc, c, status);
2421     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2422 
2423     return bfloat16_round_pack_canonical(pr, status);
2424 }
2425 
float128_muladd(float128 a,float128 b,float128 c,int flags,float_status * status)2426 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427                                       int flags, float_status *status)
2428 {
2429     FloatParts128 pa, pb, pc, *pr;
2430 
2431     float128_unpack_canonical(&pa, a, status);
2432     float128_unpack_canonical(&pb, b, status);
2433     float128_unpack_canonical(&pc, c, status);
2434     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2435 
2436     return float128_round_pack_canonical(pr, status);
2437 }
2438 
2439 /*
2440  * Division
2441  */
2442 
float16_div(float16 a,float16 b,float_status * status)2443 float16 float16_div(float16 a, float16 b, float_status *status)
2444 {
2445     FloatParts64 pa, pb, *pr;
2446 
2447     float16_unpack_canonical(&pa, a, status);
2448     float16_unpack_canonical(&pb, b, status);
2449     pr = parts_div(&pa, &pb, status);
2450 
2451     return float16_round_pack_canonical(pr, status);
2452 }
2453 
2454 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a,float32 b,float_status * status)2455 soft_f32_div(float32 a, float32 b, float_status *status)
2456 {
2457     FloatParts64 pa, pb, *pr;
2458 
2459     float32_unpack_canonical(&pa, a, status);
2460     float32_unpack_canonical(&pb, b, status);
2461     pr = parts_div(&pa, &pb, status);
2462 
2463     return float32_round_pack_canonical(pr, status);
2464 }
2465 
2466 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a,float64 b,float_status * status)2467 soft_f64_div(float64 a, float64 b, float_status *status)
2468 {
2469     FloatParts64 pa, pb, *pr;
2470 
2471     float64_unpack_canonical(&pa, a, status);
2472     float64_unpack_canonical(&pb, b, status);
2473     pr = parts_div(&pa, &pb, status);
2474 
2475     return float64_round_pack_canonical(pr, status);
2476 }
2477 
hard_f32_div(float a,float b)2478 static float hard_f32_div(float a, float b)
2479 {
2480     return a / b;
2481 }
2482 
hard_f64_div(double a,double b)2483 static double hard_f64_div(double a, double b)
2484 {
2485     return a / b;
2486 }
2487 
f32_div_pre(union_float32 a,union_float32 b)2488 static bool f32_div_pre(union_float32 a, union_float32 b)
2489 {
2490     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492                fpclassify(b.h) == FP_NORMAL;
2493     }
2494     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2495 }
2496 
f64_div_pre(union_float64 a,union_float64 b)2497 static bool f64_div_pre(union_float64 a, union_float64 b)
2498 {
2499     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501                fpclassify(b.h) == FP_NORMAL;
2502     }
2503     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2504 }
2505 
f32_div_post(union_float32 a,union_float32 b)2506 static bool f32_div_post(union_float32 a, union_float32 b)
2507 {
2508     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509         return fpclassify(a.h) != FP_ZERO;
2510     }
2511     return !float32_is_zero(a.s);
2512 }
2513 
f64_div_post(union_float64 a,union_float64 b)2514 static bool f64_div_post(union_float64 a, union_float64 b)
2515 {
2516     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517         return fpclassify(a.h) != FP_ZERO;
2518     }
2519     return !float64_is_zero(a.s);
2520 }
2521 
2522 float32 QEMU_FLATTEN
float32_div(float32 a,float32 b,float_status * s)2523 float32_div(float32 a, float32 b, float_status *s)
2524 {
2525     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2526                         f32_div_pre, f32_div_post);
2527 }
2528 
2529 float64 QEMU_FLATTEN
float64_div(float64 a,float64 b,float_status * s)2530 float64_div(float64 a, float64 b, float_status *s)
2531 {
2532     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2533                         f64_div_pre, f64_div_post);
2534 }
2535 
float64r32_div(float64 a,float64 b,float_status * status)2536 float64 float64r32_div(float64 a, float64 b, float_status *status)
2537 {
2538     FloatParts64 pa, pb, *pr;
2539 
2540     float64_unpack_canonical(&pa, a, status);
2541     float64_unpack_canonical(&pb, b, status);
2542     pr = parts_div(&pa, &pb, status);
2543 
2544     return float64r32_round_pack_canonical(pr, status);
2545 }
2546 
2547 bfloat16 QEMU_FLATTEN
bfloat16_div(bfloat16 a,bfloat16 b,float_status * status)2548 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2549 {
2550     FloatParts64 pa, pb, *pr;
2551 
2552     bfloat16_unpack_canonical(&pa, a, status);
2553     bfloat16_unpack_canonical(&pb, b, status);
2554     pr = parts_div(&pa, &pb, status);
2555 
2556     return bfloat16_round_pack_canonical(pr, status);
2557 }
2558 
2559 float128 QEMU_FLATTEN
float128_div(float128 a,float128 b,float_status * status)2560 float128_div(float128 a, float128 b, float_status *status)
2561 {
2562     FloatParts128 pa, pb, *pr;
2563 
2564     float128_unpack_canonical(&pa, a, status);
2565     float128_unpack_canonical(&pb, b, status);
2566     pr = parts_div(&pa, &pb, status);
2567 
2568     return float128_round_pack_canonical(pr, status);
2569 }
2570 
floatx80_div(floatx80 a,floatx80 b,float_status * status)2571 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2572 {
2573     FloatParts128 pa, pb, *pr;
2574 
2575     if (!floatx80_unpack_canonical(&pa, a, status) ||
2576         !floatx80_unpack_canonical(&pb, b, status)) {
2577         return floatx80_default_nan(status);
2578     }
2579 
2580     pr = parts_div(&pa, &pb, status);
2581     return floatx80_round_pack_canonical(pr, status);
2582 }
2583 
2584 /*
2585  * Remainder
2586  */
2587 
float32_rem(float32 a,float32 b,float_status * status)2588 float32 float32_rem(float32 a, float32 b, float_status *status)
2589 {
2590     FloatParts64 pa, pb, *pr;
2591 
2592     float32_unpack_canonical(&pa, a, status);
2593     float32_unpack_canonical(&pb, b, status);
2594     pr = parts_modrem(&pa, &pb, NULL, status);
2595 
2596     return float32_round_pack_canonical(pr, status);
2597 }
2598 
float64_rem(float64 a,float64 b,float_status * status)2599 float64 float64_rem(float64 a, float64 b, float_status *status)
2600 {
2601     FloatParts64 pa, pb, *pr;
2602 
2603     float64_unpack_canonical(&pa, a, status);
2604     float64_unpack_canonical(&pb, b, status);
2605     pr = parts_modrem(&pa, &pb, NULL, status);
2606 
2607     return float64_round_pack_canonical(pr, status);
2608 }
2609 
float128_rem(float128 a,float128 b,float_status * status)2610 float128 float128_rem(float128 a, float128 b, float_status *status)
2611 {
2612     FloatParts128 pa, pb, *pr;
2613 
2614     float128_unpack_canonical(&pa, a, status);
2615     float128_unpack_canonical(&pb, b, status);
2616     pr = parts_modrem(&pa, &pb, NULL, status);
2617 
2618     return float128_round_pack_canonical(pr, status);
2619 }
2620 
2621 /*
2622  * Returns the remainder of the extended double-precision floating-point value
2623  * `a' with respect to the corresponding value `b'.
2624  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2626  * the remainder based on truncating the quotient toward zero instead and
2627  * *quotient is set to the low 64 bits of the absolute value of the integer
2628  * quotient.
2629  */
floatx80_modrem(floatx80 a,floatx80 b,bool mod,uint64_t * quotient,float_status * status)2630 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631                          uint64_t *quotient, float_status *status)
2632 {
2633     FloatParts128 pa, pb, *pr;
2634 
2635     *quotient = 0;
2636     if (!floatx80_unpack_canonical(&pa, a, status) ||
2637         !floatx80_unpack_canonical(&pb, b, status)) {
2638         return floatx80_default_nan(status);
2639     }
2640     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2641 
2642     return floatx80_round_pack_canonical(pr, status);
2643 }
2644 
floatx80_rem(floatx80 a,floatx80 b,float_status * status)2645 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2646 {
2647     uint64_t quotient;
2648     return floatx80_modrem(a, b, false, &quotient, status);
2649 }
2650 
floatx80_mod(floatx80 a,floatx80 b,float_status * status)2651 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2652 {
2653     uint64_t quotient;
2654     return floatx80_modrem(a, b, true, &quotient, status);
2655 }
2656 
2657 /*
2658  * Float to Float conversions
2659  *
2660  * Returns the result of converting one float format to another. The
2661  * conversion is performed according to the IEC/IEEE Standard for
2662  * Binary Floating-Point Arithmetic.
2663  *
2664  * Usually this only needs to take care of raising invalid exceptions
2665  * and handling the conversion on NaNs.
2666  */
2667 
parts_float_to_ahp(FloatParts64 * a,float_status * s)2668 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2669 {
2670     switch (a->cls) {
2671     case float_class_snan:
2672         float_raise(float_flag_invalid_snan, s);
2673         /* fall through */
2674     case float_class_qnan:
2675         /*
2676          * There is no NaN in the destination format.  Raise Invalid
2677          * and return a zero with the sign of the input NaN.
2678          */
2679         float_raise(float_flag_invalid, s);
2680         a->cls = float_class_zero;
2681         break;
2682 
2683     case float_class_inf:
2684         /*
2685          * There is no Inf in the destination format.  Raise Invalid
2686          * and return the maximum normal with the correct sign.
2687          */
2688         float_raise(float_flag_invalid, s);
2689         a->cls = float_class_normal;
2690         a->exp = float16_params_ahp.exp_max;
2691         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692                                   float16_params_ahp.frac_size + 1);
2693         break;
2694 
2695     case float_class_normal:
2696     case float_class_zero:
2697         break;
2698 
2699     default:
2700         g_assert_not_reached();
2701     }
2702 }
2703 
parts64_float_to_float(FloatParts64 * a,float_status * s)2704 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2705 {
2706     if (is_nan(a->cls)) {
2707         parts_return_nan(a, s);
2708     }
2709 }
2710 
parts128_float_to_float(FloatParts128 * a,float_status * s)2711 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2712 {
2713     if (is_nan(a->cls)) {
2714         parts_return_nan(a, s);
2715     }
2716 }
2717 
2718 #define parts_float_to_float(P, S) \
2719     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2720 
parts_float_to_float_narrow(FloatParts64 * a,FloatParts128 * b,float_status * s)2721 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722                                         float_status *s)
2723 {
2724     a->cls = b->cls;
2725     a->sign = b->sign;
2726     a->exp = b->exp;
2727 
2728     if (a->cls == float_class_normal) {
2729         frac_truncjam(a, b);
2730     } else if (is_nan(a->cls)) {
2731         /* Discard the low bits of the NaN. */
2732         a->frac = b->frac_hi;
2733         parts_return_nan(a, s);
2734     }
2735 }
2736 
parts_float_to_float_widen(FloatParts128 * a,FloatParts64 * b,float_status * s)2737 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738                                        float_status *s)
2739 {
2740     a->cls = b->cls;
2741     a->sign = b->sign;
2742     a->exp = b->exp;
2743     frac_widen(a, b);
2744 
2745     if (is_nan(a->cls)) {
2746         parts_return_nan(a, s);
2747     }
2748 }
2749 
float16_to_float32(float16 a,bool ieee,float_status * s)2750 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2751 {
2752     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2753     FloatParts64 p;
2754 
2755     float16a_unpack_canonical(&p, a, s, fmt16);
2756     parts_float_to_float(&p, s);
2757     return float32_round_pack_canonical(&p, s);
2758 }
2759 
float16_to_float64(float16 a,bool ieee,float_status * s)2760 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2761 {
2762     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2763     FloatParts64 p;
2764 
2765     float16a_unpack_canonical(&p, a, s, fmt16);
2766     parts_float_to_float(&p, s);
2767     return float64_round_pack_canonical(&p, s);
2768 }
2769 
float32_to_float16(float32 a,bool ieee,float_status * s)2770 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2771 {
2772     FloatParts64 p;
2773     const FloatFmt *fmt;
2774 
2775     float32_unpack_canonical(&p, a, s);
2776     if (ieee) {
2777         parts_float_to_float(&p, s);
2778         fmt = &float16_params;
2779     } else {
2780         parts_float_to_ahp(&p, s);
2781         fmt = &float16_params_ahp;
2782     }
2783     return float16a_round_pack_canonical(&p, s, fmt);
2784 }
2785 
2786 static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a,float_status * s)2787 soft_float32_to_float64(float32 a, float_status *s)
2788 {
2789     FloatParts64 p;
2790 
2791     float32_unpack_canonical(&p, a, s);
2792     parts_float_to_float(&p, s);
2793     return float64_round_pack_canonical(&p, s);
2794 }
2795 
float32_to_float64(float32 a,float_status * s)2796 float64 float32_to_float64(float32 a, float_status *s)
2797 {
2798     if (likely(float32_is_normal(a))) {
2799         /* Widening conversion can never produce inexact results.  */
2800         union_float32 uf;
2801         union_float64 ud;
2802         uf.s = a;
2803         ud.h = uf.h;
2804         return ud.s;
2805     } else if (float32_is_zero(a)) {
2806         return float64_set_sign(float64_zero, float32_is_neg(a));
2807     } else {
2808         return soft_float32_to_float64(a, s);
2809     }
2810 }
2811 
float64_to_float16(float64 a,bool ieee,float_status * s)2812 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2813 {
2814     FloatParts64 p;
2815     const FloatFmt *fmt;
2816 
2817     float64_unpack_canonical(&p, a, s);
2818     if (ieee) {
2819         parts_float_to_float(&p, s);
2820         fmt = &float16_params;
2821     } else {
2822         parts_float_to_ahp(&p, s);
2823         fmt = &float16_params_ahp;
2824     }
2825     return float16a_round_pack_canonical(&p, s, fmt);
2826 }
2827 
float64_to_float32(float64 a,float_status * s)2828 float32 float64_to_float32(float64 a, float_status *s)
2829 {
2830     FloatParts64 p;
2831 
2832     float64_unpack_canonical(&p, a, s);
2833     parts_float_to_float(&p, s);
2834     return float32_round_pack_canonical(&p, s);
2835 }
2836 
bfloat16_to_float32(bfloat16 a,float_status * s)2837 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2838 {
2839     FloatParts64 p;
2840 
2841     bfloat16_unpack_canonical(&p, a, s);
2842     parts_float_to_float(&p, s);
2843     return float32_round_pack_canonical(&p, s);
2844 }
2845 
bfloat16_to_float64(bfloat16 a,float_status * s)2846 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2847 {
2848     FloatParts64 p;
2849 
2850     bfloat16_unpack_canonical(&p, a, s);
2851     parts_float_to_float(&p, s);
2852     return float64_round_pack_canonical(&p, s);
2853 }
2854 
float32_to_bfloat16(float32 a,float_status * s)2855 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2856 {
2857     FloatParts64 p;
2858 
2859     float32_unpack_canonical(&p, a, s);
2860     parts_float_to_float(&p, s);
2861     return bfloat16_round_pack_canonical(&p, s);
2862 }
2863 
float64_to_bfloat16(float64 a,float_status * s)2864 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2865 {
2866     FloatParts64 p;
2867 
2868     float64_unpack_canonical(&p, a, s);
2869     parts_float_to_float(&p, s);
2870     return bfloat16_round_pack_canonical(&p, s);
2871 }
2872 
float128_to_float32(float128 a,float_status * s)2873 float32 float128_to_float32(float128 a, float_status *s)
2874 {
2875     FloatParts64 p64;
2876     FloatParts128 p128;
2877 
2878     float128_unpack_canonical(&p128, a, s);
2879     parts_float_to_float_narrow(&p64, &p128, s);
2880     return float32_round_pack_canonical(&p64, s);
2881 }
2882 
float128_to_float64(float128 a,float_status * s)2883 float64 float128_to_float64(float128 a, float_status *s)
2884 {
2885     FloatParts64 p64;
2886     FloatParts128 p128;
2887 
2888     float128_unpack_canonical(&p128, a, s);
2889     parts_float_to_float_narrow(&p64, &p128, s);
2890     return float64_round_pack_canonical(&p64, s);
2891 }
2892 
float32_to_float128(float32 a,float_status * s)2893 float128 float32_to_float128(float32 a, float_status *s)
2894 {
2895     FloatParts64 p64;
2896     FloatParts128 p128;
2897 
2898     float32_unpack_canonical(&p64, a, s);
2899     parts_float_to_float_widen(&p128, &p64, s);
2900     return float128_round_pack_canonical(&p128, s);
2901 }
2902 
float64_to_float128(float64 a,float_status * s)2903 float128 float64_to_float128(float64 a, float_status *s)
2904 {
2905     FloatParts64 p64;
2906     FloatParts128 p128;
2907 
2908     float64_unpack_canonical(&p64, a, s);
2909     parts_float_to_float_widen(&p128, &p64, s);
2910     return float128_round_pack_canonical(&p128, s);
2911 }
2912 
floatx80_to_float32(floatx80 a,float_status * s)2913 float32 floatx80_to_float32(floatx80 a, float_status *s)
2914 {
2915     FloatParts64 p64;
2916     FloatParts128 p128;
2917 
2918     if (floatx80_unpack_canonical(&p128, a, s)) {
2919         parts_float_to_float_narrow(&p64, &p128, s);
2920     } else {
2921         parts_default_nan(&p64, s);
2922     }
2923     return float32_round_pack_canonical(&p64, s);
2924 }
2925 
floatx80_to_float64(floatx80 a,float_status * s)2926 float64 floatx80_to_float64(floatx80 a, float_status *s)
2927 {
2928     FloatParts64 p64;
2929     FloatParts128 p128;
2930 
2931     if (floatx80_unpack_canonical(&p128, a, s)) {
2932         parts_float_to_float_narrow(&p64, &p128, s);
2933     } else {
2934         parts_default_nan(&p64, s);
2935     }
2936     return float64_round_pack_canonical(&p64, s);
2937 }
2938 
floatx80_to_float128(floatx80 a,float_status * s)2939 float128 floatx80_to_float128(floatx80 a, float_status *s)
2940 {
2941     FloatParts128 p;
2942 
2943     if (floatx80_unpack_canonical(&p, a, s)) {
2944         parts_float_to_float(&p, s);
2945     } else {
2946         parts_default_nan(&p, s);
2947     }
2948     return float128_round_pack_canonical(&p, s);
2949 }
2950 
float32_to_floatx80(float32 a,float_status * s)2951 floatx80 float32_to_floatx80(float32 a, float_status *s)
2952 {
2953     FloatParts64 p64;
2954     FloatParts128 p128;
2955 
2956     float32_unpack_canonical(&p64, a, s);
2957     parts_float_to_float_widen(&p128, &p64, s);
2958     return floatx80_round_pack_canonical(&p128, s);
2959 }
2960 
float64_to_floatx80(float64 a,float_status * s)2961 floatx80 float64_to_floatx80(float64 a, float_status *s)
2962 {
2963     FloatParts64 p64;
2964     FloatParts128 p128;
2965 
2966     float64_unpack_canonical(&p64, a, s);
2967     parts_float_to_float_widen(&p128, &p64, s);
2968     return floatx80_round_pack_canonical(&p128, s);
2969 }
2970 
float128_to_floatx80(float128 a,float_status * s)2971 floatx80 float128_to_floatx80(float128 a, float_status *s)
2972 {
2973     FloatParts128 p;
2974 
2975     float128_unpack_canonical(&p, a, s);
2976     parts_float_to_float(&p, s);
2977     return floatx80_round_pack_canonical(&p, s);
2978 }
2979 
2980 /*
2981  * Round to integral value
2982  */
2983 
float16_round_to_int(float16 a,float_status * s)2984 float16 float16_round_to_int(float16 a, float_status *s)
2985 {
2986     FloatParts64 p;
2987 
2988     float16_unpack_canonical(&p, a, s);
2989     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990     return float16_round_pack_canonical(&p, s);
2991 }
2992 
float32_round_to_int(float32 a,float_status * s)2993 float32 float32_round_to_int(float32 a, float_status *s)
2994 {
2995     FloatParts64 p;
2996 
2997     float32_unpack_canonical(&p, a, s);
2998     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999     return float32_round_pack_canonical(&p, s);
3000 }
3001 
float64_round_to_int(float64 a,float_status * s)3002 float64 float64_round_to_int(float64 a, float_status *s)
3003 {
3004     FloatParts64 p;
3005 
3006     float64_unpack_canonical(&p, a, s);
3007     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008     return float64_round_pack_canonical(&p, s);
3009 }
3010 
bfloat16_round_to_int(bfloat16 a,float_status * s)3011 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3012 {
3013     FloatParts64 p;
3014 
3015     bfloat16_unpack_canonical(&p, a, s);
3016     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017     return bfloat16_round_pack_canonical(&p, s);
3018 }
3019 
float128_round_to_int(float128 a,float_status * s)3020 float128 float128_round_to_int(float128 a, float_status *s)
3021 {
3022     FloatParts128 p;
3023 
3024     float128_unpack_canonical(&p, a, s);
3025     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026     return float128_round_pack_canonical(&p, s);
3027 }
3028 
floatx80_round_to_int(floatx80 a,float_status * status)3029 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3030 {
3031     FloatParts128 p;
3032 
3033     if (!floatx80_unpack_canonical(&p, a, status)) {
3034         return floatx80_default_nan(status);
3035     }
3036 
3037     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038                        &floatx80_params[status->floatx80_rounding_precision]);
3039     return floatx80_round_pack_canonical(&p, status);
3040 }
3041 
3042 /*
3043  * Floating-point to signed integer conversions
3044  */
3045 
float16_to_int8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3046 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047                               float_status *s)
3048 {
3049     FloatParts64 p;
3050 
3051     float16_unpack_canonical(&p, a, s);
3052     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3053 }
3054 
float16_to_int16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3055 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3056                                 float_status *s)
3057 {
3058     FloatParts64 p;
3059 
3060     float16_unpack_canonical(&p, a, s);
3061     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3062 }
3063 
float16_to_int32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3064 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3065                                 float_status *s)
3066 {
3067     FloatParts64 p;
3068 
3069     float16_unpack_canonical(&p, a, s);
3070     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3071 }
3072 
float16_to_int64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3073 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074                                 float_status *s)
3075 {
3076     FloatParts64 p;
3077 
3078     float16_unpack_canonical(&p, a, s);
3079     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3080 }
3081 
float32_to_int16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3082 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3083                                 float_status *s)
3084 {
3085     FloatParts64 p;
3086 
3087     float32_unpack_canonical(&p, a, s);
3088     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3089 }
3090 
float32_to_int32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3091 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3092                                 float_status *s)
3093 {
3094     FloatParts64 p;
3095 
3096     float32_unpack_canonical(&p, a, s);
3097     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3098 }
3099 
float32_to_int64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3100 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3101                                 float_status *s)
3102 {
3103     FloatParts64 p;
3104 
3105     float32_unpack_canonical(&p, a, s);
3106     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3107 }
3108 
float64_to_int16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3109 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3110                                 float_status *s)
3111 {
3112     FloatParts64 p;
3113 
3114     float64_unpack_canonical(&p, a, s);
3115     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3116 }
3117 
float64_to_int32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3118 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3119                                 float_status *s)
3120 {
3121     FloatParts64 p;
3122 
3123     float64_unpack_canonical(&p, a, s);
3124     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3125 }
3126 
float64_to_int64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3127 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3128                                 float_status *s)
3129 {
3130     FloatParts64 p;
3131 
3132     float64_unpack_canonical(&p, a, s);
3133     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134 }
3135 
bfloat16_to_int8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3136 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137                                float_status *s)
3138 {
3139     FloatParts64 p;
3140 
3141     bfloat16_unpack_canonical(&p, a, s);
3142     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3143 }
3144 
bfloat16_to_int16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3145 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146                                  float_status *s)
3147 {
3148     FloatParts64 p;
3149 
3150     bfloat16_unpack_canonical(&p, a, s);
3151     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3152 }
3153 
bfloat16_to_int32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3154 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155                                  float_status *s)
3156 {
3157     FloatParts64 p;
3158 
3159     bfloat16_unpack_canonical(&p, a, s);
3160     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3161 }
3162 
bfloat16_to_int64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3163 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164                                  float_status *s)
3165 {
3166     FloatParts64 p;
3167 
3168     bfloat16_unpack_canonical(&p, a, s);
3169     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3170 }
3171 
float128_to_int32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3172 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173                                         int scale, float_status *s)
3174 {
3175     FloatParts128 p;
3176 
3177     float128_unpack_canonical(&p, a, s);
3178     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3179 }
3180 
float128_to_int64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3181 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182                                         int scale, float_status *s)
3183 {
3184     FloatParts128 p;
3185 
3186     float128_unpack_canonical(&p, a, s);
3187     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3188 }
3189 
float128_to_int128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3190 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191                                         int scale, float_status *s)
3192 {
3193     int flags = 0;
3194     Int128 r;
3195     FloatParts128 p;
3196 
3197     float128_unpack_canonical(&p, a, s);
3198 
3199     switch (p.cls) {
3200     case float_class_snan:
3201         flags |= float_flag_invalid_snan;
3202         /* fall through */
3203     case float_class_qnan:
3204         flags |= float_flag_invalid;
3205         r = UINT128_MAX;
3206         break;
3207 
3208     case float_class_inf:
3209         flags = float_flag_invalid | float_flag_invalid_cvti;
3210         r = p.sign ? INT128_MIN : INT128_MAX;
3211         break;
3212 
3213     case float_class_zero:
3214         return int128_zero();
3215 
3216     case float_class_normal:
3217         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218             flags = float_flag_inexact;
3219         }
3220 
3221         if (p.exp < 127) {
3222             int shift = 127 - p.exp;
3223             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224             if (p.sign) {
3225                 r = int128_neg(r);
3226             }
3227         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229             r = INT128_MIN;
3230         } else {
3231             flags = float_flag_invalid | float_flag_invalid_cvti;
3232             r = p.sign ? INT128_MIN : INT128_MAX;
3233         }
3234         break;
3235 
3236     default:
3237         g_assert_not_reached();
3238     }
3239 
3240     float_raise(flags, s);
3241     return r;
3242 }
3243 
floatx80_to_int32_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3244 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245                                         int scale, float_status *s)
3246 {
3247     FloatParts128 p;
3248 
3249     if (!floatx80_unpack_canonical(&p, a, s)) {
3250         parts_default_nan(&p, s);
3251     }
3252     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3253 }
3254 
floatx80_to_int64_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3255 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256                                         int scale, float_status *s)
3257 {
3258     FloatParts128 p;
3259 
3260     if (!floatx80_unpack_canonical(&p, a, s)) {
3261         parts_default_nan(&p, s);
3262     }
3263     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264 }
3265 
float16_to_int8(float16 a,float_status * s)3266 int8_t float16_to_int8(float16 a, float_status *s)
3267 {
3268     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3269 }
3270 
float16_to_int16(float16 a,float_status * s)3271 int16_t float16_to_int16(float16 a, float_status *s)
3272 {
3273     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3274 }
3275 
float16_to_int32(float16 a,float_status * s)3276 int32_t float16_to_int32(float16 a, float_status *s)
3277 {
3278     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3279 }
3280 
float16_to_int64(float16 a,float_status * s)3281 int64_t float16_to_int64(float16 a, float_status *s)
3282 {
3283     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3284 }
3285 
float32_to_int16(float32 a,float_status * s)3286 int16_t float32_to_int16(float32 a, float_status *s)
3287 {
3288     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3289 }
3290 
float32_to_int32(float32 a,float_status * s)3291 int32_t float32_to_int32(float32 a, float_status *s)
3292 {
3293     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3294 }
3295 
float32_to_int64(float32 a,float_status * s)3296 int64_t float32_to_int64(float32 a, float_status *s)
3297 {
3298     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3299 }
3300 
float64_to_int16(float64 a,float_status * s)3301 int16_t float64_to_int16(float64 a, float_status *s)
3302 {
3303     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3304 }
3305 
float64_to_int32(float64 a,float_status * s)3306 int32_t float64_to_int32(float64 a, float_status *s)
3307 {
3308     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3309 }
3310 
float64_to_int64(float64 a,float_status * s)3311 int64_t float64_to_int64(float64 a, float_status *s)
3312 {
3313     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3314 }
3315 
float128_to_int32(float128 a,float_status * s)3316 int32_t float128_to_int32(float128 a, float_status *s)
3317 {
3318     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3319 }
3320 
float128_to_int64(float128 a,float_status * s)3321 int64_t float128_to_int64(float128 a, float_status *s)
3322 {
3323     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3324 }
3325 
float128_to_int128(float128 a,float_status * s)3326 Int128 float128_to_int128(float128 a, float_status *s)
3327 {
3328     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3329 }
3330 
floatx80_to_int32(floatx80 a,float_status * s)3331 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3332 {
3333     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3334 }
3335 
floatx80_to_int64(floatx80 a,float_status * s)3336 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3337 {
3338     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3339 }
3340 
float16_to_int16_round_to_zero(float16 a,float_status * s)3341 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3342 {
3343     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3344 }
3345 
float16_to_int32_round_to_zero(float16 a,float_status * s)3346 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3347 {
3348     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3349 }
3350 
float16_to_int64_round_to_zero(float16 a,float_status * s)3351 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3352 {
3353     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3354 }
3355 
float32_to_int16_round_to_zero(float32 a,float_status * s)3356 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3357 {
3358     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3359 }
3360 
float32_to_int32_round_to_zero(float32 a,float_status * s)3361 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3362 {
3363     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3364 }
3365 
float32_to_int64_round_to_zero(float32 a,float_status * s)3366 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3367 {
3368     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3369 }
3370 
float64_to_int16_round_to_zero(float64 a,float_status * s)3371 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3372 {
3373     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3374 }
3375 
float64_to_int32_round_to_zero(float64 a,float_status * s)3376 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3377 {
3378     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3379 }
3380 
float64_to_int64_round_to_zero(float64 a,float_status * s)3381 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3382 {
3383     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3384 }
3385 
float128_to_int32_round_to_zero(float128 a,float_status * s)3386 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3387 {
3388     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3389 }
3390 
float128_to_int64_round_to_zero(float128 a,float_status * s)3391 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3392 {
3393     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3394 }
3395 
float128_to_int128_round_to_zero(float128 a,float_status * s)3396 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3397 {
3398     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3399 }
3400 
floatx80_to_int32_round_to_zero(floatx80 a,float_status * s)3401 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3402 {
3403     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3404 }
3405 
floatx80_to_int64_round_to_zero(floatx80 a,float_status * s)3406 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3407 {
3408     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3409 }
3410 
bfloat16_to_int8(bfloat16 a,float_status * s)3411 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3412 {
3413     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415 
bfloat16_to_int16(bfloat16 a,float_status * s)3416 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3417 {
3418     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420 
bfloat16_to_int32(bfloat16 a,float_status * s)3421 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3422 {
3423     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425 
bfloat16_to_int64(bfloat16 a,float_status * s)3426 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3427 {
3428     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430 
bfloat16_to_int8_round_to_zero(bfloat16 a,float_status * s)3431 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3432 {
3433     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435 
bfloat16_to_int16_round_to_zero(bfloat16 a,float_status * s)3436 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3437 {
3438     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440 
bfloat16_to_int32_round_to_zero(bfloat16 a,float_status * s)3441 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3442 {
3443     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445 
bfloat16_to_int64_round_to_zero(bfloat16 a,float_status * s)3446 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3447 {
3448     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450 
float64_to_int32_modulo(float64 a,FloatRoundMode rmode,float_status * s)3451 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452                                 float_status *s)
3453 {
3454     FloatParts64 p;
3455 
3456     float64_unpack_canonical(&p, a, s);
3457     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3458 }
3459 
float64_to_int64_modulo(float64 a,FloatRoundMode rmode,float_status * s)3460 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461                                 float_status *s)
3462 {
3463     FloatParts64 p;
3464 
3465     float64_unpack_canonical(&p, a, s);
3466     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3467 }
3468 
3469 /*
3470  * Floating-point to unsigned integer conversions
3471  */
3472 
float16_to_uint8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3473 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474                                 float_status *s)
3475 {
3476     FloatParts64 p;
3477 
3478     float16_unpack_canonical(&p, a, s);
3479     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3480 }
3481 
float16_to_uint16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3482 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3483                                   float_status *s)
3484 {
3485     FloatParts64 p;
3486 
3487     float16_unpack_canonical(&p, a, s);
3488     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3489 }
3490 
float16_to_uint32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3491 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3492                                   float_status *s)
3493 {
3494     FloatParts64 p;
3495 
3496     float16_unpack_canonical(&p, a, s);
3497     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3498 }
3499 
float16_to_uint64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3500 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3501                                   float_status *s)
3502 {
3503     FloatParts64 p;
3504 
3505     float16_unpack_canonical(&p, a, s);
3506     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3507 }
3508 
float32_to_uint16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3509 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3510                                   float_status *s)
3511 {
3512     FloatParts64 p;
3513 
3514     float32_unpack_canonical(&p, a, s);
3515     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3516 }
3517 
float32_to_uint32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3518 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3519                                   float_status *s)
3520 {
3521     FloatParts64 p;
3522 
3523     float32_unpack_canonical(&p, a, s);
3524     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3525 }
3526 
float32_to_uint64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3527 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3528                                   float_status *s)
3529 {
3530     FloatParts64 p;
3531 
3532     float32_unpack_canonical(&p, a, s);
3533     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3534 }
3535 
float64_to_uint16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3536 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3537                                   float_status *s)
3538 {
3539     FloatParts64 p;
3540 
3541     float64_unpack_canonical(&p, a, s);
3542     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3543 }
3544 
float64_to_uint32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3545 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3546                                   float_status *s)
3547 {
3548     FloatParts64 p;
3549 
3550     float64_unpack_canonical(&p, a, s);
3551     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3552 }
3553 
float64_to_uint64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3554 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3555                                   float_status *s)
3556 {
3557     FloatParts64 p;
3558 
3559     float64_unpack_canonical(&p, a, s);
3560     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3561 }
3562 
bfloat16_to_uint8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3563 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564                                  int scale, float_status *s)
3565 {
3566     FloatParts64 p;
3567 
3568     bfloat16_unpack_canonical(&p, a, s);
3569     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3570 }
3571 
bfloat16_to_uint16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3572 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573                                    int scale, float_status *s)
3574 {
3575     FloatParts64 p;
3576 
3577     bfloat16_unpack_canonical(&p, a, s);
3578     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3579 }
3580 
bfloat16_to_uint32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3581 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582                                    int scale, float_status *s)
3583 {
3584     FloatParts64 p;
3585 
3586     bfloat16_unpack_canonical(&p, a, s);
3587     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3588 }
3589 
bfloat16_to_uint64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3590 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591                                    int scale, float_status *s)
3592 {
3593     FloatParts64 p;
3594 
3595     bfloat16_unpack_canonical(&p, a, s);
3596     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3597 }
3598 
float128_to_uint32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3599 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600                                           int scale, float_status *s)
3601 {
3602     FloatParts128 p;
3603 
3604     float128_unpack_canonical(&p, a, s);
3605     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3606 }
3607 
float128_to_uint64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3608 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609                                           int scale, float_status *s)
3610 {
3611     FloatParts128 p;
3612 
3613     float128_unpack_canonical(&p, a, s);
3614     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3615 }
3616 
float128_to_uint128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3617 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618                                          int scale, float_status *s)
3619 {
3620     int flags = 0;
3621     Int128 r;
3622     FloatParts128 p;
3623 
3624     float128_unpack_canonical(&p, a, s);
3625 
3626     switch (p.cls) {
3627     case float_class_snan:
3628         flags |= float_flag_invalid_snan;
3629         /* fall through */
3630     case float_class_qnan:
3631         flags |= float_flag_invalid;
3632         r = UINT128_MAX;
3633         break;
3634 
3635     case float_class_inf:
3636         flags = float_flag_invalid | float_flag_invalid_cvti;
3637         r = p.sign ? int128_zero() : UINT128_MAX;
3638         break;
3639 
3640     case float_class_zero:
3641         return int128_zero();
3642 
3643     case float_class_normal:
3644         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645             flags = float_flag_inexact;
3646             if (p.cls == float_class_zero) {
3647                 r = int128_zero();
3648                 break;
3649             }
3650         }
3651 
3652         if (p.sign) {
3653             flags = float_flag_invalid | float_flag_invalid_cvti;
3654             r = int128_zero();
3655         } else if (p.exp <= 127) {
3656             int shift = 127 - p.exp;
3657             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658         } else {
3659             flags = float_flag_invalid | float_flag_invalid_cvti;
3660             r = UINT128_MAX;
3661         }
3662         break;
3663 
3664     default:
3665         g_assert_not_reached();
3666     }
3667 
3668     float_raise(flags, s);
3669     return r;
3670 }
3671 
float16_to_uint8(float16 a,float_status * s)3672 uint8_t float16_to_uint8(float16 a, float_status *s)
3673 {
3674     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3675 }
3676 
float16_to_uint16(float16 a,float_status * s)3677 uint16_t float16_to_uint16(float16 a, float_status *s)
3678 {
3679     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3680 }
3681 
float16_to_uint32(float16 a,float_status * s)3682 uint32_t float16_to_uint32(float16 a, float_status *s)
3683 {
3684     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3685 }
3686 
float16_to_uint64(float16 a,float_status * s)3687 uint64_t float16_to_uint64(float16 a, float_status *s)
3688 {
3689     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3690 }
3691 
float32_to_uint16(float32 a,float_status * s)3692 uint16_t float32_to_uint16(float32 a, float_status *s)
3693 {
3694     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3695 }
3696 
float32_to_uint32(float32 a,float_status * s)3697 uint32_t float32_to_uint32(float32 a, float_status *s)
3698 {
3699     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3700 }
3701 
float32_to_uint64(float32 a,float_status * s)3702 uint64_t float32_to_uint64(float32 a, float_status *s)
3703 {
3704     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3705 }
3706 
float64_to_uint16(float64 a,float_status * s)3707 uint16_t float64_to_uint16(float64 a, float_status *s)
3708 {
3709     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3710 }
3711 
float64_to_uint32(float64 a,float_status * s)3712 uint32_t float64_to_uint32(float64 a, float_status *s)
3713 {
3714     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3715 }
3716 
float64_to_uint64(float64 a,float_status * s)3717 uint64_t float64_to_uint64(float64 a, float_status *s)
3718 {
3719     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3720 }
3721 
float128_to_uint32(float128 a,float_status * s)3722 uint32_t float128_to_uint32(float128 a, float_status *s)
3723 {
3724     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3725 }
3726 
float128_to_uint64(float128 a,float_status * s)3727 uint64_t float128_to_uint64(float128 a, float_status *s)
3728 {
3729     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3730 }
3731 
float128_to_uint128(float128 a,float_status * s)3732 Int128 float128_to_uint128(float128 a, float_status *s)
3733 {
3734     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3735 }
3736 
float16_to_uint16_round_to_zero(float16 a,float_status * s)3737 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3738 {
3739     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3740 }
3741 
float16_to_uint32_round_to_zero(float16 a,float_status * s)3742 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3743 {
3744     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3745 }
3746 
float16_to_uint64_round_to_zero(float16 a,float_status * s)3747 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3748 {
3749     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3750 }
3751 
float32_to_uint16_round_to_zero(float32 a,float_status * s)3752 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3753 {
3754     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3755 }
3756 
float32_to_uint32_round_to_zero(float32 a,float_status * s)3757 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3758 {
3759     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3760 }
3761 
float32_to_uint64_round_to_zero(float32 a,float_status * s)3762 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3763 {
3764     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3765 }
3766 
float64_to_uint16_round_to_zero(float64 a,float_status * s)3767 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3768 {
3769     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3770 }
3771 
float64_to_uint32_round_to_zero(float64 a,float_status * s)3772 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3773 {
3774     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3775 }
3776 
float64_to_uint64_round_to_zero(float64 a,float_status * s)3777 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3778 {
3779     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3780 }
3781 
float128_to_uint32_round_to_zero(float128 a,float_status * s)3782 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3783 {
3784     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3785 }
3786 
float128_to_uint64_round_to_zero(float128 a,float_status * s)3787 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3788 {
3789     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3790 }
3791 
float128_to_uint128_round_to_zero(float128 a,float_status * s)3792 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3793 {
3794     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3795 }
3796 
bfloat16_to_uint8(bfloat16 a,float_status * s)3797 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3798 {
3799     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3800 }
3801 
bfloat16_to_uint16(bfloat16 a,float_status * s)3802 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3803 {
3804     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3805 }
3806 
bfloat16_to_uint32(bfloat16 a,float_status * s)3807 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3808 {
3809     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3810 }
3811 
bfloat16_to_uint64(bfloat16 a,float_status * s)3812 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3813 {
3814     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3815 }
3816 
bfloat16_to_uint8_round_to_zero(bfloat16 a,float_status * s)3817 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3818 {
3819     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3820 }
3821 
bfloat16_to_uint16_round_to_zero(bfloat16 a,float_status * s)3822 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3823 {
3824     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3825 }
3826 
bfloat16_to_uint32_round_to_zero(bfloat16 a,float_status * s)3827 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3828 {
3829     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3830 }
3831 
bfloat16_to_uint64_round_to_zero(bfloat16 a,float_status * s)3832 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3833 {
3834     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3835 }
3836 
3837 /*
3838  * Signed integer to floating-point conversions
3839  */
3840 
int64_to_float16_scalbn(int64_t a,int scale,float_status * status)3841 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3842 {
3843     FloatParts64 p;
3844 
3845     parts_sint_to_float(&p, a, scale, status);
3846     return float16_round_pack_canonical(&p, status);
3847 }
3848 
int32_to_float16_scalbn(int32_t a,int scale,float_status * status)3849 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3850 {
3851     return int64_to_float16_scalbn(a, scale, status);
3852 }
3853 
int16_to_float16_scalbn(int16_t a,int scale,float_status * status)3854 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3855 {
3856     return int64_to_float16_scalbn(a, scale, status);
3857 }
3858 
int64_to_float16(int64_t a,float_status * status)3859 float16 int64_to_float16(int64_t a, float_status *status)
3860 {
3861     return int64_to_float16_scalbn(a, 0, status);
3862 }
3863 
int32_to_float16(int32_t a,float_status * status)3864 float16 int32_to_float16(int32_t a, float_status *status)
3865 {
3866     return int64_to_float16_scalbn(a, 0, status);
3867 }
3868 
int16_to_float16(int16_t a,float_status * status)3869 float16 int16_to_float16(int16_t a, float_status *status)
3870 {
3871     return int64_to_float16_scalbn(a, 0, status);
3872 }
3873 
int8_to_float16(int8_t a,float_status * status)3874 float16 int8_to_float16(int8_t a, float_status *status)
3875 {
3876     return int64_to_float16_scalbn(a, 0, status);
3877 }
3878 
int64_to_float32_scalbn(int64_t a,int scale,float_status * status)3879 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3880 {
3881     FloatParts64 p;
3882 
3883     /* Without scaling, there are no overflow concerns. */
3884     if (likely(scale == 0) && can_use_fpu(status)) {
3885         union_float32 ur;
3886         ur.h = a;
3887         return ur.s;
3888     }
3889 
3890     parts64_sint_to_float(&p, a, scale, status);
3891     return float32_round_pack_canonical(&p, status);
3892 }
3893 
int32_to_float32_scalbn(int32_t a,int scale,float_status * status)3894 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3895 {
3896     return int64_to_float32_scalbn(a, scale, status);
3897 }
3898 
int16_to_float32_scalbn(int16_t a,int scale,float_status * status)3899 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3900 {
3901     return int64_to_float32_scalbn(a, scale, status);
3902 }
3903 
int64_to_float32(int64_t a,float_status * status)3904 float32 int64_to_float32(int64_t a, float_status *status)
3905 {
3906     return int64_to_float32_scalbn(a, 0, status);
3907 }
3908 
int32_to_float32(int32_t a,float_status * status)3909 float32 int32_to_float32(int32_t a, float_status *status)
3910 {
3911     return int64_to_float32_scalbn(a, 0, status);
3912 }
3913 
int16_to_float32(int16_t a,float_status * status)3914 float32 int16_to_float32(int16_t a, float_status *status)
3915 {
3916     return int64_to_float32_scalbn(a, 0, status);
3917 }
3918 
int64_to_float64_scalbn(int64_t a,int scale,float_status * status)3919 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3920 {
3921     FloatParts64 p;
3922 
3923     /* Without scaling, there are no overflow concerns. */
3924     if (likely(scale == 0) && can_use_fpu(status)) {
3925         union_float64 ur;
3926         ur.h = a;
3927         return ur.s;
3928     }
3929 
3930     parts_sint_to_float(&p, a, scale, status);
3931     return float64_round_pack_canonical(&p, status);
3932 }
3933 
int32_to_float64_scalbn(int32_t a,int scale,float_status * status)3934 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3935 {
3936     return int64_to_float64_scalbn(a, scale, status);
3937 }
3938 
int16_to_float64_scalbn(int16_t a,int scale,float_status * status)3939 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3940 {
3941     return int64_to_float64_scalbn(a, scale, status);
3942 }
3943 
int64_to_float64(int64_t a,float_status * status)3944 float64 int64_to_float64(int64_t a, float_status *status)
3945 {
3946     return int64_to_float64_scalbn(a, 0, status);
3947 }
3948 
int32_to_float64(int32_t a,float_status * status)3949 float64 int32_to_float64(int32_t a, float_status *status)
3950 {
3951     return int64_to_float64_scalbn(a, 0, status);
3952 }
3953 
int16_to_float64(int16_t a,float_status * status)3954 float64 int16_to_float64(int16_t a, float_status *status)
3955 {
3956     return int64_to_float64_scalbn(a, 0, status);
3957 }
3958 
int64_to_bfloat16_scalbn(int64_t a,int scale,float_status * status)3959 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3960 {
3961     FloatParts64 p;
3962 
3963     parts_sint_to_float(&p, a, scale, status);
3964     return bfloat16_round_pack_canonical(&p, status);
3965 }
3966 
int32_to_bfloat16_scalbn(int32_t a,int scale,float_status * status)3967 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3968 {
3969     return int64_to_bfloat16_scalbn(a, scale, status);
3970 }
3971 
int16_to_bfloat16_scalbn(int16_t a,int scale,float_status * status)3972 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3973 {
3974     return int64_to_bfloat16_scalbn(a, scale, status);
3975 }
3976 
int8_to_bfloat16_scalbn(int8_t a,int scale,float_status * status)3977 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3978 {
3979     return int64_to_bfloat16_scalbn(a, scale, status);
3980 }
3981 
int64_to_bfloat16(int64_t a,float_status * status)3982 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3983 {
3984     return int64_to_bfloat16_scalbn(a, 0, status);
3985 }
3986 
int32_to_bfloat16(int32_t a,float_status * status)3987 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3988 {
3989     return int64_to_bfloat16_scalbn(a, 0, status);
3990 }
3991 
int16_to_bfloat16(int16_t a,float_status * status)3992 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3993 {
3994     return int64_to_bfloat16_scalbn(a, 0, status);
3995 }
3996 
int8_to_bfloat16(int8_t a,float_status * status)3997 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3998 {
3999     return int64_to_bfloat16_scalbn(a, 0, status);
4000 }
4001 
int128_to_float128(Int128 a,float_status * status)4002 float128 int128_to_float128(Int128 a, float_status *status)
4003 {
4004     FloatParts128 p = { };
4005     int shift;
4006 
4007     if (int128_nz(a)) {
4008         p.cls = float_class_normal;
4009         if (!int128_nonneg(a)) {
4010             p.sign = true;
4011             a = int128_neg(a);
4012         }
4013 
4014         shift = clz64(int128_gethi(a));
4015         if (shift == 64) {
4016             shift += clz64(int128_getlo(a));
4017         }
4018 
4019         p.exp = 127 - shift;
4020         a = int128_lshift(a, shift);
4021 
4022         p.frac_hi = int128_gethi(a);
4023         p.frac_lo = int128_getlo(a);
4024     } else {
4025         p.cls = float_class_zero;
4026     }
4027 
4028     return float128_round_pack_canonical(&p, status);
4029 }
4030 
int64_to_float128(int64_t a,float_status * status)4031 float128 int64_to_float128(int64_t a, float_status *status)
4032 {
4033     FloatParts128 p;
4034 
4035     parts_sint_to_float(&p, a, 0, status);
4036     return float128_round_pack_canonical(&p, status);
4037 }
4038 
int32_to_float128(int32_t a,float_status * status)4039 float128 int32_to_float128(int32_t a, float_status *status)
4040 {
4041     return int64_to_float128(a, status);
4042 }
4043 
int64_to_floatx80(int64_t a,float_status * status)4044 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4045 {
4046     FloatParts128 p;
4047 
4048     parts_sint_to_float(&p, a, 0, status);
4049     return floatx80_round_pack_canonical(&p, status);
4050 }
4051 
int32_to_floatx80(int32_t a,float_status * status)4052 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4053 {
4054     return int64_to_floatx80(a, status);
4055 }
4056 
4057 /*
4058  * Unsigned Integer to floating-point conversions
4059  */
4060 
uint64_to_float16_scalbn(uint64_t a,int scale,float_status * status)4061 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4062 {
4063     FloatParts64 p;
4064 
4065     parts_uint_to_float(&p, a, scale, status);
4066     return float16_round_pack_canonical(&p, status);
4067 }
4068 
uint32_to_float16_scalbn(uint32_t a,int scale,float_status * status)4069 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4070 {
4071     return uint64_to_float16_scalbn(a, scale, status);
4072 }
4073 
uint16_to_float16_scalbn(uint16_t a,int scale,float_status * status)4074 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4075 {
4076     return uint64_to_float16_scalbn(a, scale, status);
4077 }
4078 
uint64_to_float16(uint64_t a,float_status * status)4079 float16 uint64_to_float16(uint64_t a, float_status *status)
4080 {
4081     return uint64_to_float16_scalbn(a, 0, status);
4082 }
4083 
uint32_to_float16(uint32_t a,float_status * status)4084 float16 uint32_to_float16(uint32_t a, float_status *status)
4085 {
4086     return uint64_to_float16_scalbn(a, 0, status);
4087 }
4088 
uint16_to_float16(uint16_t a,float_status * status)4089 float16 uint16_to_float16(uint16_t a, float_status *status)
4090 {
4091     return uint64_to_float16_scalbn(a, 0, status);
4092 }
4093 
uint8_to_float16(uint8_t a,float_status * status)4094 float16 uint8_to_float16(uint8_t a, float_status *status)
4095 {
4096     return uint64_to_float16_scalbn(a, 0, status);
4097 }
4098 
uint64_to_float32_scalbn(uint64_t a,int scale,float_status * status)4099 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4100 {
4101     FloatParts64 p;
4102 
4103     /* Without scaling, there are no overflow concerns. */
4104     if (likely(scale == 0) && can_use_fpu(status)) {
4105         union_float32 ur;
4106         ur.h = a;
4107         return ur.s;
4108     }
4109 
4110     parts_uint_to_float(&p, a, scale, status);
4111     return float32_round_pack_canonical(&p, status);
4112 }
4113 
uint32_to_float32_scalbn(uint32_t a,int scale,float_status * status)4114 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4115 {
4116     return uint64_to_float32_scalbn(a, scale, status);
4117 }
4118 
uint16_to_float32_scalbn(uint16_t a,int scale,float_status * status)4119 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4120 {
4121     return uint64_to_float32_scalbn(a, scale, status);
4122 }
4123 
uint64_to_float32(uint64_t a,float_status * status)4124 float32 uint64_to_float32(uint64_t a, float_status *status)
4125 {
4126     return uint64_to_float32_scalbn(a, 0, status);
4127 }
4128 
uint32_to_float32(uint32_t a,float_status * status)4129 float32 uint32_to_float32(uint32_t a, float_status *status)
4130 {
4131     return uint64_to_float32_scalbn(a, 0, status);
4132 }
4133 
uint16_to_float32(uint16_t a,float_status * status)4134 float32 uint16_to_float32(uint16_t a, float_status *status)
4135 {
4136     return uint64_to_float32_scalbn(a, 0, status);
4137 }
4138 
uint64_to_float64_scalbn(uint64_t a,int scale,float_status * status)4139 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4140 {
4141     FloatParts64 p;
4142 
4143     /* Without scaling, there are no overflow concerns. */
4144     if (likely(scale == 0) && can_use_fpu(status)) {
4145         union_float64 ur;
4146         ur.h = a;
4147         return ur.s;
4148     }
4149 
4150     parts_uint_to_float(&p, a, scale, status);
4151     return float64_round_pack_canonical(&p, status);
4152 }
4153 
uint32_to_float64_scalbn(uint32_t a,int scale,float_status * status)4154 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4155 {
4156     return uint64_to_float64_scalbn(a, scale, status);
4157 }
4158 
uint16_to_float64_scalbn(uint16_t a,int scale,float_status * status)4159 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4160 {
4161     return uint64_to_float64_scalbn(a, scale, status);
4162 }
4163 
uint64_to_float64(uint64_t a,float_status * status)4164 float64 uint64_to_float64(uint64_t a, float_status *status)
4165 {
4166     return uint64_to_float64_scalbn(a, 0, status);
4167 }
4168 
uint32_to_float64(uint32_t a,float_status * status)4169 float64 uint32_to_float64(uint32_t a, float_status *status)
4170 {
4171     return uint64_to_float64_scalbn(a, 0, status);
4172 }
4173 
uint16_to_float64(uint16_t a,float_status * status)4174 float64 uint16_to_float64(uint16_t a, float_status *status)
4175 {
4176     return uint64_to_float64_scalbn(a, 0, status);
4177 }
4178 
uint64_to_bfloat16_scalbn(uint64_t a,int scale,float_status * status)4179 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4180 {
4181     FloatParts64 p;
4182 
4183     parts_uint_to_float(&p, a, scale, status);
4184     return bfloat16_round_pack_canonical(&p, status);
4185 }
4186 
uint32_to_bfloat16_scalbn(uint32_t a,int scale,float_status * status)4187 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4188 {
4189     return uint64_to_bfloat16_scalbn(a, scale, status);
4190 }
4191 
uint16_to_bfloat16_scalbn(uint16_t a,int scale,float_status * status)4192 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4193 {
4194     return uint64_to_bfloat16_scalbn(a, scale, status);
4195 }
4196 
uint8_to_bfloat16_scalbn(uint8_t a,int scale,float_status * status)4197 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4198 {
4199     return uint64_to_bfloat16_scalbn(a, scale, status);
4200 }
4201 
uint64_to_bfloat16(uint64_t a,float_status * status)4202 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4203 {
4204     return uint64_to_bfloat16_scalbn(a, 0, status);
4205 }
4206 
uint32_to_bfloat16(uint32_t a,float_status * status)4207 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4208 {
4209     return uint64_to_bfloat16_scalbn(a, 0, status);
4210 }
4211 
uint16_to_bfloat16(uint16_t a,float_status * status)4212 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4213 {
4214     return uint64_to_bfloat16_scalbn(a, 0, status);
4215 }
4216 
uint8_to_bfloat16(uint8_t a,float_status * status)4217 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4218 {
4219     return uint64_to_bfloat16_scalbn(a, 0, status);
4220 }
4221 
uint64_to_float128(uint64_t a,float_status * status)4222 float128 uint64_to_float128(uint64_t a, float_status *status)
4223 {
4224     FloatParts128 p;
4225 
4226     parts_uint_to_float(&p, a, 0, status);
4227     return float128_round_pack_canonical(&p, status);
4228 }
4229 
uint128_to_float128(Int128 a,float_status * status)4230 float128 uint128_to_float128(Int128 a, float_status *status)
4231 {
4232     FloatParts128 p = { };
4233     int shift;
4234 
4235     if (int128_nz(a)) {
4236         p.cls = float_class_normal;
4237 
4238         shift = clz64(int128_gethi(a));
4239         if (shift == 64) {
4240             shift += clz64(int128_getlo(a));
4241         }
4242 
4243         p.exp = 127 - shift;
4244         a = int128_lshift(a, shift);
4245 
4246         p.frac_hi = int128_gethi(a);
4247         p.frac_lo = int128_getlo(a);
4248     } else {
4249         p.cls = float_class_zero;
4250     }
4251 
4252     return float128_round_pack_canonical(&p, status);
4253 }
4254 
4255 /*
4256  * Minimum and maximum
4257  */
4258 
float16_minmax(float16 a,float16 b,float_status * s,int flags)4259 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4260 {
4261     FloatParts64 pa, pb, *pr;
4262 
4263     float16_unpack_canonical(&pa, a, s);
4264     float16_unpack_canonical(&pb, b, s);
4265     pr = parts_minmax(&pa, &pb, s, flags);
4266 
4267     return float16_round_pack_canonical(pr, s);
4268 }
4269 
bfloat16_minmax(bfloat16 a,bfloat16 b,float_status * s,int flags)4270 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271                                 float_status *s, int flags)
4272 {
4273     FloatParts64 pa, pb, *pr;
4274 
4275     bfloat16_unpack_canonical(&pa, a, s);
4276     bfloat16_unpack_canonical(&pb, b, s);
4277     pr = parts_minmax(&pa, &pb, s, flags);
4278 
4279     return bfloat16_round_pack_canonical(pr, s);
4280 }
4281 
float32_minmax(float32 a,float32 b,float_status * s,int flags)4282 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4283 {
4284     FloatParts64 pa, pb, *pr;
4285 
4286     float32_unpack_canonical(&pa, a, s);
4287     float32_unpack_canonical(&pb, b, s);
4288     pr = parts_minmax(&pa, &pb, s, flags);
4289 
4290     return float32_round_pack_canonical(pr, s);
4291 }
4292 
float64_minmax(float64 a,float64 b,float_status * s,int flags)4293 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4294 {
4295     FloatParts64 pa, pb, *pr;
4296 
4297     float64_unpack_canonical(&pa, a, s);
4298     float64_unpack_canonical(&pb, b, s);
4299     pr = parts_minmax(&pa, &pb, s, flags);
4300 
4301     return float64_round_pack_canonical(pr, s);
4302 }
4303 
float128_minmax(float128 a,float128 b,float_status * s,int flags)4304 static float128 float128_minmax(float128 a, float128 b,
4305                                 float_status *s, int flags)
4306 {
4307     FloatParts128 pa, pb, *pr;
4308 
4309     float128_unpack_canonical(&pa, a, s);
4310     float128_unpack_canonical(&pb, b, s);
4311     pr = parts_minmax(&pa, &pb, s, flags);
4312 
4313     return float128_round_pack_canonical(pr, s);
4314 }
4315 
4316 #define MINMAX_1(type, name, flags) \
4317     type type##_##name(type a, type b, float_status *s) \
4318     { return type##_minmax(a, b, s, flags); }
4319 
4320 #define MINMAX_2(type) \
4321     MINMAX_1(type, max, 0)                                                \
4322     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4323     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4324     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4325     MINMAX_1(type, min, minmax_ismin)                                     \
4326     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4327     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4329 
4330 MINMAX_2(float16)
MINMAX_2(bfloat16)4331 MINMAX_2(bfloat16)
4332 MINMAX_2(float32)
4333 MINMAX_2(float64)
4334 MINMAX_2(float128)
4335 
4336 #undef MINMAX_1
4337 #undef MINMAX_2
4338 
4339 /*
4340  * Floating point compare
4341  */
4342 
4343 static FloatRelation QEMU_FLATTEN
4344 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4345 {
4346     FloatParts64 pa, pb;
4347 
4348     float16_unpack_canonical(&pa, a, s);
4349     float16_unpack_canonical(&pb, b, s);
4350     return parts_compare(&pa, &pb, s, is_quiet);
4351 }
4352 
float16_compare(float16 a,float16 b,float_status * s)4353 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4354 {
4355     return float16_do_compare(a, b, s, false);
4356 }
4357 
float16_compare_quiet(float16 a,float16 b,float_status * s)4358 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4359 {
4360     return float16_do_compare(a, b, s, true);
4361 }
4362 
4363 static FloatRelation QEMU_SOFTFLOAT_ATTR
float32_do_compare(float32 a,float32 b,float_status * s,bool is_quiet)4364 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4365 {
4366     FloatParts64 pa, pb;
4367 
4368     float32_unpack_canonical(&pa, a, s);
4369     float32_unpack_canonical(&pb, b, s);
4370     return parts_compare(&pa, &pb, s, is_quiet);
4371 }
4372 
4373 static FloatRelation QEMU_FLATTEN
float32_hs_compare(float32 xa,float32 xb,float_status * s,bool is_quiet)4374 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4375 {
4376     union_float32 ua, ub;
4377 
4378     ua.s = xa;
4379     ub.s = xb;
4380 
4381     if (QEMU_NO_HARDFLOAT) {
4382         goto soft;
4383     }
4384 
4385     float32_input_flush2(&ua.s, &ub.s, s);
4386     if (isgreaterequal(ua.h, ub.h)) {
4387         if (isgreater(ua.h, ub.h)) {
4388             return float_relation_greater;
4389         }
4390         return float_relation_equal;
4391     }
4392     if (likely(isless(ua.h, ub.h))) {
4393         return float_relation_less;
4394     }
4395     /*
4396      * The only condition remaining is unordered.
4397      * Fall through to set flags.
4398      */
4399  soft:
4400     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4401 }
4402 
float32_compare(float32 a,float32 b,float_status * s)4403 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4404 {
4405     return float32_hs_compare(a, b, s, false);
4406 }
4407 
float32_compare_quiet(float32 a,float32 b,float_status * s)4408 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4409 {
4410     return float32_hs_compare(a, b, s, true);
4411 }
4412 
4413 static FloatRelation QEMU_SOFTFLOAT_ATTR
float64_do_compare(float64 a,float64 b,float_status * s,bool is_quiet)4414 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4415 {
4416     FloatParts64 pa, pb;
4417 
4418     float64_unpack_canonical(&pa, a, s);
4419     float64_unpack_canonical(&pb, b, s);
4420     return parts_compare(&pa, &pb, s, is_quiet);
4421 }
4422 
4423 static FloatRelation QEMU_FLATTEN
float64_hs_compare(float64 xa,float64 xb,float_status * s,bool is_quiet)4424 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4425 {
4426     union_float64 ua, ub;
4427 
4428     ua.s = xa;
4429     ub.s = xb;
4430 
4431     if (QEMU_NO_HARDFLOAT) {
4432         goto soft;
4433     }
4434 
4435     float64_input_flush2(&ua.s, &ub.s, s);
4436     if (isgreaterequal(ua.h, ub.h)) {
4437         if (isgreater(ua.h, ub.h)) {
4438             return float_relation_greater;
4439         }
4440         return float_relation_equal;
4441     }
4442     if (likely(isless(ua.h, ub.h))) {
4443         return float_relation_less;
4444     }
4445     /*
4446      * The only condition remaining is unordered.
4447      * Fall through to set flags.
4448      */
4449  soft:
4450     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4451 }
4452 
float64_compare(float64 a,float64 b,float_status * s)4453 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4454 {
4455     return float64_hs_compare(a, b, s, false);
4456 }
4457 
float64_compare_quiet(float64 a,float64 b,float_status * s)4458 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4459 {
4460     return float64_hs_compare(a, b, s, true);
4461 }
4462 
4463 static FloatRelation QEMU_FLATTEN
bfloat16_do_compare(bfloat16 a,bfloat16 b,float_status * s,bool is_quiet)4464 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4465 {
4466     FloatParts64 pa, pb;
4467 
4468     bfloat16_unpack_canonical(&pa, a, s);
4469     bfloat16_unpack_canonical(&pb, b, s);
4470     return parts_compare(&pa, &pb, s, is_quiet);
4471 }
4472 
bfloat16_compare(bfloat16 a,bfloat16 b,float_status * s)4473 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4474 {
4475     return bfloat16_do_compare(a, b, s, false);
4476 }
4477 
bfloat16_compare_quiet(bfloat16 a,bfloat16 b,float_status * s)4478 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4479 {
4480     return bfloat16_do_compare(a, b, s, true);
4481 }
4482 
4483 static FloatRelation QEMU_FLATTEN
float128_do_compare(float128 a,float128 b,float_status * s,bool is_quiet)4484 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4485 {
4486     FloatParts128 pa, pb;
4487 
4488     float128_unpack_canonical(&pa, a, s);
4489     float128_unpack_canonical(&pb, b, s);
4490     return parts_compare(&pa, &pb, s, is_quiet);
4491 }
4492 
float128_compare(float128 a,float128 b,float_status * s)4493 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4494 {
4495     return float128_do_compare(a, b, s, false);
4496 }
4497 
float128_compare_quiet(float128 a,float128 b,float_status * s)4498 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4499 {
4500     return float128_do_compare(a, b, s, true);
4501 }
4502 
4503 static FloatRelation QEMU_FLATTEN
floatx80_do_compare(floatx80 a,floatx80 b,float_status * s,bool is_quiet)4504 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4505 {
4506     FloatParts128 pa, pb;
4507 
4508     if (!floatx80_unpack_canonical(&pa, a, s) ||
4509         !floatx80_unpack_canonical(&pb, b, s)) {
4510         return float_relation_unordered;
4511     }
4512     return parts_compare(&pa, &pb, s, is_quiet);
4513 }
4514 
floatx80_compare(floatx80 a,floatx80 b,float_status * s)4515 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4516 {
4517     return floatx80_do_compare(a, b, s, false);
4518 }
4519 
floatx80_compare_quiet(floatx80 a,floatx80 b,float_status * s)4520 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4521 {
4522     return floatx80_do_compare(a, b, s, true);
4523 }
4524 
4525 /*
4526  * Scale by 2**N
4527  */
4528 
float16_scalbn(float16 a,int n,float_status * status)4529 float16 float16_scalbn(float16 a, int n, float_status *status)
4530 {
4531     FloatParts64 p;
4532 
4533     float16_unpack_canonical(&p, a, status);
4534     parts_scalbn(&p, n, status);
4535     return float16_round_pack_canonical(&p, status);
4536 }
4537 
float32_scalbn(float32 a,int n,float_status * status)4538 float32 float32_scalbn(float32 a, int n, float_status *status)
4539 {
4540     FloatParts64 p;
4541 
4542     float32_unpack_canonical(&p, a, status);
4543     parts_scalbn(&p, n, status);
4544     return float32_round_pack_canonical(&p, status);
4545 }
4546 
float64_scalbn(float64 a,int n,float_status * status)4547 float64 float64_scalbn(float64 a, int n, float_status *status)
4548 {
4549     FloatParts64 p;
4550 
4551     float64_unpack_canonical(&p, a, status);
4552     parts_scalbn(&p, n, status);
4553     return float64_round_pack_canonical(&p, status);
4554 }
4555 
bfloat16_scalbn(bfloat16 a,int n,float_status * status)4556 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4557 {
4558     FloatParts64 p;
4559 
4560     bfloat16_unpack_canonical(&p, a, status);
4561     parts_scalbn(&p, n, status);
4562     return bfloat16_round_pack_canonical(&p, status);
4563 }
4564 
float128_scalbn(float128 a,int n,float_status * status)4565 float128 float128_scalbn(float128 a, int n, float_status *status)
4566 {
4567     FloatParts128 p;
4568 
4569     float128_unpack_canonical(&p, a, status);
4570     parts_scalbn(&p, n, status);
4571     return float128_round_pack_canonical(&p, status);
4572 }
4573 
floatx80_scalbn(floatx80 a,int n,float_status * status)4574 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4575 {
4576     FloatParts128 p;
4577 
4578     if (!floatx80_unpack_canonical(&p, a, status)) {
4579         return floatx80_default_nan(status);
4580     }
4581     parts_scalbn(&p, n, status);
4582     return floatx80_round_pack_canonical(&p, status);
4583 }
4584 
4585 /*
4586  * Square Root
4587  */
4588 
float16_sqrt(float16 a,float_status * status)4589 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4590 {
4591     FloatParts64 p;
4592 
4593     float16_unpack_canonical(&p, a, status);
4594     parts_sqrt(&p, status, &float16_params);
4595     return float16_round_pack_canonical(&p, status);
4596 }
4597 
4598 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a,float_status * status)4599 soft_f32_sqrt(float32 a, float_status *status)
4600 {
4601     FloatParts64 p;
4602 
4603     float32_unpack_canonical(&p, a, status);
4604     parts_sqrt(&p, status, &float32_params);
4605     return float32_round_pack_canonical(&p, status);
4606 }
4607 
4608 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a,float_status * status)4609 soft_f64_sqrt(float64 a, float_status *status)
4610 {
4611     FloatParts64 p;
4612 
4613     float64_unpack_canonical(&p, a, status);
4614     parts_sqrt(&p, status, &float64_params);
4615     return float64_round_pack_canonical(&p, status);
4616 }
4617 
float32_sqrt(float32 xa,float_status * s)4618 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4619 {
4620     union_float32 ua, ur;
4621 
4622     ua.s = xa;
4623     if (unlikely(!can_use_fpu(s))) {
4624         goto soft;
4625     }
4626 
4627     float32_input_flush1(&ua.s, s);
4628     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630                        fpclassify(ua.h) == FP_ZERO) ||
4631                      signbit(ua.h))) {
4632             goto soft;
4633         }
4634     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635                         float32_is_neg(ua.s))) {
4636         goto soft;
4637     }
4638     ur.h = sqrtf(ua.h);
4639     return ur.s;
4640 
4641  soft:
4642     return soft_f32_sqrt(ua.s, s);
4643 }
4644 
float64_sqrt(float64 xa,float_status * s)4645 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4646 {
4647     union_float64 ua, ur;
4648 
4649     ua.s = xa;
4650     if (unlikely(!can_use_fpu(s))) {
4651         goto soft;
4652     }
4653 
4654     float64_input_flush1(&ua.s, s);
4655     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657                        fpclassify(ua.h) == FP_ZERO) ||
4658                      signbit(ua.h))) {
4659             goto soft;
4660         }
4661     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662                         float64_is_neg(ua.s))) {
4663         goto soft;
4664     }
4665     ur.h = sqrt(ua.h);
4666     return ur.s;
4667 
4668  soft:
4669     return soft_f64_sqrt(ua.s, s);
4670 }
4671 
float64r32_sqrt(float64 a,float_status * status)4672 float64 float64r32_sqrt(float64 a, float_status *status)
4673 {
4674     FloatParts64 p;
4675 
4676     float64_unpack_canonical(&p, a, status);
4677     parts_sqrt(&p, status, &float64_params);
4678     return float64r32_round_pack_canonical(&p, status);
4679 }
4680 
bfloat16_sqrt(bfloat16 a,float_status * status)4681 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4682 {
4683     FloatParts64 p;
4684 
4685     bfloat16_unpack_canonical(&p, a, status);
4686     parts_sqrt(&p, status, &bfloat16_params);
4687     return bfloat16_round_pack_canonical(&p, status);
4688 }
4689 
float128_sqrt(float128 a,float_status * status)4690 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4691 {
4692     FloatParts128 p;
4693 
4694     float128_unpack_canonical(&p, a, status);
4695     parts_sqrt(&p, status, &float128_params);
4696     return float128_round_pack_canonical(&p, status);
4697 }
4698 
floatx80_sqrt(floatx80 a,float_status * s)4699 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4700 {
4701     FloatParts128 p;
4702 
4703     if (!floatx80_unpack_canonical(&p, a, s)) {
4704         return floatx80_default_nan(s);
4705     }
4706     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707     return floatx80_round_pack_canonical(&p, s);
4708 }
4709 
4710 /*
4711  * log2
4712  */
float32_log2(float32 a,float_status * status)4713 float32 float32_log2(float32 a, float_status *status)
4714 {
4715     FloatParts64 p;
4716 
4717     float32_unpack_canonical(&p, a, status);
4718     parts_log2(&p, status, &float32_params);
4719     return float32_round_pack_canonical(&p, status);
4720 }
4721 
float64_log2(float64 a,float_status * status)4722 float64 float64_log2(float64 a, float_status *status)
4723 {
4724     FloatParts64 p;
4725 
4726     float64_unpack_canonical(&p, a, status);
4727     parts_log2(&p, status, &float64_params);
4728     return float64_round_pack_canonical(&p, status);
4729 }
4730 
4731 /*----------------------------------------------------------------------------
4732 | The pattern for a default generated NaN.
4733 *----------------------------------------------------------------------------*/
4734 
float16_default_nan(float_status * status)4735 float16 float16_default_nan(float_status *status)
4736 {
4737     FloatParts64 p;
4738 
4739     parts_default_nan(&p, status);
4740     p.frac >>= float16_params.frac_shift;
4741     return float16_pack_raw(&p);
4742 }
4743 
float32_default_nan(float_status * status)4744 float32 float32_default_nan(float_status *status)
4745 {
4746     FloatParts64 p;
4747 
4748     parts_default_nan(&p, status);
4749     p.frac >>= float32_params.frac_shift;
4750     return float32_pack_raw(&p);
4751 }
4752 
float64_default_nan(float_status * status)4753 float64 float64_default_nan(float_status *status)
4754 {
4755     FloatParts64 p;
4756 
4757     parts_default_nan(&p, status);
4758     p.frac >>= float64_params.frac_shift;
4759     return float64_pack_raw(&p);
4760 }
4761 
float128_default_nan(float_status * status)4762 float128 float128_default_nan(float_status *status)
4763 {
4764     FloatParts128 p;
4765 
4766     parts_default_nan(&p, status);
4767     frac_shr(&p, float128_params.frac_shift);
4768     return float128_pack_raw(&p);
4769 }
4770 
bfloat16_default_nan(float_status * status)4771 bfloat16 bfloat16_default_nan(float_status *status)
4772 {
4773     FloatParts64 p;
4774 
4775     parts_default_nan(&p, status);
4776     p.frac >>= bfloat16_params.frac_shift;
4777     return bfloat16_pack_raw(&p);
4778 }
4779 
4780 /*----------------------------------------------------------------------------
4781 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782 *----------------------------------------------------------------------------*/
4783 
float16_silence_nan(float16 a,float_status * status)4784 float16 float16_silence_nan(float16 a, float_status *status)
4785 {
4786     FloatParts64 p;
4787 
4788     float16_unpack_raw(&p, a);
4789     p.frac <<= float16_params.frac_shift;
4790     parts_silence_nan(&p, status);
4791     p.frac >>= float16_params.frac_shift;
4792     return float16_pack_raw(&p);
4793 }
4794 
float32_silence_nan(float32 a,float_status * status)4795 float32 float32_silence_nan(float32 a, float_status *status)
4796 {
4797     FloatParts64 p;
4798 
4799     float32_unpack_raw(&p, a);
4800     p.frac <<= float32_params.frac_shift;
4801     parts_silence_nan(&p, status);
4802     p.frac >>= float32_params.frac_shift;
4803     return float32_pack_raw(&p);
4804 }
4805 
float64_silence_nan(float64 a,float_status * status)4806 float64 float64_silence_nan(float64 a, float_status *status)
4807 {
4808     FloatParts64 p;
4809 
4810     float64_unpack_raw(&p, a);
4811     p.frac <<= float64_params.frac_shift;
4812     parts_silence_nan(&p, status);
4813     p.frac >>= float64_params.frac_shift;
4814     return float64_pack_raw(&p);
4815 }
4816 
bfloat16_silence_nan(bfloat16 a,float_status * status)4817 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4818 {
4819     FloatParts64 p;
4820 
4821     bfloat16_unpack_raw(&p, a);
4822     p.frac <<= bfloat16_params.frac_shift;
4823     parts_silence_nan(&p, status);
4824     p.frac >>= bfloat16_params.frac_shift;
4825     return bfloat16_pack_raw(&p);
4826 }
4827 
float128_silence_nan(float128 a,float_status * status)4828 float128 float128_silence_nan(float128 a, float_status *status)
4829 {
4830     FloatParts128 p;
4831 
4832     float128_unpack_raw(&p, a);
4833     frac_shl(&p, float128_params.frac_shift);
4834     parts_silence_nan(&p, status);
4835     frac_shr(&p, float128_params.frac_shift);
4836     return float128_pack_raw(&p);
4837 }
4838 
4839 /*----------------------------------------------------------------------------
4840 | If `a' is denormal and we are in flush-to-zero mode then set the
4841 | input-denormal exception and return zero. Otherwise just return the value.
4842 *----------------------------------------------------------------------------*/
4843 
parts_squash_denormal(FloatParts64 p,float_status * status)4844 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4845 {
4846     if (p.exp == 0 && p.frac != 0) {
4847         float_raise(float_flag_input_denormal, status);
4848         return true;
4849     }
4850 
4851     return false;
4852 }
4853 
float16_squash_input_denormal(float16 a,float_status * status)4854 float16 float16_squash_input_denormal(float16 a, float_status *status)
4855 {
4856     if (status->flush_inputs_to_zero) {
4857         FloatParts64 p;
4858 
4859         float16_unpack_raw(&p, a);
4860         if (parts_squash_denormal(p, status)) {
4861             return float16_set_sign(float16_zero, p.sign);
4862         }
4863     }
4864     return a;
4865 }
4866 
float32_squash_input_denormal(float32 a,float_status * status)4867 float32 float32_squash_input_denormal(float32 a, float_status *status)
4868 {
4869     if (status->flush_inputs_to_zero) {
4870         FloatParts64 p;
4871 
4872         float32_unpack_raw(&p, a);
4873         if (parts_squash_denormal(p, status)) {
4874             return float32_set_sign(float32_zero, p.sign);
4875         }
4876     }
4877     return a;
4878 }
4879 
float64_squash_input_denormal(float64 a,float_status * status)4880 float64 float64_squash_input_denormal(float64 a, float_status *status)
4881 {
4882     if (status->flush_inputs_to_zero) {
4883         FloatParts64 p;
4884 
4885         float64_unpack_raw(&p, a);
4886         if (parts_squash_denormal(p, status)) {
4887             return float64_set_sign(float64_zero, p.sign);
4888         }
4889     }
4890     return a;
4891 }
4892 
bfloat16_squash_input_denormal(bfloat16 a,float_status * status)4893 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4894 {
4895     if (status->flush_inputs_to_zero) {
4896         FloatParts64 p;
4897 
4898         bfloat16_unpack_raw(&p, a);
4899         if (parts_squash_denormal(p, status)) {
4900             return bfloat16_set_sign(bfloat16_zero, p.sign);
4901         }
4902     }
4903     return a;
4904 }
4905 
4906 /*----------------------------------------------------------------------------
4907 | Normalizes the subnormal extended double-precision floating-point value
4908 | represented by the denormalized significand `aSig'.  The normalized exponent
4909 | and significand are stored at the locations pointed to by `zExpPtr' and
4910 | `zSigPtr', respectively.
4911 *----------------------------------------------------------------------------*/
4912 
normalizeFloatx80Subnormal(uint64_t aSig,int32_t * zExpPtr,uint64_t * zSigPtr)4913 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914                                 uint64_t *zSigPtr)
4915 {
4916     int8_t shiftCount;
4917 
4918     shiftCount = clz64(aSig);
4919     *zSigPtr = aSig<<shiftCount;
4920     *zExpPtr = 1 - shiftCount;
4921 }
4922 
4923 /*----------------------------------------------------------------------------
4924 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4925 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926 | and returns the proper extended double-precision floating-point value
4927 | corresponding to the abstract input.  Ordinarily, the abstract value is
4928 | rounded and packed into the extended double-precision format, with the
4929 | inexact exception raised if the abstract input cannot be represented
4930 | exactly.  However, if the abstract value is too large, the overflow and
4931 | inexact exceptions are raised and an infinity or maximal finite value is
4932 | returned.  If the abstract value is too small, the input value is rounded to
4933 | a subnormal number, and the underflow and inexact exceptions are raised if
4934 | the abstract input cannot be represented exactly as a subnormal extended
4935 | double-precision floating-point number.
4936 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937 | the result is rounded to the same number of bits as single or double
4938 | precision, respectively.  Otherwise, the result is rounded to the full
4939 | precision of the extended double-precision format.
4940 |     The input significand must be normalized or smaller.  If the input
4941 | significand is not normalized, `zExp' must be 0; in that case, the result
4942 | returned is a subnormal number, and it must not require rounding.  The
4943 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944 | Floating-Point Arithmetic.
4945 *----------------------------------------------------------------------------*/
4946 
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)4947 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949                               float_status *status)
4950 {
4951     FloatRoundMode roundingMode;
4952     bool roundNearestEven, increment, isTiny;
4953     int64_t roundIncrement, roundMask, roundBits;
4954 
4955     roundingMode = status->float_rounding_mode;
4956     roundNearestEven = ( roundingMode == float_round_nearest_even );
4957     switch (roundingPrecision) {
4958     case floatx80_precision_x:
4959         goto precision80;
4960     case floatx80_precision_d:
4961         roundIncrement = UINT64_C(0x0000000000000400);
4962         roundMask = UINT64_C(0x00000000000007FF);
4963         break;
4964     case floatx80_precision_s:
4965         roundIncrement = UINT64_C(0x0000008000000000);
4966         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967         break;
4968     default:
4969         g_assert_not_reached();
4970     }
4971     zSig0 |= ( zSig1 != 0 );
4972     switch (roundingMode) {
4973     case float_round_nearest_even:
4974     case float_round_ties_away:
4975         break;
4976     case float_round_to_zero:
4977         roundIncrement = 0;
4978         break;
4979     case float_round_up:
4980         roundIncrement = zSign ? 0 : roundMask;
4981         break;
4982     case float_round_down:
4983         roundIncrement = zSign ? roundMask : 0;
4984         break;
4985     default:
4986         abort();
4987     }
4988     roundBits = zSig0 & roundMask;
4989     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990         if (    ( 0x7FFE < zExp )
4991              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4992            ) {
4993             goto overflow;
4994         }
4995         if ( zExp <= 0 ) {
4996             if (status->flush_to_zero) {
4997                 float_raise(float_flag_output_denormal, status);
4998                 return packFloatx80(zSign, 0, 0);
4999             }
5000             isTiny = status->tininess_before_rounding
5001                   || (zExp < 0 )
5002                   || (zSig0 <= zSig0 + roundIncrement);
5003             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5004             zExp = 0;
5005             roundBits = zSig0 & roundMask;
5006             if (isTiny && roundBits) {
5007                 float_raise(float_flag_underflow, status);
5008             }
5009             if (roundBits) {
5010                 float_raise(float_flag_inexact, status);
5011             }
5012             zSig0 += roundIncrement;
5013             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014             roundIncrement = roundMask + 1;
5015             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016                 roundMask |= roundIncrement;
5017             }
5018             zSig0 &= ~ roundMask;
5019             return packFloatx80( zSign, zExp, zSig0 );
5020         }
5021     }
5022     if (roundBits) {
5023         float_raise(float_flag_inexact, status);
5024     }
5025     zSig0 += roundIncrement;
5026     if ( zSig0 < roundIncrement ) {
5027         ++zExp;
5028         zSig0 = UINT64_C(0x8000000000000000);
5029     }
5030     roundIncrement = roundMask + 1;
5031     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032         roundMask |= roundIncrement;
5033     }
5034     zSig0 &= ~ roundMask;
5035     if ( zSig0 == 0 ) zExp = 0;
5036     return packFloatx80( zSign, zExp, zSig0 );
5037  precision80:
5038     switch (roundingMode) {
5039     case float_round_nearest_even:
5040     case float_round_ties_away:
5041         increment = ((int64_t)zSig1 < 0);
5042         break;
5043     case float_round_to_zero:
5044         increment = 0;
5045         break;
5046     case float_round_up:
5047         increment = !zSign && zSig1;
5048         break;
5049     case float_round_down:
5050         increment = zSign && zSig1;
5051         break;
5052     default:
5053         abort();
5054     }
5055     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5056         if (    ( 0x7FFE < zExp )
5057              || (    ( zExp == 0x7FFE )
5058                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5059                   && increment
5060                 )
5061            ) {
5062             roundMask = 0;
5063  overflow:
5064             float_raise(float_flag_overflow | float_flag_inexact, status);
5065             if (    ( roundingMode == float_round_to_zero )
5066                  || ( zSign && ( roundingMode == float_round_up ) )
5067                  || ( ! zSign && ( roundingMode == float_round_down ) )
5068                ) {
5069                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5070             }
5071             return packFloatx80(zSign,
5072                                 floatx80_infinity_high,
5073                                 floatx80_infinity_low);
5074         }
5075         if ( zExp <= 0 ) {
5076             isTiny = status->tininess_before_rounding
5077                   || (zExp < 0)
5078                   || !increment
5079                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5080             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081             zExp = 0;
5082             if (isTiny && zSig1) {
5083                 float_raise(float_flag_underflow, status);
5084             }
5085             if (zSig1) {
5086                 float_raise(float_flag_inexact, status);
5087             }
5088             switch (roundingMode) {
5089             case float_round_nearest_even:
5090             case float_round_ties_away:
5091                 increment = ((int64_t)zSig1 < 0);
5092                 break;
5093             case float_round_to_zero:
5094                 increment = 0;
5095                 break;
5096             case float_round_up:
5097                 increment = !zSign && zSig1;
5098                 break;
5099             case float_round_down:
5100                 increment = zSign && zSig1;
5101                 break;
5102             default:
5103                 abort();
5104             }
5105             if ( increment ) {
5106                 ++zSig0;
5107                 if (!(zSig1 << 1) && roundNearestEven) {
5108                     zSig0 &= ~1;
5109                 }
5110                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5111             }
5112             return packFloatx80( zSign, zExp, zSig0 );
5113         }
5114     }
5115     if (zSig1) {
5116         float_raise(float_flag_inexact, status);
5117     }
5118     if ( increment ) {
5119         ++zSig0;
5120         if ( zSig0 == 0 ) {
5121             ++zExp;
5122             zSig0 = UINT64_C(0x8000000000000000);
5123         }
5124         else {
5125             if (!(zSig1 << 1) && roundNearestEven) {
5126                 zSig0 &= ~1;
5127             }
5128         }
5129     }
5130     else {
5131         if ( zSig0 == 0 ) zExp = 0;
5132     }
5133     return packFloatx80( zSign, zExp, zSig0 );
5134 
5135 }
5136 
5137 /*----------------------------------------------------------------------------
5138 | Takes an abstract floating-point value having sign `zSign', exponent
5139 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140 | and returns the proper extended double-precision floating-point value
5141 | corresponding to the abstract input.  This routine is just like
5142 | `roundAndPackFloatx80' except that the input significand does not have to be
5143 | normalized.
5144 *----------------------------------------------------------------------------*/
5145 
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5146 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5147                                        bool zSign, int32_t zExp,
5148                                        uint64_t zSig0, uint64_t zSig1,
5149                                        float_status *status)
5150 {
5151     int8_t shiftCount;
5152 
5153     if ( zSig0 == 0 ) {
5154         zSig0 = zSig1;
5155         zSig1 = 0;
5156         zExp -= 64;
5157     }
5158     shiftCount = clz64(zSig0);
5159     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160     zExp -= shiftCount;
5161     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162                                 zSig0, zSig1, status);
5163 
5164 }
5165 
5166 /*----------------------------------------------------------------------------
5167 | Returns the binary exponential of the single-precision floating-point value
5168 | `a'. The operation is performed according to the IEC/IEEE Standard for
5169 | Binary Floating-Point Arithmetic.
5170 |
5171 | Uses the following identities:
5172 |
5173 | 1. -------------------------------------------------------------------------
5174 |      x    x*ln(2)
5175 |     2  = e
5176 |
5177 | 2. -------------------------------------------------------------------------
5178 |                      2     3     4     5           n
5179 |      x        x     x     x     x     x           x
5180 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181 |               1!    2!    3!    4!    5!          n!
5182 *----------------------------------------------------------------------------*/
5183 
5184 static const float64 float32_exp2_coefficients[15] =
5185 {
5186     const_float64( 0x3ff0000000000000ll ), /*  1 */
5187     const_float64( 0x3fe0000000000000ll ), /*  2 */
5188     const_float64( 0x3fc5555555555555ll ), /*  3 */
5189     const_float64( 0x3fa5555555555555ll ), /*  4 */
5190     const_float64( 0x3f81111111111111ll ), /*  5 */
5191     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5192     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5193     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5194     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5195     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5201 };
5202 
float32_exp2(float32 a,float_status * status)5203 float32 float32_exp2(float32 a, float_status *status)
5204 {
5205     FloatParts64 xp, xnp, tp, rp;
5206     int i;
5207 
5208     float32_unpack_canonical(&xp, a, status);
5209     if (unlikely(xp.cls != float_class_normal)) {
5210         switch (xp.cls) {
5211         case float_class_snan:
5212         case float_class_qnan:
5213             parts_return_nan(&xp, status);
5214             return float32_round_pack_canonical(&xp, status);
5215         case float_class_inf:
5216             return xp.sign ? float32_zero : a;
5217         case float_class_zero:
5218             return float32_one;
5219         default:
5220             break;
5221         }
5222         g_assert_not_reached();
5223     }
5224 
5225     float_raise(float_flag_inexact, status);
5226 
5227     float64_unpack_canonical(&tp, float64_ln2, status);
5228     xp = *parts_mul(&xp, &tp, status);
5229     xnp = xp;
5230 
5231     float64_unpack_canonical(&rp, float64_one, status);
5232     for (i = 0 ; i < 15 ; i++) {
5233         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5234         rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5235         xnp = *parts_mul(&xnp, &xp, status);
5236     }
5237 
5238     return float32_round_pack_canonical(&rp, status);
5239 }
5240 
5241 /*----------------------------------------------------------------------------
5242 | Rounds the extended double-precision floating-point value `a'
5243 | to the precision provided by floatx80_rounding_precision and returns the
5244 | result as an extended double-precision floating-point value.
5245 | The operation is performed according to the IEC/IEEE Standard for Binary
5246 | Floating-Point Arithmetic.
5247 *----------------------------------------------------------------------------*/
5248 
floatx80_round(floatx80 a,float_status * status)5249 floatx80 floatx80_round(floatx80 a, float_status *status)
5250 {
5251     FloatParts128 p;
5252 
5253     if (!floatx80_unpack_canonical(&p, a, status)) {
5254         return floatx80_default_nan(status);
5255     }
5256     return floatx80_round_pack_canonical(&p, status);
5257 }
5258 
softfloat_init(void)5259 static void __attribute__((constructor)) softfloat_init(void)
5260 {
5261     union_float64 ua, ub, uc, ur;
5262 
5263     if (QEMU_NO_HARDFLOAT) {
5264         return;
5265     }
5266     /*
5267      * Test that the host's FMA is not obviously broken. For example,
5268      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5270      */
5271     ua.s = 0x0020000000000001ULL;
5272     ub.s = 0x3ca0000000000000ULL;
5273     uc.s = 0x0020000000000000ULL;
5274     ur.h = fma(ua.h, ub.h, uc.h);
5275     if (ur.s != 0x0020000000000001ULL) {
5276         force_soft_fma = true;
5277     }
5278 }
5279