xref: /openbmc/qemu/fpu/softfloat.c (revision 9a4e273ddec3927920c5958d2226c6b38b543336)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 #include "qemu/osdep.h"
83 #include <math.h>
84 #include "qemu/bitops.h"
85 #include "fpu/softfloat.h"
86 
87 /* We only need stdlib for abort() */
88 
89 /*----------------------------------------------------------------------------
90 | Primitive arithmetic functions, including multi-word arithmetic, and
91 | division and square root approximations.  (Can be specialized to target if
92 | desired.)
93 *----------------------------------------------------------------------------*/
94 #include "fpu/softfloat-macros.h"
95 
96 /*
97  * Hardfloat
98  *
99  * Fast emulation of guest FP instructions is challenging for two reasons.
100  * First, FP instruction semantics are similar but not identical, particularly
101  * when handling NaNs. Second, emulating at reasonable speed the guest FP
102  * exception flags is not trivial: reading the host's flags register with a
103  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
104  * and trapping on every FP exception is not fast nor pleasant to work with.
105  *
106  * We address these challenges by leveraging the host FPU for a subset of the
107  * operations. To do this we expand on the idea presented in this paper:
108  *
109  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
110  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
111  *
112  * The idea is thus to leverage the host FPU to (1) compute FP operations
113  * and (2) identify whether FP exceptions occurred while avoiding
114  * expensive exception flag register accesses.
115  *
116  * An important optimization shown in the paper is that given that exception
117  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
118  * This is particularly useful for the inexact flag, which is very frequently
119  * raised in floating-point workloads.
120  *
121  * We optimize the code further by deferring to soft-fp whenever FP exception
122  * detection might get hairy. Two examples: (1) when at least one operand is
123  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
124  * and the result is < the minimum normal.
125  */
126 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
127     static inline void name(soft_t *a, float_status *s)                 \
128     {                                                                   \
129         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
130             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
131                                      soft_t ## _is_neg(*a));            \
132             float_raise(float_flag_input_denormal_flushed, s);          \
133         }                                                               \
134     }
135 
GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck,float32)136 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
137 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
138 #undef GEN_INPUT_FLUSH__NOCHECK
139 
140 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
141     static inline void name(soft_t *a, float_status *s) \
142     {                                                   \
143         if (likely(!s->flush_inputs_to_zero)) {         \
144             return;                                     \
145         }                                               \
146         soft_t ## _input_flush__nocheck(a, s);          \
147     }
148 
149 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
150 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
151 #undef GEN_INPUT_FLUSH1
152 
153 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
154     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
155     {                                                                   \
156         if (likely(!s->flush_inputs_to_zero)) {                         \
157             return;                                                     \
158         }                                                               \
159         soft_t ## _input_flush__nocheck(a, s);                          \
160         soft_t ## _input_flush__nocheck(b, s);                          \
161     }
162 
163 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
164 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
165 #undef GEN_INPUT_FLUSH2
166 
167 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
168     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
169     {                                                                   \
170         if (likely(!s->flush_inputs_to_zero)) {                         \
171             return;                                                     \
172         }                                                               \
173         soft_t ## _input_flush__nocheck(a, s);                          \
174         soft_t ## _input_flush__nocheck(b, s);                          \
175         soft_t ## _input_flush__nocheck(c, s);                          \
176     }
177 
178 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
179 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
180 #undef GEN_INPUT_FLUSH3
181 
182 /*
183  * Choose whether to use fpclassify or float32/64_* primitives in the generated
184  * hardfloat functions. Each combination of number of inputs and float size
185  * gets its own value.
186  */
187 #if defined(__x86_64__)
188 # define QEMU_HARDFLOAT_1F32_USE_FP 0
189 # define QEMU_HARDFLOAT_1F64_USE_FP 1
190 # define QEMU_HARDFLOAT_2F32_USE_FP 0
191 # define QEMU_HARDFLOAT_2F64_USE_FP 1
192 # define QEMU_HARDFLOAT_3F32_USE_FP 0
193 # define QEMU_HARDFLOAT_3F64_USE_FP 1
194 #else
195 # define QEMU_HARDFLOAT_1F32_USE_FP 0
196 # define QEMU_HARDFLOAT_1F64_USE_FP 0
197 # define QEMU_HARDFLOAT_2F32_USE_FP 0
198 # define QEMU_HARDFLOAT_2F64_USE_FP 0
199 # define QEMU_HARDFLOAT_3F32_USE_FP 0
200 # define QEMU_HARDFLOAT_3F64_USE_FP 0
201 #endif
202 
203 /*
204  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
205  * float{32,64}_is_infinity when !USE_FP.
206  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
207  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
208  */
209 #if defined(__x86_64__) || defined(__aarch64__)
210 # define QEMU_HARDFLOAT_USE_ISINF   1
211 #else
212 # define QEMU_HARDFLOAT_USE_ISINF   0
213 #endif
214 
215 /*
216  * Some targets clear the FP flags before most FP operations. This prevents
217  * the use of hardfloat, since hardfloat relies on the inexact flag being
218  * already set.
219  */
220 # if defined(__FAST_MATH__)
221 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
222     IEEE implementation
223 # define QEMU_NO_HARDFLOAT 1
224 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
225 #else
226 # define QEMU_NO_HARDFLOAT 0
227 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
228 #endif
229 
230 static inline bool can_use_fpu(const float_status *s)
231 {
232     if (QEMU_NO_HARDFLOAT) {
233         return false;
234     }
235     return likely(s->float_exception_flags & float_flag_inexact &&
236                   s->float_rounding_mode == float_round_nearest_even);
237 }
238 
239 /*
240  * Hardfloat generation functions. Each operation can have two flavors:
241  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
242  * most condition checks, or native ones (e.g. fpclassify).
243  *
244  * The flavor is chosen by the callers. Instead of using macros, we rely on the
245  * compiler to propagate constants and inline everything into the callers.
246  *
247  * We only generate functions for operations with two inputs, since only
248  * these are common enough to justify consolidating them into common code.
249  */
250 
251 typedef union {
252     float32 s;
253     float h;
254 } union_float32;
255 
256 typedef union {
257     float64 s;
258     double h;
259 } union_float64;
260 
261 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
262 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
263 
264 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
265 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
266 typedef float   (*hard_f32_op2_fn)(float a, float b);
267 typedef double  (*hard_f64_op2_fn)(double a, double b);
268 
269 /* 2-input is-zero-or-normal */
f32_is_zon2(union_float32 a,union_float32 b)270 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
271 {
272     if (QEMU_HARDFLOAT_2F32_USE_FP) {
273         /*
274          * Not using a temp variable for consecutive fpclassify calls ends up
275          * generating faster code.
276          */
277         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
278                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
279     }
280     return float32_is_zero_or_normal(a.s) &&
281            float32_is_zero_or_normal(b.s);
282 }
283 
f64_is_zon2(union_float64 a,union_float64 b)284 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
285 {
286     if (QEMU_HARDFLOAT_2F64_USE_FP) {
287         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
288                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
289     }
290     return float64_is_zero_or_normal(a.s) &&
291            float64_is_zero_or_normal(b.s);
292 }
293 
294 /* 3-input is-zero-or-normal */
295 static inline
f32_is_zon3(union_float32 a,union_float32 b,union_float32 c)296 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
297 {
298     if (QEMU_HARDFLOAT_3F32_USE_FP) {
299         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
300                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
301                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
302     }
303     return float32_is_zero_or_normal(a.s) &&
304            float32_is_zero_or_normal(b.s) &&
305            float32_is_zero_or_normal(c.s);
306 }
307 
308 static inline
f64_is_zon3(union_float64 a,union_float64 b,union_float64 c)309 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
310 {
311     if (QEMU_HARDFLOAT_3F64_USE_FP) {
312         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
313                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
314                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
315     }
316     return float64_is_zero_or_normal(a.s) &&
317            float64_is_zero_or_normal(b.s) &&
318            float64_is_zero_or_normal(c.s);
319 }
320 
f32_is_inf(union_float32 a)321 static inline bool f32_is_inf(union_float32 a)
322 {
323     if (QEMU_HARDFLOAT_USE_ISINF) {
324         return isinf(a.h);
325     }
326     return float32_is_infinity(a.s);
327 }
328 
f64_is_inf(union_float64 a)329 static inline bool f64_is_inf(union_float64 a)
330 {
331     if (QEMU_HARDFLOAT_USE_ISINF) {
332         return isinf(a.h);
333     }
334     return float64_is_infinity(a.s);
335 }
336 
337 static inline float32
float32_gen2(float32 xa,float32 xb,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft,f32_check_fn pre,f32_check_fn post)338 float32_gen2(float32 xa, float32 xb, float_status *s,
339              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
340              f32_check_fn pre, f32_check_fn post)
341 {
342     union_float32 ua, ub, ur;
343 
344     ua.s = xa;
345     ub.s = xb;
346 
347     if (unlikely(!can_use_fpu(s))) {
348         goto soft;
349     }
350 
351     float32_input_flush2(&ua.s, &ub.s, s);
352     if (unlikely(!pre(ua, ub))) {
353         goto soft;
354     }
355 
356     ur.h = hard(ua.h, ub.h);
357     if (unlikely(f32_is_inf(ur))) {
358         float_raise(float_flag_overflow, s);
359     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
360         goto soft;
361     }
362     return ur.s;
363 
364  soft:
365     return soft(ua.s, ub.s, s);
366 }
367 
368 static inline float64
float64_gen2(float64 xa,float64 xb,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft,f64_check_fn pre,f64_check_fn post)369 float64_gen2(float64 xa, float64 xb, float_status *s,
370              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
371              f64_check_fn pre, f64_check_fn post)
372 {
373     union_float64 ua, ub, ur;
374 
375     ua.s = xa;
376     ub.s = xb;
377 
378     if (unlikely(!can_use_fpu(s))) {
379         goto soft;
380     }
381 
382     float64_input_flush2(&ua.s, &ub.s, s);
383     if (unlikely(!pre(ua, ub))) {
384         goto soft;
385     }
386 
387     ur.h = hard(ua.h, ub.h);
388     if (unlikely(f64_is_inf(ur))) {
389         float_raise(float_flag_overflow, s);
390     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
391         goto soft;
392     }
393     return ur.s;
394 
395  soft:
396     return soft(ua.s, ub.s, s);
397 }
398 
399 /*
400  * Classify a floating point number. Everything above float_class_qnan
401  * is a NaN so cls >= float_class_qnan is any NaN.
402  *
403  * Note that we canonicalize denormals, so most code should treat
404  * class_normal and class_denormal identically.
405  */
406 
407 typedef enum __attribute__ ((__packed__)) {
408     float_class_unclassified,
409     float_class_zero,
410     float_class_normal,
411     float_class_denormal, /* input was a non-squashed denormal */
412     float_class_inf,
413     float_class_qnan,  /* all NaNs from here */
414     float_class_snan,
415 } FloatClass;
416 
417 #define float_cmask(bit)  (1u << (bit))
418 
419 enum {
420     float_cmask_zero    = float_cmask(float_class_zero),
421     float_cmask_normal  = float_cmask(float_class_normal),
422     float_cmask_denormal = float_cmask(float_class_denormal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
430 };
431 
432 /* Flags for parts_minmax. */
433 enum {
434     /* Set for minimum; clear for maximum. */
435     minmax_ismin = 1,
436     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437     minmax_isnum = 2,
438     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
439     minmax_ismag = 4,
440     /*
441      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
442      * operations.
443      */
444     minmax_isnumber = 8,
445 };
446 
447 /* Simple helpers for checking if, or what kind of, NaN we have */
is_nan(FloatClass c)448 static inline __attribute__((unused)) bool is_nan(FloatClass c)
449 {
450     return unlikely(c >= float_class_qnan);
451 }
452 
is_snan(FloatClass c)453 static inline __attribute__((unused)) bool is_snan(FloatClass c)
454 {
455     return c == float_class_snan;
456 }
457 
is_qnan(FloatClass c)458 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
459 {
460     return c == float_class_qnan;
461 }
462 
463 /*
464  * Return true if the float_cmask has only normals in it
465  * (including input denormals that were canonicalized)
466  */
cmask_is_only_normals(int cmask)467 static inline bool cmask_is_only_normals(int cmask)
468 {
469     return !(cmask & ~float_cmask_anynorm);
470 }
471 
is_anynorm(FloatClass c)472 static inline bool is_anynorm(FloatClass c)
473 {
474     return float_cmask(c) & float_cmask_anynorm;
475 }
476 
477 /*
478  * Structure holding all of the decomposed parts of a float.
479  * The exponent is unbiased and the fraction is normalized.
480  *
481  * The fraction words are stored in big-endian word ordering,
482  * so that truncation from a larger format to a smaller format
483  * can be done simply by ignoring subsequent elements.
484  */
485 
486 typedef struct {
487     FloatClass cls;
488     bool sign;
489     int32_t exp;
490     union {
491         /* Routines that know the structure may reference the singular name. */
492         uint64_t frac;
493         /*
494          * Routines expanded with multiple structures reference "hi" and "lo"
495          * depending on the operation.  In FloatParts64, "hi" and "lo" are
496          * both the same word and aliased here.
497          */
498         uint64_t frac_hi;
499         uint64_t frac_lo;
500     };
501 } FloatParts64;
502 
503 typedef struct {
504     FloatClass cls;
505     bool sign;
506     int32_t exp;
507     uint64_t frac_hi;
508     uint64_t frac_lo;
509 } FloatParts128;
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     uint64_t frac_hi;
516     uint64_t frac_hm;  /* high-middle */
517     uint64_t frac_lm;  /* low-middle */
518     uint64_t frac_lo;
519 } FloatParts256;
520 
521 /* These apply to the most significant word of each FloatPartsN. */
522 #define DECOMPOSED_BINARY_POINT    63
523 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
524 
525 /* Structure holding all of the relevant parameters for a format.
526  *   exp_size: the size of the exponent field
527  *   exp_bias: the offset applied to the exponent field
528  *   exp_max: the maximum normalised exponent
529  *   frac_size: the size of the fraction field
530  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
531  * The following are computed based the size of fraction
532  *   round_mask: bits below lsb which must be rounded
533  * The following optional modifiers are available:
534  *   arm_althp: handle ARM Alternative Half Precision
535  *   has_explicit_bit: has an explicit integer bit; this affects whether
536  *   the float_status floatx80_behaviour handling applies
537  */
538 typedef struct {
539     int exp_size;
540     int exp_bias;
541     int exp_re_bias;
542     int exp_max;
543     int frac_size;
544     int frac_shift;
545     bool arm_althp;
546     bool has_explicit_bit;
547     uint64_t round_mask;
548 } FloatFmt;
549 
550 /* Expand fields based on the size of exponent and fraction */
551 #define FLOAT_PARAMS_(E)                                \
552     .exp_size       = E,                                \
553     .exp_bias       = ((1 << E) - 1) >> 1,              \
554     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
555     .exp_max        = (1 << E) - 1
556 
557 #define FLOAT_PARAMS(E, F)                              \
558     FLOAT_PARAMS_(E),                                   \
559     .frac_size      = F,                                \
560     .frac_shift     = (-F - 1) & 63,                    \
561     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
562 
563 static const FloatFmt float16_params = {
564     FLOAT_PARAMS(5, 10)
565 };
566 
567 static const FloatFmt float16_params_ahp = {
568     FLOAT_PARAMS(5, 10),
569     .arm_althp = true
570 };
571 
572 static const FloatFmt bfloat16_params = {
573     FLOAT_PARAMS(8, 7)
574 };
575 
576 static const FloatFmt float32_params = {
577     FLOAT_PARAMS(8, 23)
578 };
579 
580 static const FloatFmt float64_params = {
581     FLOAT_PARAMS(11, 52)
582 };
583 
584 static const FloatFmt float128_params = {
585     FLOAT_PARAMS(15, 112)
586 };
587 
588 #define FLOATX80_PARAMS(R)              \
589     FLOAT_PARAMS_(15),                  \
590     .frac_size = R == 64 ? 63 : R,      \
591     .frac_shift = 0,                    \
592     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
593 
594 static const FloatFmt floatx80_params[3] = {
595     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
596     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
597     [floatx80_precision_x] = {
598         FLOATX80_PARAMS(64),
599         .has_explicit_bit = true,
600     },
601 };
602 
603 /* Unpack a float to parts, but do not canonicalize.  */
unpack_raw64(FloatParts64 * r,const FloatFmt * fmt,uint64_t raw)604 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
605 {
606     const int f_size = fmt->frac_size;
607     const int e_size = fmt->exp_size;
608 
609     *r = (FloatParts64) {
610         .cls = float_class_unclassified,
611         .sign = extract64(raw, f_size + e_size, 1),
612         .exp = extract64(raw, f_size, e_size),
613         .frac = extract64(raw, 0, f_size)
614     };
615 }
616 
float16_unpack_raw(FloatParts64 * p,float16 f)617 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
618 {
619     unpack_raw64(p, &float16_params, f);
620 }
621 
bfloat16_unpack_raw(FloatParts64 * p,bfloat16 f)622 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
623 {
624     unpack_raw64(p, &bfloat16_params, f);
625 }
626 
float32_unpack_raw(FloatParts64 * p,float32 f)627 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
628 {
629     unpack_raw64(p, &float32_params, f);
630 }
631 
float64_unpack_raw(FloatParts64 * p,float64 f)632 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
633 {
634     unpack_raw64(p, &float64_params, f);
635 }
636 
floatx80_unpack_raw(FloatParts128 * p,floatx80 f)637 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
638 {
639     *p = (FloatParts128) {
640         .cls = float_class_unclassified,
641         .sign = extract32(f.high, 15, 1),
642         .exp = extract32(f.high, 0, 15),
643         .frac_hi = f.low
644     };
645 }
646 
float128_unpack_raw(FloatParts128 * p,float128 f)647 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
pack_raw64(const FloatParts64 * p,const FloatFmt * fmt)662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
float16_pack_raw(const FloatParts64 * p)674 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
bfloat16_pack_raw(const FloatParts64 * p)679 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
float32_pack_raw(const FloatParts64 * p)684 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
float64_pack_raw(const FloatParts64 * p)689 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
float128_pack_raw(const FloatParts128 * p)694 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     _Generic((P), FloatParts64 *: parts64_##NAME, \
718                   FloatParts128 *: parts128_##NAME)
719 
720 #define PARTS_GENERIC_64_128_256(NAME, P) \
721     _Generic((P), FloatParts64 *: parts64_##NAME, \
722                   FloatParts128 *: parts128_##NAME, \
723                   FloatParts256 *: parts256_##NAME)
724 
725 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
726 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
727 
728 static void parts64_return_nan(FloatParts64 *a, float_status *s);
729 static void parts128_return_nan(FloatParts128 *a, float_status *s);
730 
731 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
732 
733 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
734                                       float_status *s);
735 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
736                                         float_status *s);
737 
738 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
739 
740 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
741                                              FloatParts64 *c, float_status *s,
742                                              int ab_mask, int abc_mask);
743 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
744                                                FloatParts128 *b,
745                                                FloatParts128 *c,
746                                                float_status *s,
747                                                int ab_mask, int abc_mask);
748 
749 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
750     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
751 
752 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
753                                  const FloatFmt *fmt);
754 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
755                                   const FloatFmt *fmt);
756 
757 #define parts_canonicalize(A, S, F) \
758     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
759 
760 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
761                                    const FloatFmt *fmt);
762 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
763                                     const FloatFmt *fmt);
764 
765 #define parts_uncanon_normal(A, S, F) \
766     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
767 
768 static void parts64_uncanon(FloatParts64 *p, float_status *status,
769                             const FloatFmt *fmt);
770 static void parts128_uncanon(FloatParts128 *p, float_status *status,
771                              const FloatFmt *fmt);
772 
773 #define parts_uncanon(A, S, F) \
774     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
775 
776 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
777 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
778 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
779 
780 #define parts_add_normal(A, B) \
781     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
782 
783 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
784 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
785 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
786 
787 #define parts_sub_normal(A, B) \
788     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
789 
790 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
791                                     float_status *s, bool subtract);
792 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
793                                       float_status *s, bool subtract);
794 
795 #define parts_addsub(A, B, S, Z) \
796     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
797 
798 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
799                                  float_status *s);
800 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
801                                    float_status *s);
802 
803 #define parts_mul(A, B, S) \
804     PARTS_GENERIC_64_128(mul, A)(A, B, S)
805 
806 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
807                                            FloatParts64 *c, int scale,
808                                            int flags, float_status *s);
809 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
810                                              FloatParts128 *c, int scale,
811                                              int flags, float_status *s);
812 
813 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
814     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
815 
816 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
817                                  float_status *s);
818 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
819                                    float_status *s);
820 
821 #define parts_div(A, B, S) \
822     PARTS_GENERIC_64_128(div, A)(A, B, S)
823 
824 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
825                                     uint64_t *mod_quot, float_status *s);
826 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
827                                       uint64_t *mod_quot, float_status *s);
828 
829 #define parts_modrem(A, B, Q, S) \
830     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
831 
832 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
833 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
834 
835 #define parts_sqrt(A, S, F) \
836     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
837 
838 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
839                                         int scale, int frac_size);
840 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
841                                          int scale, int frac_size);
842 
843 #define parts_round_to_int_normal(A, R, C, F) \
844     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
845 
846 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
847                                  int scale, float_status *s,
848                                  const FloatFmt *fmt);
849 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
850                                   int scale, float_status *s,
851                                   const FloatFmt *fmt);
852 
853 #define parts_round_to_int(A, R, C, S, F) \
854     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
855 
856 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
857                                      int scale, int64_t min, int64_t max,
858                                      float_status *s);
859 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
860                                      int scale, int64_t min, int64_t max,
861                                      float_status *s);
862 
863 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
864     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
865 
866 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
867                                       int scale, uint64_t max,
868                                       float_status *s);
869 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
870                                        int scale, uint64_t max,
871                                        float_status *s);
872 
873 #define parts_float_to_uint(P, R, Z, M, S) \
874     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
875 
876 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
877                                             FloatRoundMode rmode,
878                                             int bitsm1, float_status *s);
879 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
880                                              FloatRoundMode rmode,
881                                              int bitsm1, float_status *s);
882 
883 #define parts_float_to_sint_modulo(P, R, M, S) \
884     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
885 
886 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
887                                   int scale, float_status *s);
888 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
889                                    int scale, float_status *s);
890 
891 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
892     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
893 
894 #define parts_sint_to_float(P, I, Z, S) \
895     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
896 
897 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
898                                   int scale, float_status *s);
899 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
900                                    int scale, float_status *s);
901 
902 #define parts_uint_to_float(P, I, Z, S) \
903     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
904 
905 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
906                                     float_status *s, int flags);
907 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
908                                       float_status *s, int flags);
909 
910 #define parts_minmax(A, B, S, F) \
911     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
912 
913 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
914                                      float_status *s, bool q);
915 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
916                                       float_status *s, bool q);
917 
918 #define parts_compare(A, B, S, Q) \
919     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
920 
921 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
922 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
923 
924 #define parts_scalbn(A, N, S) \
925     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
926 
927 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
928 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
929 
930 #define parts_log2(A, S, F) \
931     PARTS_GENERIC_64_128(log2, A)(A, S, F)
932 
933 /*
934  * Helper functions for softfloat-parts.c.inc, per-size operations.
935  */
936 
937 #define FRAC_GENERIC_64_128(NAME, P) \
938     _Generic((P), FloatParts64 *: frac64_##NAME, \
939                   FloatParts128 *: frac128_##NAME)
940 
941 #define FRAC_GENERIC_64_128_256(NAME, P) \
942     _Generic((P), FloatParts64 *: frac64_##NAME, \
943                   FloatParts128 *: frac128_##NAME, \
944                   FloatParts256 *: frac256_##NAME)
945 
frac64_add(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)946 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
947 {
948     return uadd64_overflow(a->frac, b->frac, &r->frac);
949 }
950 
frac128_add(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)951 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
952 {
953     bool c = 0;
954     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
955     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
956     return c;
957 }
958 
frac256_add(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)959 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
960 {
961     bool c = 0;
962     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
963     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
964     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
965     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
966     return c;
967 }
968 
969 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
970 
frac64_addi(FloatParts64 * r,FloatParts64 * a,uint64_t c)971 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
972 {
973     return uadd64_overflow(a->frac, c, &r->frac);
974 }
975 
frac128_addi(FloatParts128 * r,FloatParts128 * a,uint64_t c)976 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
977 {
978     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
979     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
980 }
981 
982 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
983 
frac64_allones(FloatParts64 * a)984 static void frac64_allones(FloatParts64 *a)
985 {
986     a->frac = -1;
987 }
988 
frac128_allones(FloatParts128 * a)989 static void frac128_allones(FloatParts128 *a)
990 {
991     a->frac_hi = a->frac_lo = -1;
992 }
993 
994 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
995 
frac64_cmp(FloatParts64 * a,FloatParts64 * b)996 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
997 {
998     return (a->frac == b->frac ? float_relation_equal
999             : a->frac < b->frac ? float_relation_less
1000             : float_relation_greater);
1001 }
1002 
frac128_cmp(FloatParts128 * a,FloatParts128 * b)1003 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1004 {
1005     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1006     if (ta == tb) {
1007         ta = a->frac_lo, tb = b->frac_lo;
1008         if (ta == tb) {
1009             return float_relation_equal;
1010         }
1011     }
1012     return ta < tb ? float_relation_less : float_relation_greater;
1013 }
1014 
1015 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1016 
frac64_clear(FloatParts64 * a)1017 static void frac64_clear(FloatParts64 *a)
1018 {
1019     a->frac = 0;
1020 }
1021 
frac128_clear(FloatParts128 * a)1022 static void frac128_clear(FloatParts128 *a)
1023 {
1024     a->frac_hi = a->frac_lo = 0;
1025 }
1026 
1027 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1028 
frac64_div(FloatParts64 * a,FloatParts64 * b)1029 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1030 {
1031     uint64_t n1, n0, r, q;
1032     bool ret;
1033 
1034     /*
1035      * We want a 2*N / N-bit division to produce exactly an N-bit
1036      * result, so that we do not lose any precision and so that we
1037      * do not have to renormalize afterward.  If A.frac < B.frac,
1038      * then division would produce an (N-1)-bit result; shift A left
1039      * by one to produce the an N-bit result, and return true to
1040      * decrement the exponent to match.
1041      *
1042      * The udiv_qrnnd algorithm that we're using requires normalization,
1043      * i.e. the msb of the denominator must be set, which is already true.
1044      */
1045     ret = a->frac < b->frac;
1046     if (ret) {
1047         n0 = a->frac;
1048         n1 = 0;
1049     } else {
1050         n0 = a->frac >> 1;
1051         n1 = a->frac << 63;
1052     }
1053     q = udiv_qrnnd(&r, n0, n1, b->frac);
1054 
1055     /* Set lsb if there is a remainder, to set inexact. */
1056     a->frac = q | (r != 0);
1057 
1058     return ret;
1059 }
1060 
frac128_div(FloatParts128 * a,FloatParts128 * b)1061 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1062 {
1063     uint64_t q0, q1, a0, a1, b0, b1;
1064     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1065     bool ret = false;
1066 
1067     a0 = a->frac_hi, a1 = a->frac_lo;
1068     b0 = b->frac_hi, b1 = b->frac_lo;
1069 
1070     ret = lt128(a0, a1, b0, b1);
1071     if (!ret) {
1072         a1 = shr_double(a0, a1, 1);
1073         a0 = a0 >> 1;
1074     }
1075 
1076     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1077     q0 = estimateDiv128To64(a0, a1, b0);
1078 
1079     /*
1080      * Estimate is high because B1 was not included (unless B1 == 0).
1081      * Reduce quotient and increase remainder until remainder is non-negative.
1082      * This loop will execute 0 to 2 times.
1083      */
1084     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1085     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1086     while (r0 != 0) {
1087         q0--;
1088         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1089     }
1090 
1091     /* Repeat using the remainder, producing a second word of quotient. */
1092     q1 = estimateDiv128To64(r1, r2, b0);
1093     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1094     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1095     while (r1 != 0) {
1096         q1--;
1097         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1098     }
1099 
1100     /* Any remainder indicates inexact; set sticky bit. */
1101     q1 |= (r2 | r3) != 0;
1102 
1103     a->frac_hi = q0;
1104     a->frac_lo = q1;
1105     return ret;
1106 }
1107 
1108 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1109 
frac64_eqz(FloatParts64 * a)1110 static bool frac64_eqz(FloatParts64 *a)
1111 {
1112     return a->frac == 0;
1113 }
1114 
frac128_eqz(FloatParts128 * a)1115 static bool frac128_eqz(FloatParts128 *a)
1116 {
1117     return (a->frac_hi | a->frac_lo) == 0;
1118 }
1119 
1120 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1121 
frac64_mulw(FloatParts128 * r,FloatParts64 * a,FloatParts64 * b)1122 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1123 {
1124     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1125 }
1126 
frac128_mulw(FloatParts256 * r,FloatParts128 * a,FloatParts128 * b)1127 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1128 {
1129     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1130                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1131 }
1132 
1133 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1134 
frac64_neg(FloatParts64 * a)1135 static void frac64_neg(FloatParts64 *a)
1136 {
1137     a->frac = -a->frac;
1138 }
1139 
frac128_neg(FloatParts128 * a)1140 static void frac128_neg(FloatParts128 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1145 }
1146 
frac256_neg(FloatParts256 * a)1147 static void frac256_neg(FloatParts256 *a)
1148 {
1149     bool c = 0;
1150     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1151     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1152     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1153     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1154 }
1155 
1156 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1157 
frac64_normalize(FloatParts64 * a)1158 static int frac64_normalize(FloatParts64 *a)
1159 {
1160     if (a->frac) {
1161         int shift = clz64(a->frac);
1162         a->frac <<= shift;
1163         return shift;
1164     }
1165     return 64;
1166 }
1167 
frac128_normalize(FloatParts128 * a)1168 static int frac128_normalize(FloatParts128 *a)
1169 {
1170     if (a->frac_hi) {
1171         int shl = clz64(a->frac_hi);
1172         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1173         a->frac_lo <<= shl;
1174         return shl;
1175     } else if (a->frac_lo) {
1176         int shl = clz64(a->frac_lo);
1177         a->frac_hi = a->frac_lo << shl;
1178         a->frac_lo = 0;
1179         return shl + 64;
1180     }
1181     return 128;
1182 }
1183 
frac256_normalize(FloatParts256 * a)1184 static int frac256_normalize(FloatParts256 *a)
1185 {
1186     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1187     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1188     int ret, shl;
1189 
1190     if (likely(a0)) {
1191         shl = clz64(a0);
1192         if (shl == 0) {
1193             return 0;
1194         }
1195         ret = shl;
1196     } else {
1197         if (a1) {
1198             ret = 64;
1199             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1200         } else if (a2) {
1201             ret = 128;
1202             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1203         } else if (a3) {
1204             ret = 192;
1205             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1206         } else {
1207             ret = 256;
1208             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1209             goto done;
1210         }
1211         shl = clz64(a0);
1212         if (shl == 0) {
1213             goto done;
1214         }
1215         ret += shl;
1216     }
1217 
1218     a0 = shl_double(a0, a1, shl);
1219     a1 = shl_double(a1, a2, shl);
1220     a2 = shl_double(a2, a3, shl);
1221     a3 <<= shl;
1222 
1223  done:
1224     a->frac_hi = a0;
1225     a->frac_hm = a1;
1226     a->frac_lm = a2;
1227     a->frac_lo = a3;
1228     return ret;
1229 }
1230 
1231 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1232 
frac64_modrem(FloatParts64 * a,FloatParts64 * b,uint64_t * mod_quot)1233 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1234 {
1235     uint64_t a0, a1, b0, t0, t1, q, quot;
1236     int exp_diff = a->exp - b->exp;
1237     int shift;
1238 
1239     a0 = a->frac;
1240     a1 = 0;
1241 
1242     if (exp_diff < -1) {
1243         if (mod_quot) {
1244             *mod_quot = 0;
1245         }
1246         return;
1247     }
1248     if (exp_diff == -1) {
1249         a0 >>= 1;
1250         exp_diff = 0;
1251     }
1252 
1253     b0 = b->frac;
1254     quot = q = b0 <= a0;
1255     if (q) {
1256         a0 -= b0;
1257     }
1258 
1259     exp_diff -= 64;
1260     while (exp_diff > 0) {
1261         q = estimateDiv128To64(a0, a1, b0);
1262         q = q > 2 ? q - 2 : 0;
1263         mul64To128(b0, q, &t0, &t1);
1264         sub128(a0, a1, t0, t1, &a0, &a1);
1265         shortShift128Left(a0, a1, 62, &a0, &a1);
1266         exp_diff -= 62;
1267         quot = (quot << 62) + q;
1268     }
1269 
1270     exp_diff += 64;
1271     if (exp_diff > 0) {
1272         q = estimateDiv128To64(a0, a1, b0);
1273         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1274         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1275         sub128(a0, a1, t0, t1, &a0, &a1);
1276         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1277         while (le128(t0, t1, a0, a1)) {
1278             ++q;
1279             sub128(a0, a1, t0, t1, &a0, &a1);
1280         }
1281         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1282     } else {
1283         t0 = b0;
1284         t1 = 0;
1285     }
1286 
1287     if (mod_quot) {
1288         *mod_quot = quot;
1289     } else {
1290         sub128(t0, t1, a0, a1, &t0, &t1);
1291         if (lt128(t0, t1, a0, a1) ||
1292             (eq128(t0, t1, a0, a1) && (q & 1))) {
1293             a0 = t0;
1294             a1 = t1;
1295             a->sign = !a->sign;
1296         }
1297     }
1298 
1299     if (likely(a0)) {
1300         shift = clz64(a0);
1301         shortShift128Left(a0, a1, shift, &a0, &a1);
1302     } else if (likely(a1)) {
1303         shift = clz64(a1);
1304         a0 = a1 << shift;
1305         a1 = 0;
1306         shift += 64;
1307     } else {
1308         a->cls = float_class_zero;
1309         return;
1310     }
1311 
1312     a->exp = b->exp + exp_diff - shift;
1313     a->frac = a0 | (a1 != 0);
1314 }
1315 
frac128_modrem(FloatParts128 * a,FloatParts128 * b,uint64_t * mod_quot)1316 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1317                            uint64_t *mod_quot)
1318 {
1319     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1320     int exp_diff = a->exp - b->exp;
1321     int shift;
1322 
1323     a0 = a->frac_hi;
1324     a1 = a->frac_lo;
1325     a2 = 0;
1326 
1327     if (exp_diff < -1) {
1328         if (mod_quot) {
1329             *mod_quot = 0;
1330         }
1331         return;
1332     }
1333     if (exp_diff == -1) {
1334         shift128Right(a0, a1, 1, &a0, &a1);
1335         exp_diff = 0;
1336     }
1337 
1338     b0 = b->frac_hi;
1339     b1 = b->frac_lo;
1340 
1341     quot = q = le128(b0, b1, a0, a1);
1342     if (q) {
1343         sub128(a0, a1, b0, b1, &a0, &a1);
1344     }
1345 
1346     exp_diff -= 64;
1347     while (exp_diff > 0) {
1348         q = estimateDiv128To64(a0, a1, b0);
1349         q = q > 4 ? q - 4 : 0;
1350         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1351         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1352         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1353         exp_diff -= 61;
1354         quot = (quot << 61) + q;
1355     }
1356 
1357     exp_diff += 64;
1358     if (exp_diff > 0) {
1359         q = estimateDiv128To64(a0, a1, b0);
1360         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1361         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1362         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1363         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1364         while (le192(t0, t1, t2, a0, a1, a2)) {
1365             ++q;
1366             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1367         }
1368         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1369     } else {
1370         t0 = b0;
1371         t1 = b1;
1372         t2 = 0;
1373     }
1374 
1375     if (mod_quot) {
1376         *mod_quot = quot;
1377     } else {
1378         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1379         if (lt192(t0, t1, t2, a0, a1, a2) ||
1380             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1381             a0 = t0;
1382             a1 = t1;
1383             a2 = t2;
1384             a->sign = !a->sign;
1385         }
1386     }
1387 
1388     if (likely(a0)) {
1389         shift = clz64(a0);
1390         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1391     } else if (likely(a1)) {
1392         shift = clz64(a1);
1393         shortShift128Left(a1, a2, shift, &a0, &a1);
1394         a2 = 0;
1395         shift += 64;
1396     } else if (likely(a2)) {
1397         shift = clz64(a2);
1398         a0 = a2 << shift;
1399         a1 = a2 = 0;
1400         shift += 128;
1401     } else {
1402         a->cls = float_class_zero;
1403         return;
1404     }
1405 
1406     a->exp = b->exp + exp_diff - shift;
1407     a->frac_hi = a0;
1408     a->frac_lo = a1 | (a2 != 0);
1409 }
1410 
1411 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1412 
frac64_shl(FloatParts64 * a,int c)1413 static void frac64_shl(FloatParts64 *a, int c)
1414 {
1415     a->frac <<= c;
1416 }
1417 
frac128_shl(FloatParts128 * a,int c)1418 static void frac128_shl(FloatParts128 *a, int c)
1419 {
1420     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1421 
1422     if (c & 64) {
1423         a0 = a1, a1 = 0;
1424     }
1425 
1426     c &= 63;
1427     if (c) {
1428         a0 = shl_double(a0, a1, c);
1429         a1 = a1 << c;
1430     }
1431 
1432     a->frac_hi = a0;
1433     a->frac_lo = a1;
1434 }
1435 
1436 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1437 
frac64_shr(FloatParts64 * a,int c)1438 static void frac64_shr(FloatParts64 *a, int c)
1439 {
1440     a->frac >>= c;
1441 }
1442 
frac128_shr(FloatParts128 * a,int c)1443 static void frac128_shr(FloatParts128 *a, int c)
1444 {
1445     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446 
1447     if (c & 64) {
1448         a1 = a0, a0 = 0;
1449     }
1450 
1451     c &= 63;
1452     if (c) {
1453         a1 = shr_double(a0, a1, c);
1454         a0 = a0 >> c;
1455     }
1456 
1457     a->frac_hi = a0;
1458     a->frac_lo = a1;
1459 }
1460 
1461 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1462 
frac64_shrjam(FloatParts64 * a,int c)1463 static void frac64_shrjam(FloatParts64 *a, int c)
1464 {
1465     uint64_t a0 = a->frac;
1466 
1467     if (likely(c != 0)) {
1468         if (likely(c < 64)) {
1469             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1470         } else {
1471             a0 = a0 != 0;
1472         }
1473         a->frac = a0;
1474     }
1475 }
1476 
frac128_shrjam(FloatParts128 * a,int c)1477 static void frac128_shrjam(FloatParts128 *a, int c)
1478 {
1479     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1480     uint64_t sticky = 0;
1481 
1482     if (unlikely(c == 0)) {
1483         return;
1484     } else if (likely(c < 64)) {
1485         /* nothing */
1486     } else if (likely(c < 128)) {
1487         sticky = a1;
1488         a1 = a0;
1489         a0 = 0;
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1;
1496         a0 = a1 = 0;
1497         goto done;
1498     }
1499 
1500     sticky |= shr_double(a1, 0, c);
1501     a1 = shr_double(a0, a1, c);
1502     a0 = a0 >> c;
1503 
1504  done:
1505     a->frac_lo = a1 | (sticky != 0);
1506     a->frac_hi = a0;
1507 }
1508 
frac256_shrjam(FloatParts256 * a,int c)1509 static void frac256_shrjam(FloatParts256 *a, int c)
1510 {
1511     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1512     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1513     uint64_t sticky = 0;
1514 
1515     if (unlikely(c == 0)) {
1516         return;
1517     } else if (likely(c < 64)) {
1518         /* nothing */
1519     } else if (likely(c < 256)) {
1520         if (unlikely(c & 128)) {
1521             sticky |= a2 | a3;
1522             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1523         }
1524         if (unlikely(c & 64)) {
1525             sticky |= a3;
1526             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1527         }
1528         c &= 63;
1529         if (c == 0) {
1530             goto done;
1531         }
1532     } else {
1533         sticky = a0 | a1 | a2 | a3;
1534         a0 = a1 = a2 = a3 = 0;
1535         goto done;
1536     }
1537 
1538     sticky |= shr_double(a3, 0, c);
1539     a3 = shr_double(a2, a3, c);
1540     a2 = shr_double(a1, a2, c);
1541     a1 = shr_double(a0, a1, c);
1542     a0 = a0 >> c;
1543 
1544  done:
1545     a->frac_lo = a3 | (sticky != 0);
1546     a->frac_lm = a2;
1547     a->frac_hm = a1;
1548     a->frac_hi = a0;
1549 }
1550 
1551 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1552 
frac64_sub(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)1553 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1554 {
1555     return usub64_overflow(a->frac, b->frac, &r->frac);
1556 }
1557 
frac128_sub(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)1558 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1559 {
1560     bool c = 0;
1561     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1562     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1563     return c;
1564 }
1565 
frac256_sub(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)1566 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1567 {
1568     bool c = 0;
1569     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1570     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1571     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1572     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1573     return c;
1574 }
1575 
1576 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1577 
frac64_truncjam(FloatParts64 * r,FloatParts128 * a)1578 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1579 {
1580     r->frac = a->frac_hi | (a->frac_lo != 0);
1581 }
1582 
frac128_truncjam(FloatParts128 * r,FloatParts256 * a)1583 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1587 }
1588 
1589 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1590 
frac64_widen(FloatParts128 * r,FloatParts64 * a)1591 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1592 {
1593     r->frac_hi = a->frac;
1594     r->frac_lo = 0;
1595 }
1596 
frac128_widen(FloatParts256 * r,FloatParts128 * a)1597 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1598 {
1599     r->frac_hi = a->frac_hi;
1600     r->frac_hm = a->frac_lo;
1601     r->frac_lm = 0;
1602     r->frac_lo = 0;
1603 }
1604 
1605 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1606 
1607 /*
1608  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1609  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1610  * and thus MIT licenced.
1611  */
1612 static const uint16_t rsqrt_tab[128] = {
1613     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1614     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1615     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1616     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1617     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1618     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1619     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1620     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1621     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1622     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1623     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1624     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1625     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1626     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1627     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1628     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1629 };
1630 
1631 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1632 #define FloatPartsN    glue(FloatParts,N)
1633 #define FloatPartsW    glue(FloatParts,W)
1634 
1635 #define N 64
1636 #define W 128
1637 
1638 #include "softfloat-parts-addsub.c.inc"
1639 #include "softfloat-parts.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #define N 128
1644 #define W 256
1645 
1646 #include "softfloat-parts-addsub.c.inc"
1647 #include "softfloat-parts.c.inc"
1648 
1649 #undef  N
1650 #undef  W
1651 #define N            256
1652 
1653 #include "softfloat-parts-addsub.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #undef  partsN
1658 #undef  FloatPartsN
1659 #undef  FloatPartsW
1660 
1661 /*
1662  * Pack/unpack routines with a specific FloatFmt.
1663  */
1664 
float16a_unpack_canonical(FloatParts64 * p,float16 f,float_status * s,const FloatFmt * params)1665 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1666                                       float_status *s, const FloatFmt *params)
1667 {
1668     float16_unpack_raw(p, f);
1669     parts_canonicalize(p, s, params);
1670 }
1671 
float16_unpack_canonical(FloatParts64 * p,float16 f,float_status * s)1672 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1673                                      float_status *s)
1674 {
1675     float16a_unpack_canonical(p, f, s, &float16_params);
1676 }
1677 
bfloat16_unpack_canonical(FloatParts64 * p,bfloat16 f,float_status * s)1678 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1679                                       float_status *s)
1680 {
1681     bfloat16_unpack_raw(p, f);
1682     parts_canonicalize(p, s, &bfloat16_params);
1683 }
1684 
float16a_round_pack_canonical(FloatParts64 * p,float_status * s,const FloatFmt * params)1685 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1686                                              float_status *s,
1687                                              const FloatFmt *params)
1688 {
1689     parts_uncanon(p, s, params);
1690     return float16_pack_raw(p);
1691 }
1692 
float16_round_pack_canonical(FloatParts64 * p,float_status * s)1693 static float16 float16_round_pack_canonical(FloatParts64 *p,
1694                                             float_status *s)
1695 {
1696     return float16a_round_pack_canonical(p, s, &float16_params);
1697 }
1698 
bfloat16_round_pack_canonical(FloatParts64 * p,float_status * s)1699 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1700                                               float_status *s)
1701 {
1702     parts_uncanon(p, s, &bfloat16_params);
1703     return bfloat16_pack_raw(p);
1704 }
1705 
float32_unpack_canonical(FloatParts64 * p,float32 f,float_status * s)1706 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1707                                      float_status *s)
1708 {
1709     float32_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float32_params);
1711 }
1712 
float32_round_pack_canonical(FloatParts64 * p,float_status * s)1713 static float32 float32_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float32_params);
1717     return float32_pack_raw(p);
1718 }
1719 
float64_unpack_canonical(FloatParts64 * p,float64 f,float_status * s)1720 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1721                                      float_status *s)
1722 {
1723     float64_unpack_raw(p, f);
1724     parts_canonicalize(p, s, &float64_params);
1725 }
1726 
float64_round_pack_canonical(FloatParts64 * p,float_status * s)1727 static float64 float64_round_pack_canonical(FloatParts64 *p,
1728                                             float_status *s)
1729 {
1730     parts_uncanon(p, s, &float64_params);
1731     return float64_pack_raw(p);
1732 }
1733 
float64r32_pack_raw(FloatParts64 * p)1734 static float64 float64r32_pack_raw(FloatParts64 *p)
1735 {
1736     /*
1737      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1738      * We need to adjust the fraction higher so that the least N bits are
1739      * zero, and the fraction is adjacent to the float64 implicit bit.
1740      */
1741     switch (p->cls) {
1742     case float_class_normal:
1743     case float_class_denormal:
1744         if (unlikely(p->exp == 0)) {
1745             /*
1746              * The result is denormal for float32, but can be represented
1747              * in normalized form for float64.  Adjust, per canonicalize.
1748              */
1749             int shift = frac_normalize(p);
1750             p->exp = (float32_params.frac_shift -
1751                       float32_params.exp_bias - shift + 1 +
1752                       float64_params.exp_bias);
1753             frac_shr(p, float64_params.frac_shift);
1754         } else {
1755             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1756             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1757         }
1758         break;
1759     case float_class_snan:
1760     case float_class_qnan:
1761         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1762         p->exp = float64_params.exp_max;
1763         break;
1764     case float_class_inf:
1765         p->exp = float64_params.exp_max;
1766         break;
1767     case float_class_zero:
1768         break;
1769     default:
1770         g_assert_not_reached();
1771     }
1772 
1773     return float64_pack_raw(p);
1774 }
1775 
float64r32_round_pack_canonical(FloatParts64 * p,float_status * s)1776 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1777                                                float_status *s)
1778 {
1779     parts_uncanon(p, s, &float32_params);
1780     return float64r32_pack_raw(p);
1781 }
1782 
float128_unpack_canonical(FloatParts128 * p,float128 f,float_status * s)1783 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1784                                       float_status *s)
1785 {
1786     float128_unpack_raw(p, f);
1787     parts_canonicalize(p, s, &float128_params);
1788 }
1789 
float128_round_pack_canonical(FloatParts128 * p,float_status * s)1790 static float128 float128_round_pack_canonical(FloatParts128 *p,
1791                                               float_status *s)
1792 {
1793     parts_uncanon(p, s, &float128_params);
1794     return float128_pack_raw(p);
1795 }
1796 
1797 /* Returns false if the encoding is invalid. */
floatx80_unpack_canonical(FloatParts128 * p,floatx80 f,float_status * s)1798 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1799                                       float_status *s)
1800 {
1801     /* Ensure rounding precision is set before beginning. */
1802     switch (s->floatx80_rounding_precision) {
1803     case floatx80_precision_x:
1804     case floatx80_precision_d:
1805     case floatx80_precision_s:
1806         break;
1807     default:
1808         g_assert_not_reached();
1809     }
1810 
1811     if (unlikely(floatx80_invalid_encoding(f, s))) {
1812         float_raise(float_flag_invalid, s);
1813         return false;
1814     }
1815 
1816     floatx80_unpack_raw(p, f);
1817 
1818     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1819         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1820     } else {
1821         /* The explicit integer bit is ignored, after invalid checks. */
1822         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1823         p->cls = (p->frac_hi == 0 ? float_class_inf
1824                   : parts_is_snan_frac(p->frac_hi, s)
1825                   ? float_class_snan : float_class_qnan);
1826     }
1827     return true;
1828 }
1829 
floatx80_round_pack_canonical(FloatParts128 * p,float_status * s)1830 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1831                                               float_status *s)
1832 {
1833     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1834     uint64_t frac;
1835     int exp;
1836 
1837     switch (p->cls) {
1838     case float_class_normal:
1839     case float_class_denormal:
1840         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1841             parts_uncanon_normal(p, s, fmt);
1842             frac = p->frac_hi;
1843             exp = p->exp;
1844         } else {
1845             FloatParts64 p64;
1846 
1847             p64.sign = p->sign;
1848             p64.exp = p->exp;
1849             frac_truncjam(&p64, p);
1850             parts_uncanon_normal(&p64, s, fmt);
1851             frac = p64.frac;
1852             exp = p64.exp;
1853         }
1854         if (exp != fmt->exp_max) {
1855             break;
1856         }
1857         /* rounded to inf -- fall through to set frac correctly */
1858 
1859     case float_class_inf:
1860         /* x86 and m68k differ in the setting of the integer bit. */
1861         frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
1862             0 : (1ULL << 63);
1863         exp = fmt->exp_max;
1864         break;
1865 
1866     case float_class_zero:
1867         frac = 0;
1868         exp = 0;
1869         break;
1870 
1871     case float_class_snan:
1872     case float_class_qnan:
1873         /* NaNs have the integer bit set. */
1874         frac = p->frac_hi | (1ull << 63);
1875         exp = fmt->exp_max;
1876         break;
1877 
1878     default:
1879         g_assert_not_reached();
1880     }
1881 
1882     return packFloatx80(p->sign, exp, frac);
1883 }
1884 
1885 /*
1886  * Addition and subtraction
1887  */
1888 
1889 static float16 QEMU_FLATTEN
float16_addsub(float16 a,float16 b,float_status * status,bool subtract)1890 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1891 {
1892     FloatParts64 pa, pb, *pr;
1893 
1894     float16_unpack_canonical(&pa, a, status);
1895     float16_unpack_canonical(&pb, b, status);
1896     pr = parts_addsub(&pa, &pb, status, subtract);
1897 
1898     return float16_round_pack_canonical(pr, status);
1899 }
1900 
float16_add(float16 a,float16 b,float_status * status)1901 float16 float16_add(float16 a, float16 b, float_status *status)
1902 {
1903     return float16_addsub(a, b, status, false);
1904 }
1905 
float16_sub(float16 a,float16 b,float_status * status)1906 float16 float16_sub(float16 a, float16 b, float_status *status)
1907 {
1908     return float16_addsub(a, b, status, true);
1909 }
1910 
1911 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a,float32 b,float_status * status,bool subtract)1912 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1913 {
1914     FloatParts64 pa, pb, *pr;
1915 
1916     float32_unpack_canonical(&pa, a, status);
1917     float32_unpack_canonical(&pb, b, status);
1918     pr = parts_addsub(&pa, &pb, status, subtract);
1919 
1920     return float32_round_pack_canonical(pr, status);
1921 }
1922 
soft_f32_add(float32 a,float32 b,float_status * status)1923 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1924 {
1925     return soft_f32_addsub(a, b, status, false);
1926 }
1927 
soft_f32_sub(float32 a,float32 b,float_status * status)1928 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1929 {
1930     return soft_f32_addsub(a, b, status, true);
1931 }
1932 
1933 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a,float64 b,float_status * status,bool subtract)1934 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1935 {
1936     FloatParts64 pa, pb, *pr;
1937 
1938     float64_unpack_canonical(&pa, a, status);
1939     float64_unpack_canonical(&pb, b, status);
1940     pr = parts_addsub(&pa, &pb, status, subtract);
1941 
1942     return float64_round_pack_canonical(pr, status);
1943 }
1944 
soft_f64_add(float64 a,float64 b,float_status * status)1945 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1946 {
1947     return soft_f64_addsub(a, b, status, false);
1948 }
1949 
soft_f64_sub(float64 a,float64 b,float_status * status)1950 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1951 {
1952     return soft_f64_addsub(a, b, status, true);
1953 }
1954 
hard_f32_add(float a,float b)1955 static float hard_f32_add(float a, float b)
1956 {
1957     return a + b;
1958 }
1959 
hard_f32_sub(float a,float b)1960 static float hard_f32_sub(float a, float b)
1961 {
1962     return a - b;
1963 }
1964 
hard_f64_add(double a,double b)1965 static double hard_f64_add(double a, double b)
1966 {
1967     return a + b;
1968 }
1969 
hard_f64_sub(double a,double b)1970 static double hard_f64_sub(double a, double b)
1971 {
1972     return a - b;
1973 }
1974 
f32_addsubmul_post(union_float32 a,union_float32 b)1975 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1976 {
1977     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1978         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1979     }
1980     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1981 }
1982 
f64_addsubmul_post(union_float64 a,union_float64 b)1983 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1984 {
1985     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1986         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1987     } else {
1988         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1989     }
1990 }
1991 
float32_addsub(float32 a,float32 b,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft)1992 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1993                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1994 {
1995     return float32_gen2(a, b, s, hard, soft,
1996                         f32_is_zon2, f32_addsubmul_post);
1997 }
1998 
float64_addsub(float64 a,float64 b,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft)1999 static float64 float64_addsub(float64 a, float64 b, float_status *s,
2000                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2001 {
2002     return float64_gen2(a, b, s, hard, soft,
2003                         f64_is_zon2, f64_addsubmul_post);
2004 }
2005 
2006 float32 QEMU_FLATTEN
float32_add(float32 a,float32 b,float_status * s)2007 float32_add(float32 a, float32 b, float_status *s)
2008 {
2009     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2010 }
2011 
2012 float32 QEMU_FLATTEN
float32_sub(float32 a,float32 b,float_status * s)2013 float32_sub(float32 a, float32 b, float_status *s)
2014 {
2015     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2016 }
2017 
2018 float64 QEMU_FLATTEN
float64_add(float64 a,float64 b,float_status * s)2019 float64_add(float64 a, float64 b, float_status *s)
2020 {
2021     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2022 }
2023 
2024 float64 QEMU_FLATTEN
float64_sub(float64 a,float64 b,float_status * s)2025 float64_sub(float64 a, float64 b, float_status *s)
2026 {
2027     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2028 }
2029 
float64r32_addsub(float64 a,float64 b,float_status * status,bool subtract)2030 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2031                                  bool subtract)
2032 {
2033     FloatParts64 pa, pb, *pr;
2034 
2035     float64_unpack_canonical(&pa, a, status);
2036     float64_unpack_canonical(&pb, b, status);
2037     pr = parts_addsub(&pa, &pb, status, subtract);
2038 
2039     return float64r32_round_pack_canonical(pr, status);
2040 }
2041 
float64r32_add(float64 a,float64 b,float_status * status)2042 float64 float64r32_add(float64 a, float64 b, float_status *status)
2043 {
2044     return float64r32_addsub(a, b, status, false);
2045 }
2046 
float64r32_sub(float64 a,float64 b,float_status * status)2047 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2048 {
2049     return float64r32_addsub(a, b, status, true);
2050 }
2051 
2052 static bfloat16 QEMU_FLATTEN
bfloat16_addsub(bfloat16 a,bfloat16 b,float_status * status,bool subtract)2053 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2054 {
2055     FloatParts64 pa, pb, *pr;
2056 
2057     bfloat16_unpack_canonical(&pa, a, status);
2058     bfloat16_unpack_canonical(&pb, b, status);
2059     pr = parts_addsub(&pa, &pb, status, subtract);
2060 
2061     return bfloat16_round_pack_canonical(pr, status);
2062 }
2063 
bfloat16_add(bfloat16 a,bfloat16 b,float_status * status)2064 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     return bfloat16_addsub(a, b, status, false);
2067 }
2068 
bfloat16_sub(bfloat16 a,bfloat16 b,float_status * status)2069 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2070 {
2071     return bfloat16_addsub(a, b, status, true);
2072 }
2073 
2074 static float128 QEMU_FLATTEN
float128_addsub(float128 a,float128 b,float_status * status,bool subtract)2075 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2076 {
2077     FloatParts128 pa, pb, *pr;
2078 
2079     float128_unpack_canonical(&pa, a, status);
2080     float128_unpack_canonical(&pb, b, status);
2081     pr = parts_addsub(&pa, &pb, status, subtract);
2082 
2083     return float128_round_pack_canonical(pr, status);
2084 }
2085 
float128_add(float128 a,float128 b,float_status * status)2086 float128 float128_add(float128 a, float128 b, float_status *status)
2087 {
2088     return float128_addsub(a, b, status, false);
2089 }
2090 
float128_sub(float128 a,float128 b,float_status * status)2091 float128 float128_sub(float128 a, float128 b, float_status *status)
2092 {
2093     return float128_addsub(a, b, status, true);
2094 }
2095 
2096 static floatx80 QEMU_FLATTEN
floatx80_addsub(floatx80 a,floatx80 b,float_status * status,bool subtract)2097 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2098 {
2099     FloatParts128 pa, pb, *pr;
2100 
2101     if (!floatx80_unpack_canonical(&pa, a, status) ||
2102         !floatx80_unpack_canonical(&pb, b, status)) {
2103         return floatx80_default_nan(status);
2104     }
2105 
2106     pr = parts_addsub(&pa, &pb, status, subtract);
2107     return floatx80_round_pack_canonical(pr, status);
2108 }
2109 
floatx80_add(floatx80 a,floatx80 b,float_status * status)2110 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2111 {
2112     return floatx80_addsub(a, b, status, false);
2113 }
2114 
floatx80_sub(floatx80 a,floatx80 b,float_status * status)2115 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2116 {
2117     return floatx80_addsub(a, b, status, true);
2118 }
2119 
2120 /*
2121  * Multiplication
2122  */
2123 
float16_mul(float16 a,float16 b,float_status * status)2124 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2125 {
2126     FloatParts64 pa, pb, *pr;
2127 
2128     float16_unpack_canonical(&pa, a, status);
2129     float16_unpack_canonical(&pb, b, status);
2130     pr = parts_mul(&pa, &pb, status);
2131 
2132     return float16_round_pack_canonical(pr, status);
2133 }
2134 
2135 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a,float32 b,float_status * status)2136 soft_f32_mul(float32 a, float32 b, float_status *status)
2137 {
2138     FloatParts64 pa, pb, *pr;
2139 
2140     float32_unpack_canonical(&pa, a, status);
2141     float32_unpack_canonical(&pb, b, status);
2142     pr = parts_mul(&pa, &pb, status);
2143 
2144     return float32_round_pack_canonical(pr, status);
2145 }
2146 
2147 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a,float64 b,float_status * status)2148 soft_f64_mul(float64 a, float64 b, float_status *status)
2149 {
2150     FloatParts64 pa, pb, *pr;
2151 
2152     float64_unpack_canonical(&pa, a, status);
2153     float64_unpack_canonical(&pb, b, status);
2154     pr = parts_mul(&pa, &pb, status);
2155 
2156     return float64_round_pack_canonical(pr, status);
2157 }
2158 
hard_f32_mul(float a,float b)2159 static float hard_f32_mul(float a, float b)
2160 {
2161     return a * b;
2162 }
2163 
hard_f64_mul(double a,double b)2164 static double hard_f64_mul(double a, double b)
2165 {
2166     return a * b;
2167 }
2168 
2169 float32 QEMU_FLATTEN
float32_mul(float32 a,float32 b,float_status * s)2170 float32_mul(float32 a, float32 b, float_status *s)
2171 {
2172     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2173                         f32_is_zon2, f32_addsubmul_post);
2174 }
2175 
2176 float64 QEMU_FLATTEN
float64_mul(float64 a,float64 b,float_status * s)2177 float64_mul(float64 a, float64 b, float_status *s)
2178 {
2179     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2180                         f64_is_zon2, f64_addsubmul_post);
2181 }
2182 
float64r32_mul(float64 a,float64 b,float_status * status)2183 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2184 {
2185     FloatParts64 pa, pb, *pr;
2186 
2187     float64_unpack_canonical(&pa, a, status);
2188     float64_unpack_canonical(&pb, b, status);
2189     pr = parts_mul(&pa, &pb, status);
2190 
2191     return float64r32_round_pack_canonical(pr, status);
2192 }
2193 
2194 bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a,bfloat16 b,float_status * status)2195 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2196 {
2197     FloatParts64 pa, pb, *pr;
2198 
2199     bfloat16_unpack_canonical(&pa, a, status);
2200     bfloat16_unpack_canonical(&pb, b, status);
2201     pr = parts_mul(&pa, &pb, status);
2202 
2203     return bfloat16_round_pack_canonical(pr, status);
2204 }
2205 
2206 float128 QEMU_FLATTEN
float128_mul(float128 a,float128 b,float_status * status)2207 float128_mul(float128 a, float128 b, float_status *status)
2208 {
2209     FloatParts128 pa, pb, *pr;
2210 
2211     float128_unpack_canonical(&pa, a, status);
2212     float128_unpack_canonical(&pb, b, status);
2213     pr = parts_mul(&pa, &pb, status);
2214 
2215     return float128_round_pack_canonical(pr, status);
2216 }
2217 
2218 floatx80 QEMU_FLATTEN
floatx80_mul(floatx80 a,floatx80 b,float_status * status)2219 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2220 {
2221     FloatParts128 pa, pb, *pr;
2222 
2223     if (!floatx80_unpack_canonical(&pa, a, status) ||
2224         !floatx80_unpack_canonical(&pb, b, status)) {
2225         return floatx80_default_nan(status);
2226     }
2227 
2228     pr = parts_mul(&pa, &pb, status);
2229     return floatx80_round_pack_canonical(pr, status);
2230 }
2231 
2232 /*
2233  * Fused multiply-add
2234  */
2235 
2236 float16 QEMU_FLATTEN
float16_muladd_scalbn(float16 a,float16 b,float16 c,int scale,int flags,float_status * status)2237 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2238                       int scale, int flags, float_status *status)
2239 {
2240     FloatParts64 pa, pb, pc, *pr;
2241 
2242     float16_unpack_canonical(&pa, a, status);
2243     float16_unpack_canonical(&pb, b, status);
2244     float16_unpack_canonical(&pc, c, status);
2245     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2246 
2247     /* Round before applying negate result. */
2248     parts_uncanon(pr, status, &float16_params);
2249     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2250         pr->sign ^= 1;
2251     }
2252     return float16_pack_raw(pr);
2253 }
2254 
float16_muladd(float16 a,float16 b,float16 c,int flags,float_status * status)2255 float16 float16_muladd(float16 a, float16 b, float16 c,
2256                        int flags, float_status *status)
2257 {
2258     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2259 }
2260 
2261 float32 QEMU_SOFTFLOAT_ATTR
float32_muladd_scalbn(float32 a,float32 b,float32 c,int scale,int flags,float_status * status)2262 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2263                       int scale, int flags, float_status *status)
2264 {
2265     FloatParts64 pa, pb, pc, *pr;
2266 
2267     float32_unpack_canonical(&pa, a, status);
2268     float32_unpack_canonical(&pb, b, status);
2269     float32_unpack_canonical(&pc, c, status);
2270     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2271 
2272     /* Round before applying negate result. */
2273     parts_uncanon(pr, status, &float32_params);
2274     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2275         pr->sign ^= 1;
2276     }
2277     return float32_pack_raw(pr);
2278 }
2279 
2280 float64 QEMU_SOFTFLOAT_ATTR
float64_muladd_scalbn(float64 a,float64 b,float64 c,int scale,int flags,float_status * status)2281 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2282                       int scale, int flags, float_status *status)
2283 {
2284     FloatParts64 pa, pb, pc, *pr;
2285 
2286     float64_unpack_canonical(&pa, a, status);
2287     float64_unpack_canonical(&pb, b, status);
2288     float64_unpack_canonical(&pc, c, status);
2289     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2290 
2291     /* Round before applying negate result. */
2292     parts_uncanon(pr, status, &float64_params);
2293     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2294         pr->sign ^= 1;
2295     }
2296     return float64_pack_raw(pr);
2297 }
2298 
2299 static bool force_soft_fma;
2300 
2301 float32 QEMU_FLATTEN
float32_muladd(float32 xa,float32 xb,float32 xc,int flags,float_status * s)2302 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2303 {
2304     union_float32 ua, ub, uc, ur;
2305 
2306     ua.s = xa;
2307     ub.s = xb;
2308     uc.s = xc;
2309 
2310     if (unlikely(!can_use_fpu(s))) {
2311         goto soft;
2312     }
2313     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2314         goto soft;
2315     }
2316 
2317     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2318     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2319         goto soft;
2320     }
2321 
2322     if (unlikely(force_soft_fma)) {
2323         goto soft;
2324     }
2325 
2326     /*
2327      * When (a || b) == 0, there's no need to check for under/over flow,
2328      * since we know the addend is (normal || 0) and the product is 0.
2329      */
2330     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2331         union_float32 up;
2332         bool prod_sign;
2333 
2334         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2335         prod_sign ^= !!(flags & float_muladd_negate_product);
2336         up.s = float32_set_sign(float32_zero, prod_sign);
2337 
2338         if (flags & float_muladd_negate_c) {
2339             uc.h = -uc.h;
2340         }
2341         ur.h = up.h + uc.h;
2342     } else {
2343         union_float32 ua_orig = ua;
2344         union_float32 uc_orig = uc;
2345 
2346         if (flags & float_muladd_negate_product) {
2347             ua.h = -ua.h;
2348         }
2349         if (flags & float_muladd_negate_c) {
2350             uc.h = -uc.h;
2351         }
2352 
2353         ur.h = fmaf(ua.h, ub.h, uc.h);
2354 
2355         if (unlikely(f32_is_inf(ur))) {
2356             float_raise(float_flag_overflow, s);
2357         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2358             ua = ua_orig;
2359             uc = uc_orig;
2360             goto soft;
2361         }
2362     }
2363     if (flags & float_muladd_negate_result) {
2364         return float32_chs(ur.s);
2365     }
2366     return ur.s;
2367 
2368  soft:
2369     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2370 }
2371 
2372 float64 QEMU_FLATTEN
float64_muladd(float64 xa,float64 xb,float64 xc,int flags,float_status * s)2373 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2374 {
2375     union_float64 ua, ub, uc, ur;
2376 
2377     ua.s = xa;
2378     ub.s = xb;
2379     uc.s = xc;
2380 
2381     if (unlikely(!can_use_fpu(s))) {
2382         goto soft;
2383     }
2384 
2385     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2386     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2387         goto soft;
2388     }
2389 
2390     if (unlikely(force_soft_fma)) {
2391         goto soft;
2392     }
2393 
2394     /*
2395      * When (a || b) == 0, there's no need to check for under/over flow,
2396      * since we know the addend is (normal || 0) and the product is 0.
2397      */
2398     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2399         union_float64 up;
2400         bool prod_sign;
2401 
2402         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2403         prod_sign ^= !!(flags & float_muladd_negate_product);
2404         up.s = float64_set_sign(float64_zero, prod_sign);
2405 
2406         if (flags & float_muladd_negate_c) {
2407             uc.h = -uc.h;
2408         }
2409         ur.h = up.h + uc.h;
2410     } else {
2411         union_float64 ua_orig = ua;
2412         union_float64 uc_orig = uc;
2413 
2414         if (flags & float_muladd_negate_product) {
2415             ua.h = -ua.h;
2416         }
2417         if (flags & float_muladd_negate_c) {
2418             uc.h = -uc.h;
2419         }
2420 
2421         ur.h = fma(ua.h, ub.h, uc.h);
2422 
2423         if (unlikely(f64_is_inf(ur))) {
2424             float_raise(float_flag_overflow, s);
2425         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2426             ua = ua_orig;
2427             uc = uc_orig;
2428             goto soft;
2429         }
2430     }
2431     if (flags & float_muladd_negate_result) {
2432         return float64_chs(ur.s);
2433     }
2434     return ur.s;
2435 
2436  soft:
2437     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2438 }
2439 
float64r32_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2440 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2441                           int flags, float_status *status)
2442 {
2443     FloatParts64 pa, pb, pc, *pr;
2444 
2445     float64_unpack_canonical(&pa, a, status);
2446     float64_unpack_canonical(&pb, b, status);
2447     float64_unpack_canonical(&pc, c, status);
2448     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2449 
2450     /* Round before applying negate result. */
2451     parts_uncanon(pr, status, &float32_params);
2452     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2453         pr->sign ^= 1;
2454     }
2455     return float64r32_pack_raw(pr);
2456 }
2457 
bfloat16_muladd(bfloat16 a,bfloat16 b,bfloat16 c,int flags,float_status * status)2458 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2459                                       int flags, float_status *status)
2460 {
2461     FloatParts64 pa, pb, pc, *pr;
2462 
2463     bfloat16_unpack_canonical(&pa, a, status);
2464     bfloat16_unpack_canonical(&pb, b, status);
2465     bfloat16_unpack_canonical(&pc, c, status);
2466     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2467 
2468     /* Round before applying negate result. */
2469     parts_uncanon(pr, status, &bfloat16_params);
2470     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2471         pr->sign ^= 1;
2472     }
2473     return bfloat16_pack_raw(pr);
2474 }
2475 
float128_muladd(float128 a,float128 b,float128 c,int flags,float_status * status)2476 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2477                                       int flags, float_status *status)
2478 {
2479     FloatParts128 pa, pb, pc, *pr;
2480 
2481     float128_unpack_canonical(&pa, a, status);
2482     float128_unpack_canonical(&pb, b, status);
2483     float128_unpack_canonical(&pc, c, status);
2484     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2485 
2486     /* Round before applying negate result. */
2487     parts_uncanon(pr, status, &float128_params);
2488     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2489         pr->sign ^= 1;
2490     }
2491     return float128_pack_raw(pr);
2492 }
2493 
2494 /*
2495  * Division
2496  */
2497 
float16_div(float16 a,float16 b,float_status * status)2498 float16 float16_div(float16 a, float16 b, float_status *status)
2499 {
2500     FloatParts64 pa, pb, *pr;
2501 
2502     float16_unpack_canonical(&pa, a, status);
2503     float16_unpack_canonical(&pb, b, status);
2504     pr = parts_div(&pa, &pb, status);
2505 
2506     return float16_round_pack_canonical(pr, status);
2507 }
2508 
2509 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a,float32 b,float_status * status)2510 soft_f32_div(float32 a, float32 b, float_status *status)
2511 {
2512     FloatParts64 pa, pb, *pr;
2513 
2514     float32_unpack_canonical(&pa, a, status);
2515     float32_unpack_canonical(&pb, b, status);
2516     pr = parts_div(&pa, &pb, status);
2517 
2518     return float32_round_pack_canonical(pr, status);
2519 }
2520 
2521 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a,float64 b,float_status * status)2522 soft_f64_div(float64 a, float64 b, float_status *status)
2523 {
2524     FloatParts64 pa, pb, *pr;
2525 
2526     float64_unpack_canonical(&pa, a, status);
2527     float64_unpack_canonical(&pb, b, status);
2528     pr = parts_div(&pa, &pb, status);
2529 
2530     return float64_round_pack_canonical(pr, status);
2531 }
2532 
hard_f32_div(float a,float b)2533 static float hard_f32_div(float a, float b)
2534 {
2535     return a / b;
2536 }
2537 
hard_f64_div(double a,double b)2538 static double hard_f64_div(double a, double b)
2539 {
2540     return a / b;
2541 }
2542 
f32_div_pre(union_float32 a,union_float32 b)2543 static bool f32_div_pre(union_float32 a, union_float32 b)
2544 {
2545     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2546         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2547                fpclassify(b.h) == FP_NORMAL;
2548     }
2549     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2550 }
2551 
f64_div_pre(union_float64 a,union_float64 b)2552 static bool f64_div_pre(union_float64 a, union_float64 b)
2553 {
2554     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2555         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2556                fpclassify(b.h) == FP_NORMAL;
2557     }
2558     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2559 }
2560 
f32_div_post(union_float32 a,union_float32 b)2561 static bool f32_div_post(union_float32 a, union_float32 b)
2562 {
2563     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2564         return fpclassify(a.h) != FP_ZERO;
2565     }
2566     return !float32_is_zero(a.s);
2567 }
2568 
f64_div_post(union_float64 a,union_float64 b)2569 static bool f64_div_post(union_float64 a, union_float64 b)
2570 {
2571     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2572         return fpclassify(a.h) != FP_ZERO;
2573     }
2574     return !float64_is_zero(a.s);
2575 }
2576 
2577 float32 QEMU_FLATTEN
float32_div(float32 a,float32 b,float_status * s)2578 float32_div(float32 a, float32 b, float_status *s)
2579 {
2580     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2581                         f32_div_pre, f32_div_post);
2582 }
2583 
2584 float64 QEMU_FLATTEN
float64_div(float64 a,float64 b,float_status * s)2585 float64_div(float64 a, float64 b, float_status *s)
2586 {
2587     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2588                         f64_div_pre, f64_div_post);
2589 }
2590 
float64r32_div(float64 a,float64 b,float_status * status)2591 float64 float64r32_div(float64 a, float64 b, float_status *status)
2592 {
2593     FloatParts64 pa, pb, *pr;
2594 
2595     float64_unpack_canonical(&pa, a, status);
2596     float64_unpack_canonical(&pb, b, status);
2597     pr = parts_div(&pa, &pb, status);
2598 
2599     return float64r32_round_pack_canonical(pr, status);
2600 }
2601 
2602 bfloat16 QEMU_FLATTEN
bfloat16_div(bfloat16 a,bfloat16 b,float_status * status)2603 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2604 {
2605     FloatParts64 pa, pb, *pr;
2606 
2607     bfloat16_unpack_canonical(&pa, a, status);
2608     bfloat16_unpack_canonical(&pb, b, status);
2609     pr = parts_div(&pa, &pb, status);
2610 
2611     return bfloat16_round_pack_canonical(pr, status);
2612 }
2613 
2614 float128 QEMU_FLATTEN
float128_div(float128 a,float128 b,float_status * status)2615 float128_div(float128 a, float128 b, float_status *status)
2616 {
2617     FloatParts128 pa, pb, *pr;
2618 
2619     float128_unpack_canonical(&pa, a, status);
2620     float128_unpack_canonical(&pb, b, status);
2621     pr = parts_div(&pa, &pb, status);
2622 
2623     return float128_round_pack_canonical(pr, status);
2624 }
2625 
floatx80_div(floatx80 a,floatx80 b,float_status * status)2626 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2627 {
2628     FloatParts128 pa, pb, *pr;
2629 
2630     if (!floatx80_unpack_canonical(&pa, a, status) ||
2631         !floatx80_unpack_canonical(&pb, b, status)) {
2632         return floatx80_default_nan(status);
2633     }
2634 
2635     pr = parts_div(&pa, &pb, status);
2636     return floatx80_round_pack_canonical(pr, status);
2637 }
2638 
2639 /*
2640  * Remainder
2641  */
2642 
float32_rem(float32 a,float32 b,float_status * status)2643 float32 float32_rem(float32 a, float32 b, float_status *status)
2644 {
2645     FloatParts64 pa, pb, *pr;
2646 
2647     float32_unpack_canonical(&pa, a, status);
2648     float32_unpack_canonical(&pb, b, status);
2649     pr = parts_modrem(&pa, &pb, NULL, status);
2650 
2651     return float32_round_pack_canonical(pr, status);
2652 }
2653 
float64_rem(float64 a,float64 b,float_status * status)2654 float64 float64_rem(float64 a, float64 b, float_status *status)
2655 {
2656     FloatParts64 pa, pb, *pr;
2657 
2658     float64_unpack_canonical(&pa, a, status);
2659     float64_unpack_canonical(&pb, b, status);
2660     pr = parts_modrem(&pa, &pb, NULL, status);
2661 
2662     return float64_round_pack_canonical(pr, status);
2663 }
2664 
float128_rem(float128 a,float128 b,float_status * status)2665 float128 float128_rem(float128 a, float128 b, float_status *status)
2666 {
2667     FloatParts128 pa, pb, *pr;
2668 
2669     float128_unpack_canonical(&pa, a, status);
2670     float128_unpack_canonical(&pb, b, status);
2671     pr = parts_modrem(&pa, &pb, NULL, status);
2672 
2673     return float128_round_pack_canonical(pr, status);
2674 }
2675 
2676 /*
2677  * Returns the remainder of the extended double-precision floating-point value
2678  * `a' with respect to the corresponding value `b'.
2679  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2680  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2681  * the remainder based on truncating the quotient toward zero instead and
2682  * *quotient is set to the low 64 bits of the absolute value of the integer
2683  * quotient.
2684  */
floatx80_modrem(floatx80 a,floatx80 b,bool mod,uint64_t * quotient,float_status * status)2685 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2686                          uint64_t *quotient, float_status *status)
2687 {
2688     FloatParts128 pa, pb, *pr;
2689 
2690     *quotient = 0;
2691     if (!floatx80_unpack_canonical(&pa, a, status) ||
2692         !floatx80_unpack_canonical(&pb, b, status)) {
2693         return floatx80_default_nan(status);
2694     }
2695     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2696 
2697     return floatx80_round_pack_canonical(pr, status);
2698 }
2699 
floatx80_rem(floatx80 a,floatx80 b,float_status * status)2700 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2701 {
2702     uint64_t quotient;
2703     return floatx80_modrem(a, b, false, &quotient, status);
2704 }
2705 
floatx80_mod(floatx80 a,floatx80 b,float_status * status)2706 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2707 {
2708     uint64_t quotient;
2709     return floatx80_modrem(a, b, true, &quotient, status);
2710 }
2711 
2712 /*
2713  * Float to Float conversions
2714  *
2715  * Returns the result of converting one float format to another. The
2716  * conversion is performed according to the IEC/IEEE Standard for
2717  * Binary Floating-Point Arithmetic.
2718  *
2719  * Usually this only needs to take care of raising invalid exceptions
2720  * and handling the conversion on NaNs.
2721  */
2722 
parts_float_to_ahp(FloatParts64 * a,float_status * s)2723 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2724 {
2725     switch (a->cls) {
2726     case float_class_snan:
2727         float_raise(float_flag_invalid_snan, s);
2728         /* fall through */
2729     case float_class_qnan:
2730         /*
2731          * There is no NaN in the destination format.  Raise Invalid
2732          * and return a zero with the sign of the input NaN.
2733          */
2734         float_raise(float_flag_invalid, s);
2735         a->cls = float_class_zero;
2736         break;
2737 
2738     case float_class_inf:
2739         /*
2740          * There is no Inf in the destination format.  Raise Invalid
2741          * and return the maximum normal with the correct sign.
2742          */
2743         float_raise(float_flag_invalid, s);
2744         a->cls = float_class_normal;
2745         a->exp = float16_params_ahp.exp_max;
2746         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2747                                   float16_params_ahp.frac_size + 1);
2748         break;
2749 
2750     case float_class_denormal:
2751         float_raise(float_flag_input_denormal_used, s);
2752         break;
2753     case float_class_normal:
2754     case float_class_zero:
2755         break;
2756 
2757     default:
2758         g_assert_not_reached();
2759     }
2760 }
2761 
parts64_float_to_float(FloatParts64 * a,float_status * s)2762 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2763 {
2764     if (is_nan(a->cls)) {
2765         parts_return_nan(a, s);
2766     }
2767     if (a->cls == float_class_denormal) {
2768         float_raise(float_flag_input_denormal_used, s);
2769     }
2770 }
2771 
parts128_float_to_float(FloatParts128 * a,float_status * s)2772 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2773 {
2774     if (is_nan(a->cls)) {
2775         parts_return_nan(a, s);
2776     }
2777     if (a->cls == float_class_denormal) {
2778         float_raise(float_flag_input_denormal_used, s);
2779     }
2780 }
2781 
2782 #define parts_float_to_float(P, S) \
2783     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2784 
parts_float_to_float_narrow(FloatParts64 * a,FloatParts128 * b,float_status * s)2785 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2786                                         float_status *s)
2787 {
2788     a->cls = b->cls;
2789     a->sign = b->sign;
2790     a->exp = b->exp;
2791 
2792     switch (a->cls) {
2793     case float_class_denormal:
2794         float_raise(float_flag_input_denormal_used, s);
2795         /* fall through */
2796     case float_class_normal:
2797         frac_truncjam(a, b);
2798         break;
2799     case float_class_snan:
2800     case float_class_qnan:
2801         /* Discard the low bits of the NaN. */
2802         a->frac = b->frac_hi;
2803         parts_return_nan(a, s);
2804         break;
2805     default:
2806         break;
2807     }
2808 }
2809 
parts_float_to_float_widen(FloatParts128 * a,FloatParts64 * b,float_status * s)2810 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2811                                        float_status *s)
2812 {
2813     a->cls = b->cls;
2814     a->sign = b->sign;
2815     a->exp = b->exp;
2816     frac_widen(a, b);
2817 
2818     if (is_nan(a->cls)) {
2819         parts_return_nan(a, s);
2820     }
2821     if (a->cls == float_class_denormal) {
2822         float_raise(float_flag_input_denormal_used, s);
2823     }
2824 }
2825 
float16_to_float32(float16 a,bool ieee,float_status * s)2826 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2827 {
2828     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2829     FloatParts64 p;
2830 
2831     float16a_unpack_canonical(&p, a, s, fmt16);
2832     parts_float_to_float(&p, s);
2833     return float32_round_pack_canonical(&p, s);
2834 }
2835 
float16_to_float64(float16 a,bool ieee,float_status * s)2836 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2837 {
2838     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2839     FloatParts64 p;
2840 
2841     float16a_unpack_canonical(&p, a, s, fmt16);
2842     parts_float_to_float(&p, s);
2843     return float64_round_pack_canonical(&p, s);
2844 }
2845 
float32_to_float16(float32 a,bool ieee,float_status * s)2846 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2847 {
2848     FloatParts64 p;
2849     const FloatFmt *fmt;
2850 
2851     float32_unpack_canonical(&p, a, s);
2852     if (ieee) {
2853         parts_float_to_float(&p, s);
2854         fmt = &float16_params;
2855     } else {
2856         parts_float_to_ahp(&p, s);
2857         fmt = &float16_params_ahp;
2858     }
2859     return float16a_round_pack_canonical(&p, s, fmt);
2860 }
2861 
2862 static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a,float_status * s)2863 soft_float32_to_float64(float32 a, float_status *s)
2864 {
2865     FloatParts64 p;
2866 
2867     float32_unpack_canonical(&p, a, s);
2868     parts_float_to_float(&p, s);
2869     return float64_round_pack_canonical(&p, s);
2870 }
2871 
float32_to_float64(float32 a,float_status * s)2872 float64 float32_to_float64(float32 a, float_status *s)
2873 {
2874     if (likely(float32_is_normal(a))) {
2875         /* Widening conversion can never produce inexact results.  */
2876         union_float32 uf;
2877         union_float64 ud;
2878         uf.s = a;
2879         ud.h = uf.h;
2880         return ud.s;
2881     } else if (float32_is_zero(a)) {
2882         return float64_set_sign(float64_zero, float32_is_neg(a));
2883     } else {
2884         return soft_float32_to_float64(a, s);
2885     }
2886 }
2887 
float64_to_float16(float64 a,bool ieee,float_status * s)2888 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2889 {
2890     FloatParts64 p;
2891     const FloatFmt *fmt;
2892 
2893     float64_unpack_canonical(&p, a, s);
2894     if (ieee) {
2895         parts_float_to_float(&p, s);
2896         fmt = &float16_params;
2897     } else {
2898         parts_float_to_ahp(&p, s);
2899         fmt = &float16_params_ahp;
2900     }
2901     return float16a_round_pack_canonical(&p, s, fmt);
2902 }
2903 
float64_to_float32(float64 a,float_status * s)2904 float32 float64_to_float32(float64 a, float_status *s)
2905 {
2906     FloatParts64 p;
2907 
2908     float64_unpack_canonical(&p, a, s);
2909     parts_float_to_float(&p, s);
2910     return float32_round_pack_canonical(&p, s);
2911 }
2912 
bfloat16_to_float32(bfloat16 a,float_status * s)2913 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2914 {
2915     FloatParts64 p;
2916 
2917     bfloat16_unpack_canonical(&p, a, s);
2918     parts_float_to_float(&p, s);
2919     return float32_round_pack_canonical(&p, s);
2920 }
2921 
bfloat16_to_float64(bfloat16 a,float_status * s)2922 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2923 {
2924     FloatParts64 p;
2925 
2926     bfloat16_unpack_canonical(&p, a, s);
2927     parts_float_to_float(&p, s);
2928     return float64_round_pack_canonical(&p, s);
2929 }
2930 
float32_to_bfloat16(float32 a,float_status * s)2931 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2932 {
2933     FloatParts64 p;
2934 
2935     float32_unpack_canonical(&p, a, s);
2936     parts_float_to_float(&p, s);
2937     return bfloat16_round_pack_canonical(&p, s);
2938 }
2939 
float64_to_bfloat16(float64 a,float_status * s)2940 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2941 {
2942     FloatParts64 p;
2943 
2944     float64_unpack_canonical(&p, a, s);
2945     parts_float_to_float(&p, s);
2946     return bfloat16_round_pack_canonical(&p, s);
2947 }
2948 
float128_to_float32(float128 a,float_status * s)2949 float32 float128_to_float32(float128 a, float_status *s)
2950 {
2951     FloatParts64 p64;
2952     FloatParts128 p128;
2953 
2954     float128_unpack_canonical(&p128, a, s);
2955     parts_float_to_float_narrow(&p64, &p128, s);
2956     return float32_round_pack_canonical(&p64, s);
2957 }
2958 
float128_to_float64(float128 a,float_status * s)2959 float64 float128_to_float64(float128 a, float_status *s)
2960 {
2961     FloatParts64 p64;
2962     FloatParts128 p128;
2963 
2964     float128_unpack_canonical(&p128, a, s);
2965     parts_float_to_float_narrow(&p64, &p128, s);
2966     return float64_round_pack_canonical(&p64, s);
2967 }
2968 
float32_to_float128(float32 a,float_status * s)2969 float128 float32_to_float128(float32 a, float_status *s)
2970 {
2971     FloatParts64 p64;
2972     FloatParts128 p128;
2973 
2974     float32_unpack_canonical(&p64, a, s);
2975     parts_float_to_float_widen(&p128, &p64, s);
2976     return float128_round_pack_canonical(&p128, s);
2977 }
2978 
float64_to_float128(float64 a,float_status * s)2979 float128 float64_to_float128(float64 a, float_status *s)
2980 {
2981     FloatParts64 p64;
2982     FloatParts128 p128;
2983 
2984     float64_unpack_canonical(&p64, a, s);
2985     parts_float_to_float_widen(&p128, &p64, s);
2986     return float128_round_pack_canonical(&p128, s);
2987 }
2988 
floatx80_to_float32(floatx80 a,float_status * s)2989 float32 floatx80_to_float32(floatx80 a, float_status *s)
2990 {
2991     FloatParts64 p64;
2992     FloatParts128 p128;
2993 
2994     if (floatx80_unpack_canonical(&p128, a, s)) {
2995         parts_float_to_float_narrow(&p64, &p128, s);
2996     } else {
2997         parts_default_nan(&p64, s);
2998     }
2999     return float32_round_pack_canonical(&p64, s);
3000 }
3001 
floatx80_to_float64(floatx80 a,float_status * s)3002 float64 floatx80_to_float64(floatx80 a, float_status *s)
3003 {
3004     FloatParts64 p64;
3005     FloatParts128 p128;
3006 
3007     if (floatx80_unpack_canonical(&p128, a, s)) {
3008         parts_float_to_float_narrow(&p64, &p128, s);
3009     } else {
3010         parts_default_nan(&p64, s);
3011     }
3012     return float64_round_pack_canonical(&p64, s);
3013 }
3014 
floatx80_to_float128(floatx80 a,float_status * s)3015 float128 floatx80_to_float128(floatx80 a, float_status *s)
3016 {
3017     FloatParts128 p;
3018 
3019     if (floatx80_unpack_canonical(&p, a, s)) {
3020         parts_float_to_float(&p, s);
3021     } else {
3022         parts_default_nan(&p, s);
3023     }
3024     return float128_round_pack_canonical(&p, s);
3025 }
3026 
float32_to_floatx80(float32 a,float_status * s)3027 floatx80 float32_to_floatx80(float32 a, float_status *s)
3028 {
3029     FloatParts64 p64;
3030     FloatParts128 p128;
3031 
3032     float32_unpack_canonical(&p64, a, s);
3033     parts_float_to_float_widen(&p128, &p64, s);
3034     return floatx80_round_pack_canonical(&p128, s);
3035 }
3036 
float64_to_floatx80(float64 a,float_status * s)3037 floatx80 float64_to_floatx80(float64 a, float_status *s)
3038 {
3039     FloatParts64 p64;
3040     FloatParts128 p128;
3041 
3042     float64_unpack_canonical(&p64, a, s);
3043     parts_float_to_float_widen(&p128, &p64, s);
3044     return floatx80_round_pack_canonical(&p128, s);
3045 }
3046 
float128_to_floatx80(float128 a,float_status * s)3047 floatx80 float128_to_floatx80(float128 a, float_status *s)
3048 {
3049     FloatParts128 p;
3050 
3051     float128_unpack_canonical(&p, a, s);
3052     parts_float_to_float(&p, s);
3053     return floatx80_round_pack_canonical(&p, s);
3054 }
3055 
3056 /*
3057  * Round to integral value
3058  */
3059 
float16_round_to_int(float16 a,float_status * s)3060 float16 float16_round_to_int(float16 a, float_status *s)
3061 {
3062     FloatParts64 p;
3063 
3064     float16_unpack_canonical(&p, a, s);
3065     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3066     return float16_round_pack_canonical(&p, s);
3067 }
3068 
float32_round_to_int(float32 a,float_status * s)3069 float32 float32_round_to_int(float32 a, float_status *s)
3070 {
3071     FloatParts64 p;
3072 
3073     float32_unpack_canonical(&p, a, s);
3074     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3075     return float32_round_pack_canonical(&p, s);
3076 }
3077 
float64_round_to_int(float64 a,float_status * s)3078 float64 float64_round_to_int(float64 a, float_status *s)
3079 {
3080     FloatParts64 p;
3081 
3082     float64_unpack_canonical(&p, a, s);
3083     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3084     return float64_round_pack_canonical(&p, s);
3085 }
3086 
bfloat16_round_to_int(bfloat16 a,float_status * s)3087 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3088 {
3089     FloatParts64 p;
3090 
3091     bfloat16_unpack_canonical(&p, a, s);
3092     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3093     return bfloat16_round_pack_canonical(&p, s);
3094 }
3095 
float128_round_to_int(float128 a,float_status * s)3096 float128 float128_round_to_int(float128 a, float_status *s)
3097 {
3098     FloatParts128 p;
3099 
3100     float128_unpack_canonical(&p, a, s);
3101     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3102     return float128_round_pack_canonical(&p, s);
3103 }
3104 
floatx80_round_to_int(floatx80 a,float_status * status)3105 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3106 {
3107     FloatParts128 p;
3108 
3109     if (!floatx80_unpack_canonical(&p, a, status)) {
3110         return floatx80_default_nan(status);
3111     }
3112 
3113     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3114                        &floatx80_params[status->floatx80_rounding_precision]);
3115     return floatx80_round_pack_canonical(&p, status);
3116 }
3117 
3118 /*
3119  * Floating-point to signed integer conversions
3120  */
3121 
float16_to_int8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3122 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3123                               float_status *s)
3124 {
3125     FloatParts64 p;
3126 
3127     float16_unpack_canonical(&p, a, s);
3128     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3129 }
3130 
float16_to_int16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3131 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3132                                 float_status *s)
3133 {
3134     FloatParts64 p;
3135 
3136     float16_unpack_canonical(&p, a, s);
3137     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3138 }
3139 
float16_to_int32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3140 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3141                                 float_status *s)
3142 {
3143     FloatParts64 p;
3144 
3145     float16_unpack_canonical(&p, a, s);
3146     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3147 }
3148 
float16_to_int64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3149 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3150                                 float_status *s)
3151 {
3152     FloatParts64 p;
3153 
3154     float16_unpack_canonical(&p, a, s);
3155     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3156 }
3157 
float32_to_int16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3158 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3159                                 float_status *s)
3160 {
3161     FloatParts64 p;
3162 
3163     float32_unpack_canonical(&p, a, s);
3164     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3165 }
3166 
float32_to_int32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3167 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3168                                 float_status *s)
3169 {
3170     FloatParts64 p;
3171 
3172     float32_unpack_canonical(&p, a, s);
3173     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3174 }
3175 
float32_to_int64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3176 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3177                                 float_status *s)
3178 {
3179     FloatParts64 p;
3180 
3181     float32_unpack_canonical(&p, a, s);
3182     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3183 }
3184 
float64_to_int16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3185 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3186                                 float_status *s)
3187 {
3188     FloatParts64 p;
3189 
3190     float64_unpack_canonical(&p, a, s);
3191     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3192 }
3193 
float64_to_int32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3194 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3195                                 float_status *s)
3196 {
3197     FloatParts64 p;
3198 
3199     float64_unpack_canonical(&p, a, s);
3200     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3201 }
3202 
float64_to_int64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3203 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3204                                 float_status *s)
3205 {
3206     FloatParts64 p;
3207 
3208     float64_unpack_canonical(&p, a, s);
3209     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3210 }
3211 
bfloat16_to_int8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3212 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3213                                float_status *s)
3214 {
3215     FloatParts64 p;
3216 
3217     bfloat16_unpack_canonical(&p, a, s);
3218     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3219 }
3220 
bfloat16_to_int16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3221 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3222                                  float_status *s)
3223 {
3224     FloatParts64 p;
3225 
3226     bfloat16_unpack_canonical(&p, a, s);
3227     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3228 }
3229 
bfloat16_to_int32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3230 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3231                                  float_status *s)
3232 {
3233     FloatParts64 p;
3234 
3235     bfloat16_unpack_canonical(&p, a, s);
3236     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3237 }
3238 
bfloat16_to_int64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3239 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3240                                  float_status *s)
3241 {
3242     FloatParts64 p;
3243 
3244     bfloat16_unpack_canonical(&p, a, s);
3245     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3246 }
3247 
float128_to_int32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3248 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3249                                         int scale, float_status *s)
3250 {
3251     FloatParts128 p;
3252 
3253     float128_unpack_canonical(&p, a, s);
3254     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3255 }
3256 
float128_to_int64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3257 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3258                                         int scale, float_status *s)
3259 {
3260     FloatParts128 p;
3261 
3262     float128_unpack_canonical(&p, a, s);
3263     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264 }
3265 
float128_to_int128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3266 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3267                                         int scale, float_status *s)
3268 {
3269     int flags = 0;
3270     Int128 r;
3271     FloatParts128 p;
3272 
3273     float128_unpack_canonical(&p, a, s);
3274 
3275     switch (p.cls) {
3276     case float_class_snan:
3277         flags |= float_flag_invalid_snan;
3278         /* fall through */
3279     case float_class_qnan:
3280         flags |= float_flag_invalid;
3281         r = UINT128_MAX;
3282         break;
3283 
3284     case float_class_inf:
3285         flags = float_flag_invalid | float_flag_invalid_cvti;
3286         r = p.sign ? INT128_MIN : INT128_MAX;
3287         break;
3288 
3289     case float_class_zero:
3290         return int128_zero();
3291 
3292     case float_class_normal:
3293     case float_class_denormal:
3294         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3295             flags = float_flag_inexact;
3296         }
3297 
3298         if (p.exp < 127) {
3299             int shift = 127 - p.exp;
3300             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3301             if (p.sign) {
3302                 r = int128_neg(r);
3303             }
3304         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3305                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3306             r = INT128_MIN;
3307         } else {
3308             flags = float_flag_invalid | float_flag_invalid_cvti;
3309             r = p.sign ? INT128_MIN : INT128_MAX;
3310         }
3311         break;
3312 
3313     default:
3314         g_assert_not_reached();
3315     }
3316 
3317     float_raise(flags, s);
3318     return r;
3319 }
3320 
floatx80_to_int32_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3321 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3322                                         int scale, float_status *s)
3323 {
3324     FloatParts128 p;
3325 
3326     if (!floatx80_unpack_canonical(&p, a, s)) {
3327         parts_default_nan(&p, s);
3328     }
3329     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3330 }
3331 
floatx80_to_int64_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3332 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3333                                         int scale, float_status *s)
3334 {
3335     FloatParts128 p;
3336 
3337     if (!floatx80_unpack_canonical(&p, a, s)) {
3338         parts_default_nan(&p, s);
3339     }
3340     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3341 }
3342 
float16_to_int8(float16 a,float_status * s)3343 int8_t float16_to_int8(float16 a, float_status *s)
3344 {
3345     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3346 }
3347 
float16_to_int16(float16 a,float_status * s)3348 int16_t float16_to_int16(float16 a, float_status *s)
3349 {
3350     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3351 }
3352 
float16_to_int32(float16 a,float_status * s)3353 int32_t float16_to_int32(float16 a, float_status *s)
3354 {
3355     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3356 }
3357 
float16_to_int64(float16 a,float_status * s)3358 int64_t float16_to_int64(float16 a, float_status *s)
3359 {
3360     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3361 }
3362 
float32_to_int16(float32 a,float_status * s)3363 int16_t float32_to_int16(float32 a, float_status *s)
3364 {
3365     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3366 }
3367 
float32_to_int32(float32 a,float_status * s)3368 int32_t float32_to_int32(float32 a, float_status *s)
3369 {
3370     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3371 }
3372 
float32_to_int64(float32 a,float_status * s)3373 int64_t float32_to_int64(float32 a, float_status *s)
3374 {
3375     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3376 }
3377 
float64_to_int16(float64 a,float_status * s)3378 int16_t float64_to_int16(float64 a, float_status *s)
3379 {
3380     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3381 }
3382 
float64_to_int32(float64 a,float_status * s)3383 int32_t float64_to_int32(float64 a, float_status *s)
3384 {
3385     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3386 }
3387 
float64_to_int64(float64 a,float_status * s)3388 int64_t float64_to_int64(float64 a, float_status *s)
3389 {
3390     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3391 }
3392 
float128_to_int32(float128 a,float_status * s)3393 int32_t float128_to_int32(float128 a, float_status *s)
3394 {
3395     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3396 }
3397 
float128_to_int64(float128 a,float_status * s)3398 int64_t float128_to_int64(float128 a, float_status *s)
3399 {
3400     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3401 }
3402 
float128_to_int128(float128 a,float_status * s)3403 Int128 float128_to_int128(float128 a, float_status *s)
3404 {
3405     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3406 }
3407 
floatx80_to_int32(floatx80 a,float_status * s)3408 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3409 {
3410     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3411 }
3412 
floatx80_to_int64(floatx80 a,float_status * s)3413 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3414 {
3415     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3416 }
3417 
float16_to_int16_round_to_zero(float16 a,float_status * s)3418 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3419 {
3420     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3421 }
3422 
float16_to_int32_round_to_zero(float16 a,float_status * s)3423 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3424 {
3425     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3426 }
3427 
float16_to_int64_round_to_zero(float16 a,float_status * s)3428 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3429 {
3430     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3431 }
3432 
float32_to_int16_round_to_zero(float32 a,float_status * s)3433 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3434 {
3435     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3436 }
3437 
float32_to_int32_round_to_zero(float32 a,float_status * s)3438 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3439 {
3440     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3441 }
3442 
float32_to_int64_round_to_zero(float32 a,float_status * s)3443 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3444 {
3445     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3446 }
3447 
float64_to_int16_round_to_zero(float64 a,float_status * s)3448 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3449 {
3450     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3451 }
3452 
float64_to_int32_round_to_zero(float64 a,float_status * s)3453 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3454 {
3455     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3456 }
3457 
float64_to_int64_round_to_zero(float64 a,float_status * s)3458 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3459 {
3460     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3461 }
3462 
float128_to_int32_round_to_zero(float128 a,float_status * s)3463 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3464 {
3465     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3466 }
3467 
float128_to_int64_round_to_zero(float128 a,float_status * s)3468 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3469 {
3470     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3471 }
3472 
float128_to_int128_round_to_zero(float128 a,float_status * s)3473 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3474 {
3475     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3476 }
3477 
floatx80_to_int32_round_to_zero(floatx80 a,float_status * s)3478 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3479 {
3480     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3481 }
3482 
floatx80_to_int64_round_to_zero(floatx80 a,float_status * s)3483 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3484 {
3485     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3486 }
3487 
bfloat16_to_int8(bfloat16 a,float_status * s)3488 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3489 {
3490     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3491 }
3492 
bfloat16_to_int16(bfloat16 a,float_status * s)3493 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3494 {
3495     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3496 }
3497 
bfloat16_to_int32(bfloat16 a,float_status * s)3498 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3499 {
3500     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3501 }
3502 
bfloat16_to_int64(bfloat16 a,float_status * s)3503 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3504 {
3505     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3506 }
3507 
bfloat16_to_int8_round_to_zero(bfloat16 a,float_status * s)3508 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3509 {
3510     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3511 }
3512 
bfloat16_to_int16_round_to_zero(bfloat16 a,float_status * s)3513 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3514 {
3515     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3516 }
3517 
bfloat16_to_int32_round_to_zero(bfloat16 a,float_status * s)3518 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3519 {
3520     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3521 }
3522 
bfloat16_to_int64_round_to_zero(bfloat16 a,float_status * s)3523 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3524 {
3525     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3526 }
3527 
float64_to_int32_modulo(float64 a,FloatRoundMode rmode,float_status * s)3528 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3529                                 float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float64_unpack_canonical(&p, a, s);
3534     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3535 }
3536 
float64_to_int64_modulo(float64 a,FloatRoundMode rmode,float_status * s)3537 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3538                                 float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     float64_unpack_canonical(&p, a, s);
3543     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3544 }
3545 
3546 /*
3547  * Floating-point to unsigned integer conversions
3548  */
3549 
float16_to_uint8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3550 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3551                                 float_status *s)
3552 {
3553     FloatParts64 p;
3554 
3555     float16_unpack_canonical(&p, a, s);
3556     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3557 }
3558 
float16_to_uint16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3559 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3560                                   float_status *s)
3561 {
3562     FloatParts64 p;
3563 
3564     float16_unpack_canonical(&p, a, s);
3565     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3566 }
3567 
float16_to_uint32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3568 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3569                                   float_status *s)
3570 {
3571     FloatParts64 p;
3572 
3573     float16_unpack_canonical(&p, a, s);
3574     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3575 }
3576 
float16_to_uint64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3577 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3578                                   float_status *s)
3579 {
3580     FloatParts64 p;
3581 
3582     float16_unpack_canonical(&p, a, s);
3583     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3584 }
3585 
float32_to_uint16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3586 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3587                                   float_status *s)
3588 {
3589     FloatParts64 p;
3590 
3591     float32_unpack_canonical(&p, a, s);
3592     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3593 }
3594 
float32_to_uint32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3595 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3596                                   float_status *s)
3597 {
3598     FloatParts64 p;
3599 
3600     float32_unpack_canonical(&p, a, s);
3601     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3602 }
3603 
float32_to_uint64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3604 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3605                                   float_status *s)
3606 {
3607     FloatParts64 p;
3608 
3609     float32_unpack_canonical(&p, a, s);
3610     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3611 }
3612 
float64_to_uint16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3613 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3614                                   float_status *s)
3615 {
3616     FloatParts64 p;
3617 
3618     float64_unpack_canonical(&p, a, s);
3619     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3620 }
3621 
float64_to_uint32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3622 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3623                                   float_status *s)
3624 {
3625     FloatParts64 p;
3626 
3627     float64_unpack_canonical(&p, a, s);
3628     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3629 }
3630 
float64_to_uint64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3631 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3632                                   float_status *s)
3633 {
3634     FloatParts64 p;
3635 
3636     float64_unpack_canonical(&p, a, s);
3637     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3638 }
3639 
bfloat16_to_uint8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3640 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3641                                  int scale, float_status *s)
3642 {
3643     FloatParts64 p;
3644 
3645     bfloat16_unpack_canonical(&p, a, s);
3646     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3647 }
3648 
bfloat16_to_uint16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3649 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3650                                    int scale, float_status *s)
3651 {
3652     FloatParts64 p;
3653 
3654     bfloat16_unpack_canonical(&p, a, s);
3655     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3656 }
3657 
bfloat16_to_uint32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3658 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3659                                    int scale, float_status *s)
3660 {
3661     FloatParts64 p;
3662 
3663     bfloat16_unpack_canonical(&p, a, s);
3664     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3665 }
3666 
bfloat16_to_uint64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3667 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3668                                    int scale, float_status *s)
3669 {
3670     FloatParts64 p;
3671 
3672     bfloat16_unpack_canonical(&p, a, s);
3673     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3674 }
3675 
float128_to_uint32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3676 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3677                                           int scale, float_status *s)
3678 {
3679     FloatParts128 p;
3680 
3681     float128_unpack_canonical(&p, a, s);
3682     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3683 }
3684 
float128_to_uint64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3685 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3686                                           int scale, float_status *s)
3687 {
3688     FloatParts128 p;
3689 
3690     float128_unpack_canonical(&p, a, s);
3691     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3692 }
3693 
float128_to_uint128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3694 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3695                                          int scale, float_status *s)
3696 {
3697     int flags = 0;
3698     Int128 r;
3699     FloatParts128 p;
3700 
3701     float128_unpack_canonical(&p, a, s);
3702 
3703     switch (p.cls) {
3704     case float_class_snan:
3705         flags |= float_flag_invalid_snan;
3706         /* fall through */
3707     case float_class_qnan:
3708         flags |= float_flag_invalid;
3709         r = UINT128_MAX;
3710         break;
3711 
3712     case float_class_inf:
3713         flags = float_flag_invalid | float_flag_invalid_cvti;
3714         r = p.sign ? int128_zero() : UINT128_MAX;
3715         break;
3716 
3717     case float_class_zero:
3718         return int128_zero();
3719 
3720     case float_class_normal:
3721     case float_class_denormal:
3722         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3723             flags = float_flag_inexact;
3724             if (p.cls == float_class_zero) {
3725                 r = int128_zero();
3726                 break;
3727             }
3728         }
3729 
3730         if (p.sign) {
3731             flags = float_flag_invalid | float_flag_invalid_cvti;
3732             r = int128_zero();
3733         } else if (p.exp <= 127) {
3734             int shift = 127 - p.exp;
3735             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3736         } else {
3737             flags = float_flag_invalid | float_flag_invalid_cvti;
3738             r = UINT128_MAX;
3739         }
3740         break;
3741 
3742     default:
3743         g_assert_not_reached();
3744     }
3745 
3746     float_raise(flags, s);
3747     return r;
3748 }
3749 
float16_to_uint8(float16 a,float_status * s)3750 uint8_t float16_to_uint8(float16 a, float_status *s)
3751 {
3752     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3753 }
3754 
float16_to_uint16(float16 a,float_status * s)3755 uint16_t float16_to_uint16(float16 a, float_status *s)
3756 {
3757     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3758 }
3759 
float16_to_uint32(float16 a,float_status * s)3760 uint32_t float16_to_uint32(float16 a, float_status *s)
3761 {
3762     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3763 }
3764 
float16_to_uint64(float16 a,float_status * s)3765 uint64_t float16_to_uint64(float16 a, float_status *s)
3766 {
3767     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3768 }
3769 
float32_to_uint16(float32 a,float_status * s)3770 uint16_t float32_to_uint16(float32 a, float_status *s)
3771 {
3772     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3773 }
3774 
float32_to_uint32(float32 a,float_status * s)3775 uint32_t float32_to_uint32(float32 a, float_status *s)
3776 {
3777     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3778 }
3779 
float32_to_uint64(float32 a,float_status * s)3780 uint64_t float32_to_uint64(float32 a, float_status *s)
3781 {
3782     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3783 }
3784 
float64_to_uint16(float64 a,float_status * s)3785 uint16_t float64_to_uint16(float64 a, float_status *s)
3786 {
3787     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3788 }
3789 
float64_to_uint32(float64 a,float_status * s)3790 uint32_t float64_to_uint32(float64 a, float_status *s)
3791 {
3792     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3793 }
3794 
float64_to_uint64(float64 a,float_status * s)3795 uint64_t float64_to_uint64(float64 a, float_status *s)
3796 {
3797     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3798 }
3799 
float128_to_uint32(float128 a,float_status * s)3800 uint32_t float128_to_uint32(float128 a, float_status *s)
3801 {
3802     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3803 }
3804 
float128_to_uint64(float128 a,float_status * s)3805 uint64_t float128_to_uint64(float128 a, float_status *s)
3806 {
3807     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3808 }
3809 
float128_to_uint128(float128 a,float_status * s)3810 Int128 float128_to_uint128(float128 a, float_status *s)
3811 {
3812     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3813 }
3814 
float16_to_uint16_round_to_zero(float16 a,float_status * s)3815 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3816 {
3817     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3818 }
3819 
float16_to_uint32_round_to_zero(float16 a,float_status * s)3820 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3821 {
3822     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3823 }
3824 
float16_to_uint64_round_to_zero(float16 a,float_status * s)3825 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3826 {
3827     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3828 }
3829 
float32_to_uint16_round_to_zero(float32 a,float_status * s)3830 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3831 {
3832     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3833 }
3834 
float32_to_uint32_round_to_zero(float32 a,float_status * s)3835 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3836 {
3837     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3838 }
3839 
float32_to_uint64_round_to_zero(float32 a,float_status * s)3840 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3841 {
3842     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3843 }
3844 
float64_to_uint16_round_to_zero(float64 a,float_status * s)3845 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3846 {
3847     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3848 }
3849 
float64_to_uint32_round_to_zero(float64 a,float_status * s)3850 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3851 {
3852     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3853 }
3854 
float64_to_uint64_round_to_zero(float64 a,float_status * s)3855 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3856 {
3857     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3858 }
3859 
float128_to_uint32_round_to_zero(float128 a,float_status * s)3860 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3861 {
3862     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3863 }
3864 
float128_to_uint64_round_to_zero(float128 a,float_status * s)3865 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3866 {
3867     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3868 }
3869 
float128_to_uint128_round_to_zero(float128 a,float_status * s)3870 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3871 {
3872     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3873 }
3874 
bfloat16_to_uint8(bfloat16 a,float_status * s)3875 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3876 {
3877     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3878 }
3879 
bfloat16_to_uint16(bfloat16 a,float_status * s)3880 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3881 {
3882     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3883 }
3884 
bfloat16_to_uint32(bfloat16 a,float_status * s)3885 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3886 {
3887     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3888 }
3889 
bfloat16_to_uint64(bfloat16 a,float_status * s)3890 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3891 {
3892     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3893 }
3894 
bfloat16_to_uint8_round_to_zero(bfloat16 a,float_status * s)3895 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3896 {
3897     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3898 }
3899 
bfloat16_to_uint16_round_to_zero(bfloat16 a,float_status * s)3900 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3901 {
3902     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3903 }
3904 
bfloat16_to_uint32_round_to_zero(bfloat16 a,float_status * s)3905 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3906 {
3907     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3908 }
3909 
bfloat16_to_uint64_round_to_zero(bfloat16 a,float_status * s)3910 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3911 {
3912     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3913 }
3914 
3915 /*
3916  * Signed integer to floating-point conversions
3917  */
3918 
int64_to_float16_scalbn(int64_t a,int scale,float_status * status)3919 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3920 {
3921     FloatParts64 p;
3922 
3923     parts_sint_to_float(&p, a, scale, status);
3924     return float16_round_pack_canonical(&p, status);
3925 }
3926 
int32_to_float16_scalbn(int32_t a,int scale,float_status * status)3927 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3928 {
3929     return int64_to_float16_scalbn(a, scale, status);
3930 }
3931 
int16_to_float16_scalbn(int16_t a,int scale,float_status * status)3932 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3933 {
3934     return int64_to_float16_scalbn(a, scale, status);
3935 }
3936 
int64_to_float16(int64_t a,float_status * status)3937 float16 int64_to_float16(int64_t a, float_status *status)
3938 {
3939     return int64_to_float16_scalbn(a, 0, status);
3940 }
3941 
int32_to_float16(int32_t a,float_status * status)3942 float16 int32_to_float16(int32_t a, float_status *status)
3943 {
3944     return int64_to_float16_scalbn(a, 0, status);
3945 }
3946 
int16_to_float16(int16_t a,float_status * status)3947 float16 int16_to_float16(int16_t a, float_status *status)
3948 {
3949     return int64_to_float16_scalbn(a, 0, status);
3950 }
3951 
int8_to_float16(int8_t a,float_status * status)3952 float16 int8_to_float16(int8_t a, float_status *status)
3953 {
3954     return int64_to_float16_scalbn(a, 0, status);
3955 }
3956 
int64_to_float32_scalbn(int64_t a,int scale,float_status * status)3957 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3958 {
3959     FloatParts64 p;
3960 
3961     /* Without scaling, there are no overflow concerns. */
3962     if (likely(scale == 0) && can_use_fpu(status)) {
3963         union_float32 ur;
3964         ur.h = a;
3965         return ur.s;
3966     }
3967 
3968     parts64_sint_to_float(&p, a, scale, status);
3969     return float32_round_pack_canonical(&p, status);
3970 }
3971 
int32_to_float32_scalbn(int32_t a,int scale,float_status * status)3972 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3973 {
3974     return int64_to_float32_scalbn(a, scale, status);
3975 }
3976 
int16_to_float32_scalbn(int16_t a,int scale,float_status * status)3977 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3978 {
3979     return int64_to_float32_scalbn(a, scale, status);
3980 }
3981 
int64_to_float32(int64_t a,float_status * status)3982 float32 int64_to_float32(int64_t a, float_status *status)
3983 {
3984     return int64_to_float32_scalbn(a, 0, status);
3985 }
3986 
int32_to_float32(int32_t a,float_status * status)3987 float32 int32_to_float32(int32_t a, float_status *status)
3988 {
3989     return int64_to_float32_scalbn(a, 0, status);
3990 }
3991 
int16_to_float32(int16_t a,float_status * status)3992 float32 int16_to_float32(int16_t a, float_status *status)
3993 {
3994     return int64_to_float32_scalbn(a, 0, status);
3995 }
3996 
int64_to_float64_scalbn(int64_t a,int scale,float_status * status)3997 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3998 {
3999     FloatParts64 p;
4000 
4001     /* Without scaling, there are no overflow concerns. */
4002     if (likely(scale == 0) && can_use_fpu(status)) {
4003         union_float64 ur;
4004         ur.h = a;
4005         return ur.s;
4006     }
4007 
4008     parts_sint_to_float(&p, a, scale, status);
4009     return float64_round_pack_canonical(&p, status);
4010 }
4011 
int32_to_float64_scalbn(int32_t a,int scale,float_status * status)4012 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
4013 {
4014     return int64_to_float64_scalbn(a, scale, status);
4015 }
4016 
int16_to_float64_scalbn(int16_t a,int scale,float_status * status)4017 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
4018 {
4019     return int64_to_float64_scalbn(a, scale, status);
4020 }
4021 
int64_to_float64(int64_t a,float_status * status)4022 float64 int64_to_float64(int64_t a, float_status *status)
4023 {
4024     return int64_to_float64_scalbn(a, 0, status);
4025 }
4026 
int32_to_float64(int32_t a,float_status * status)4027 float64 int32_to_float64(int32_t a, float_status *status)
4028 {
4029     return int64_to_float64_scalbn(a, 0, status);
4030 }
4031 
int16_to_float64(int16_t a,float_status * status)4032 float64 int16_to_float64(int16_t a, float_status *status)
4033 {
4034     return int64_to_float64_scalbn(a, 0, status);
4035 }
4036 
int64_to_bfloat16_scalbn(int64_t a,int scale,float_status * status)4037 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4038 {
4039     FloatParts64 p;
4040 
4041     parts_sint_to_float(&p, a, scale, status);
4042     return bfloat16_round_pack_canonical(&p, status);
4043 }
4044 
int32_to_bfloat16_scalbn(int32_t a,int scale,float_status * status)4045 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4046 {
4047     return int64_to_bfloat16_scalbn(a, scale, status);
4048 }
4049 
int16_to_bfloat16_scalbn(int16_t a,int scale,float_status * status)4050 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4051 {
4052     return int64_to_bfloat16_scalbn(a, scale, status);
4053 }
4054 
int8_to_bfloat16_scalbn(int8_t a,int scale,float_status * status)4055 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4056 {
4057     return int64_to_bfloat16_scalbn(a, scale, status);
4058 }
4059 
int64_to_bfloat16(int64_t a,float_status * status)4060 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4061 {
4062     return int64_to_bfloat16_scalbn(a, 0, status);
4063 }
4064 
int32_to_bfloat16(int32_t a,float_status * status)4065 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4066 {
4067     return int64_to_bfloat16_scalbn(a, 0, status);
4068 }
4069 
int16_to_bfloat16(int16_t a,float_status * status)4070 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4071 {
4072     return int64_to_bfloat16_scalbn(a, 0, status);
4073 }
4074 
int8_to_bfloat16(int8_t a,float_status * status)4075 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4076 {
4077     return int64_to_bfloat16_scalbn(a, 0, status);
4078 }
4079 
int128_to_float128(Int128 a,float_status * status)4080 float128 int128_to_float128(Int128 a, float_status *status)
4081 {
4082     FloatParts128 p = { };
4083     int shift;
4084 
4085     if (int128_nz(a)) {
4086         p.cls = float_class_normal;
4087         if (!int128_nonneg(a)) {
4088             p.sign = true;
4089             a = int128_neg(a);
4090         }
4091 
4092         shift = clz64(int128_gethi(a));
4093         if (shift == 64) {
4094             shift += clz64(int128_getlo(a));
4095         }
4096 
4097         p.exp = 127 - shift;
4098         a = int128_lshift(a, shift);
4099 
4100         p.frac_hi = int128_gethi(a);
4101         p.frac_lo = int128_getlo(a);
4102     } else {
4103         p.cls = float_class_zero;
4104     }
4105 
4106     return float128_round_pack_canonical(&p, status);
4107 }
4108 
int64_to_float128(int64_t a,float_status * status)4109 float128 int64_to_float128(int64_t a, float_status *status)
4110 {
4111     FloatParts128 p;
4112 
4113     parts_sint_to_float(&p, a, 0, status);
4114     return float128_round_pack_canonical(&p, status);
4115 }
4116 
int32_to_float128(int32_t a,float_status * status)4117 float128 int32_to_float128(int32_t a, float_status *status)
4118 {
4119     return int64_to_float128(a, status);
4120 }
4121 
int64_to_floatx80(int64_t a,float_status * status)4122 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4123 {
4124     FloatParts128 p;
4125 
4126     parts_sint_to_float(&p, a, 0, status);
4127     return floatx80_round_pack_canonical(&p, status);
4128 }
4129 
int32_to_floatx80(int32_t a,float_status * status)4130 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4131 {
4132     return int64_to_floatx80(a, status);
4133 }
4134 
4135 /*
4136  * Unsigned Integer to floating-point conversions
4137  */
4138 
uint64_to_float16_scalbn(uint64_t a,int scale,float_status * status)4139 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4140 {
4141     FloatParts64 p;
4142 
4143     parts_uint_to_float(&p, a, scale, status);
4144     return float16_round_pack_canonical(&p, status);
4145 }
4146 
uint32_to_float16_scalbn(uint32_t a,int scale,float_status * status)4147 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4148 {
4149     return uint64_to_float16_scalbn(a, scale, status);
4150 }
4151 
uint16_to_float16_scalbn(uint16_t a,int scale,float_status * status)4152 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4153 {
4154     return uint64_to_float16_scalbn(a, scale, status);
4155 }
4156 
uint64_to_float16(uint64_t a,float_status * status)4157 float16 uint64_to_float16(uint64_t a, float_status *status)
4158 {
4159     return uint64_to_float16_scalbn(a, 0, status);
4160 }
4161 
uint32_to_float16(uint32_t a,float_status * status)4162 float16 uint32_to_float16(uint32_t a, float_status *status)
4163 {
4164     return uint64_to_float16_scalbn(a, 0, status);
4165 }
4166 
uint16_to_float16(uint16_t a,float_status * status)4167 float16 uint16_to_float16(uint16_t a, float_status *status)
4168 {
4169     return uint64_to_float16_scalbn(a, 0, status);
4170 }
4171 
uint8_to_float16(uint8_t a,float_status * status)4172 float16 uint8_to_float16(uint8_t a, float_status *status)
4173 {
4174     return uint64_to_float16_scalbn(a, 0, status);
4175 }
4176 
uint64_to_float32_scalbn(uint64_t a,int scale,float_status * status)4177 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4178 {
4179     FloatParts64 p;
4180 
4181     /* Without scaling, there are no overflow concerns. */
4182     if (likely(scale == 0) && can_use_fpu(status)) {
4183         union_float32 ur;
4184         ur.h = a;
4185         return ur.s;
4186     }
4187 
4188     parts_uint_to_float(&p, a, scale, status);
4189     return float32_round_pack_canonical(&p, status);
4190 }
4191 
uint32_to_float32_scalbn(uint32_t a,int scale,float_status * status)4192 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4193 {
4194     return uint64_to_float32_scalbn(a, scale, status);
4195 }
4196 
uint16_to_float32_scalbn(uint16_t a,int scale,float_status * status)4197 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4198 {
4199     return uint64_to_float32_scalbn(a, scale, status);
4200 }
4201 
uint64_to_float32(uint64_t a,float_status * status)4202 float32 uint64_to_float32(uint64_t a, float_status *status)
4203 {
4204     return uint64_to_float32_scalbn(a, 0, status);
4205 }
4206 
uint32_to_float32(uint32_t a,float_status * status)4207 float32 uint32_to_float32(uint32_t a, float_status *status)
4208 {
4209     return uint64_to_float32_scalbn(a, 0, status);
4210 }
4211 
uint16_to_float32(uint16_t a,float_status * status)4212 float32 uint16_to_float32(uint16_t a, float_status *status)
4213 {
4214     return uint64_to_float32_scalbn(a, 0, status);
4215 }
4216 
uint64_to_float64_scalbn(uint64_t a,int scale,float_status * status)4217 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4218 {
4219     FloatParts64 p;
4220 
4221     /* Without scaling, there are no overflow concerns. */
4222     if (likely(scale == 0) && can_use_fpu(status)) {
4223         union_float64 ur;
4224         ur.h = a;
4225         return ur.s;
4226     }
4227 
4228     parts_uint_to_float(&p, a, scale, status);
4229     return float64_round_pack_canonical(&p, status);
4230 }
4231 
uint32_to_float64_scalbn(uint32_t a,int scale,float_status * status)4232 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4233 {
4234     return uint64_to_float64_scalbn(a, scale, status);
4235 }
4236 
uint16_to_float64_scalbn(uint16_t a,int scale,float_status * status)4237 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4238 {
4239     return uint64_to_float64_scalbn(a, scale, status);
4240 }
4241 
uint64_to_float64(uint64_t a,float_status * status)4242 float64 uint64_to_float64(uint64_t a, float_status *status)
4243 {
4244     return uint64_to_float64_scalbn(a, 0, status);
4245 }
4246 
uint32_to_float64(uint32_t a,float_status * status)4247 float64 uint32_to_float64(uint32_t a, float_status *status)
4248 {
4249     return uint64_to_float64_scalbn(a, 0, status);
4250 }
4251 
uint16_to_float64(uint16_t a,float_status * status)4252 float64 uint16_to_float64(uint16_t a, float_status *status)
4253 {
4254     return uint64_to_float64_scalbn(a, 0, status);
4255 }
4256 
uint64_to_bfloat16_scalbn(uint64_t a,int scale,float_status * status)4257 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4258 {
4259     FloatParts64 p;
4260 
4261     parts_uint_to_float(&p, a, scale, status);
4262     return bfloat16_round_pack_canonical(&p, status);
4263 }
4264 
uint32_to_bfloat16_scalbn(uint32_t a,int scale,float_status * status)4265 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4266 {
4267     return uint64_to_bfloat16_scalbn(a, scale, status);
4268 }
4269 
uint16_to_bfloat16_scalbn(uint16_t a,int scale,float_status * status)4270 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4271 {
4272     return uint64_to_bfloat16_scalbn(a, scale, status);
4273 }
4274 
uint8_to_bfloat16_scalbn(uint8_t a,int scale,float_status * status)4275 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4276 {
4277     return uint64_to_bfloat16_scalbn(a, scale, status);
4278 }
4279 
uint64_to_bfloat16(uint64_t a,float_status * status)4280 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4281 {
4282     return uint64_to_bfloat16_scalbn(a, 0, status);
4283 }
4284 
uint32_to_bfloat16(uint32_t a,float_status * status)4285 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4286 {
4287     return uint64_to_bfloat16_scalbn(a, 0, status);
4288 }
4289 
uint16_to_bfloat16(uint16_t a,float_status * status)4290 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4291 {
4292     return uint64_to_bfloat16_scalbn(a, 0, status);
4293 }
4294 
uint8_to_bfloat16(uint8_t a,float_status * status)4295 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4296 {
4297     return uint64_to_bfloat16_scalbn(a, 0, status);
4298 }
4299 
uint64_to_float128(uint64_t a,float_status * status)4300 float128 uint64_to_float128(uint64_t a, float_status *status)
4301 {
4302     FloatParts128 p;
4303 
4304     parts_uint_to_float(&p, a, 0, status);
4305     return float128_round_pack_canonical(&p, status);
4306 }
4307 
uint128_to_float128(Int128 a,float_status * status)4308 float128 uint128_to_float128(Int128 a, float_status *status)
4309 {
4310     FloatParts128 p = { };
4311     int shift;
4312 
4313     if (int128_nz(a)) {
4314         p.cls = float_class_normal;
4315 
4316         shift = clz64(int128_gethi(a));
4317         if (shift == 64) {
4318             shift += clz64(int128_getlo(a));
4319         }
4320 
4321         p.exp = 127 - shift;
4322         a = int128_lshift(a, shift);
4323 
4324         p.frac_hi = int128_gethi(a);
4325         p.frac_lo = int128_getlo(a);
4326     } else {
4327         p.cls = float_class_zero;
4328     }
4329 
4330     return float128_round_pack_canonical(&p, status);
4331 }
4332 
4333 /*
4334  * Minimum and maximum
4335  */
4336 
float16_minmax(float16 a,float16 b,float_status * s,int flags)4337 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4338 {
4339     FloatParts64 pa, pb, *pr;
4340 
4341     float16_unpack_canonical(&pa, a, s);
4342     float16_unpack_canonical(&pb, b, s);
4343     pr = parts_minmax(&pa, &pb, s, flags);
4344 
4345     return float16_round_pack_canonical(pr, s);
4346 }
4347 
bfloat16_minmax(bfloat16 a,bfloat16 b,float_status * s,int flags)4348 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4349                                 float_status *s, int flags)
4350 {
4351     FloatParts64 pa, pb, *pr;
4352 
4353     bfloat16_unpack_canonical(&pa, a, s);
4354     bfloat16_unpack_canonical(&pb, b, s);
4355     pr = parts_minmax(&pa, &pb, s, flags);
4356 
4357     return bfloat16_round_pack_canonical(pr, s);
4358 }
4359 
float32_minmax(float32 a,float32 b,float_status * s,int flags)4360 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4361 {
4362     FloatParts64 pa, pb, *pr;
4363 
4364     float32_unpack_canonical(&pa, a, s);
4365     float32_unpack_canonical(&pb, b, s);
4366     pr = parts_minmax(&pa, &pb, s, flags);
4367 
4368     return float32_round_pack_canonical(pr, s);
4369 }
4370 
float64_minmax(float64 a,float64 b,float_status * s,int flags)4371 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4372 {
4373     FloatParts64 pa, pb, *pr;
4374 
4375     float64_unpack_canonical(&pa, a, s);
4376     float64_unpack_canonical(&pb, b, s);
4377     pr = parts_minmax(&pa, &pb, s, flags);
4378 
4379     return float64_round_pack_canonical(pr, s);
4380 }
4381 
float128_minmax(float128 a,float128 b,float_status * s,int flags)4382 static float128 float128_minmax(float128 a, float128 b,
4383                                 float_status *s, int flags)
4384 {
4385     FloatParts128 pa, pb, *pr;
4386 
4387     float128_unpack_canonical(&pa, a, s);
4388     float128_unpack_canonical(&pb, b, s);
4389     pr = parts_minmax(&pa, &pb, s, flags);
4390 
4391     return float128_round_pack_canonical(pr, s);
4392 }
4393 
4394 #define MINMAX_1(type, name, flags) \
4395     type type##_##name(type a, type b, float_status *s) \
4396     { return type##_minmax(a, b, s, flags); }
4397 
4398 #define MINMAX_2(type) \
4399     MINMAX_1(type, max, 0)                                                \
4400     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4401     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4402     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4403     MINMAX_1(type, min, minmax_ismin)                                     \
4404     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4405     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4406     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4407 
4408 MINMAX_2(float16)
MINMAX_2(bfloat16)4409 MINMAX_2(bfloat16)
4410 MINMAX_2(float32)
4411 MINMAX_2(float64)
4412 MINMAX_2(float128)
4413 
4414 #undef MINMAX_1
4415 #undef MINMAX_2
4416 
4417 /*
4418  * Floating point compare
4419  */
4420 
4421 static FloatRelation QEMU_FLATTEN
4422 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4423 {
4424     FloatParts64 pa, pb;
4425 
4426     float16_unpack_canonical(&pa, a, s);
4427     float16_unpack_canonical(&pb, b, s);
4428     return parts_compare(&pa, &pb, s, is_quiet);
4429 }
4430 
float16_compare(float16 a,float16 b,float_status * s)4431 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4432 {
4433     return float16_do_compare(a, b, s, false);
4434 }
4435 
float16_compare_quiet(float16 a,float16 b,float_status * s)4436 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4437 {
4438     return float16_do_compare(a, b, s, true);
4439 }
4440 
4441 static FloatRelation QEMU_SOFTFLOAT_ATTR
float32_do_compare(float32 a,float32 b,float_status * s,bool is_quiet)4442 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4443 {
4444     FloatParts64 pa, pb;
4445 
4446     float32_unpack_canonical(&pa, a, s);
4447     float32_unpack_canonical(&pb, b, s);
4448     return parts_compare(&pa, &pb, s, is_quiet);
4449 }
4450 
4451 static FloatRelation QEMU_FLATTEN
float32_hs_compare(float32 xa,float32 xb,float_status * s,bool is_quiet)4452 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4453 {
4454     union_float32 ua, ub;
4455 
4456     ua.s = xa;
4457     ub.s = xb;
4458 
4459     if (QEMU_NO_HARDFLOAT) {
4460         goto soft;
4461     }
4462 
4463     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4464         /* We may need to set the input_denormal_used flag */
4465         goto soft;
4466     }
4467 
4468     if (isgreaterequal(ua.h, ub.h)) {
4469         if (isgreater(ua.h, ub.h)) {
4470             return float_relation_greater;
4471         }
4472         return float_relation_equal;
4473     }
4474     if (likely(isless(ua.h, ub.h))) {
4475         return float_relation_less;
4476     }
4477     /*
4478      * The only condition remaining is unordered.
4479      * Fall through to set flags.
4480      */
4481  soft:
4482     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4483 }
4484 
float32_compare(float32 a,float32 b,float_status * s)4485 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4486 {
4487     return float32_hs_compare(a, b, s, false);
4488 }
4489 
float32_compare_quiet(float32 a,float32 b,float_status * s)4490 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4491 {
4492     return float32_hs_compare(a, b, s, true);
4493 }
4494 
4495 static FloatRelation QEMU_SOFTFLOAT_ATTR
float64_do_compare(float64 a,float64 b,float_status * s,bool is_quiet)4496 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4497 {
4498     FloatParts64 pa, pb;
4499 
4500     float64_unpack_canonical(&pa, a, s);
4501     float64_unpack_canonical(&pb, b, s);
4502     return parts_compare(&pa, &pb, s, is_quiet);
4503 }
4504 
4505 static FloatRelation QEMU_FLATTEN
float64_hs_compare(float64 xa,float64 xb,float_status * s,bool is_quiet)4506 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4507 {
4508     union_float64 ua, ub;
4509 
4510     ua.s = xa;
4511     ub.s = xb;
4512 
4513     if (QEMU_NO_HARDFLOAT) {
4514         goto soft;
4515     }
4516 
4517     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4518         /* We may need to set the input_denormal_used flag */
4519         goto soft;
4520     }
4521 
4522     if (isgreaterequal(ua.h, ub.h)) {
4523         if (isgreater(ua.h, ub.h)) {
4524             return float_relation_greater;
4525         }
4526         return float_relation_equal;
4527     }
4528     if (likely(isless(ua.h, ub.h))) {
4529         return float_relation_less;
4530     }
4531     /*
4532      * The only condition remaining is unordered.
4533      * Fall through to set flags.
4534      */
4535  soft:
4536     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4537 }
4538 
float64_compare(float64 a,float64 b,float_status * s)4539 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4540 {
4541     return float64_hs_compare(a, b, s, false);
4542 }
4543 
float64_compare_quiet(float64 a,float64 b,float_status * s)4544 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4545 {
4546     return float64_hs_compare(a, b, s, true);
4547 }
4548 
4549 static FloatRelation QEMU_FLATTEN
bfloat16_do_compare(bfloat16 a,bfloat16 b,float_status * s,bool is_quiet)4550 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4551 {
4552     FloatParts64 pa, pb;
4553 
4554     bfloat16_unpack_canonical(&pa, a, s);
4555     bfloat16_unpack_canonical(&pb, b, s);
4556     return parts_compare(&pa, &pb, s, is_quiet);
4557 }
4558 
bfloat16_compare(bfloat16 a,bfloat16 b,float_status * s)4559 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4560 {
4561     return bfloat16_do_compare(a, b, s, false);
4562 }
4563 
bfloat16_compare_quiet(bfloat16 a,bfloat16 b,float_status * s)4564 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4565 {
4566     return bfloat16_do_compare(a, b, s, true);
4567 }
4568 
4569 static FloatRelation QEMU_FLATTEN
float128_do_compare(float128 a,float128 b,float_status * s,bool is_quiet)4570 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4571 {
4572     FloatParts128 pa, pb;
4573 
4574     float128_unpack_canonical(&pa, a, s);
4575     float128_unpack_canonical(&pb, b, s);
4576     return parts_compare(&pa, &pb, s, is_quiet);
4577 }
4578 
float128_compare(float128 a,float128 b,float_status * s)4579 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4580 {
4581     return float128_do_compare(a, b, s, false);
4582 }
4583 
float128_compare_quiet(float128 a,float128 b,float_status * s)4584 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4585 {
4586     return float128_do_compare(a, b, s, true);
4587 }
4588 
4589 static FloatRelation QEMU_FLATTEN
floatx80_do_compare(floatx80 a,floatx80 b,float_status * s,bool is_quiet)4590 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4591 {
4592     FloatParts128 pa, pb;
4593 
4594     if (!floatx80_unpack_canonical(&pa, a, s) ||
4595         !floatx80_unpack_canonical(&pb, b, s)) {
4596         return float_relation_unordered;
4597     }
4598     return parts_compare(&pa, &pb, s, is_quiet);
4599 }
4600 
floatx80_compare(floatx80 a,floatx80 b,float_status * s)4601 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4602 {
4603     return floatx80_do_compare(a, b, s, false);
4604 }
4605 
floatx80_compare_quiet(floatx80 a,floatx80 b,float_status * s)4606 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4607 {
4608     return floatx80_do_compare(a, b, s, true);
4609 }
4610 
4611 /*
4612  * Scale by 2**N
4613  */
4614 
float16_scalbn(float16 a,int n,float_status * status)4615 float16 float16_scalbn(float16 a, int n, float_status *status)
4616 {
4617     FloatParts64 p;
4618 
4619     float16_unpack_canonical(&p, a, status);
4620     parts_scalbn(&p, n, status);
4621     return float16_round_pack_canonical(&p, status);
4622 }
4623 
float32_scalbn(float32 a,int n,float_status * status)4624 float32 float32_scalbn(float32 a, int n, float_status *status)
4625 {
4626     FloatParts64 p;
4627 
4628     float32_unpack_canonical(&p, a, status);
4629     parts_scalbn(&p, n, status);
4630     return float32_round_pack_canonical(&p, status);
4631 }
4632 
float64_scalbn(float64 a,int n,float_status * status)4633 float64 float64_scalbn(float64 a, int n, float_status *status)
4634 {
4635     FloatParts64 p;
4636 
4637     float64_unpack_canonical(&p, a, status);
4638     parts_scalbn(&p, n, status);
4639     return float64_round_pack_canonical(&p, status);
4640 }
4641 
bfloat16_scalbn(bfloat16 a,int n,float_status * status)4642 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4643 {
4644     FloatParts64 p;
4645 
4646     bfloat16_unpack_canonical(&p, a, status);
4647     parts_scalbn(&p, n, status);
4648     return bfloat16_round_pack_canonical(&p, status);
4649 }
4650 
float128_scalbn(float128 a,int n,float_status * status)4651 float128 float128_scalbn(float128 a, int n, float_status *status)
4652 {
4653     FloatParts128 p;
4654 
4655     float128_unpack_canonical(&p, a, status);
4656     parts_scalbn(&p, n, status);
4657     return float128_round_pack_canonical(&p, status);
4658 }
4659 
floatx80_scalbn(floatx80 a,int n,float_status * status)4660 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4661 {
4662     FloatParts128 p;
4663 
4664     if (!floatx80_unpack_canonical(&p, a, status)) {
4665         return floatx80_default_nan(status);
4666     }
4667     parts_scalbn(&p, n, status);
4668     return floatx80_round_pack_canonical(&p, status);
4669 }
4670 
4671 /*
4672  * Square Root
4673  */
4674 
float16_sqrt(float16 a,float_status * status)4675 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4676 {
4677     FloatParts64 p;
4678 
4679     float16_unpack_canonical(&p, a, status);
4680     parts_sqrt(&p, status, &float16_params);
4681     return float16_round_pack_canonical(&p, status);
4682 }
4683 
4684 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a,float_status * status)4685 soft_f32_sqrt(float32 a, float_status *status)
4686 {
4687     FloatParts64 p;
4688 
4689     float32_unpack_canonical(&p, a, status);
4690     parts_sqrt(&p, status, &float32_params);
4691     return float32_round_pack_canonical(&p, status);
4692 }
4693 
4694 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a,float_status * status)4695 soft_f64_sqrt(float64 a, float_status *status)
4696 {
4697     FloatParts64 p;
4698 
4699     float64_unpack_canonical(&p, a, status);
4700     parts_sqrt(&p, status, &float64_params);
4701     return float64_round_pack_canonical(&p, status);
4702 }
4703 
float32_sqrt(float32 xa,float_status * s)4704 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4705 {
4706     union_float32 ua, ur;
4707 
4708     ua.s = xa;
4709     if (unlikely(!can_use_fpu(s))) {
4710         goto soft;
4711     }
4712 
4713     float32_input_flush1(&ua.s, s);
4714     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4715         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4716                        fpclassify(ua.h) == FP_ZERO) ||
4717                      signbit(ua.h))) {
4718             goto soft;
4719         }
4720     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4721                         float32_is_neg(ua.s))) {
4722         goto soft;
4723     }
4724     ur.h = sqrtf(ua.h);
4725     return ur.s;
4726 
4727  soft:
4728     return soft_f32_sqrt(ua.s, s);
4729 }
4730 
float64_sqrt(float64 xa,float_status * s)4731 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4732 {
4733     union_float64 ua, ur;
4734 
4735     ua.s = xa;
4736     if (unlikely(!can_use_fpu(s))) {
4737         goto soft;
4738     }
4739 
4740     float64_input_flush1(&ua.s, s);
4741     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4742         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4743                        fpclassify(ua.h) == FP_ZERO) ||
4744                      signbit(ua.h))) {
4745             goto soft;
4746         }
4747     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4748                         float64_is_neg(ua.s))) {
4749         goto soft;
4750     }
4751     ur.h = sqrt(ua.h);
4752     return ur.s;
4753 
4754  soft:
4755     return soft_f64_sqrt(ua.s, s);
4756 }
4757 
float64r32_sqrt(float64 a,float_status * status)4758 float64 float64r32_sqrt(float64 a, float_status *status)
4759 {
4760     FloatParts64 p;
4761 
4762     float64_unpack_canonical(&p, a, status);
4763     parts_sqrt(&p, status, &float64_params);
4764     return float64r32_round_pack_canonical(&p, status);
4765 }
4766 
bfloat16_sqrt(bfloat16 a,float_status * status)4767 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4768 {
4769     FloatParts64 p;
4770 
4771     bfloat16_unpack_canonical(&p, a, status);
4772     parts_sqrt(&p, status, &bfloat16_params);
4773     return bfloat16_round_pack_canonical(&p, status);
4774 }
4775 
float128_sqrt(float128 a,float_status * status)4776 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4777 {
4778     FloatParts128 p;
4779 
4780     float128_unpack_canonical(&p, a, status);
4781     parts_sqrt(&p, status, &float128_params);
4782     return float128_round_pack_canonical(&p, status);
4783 }
4784 
floatx80_sqrt(floatx80 a,float_status * s)4785 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4786 {
4787     FloatParts128 p;
4788 
4789     if (!floatx80_unpack_canonical(&p, a, s)) {
4790         return floatx80_default_nan(s);
4791     }
4792     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4793     return floatx80_round_pack_canonical(&p, s);
4794 }
4795 
4796 /*
4797  * log2
4798  */
float32_log2(float32 a,float_status * status)4799 float32 float32_log2(float32 a, float_status *status)
4800 {
4801     FloatParts64 p;
4802 
4803     float32_unpack_canonical(&p, a, status);
4804     parts_log2(&p, status, &float32_params);
4805     return float32_round_pack_canonical(&p, status);
4806 }
4807 
float64_log2(float64 a,float_status * status)4808 float64 float64_log2(float64 a, float_status *status)
4809 {
4810     FloatParts64 p;
4811 
4812     float64_unpack_canonical(&p, a, status);
4813     parts_log2(&p, status, &float64_params);
4814     return float64_round_pack_canonical(&p, status);
4815 }
4816 
4817 /*----------------------------------------------------------------------------
4818 | The pattern for a default generated NaN.
4819 *----------------------------------------------------------------------------*/
4820 
float16_default_nan(float_status * status)4821 float16 float16_default_nan(float_status *status)
4822 {
4823     FloatParts64 p;
4824 
4825     parts_default_nan(&p, status);
4826     p.frac >>= float16_params.frac_shift;
4827     return float16_pack_raw(&p);
4828 }
4829 
float32_default_nan(float_status * status)4830 float32 float32_default_nan(float_status *status)
4831 {
4832     FloatParts64 p;
4833 
4834     parts_default_nan(&p, status);
4835     p.frac >>= float32_params.frac_shift;
4836     return float32_pack_raw(&p);
4837 }
4838 
float64_default_nan(float_status * status)4839 float64 float64_default_nan(float_status *status)
4840 {
4841     FloatParts64 p;
4842 
4843     parts_default_nan(&p, status);
4844     p.frac >>= float64_params.frac_shift;
4845     return float64_pack_raw(&p);
4846 }
4847 
float128_default_nan(float_status * status)4848 float128 float128_default_nan(float_status *status)
4849 {
4850     FloatParts128 p;
4851 
4852     parts_default_nan(&p, status);
4853     frac_shr(&p, float128_params.frac_shift);
4854     return float128_pack_raw(&p);
4855 }
4856 
bfloat16_default_nan(float_status * status)4857 bfloat16 bfloat16_default_nan(float_status *status)
4858 {
4859     FloatParts64 p;
4860 
4861     parts_default_nan(&p, status);
4862     p.frac >>= bfloat16_params.frac_shift;
4863     return bfloat16_pack_raw(&p);
4864 }
4865 
4866 /*----------------------------------------------------------------------------
4867 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4868 *----------------------------------------------------------------------------*/
4869 
float16_silence_nan(float16 a,float_status * status)4870 float16 float16_silence_nan(float16 a, float_status *status)
4871 {
4872     FloatParts64 p;
4873 
4874     float16_unpack_raw(&p, a);
4875     p.frac <<= float16_params.frac_shift;
4876     parts_silence_nan(&p, status);
4877     p.frac >>= float16_params.frac_shift;
4878     return float16_pack_raw(&p);
4879 }
4880 
float32_silence_nan(float32 a,float_status * status)4881 float32 float32_silence_nan(float32 a, float_status *status)
4882 {
4883     FloatParts64 p;
4884 
4885     float32_unpack_raw(&p, a);
4886     p.frac <<= float32_params.frac_shift;
4887     parts_silence_nan(&p, status);
4888     p.frac >>= float32_params.frac_shift;
4889     return float32_pack_raw(&p);
4890 }
4891 
float64_silence_nan(float64 a,float_status * status)4892 float64 float64_silence_nan(float64 a, float_status *status)
4893 {
4894     FloatParts64 p;
4895 
4896     float64_unpack_raw(&p, a);
4897     p.frac <<= float64_params.frac_shift;
4898     parts_silence_nan(&p, status);
4899     p.frac >>= float64_params.frac_shift;
4900     return float64_pack_raw(&p);
4901 }
4902 
bfloat16_silence_nan(bfloat16 a,float_status * status)4903 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4904 {
4905     FloatParts64 p;
4906 
4907     bfloat16_unpack_raw(&p, a);
4908     p.frac <<= bfloat16_params.frac_shift;
4909     parts_silence_nan(&p, status);
4910     p.frac >>= bfloat16_params.frac_shift;
4911     return bfloat16_pack_raw(&p);
4912 }
4913 
float128_silence_nan(float128 a,float_status * status)4914 float128 float128_silence_nan(float128 a, float_status *status)
4915 {
4916     FloatParts128 p;
4917 
4918     float128_unpack_raw(&p, a);
4919     frac_shl(&p, float128_params.frac_shift);
4920     parts_silence_nan(&p, status);
4921     frac_shr(&p, float128_params.frac_shift);
4922     return float128_pack_raw(&p);
4923 }
4924 
4925 /*----------------------------------------------------------------------------
4926 | If `a' is denormal and we are in flush-to-zero mode then set the
4927 | input-denormal exception and return zero. Otherwise just return the value.
4928 *----------------------------------------------------------------------------*/
4929 
parts_squash_denormal(FloatParts64 p,float_status * status)4930 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4931 {
4932     if (p.exp == 0 && p.frac != 0) {
4933         float_raise(float_flag_input_denormal_flushed, status);
4934         return true;
4935     }
4936 
4937     return false;
4938 }
4939 
float16_squash_input_denormal(float16 a,float_status * status)4940 float16 float16_squash_input_denormal(float16 a, float_status *status)
4941 {
4942     if (status->flush_inputs_to_zero) {
4943         FloatParts64 p;
4944 
4945         float16_unpack_raw(&p, a);
4946         if (parts_squash_denormal(p, status)) {
4947             return float16_set_sign(float16_zero, p.sign);
4948         }
4949     }
4950     return a;
4951 }
4952 
float32_squash_input_denormal(float32 a,float_status * status)4953 float32 float32_squash_input_denormal(float32 a, float_status *status)
4954 {
4955     if (status->flush_inputs_to_zero) {
4956         FloatParts64 p;
4957 
4958         float32_unpack_raw(&p, a);
4959         if (parts_squash_denormal(p, status)) {
4960             return float32_set_sign(float32_zero, p.sign);
4961         }
4962     }
4963     return a;
4964 }
4965 
float64_squash_input_denormal(float64 a,float_status * status)4966 float64 float64_squash_input_denormal(float64 a, float_status *status)
4967 {
4968     if (status->flush_inputs_to_zero) {
4969         FloatParts64 p;
4970 
4971         float64_unpack_raw(&p, a);
4972         if (parts_squash_denormal(p, status)) {
4973             return float64_set_sign(float64_zero, p.sign);
4974         }
4975     }
4976     return a;
4977 }
4978 
bfloat16_squash_input_denormal(bfloat16 a,float_status * status)4979 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4980 {
4981     if (status->flush_inputs_to_zero) {
4982         FloatParts64 p;
4983 
4984         bfloat16_unpack_raw(&p, a);
4985         if (parts_squash_denormal(p, status)) {
4986             return bfloat16_set_sign(bfloat16_zero, p.sign);
4987         }
4988     }
4989     return a;
4990 }
4991 
4992 /*----------------------------------------------------------------------------
4993 | Normalizes the subnormal extended double-precision floating-point value
4994 | represented by the denormalized significand `aSig'.  The normalized exponent
4995 | and significand are stored at the locations pointed to by `zExpPtr' and
4996 | `zSigPtr', respectively.
4997 *----------------------------------------------------------------------------*/
4998 
normalizeFloatx80Subnormal(uint64_t aSig,int32_t * zExpPtr,uint64_t * zSigPtr)4999 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
5000                                 uint64_t *zSigPtr)
5001 {
5002     int8_t shiftCount;
5003 
5004     shiftCount = clz64(aSig);
5005     *zSigPtr = aSig<<shiftCount;
5006     *zExpPtr = 1 - shiftCount;
5007 }
5008 
5009 /*----------------------------------------------------------------------------
5010 | Takes two extended double-precision floating-point values `a' and `b', one
5011 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
5012 | `b' is a signaling NaN, the invalid exception is raised.
5013 *----------------------------------------------------------------------------*/
5014 
propagateFloatx80NaN(floatx80 a,floatx80 b,float_status * status)5015 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
5016 {
5017     FloatParts128 pa, pb, *pr;
5018 
5019     if (!floatx80_unpack_canonical(&pa, a, status) ||
5020         !floatx80_unpack_canonical(&pb, b, status)) {
5021         return floatx80_default_nan(status);
5022     }
5023 
5024     pr = parts_pick_nan(&pa, &pb, status);
5025     return floatx80_round_pack_canonical(pr, status);
5026 }
5027 
5028 /*----------------------------------------------------------------------------
5029 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5030 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
5031 | and returns the proper extended double-precision floating-point value
5032 | corresponding to the abstract input.  Ordinarily, the abstract value is
5033 | rounded and packed into the extended double-precision format, with the
5034 | inexact exception raised if the abstract input cannot be represented
5035 | exactly.  However, if the abstract value is too large, the overflow and
5036 | inexact exceptions are raised and an infinity or maximal finite value is
5037 | returned.  If the abstract value is too small, the input value is rounded to
5038 | a subnormal number, and the underflow and inexact exceptions are raised if
5039 | the abstract input cannot be represented exactly as a subnormal extended
5040 | double-precision floating-point number.
5041 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5042 | the result is rounded to the same number of bits as single or double
5043 | precision, respectively.  Otherwise, the result is rounded to the full
5044 | precision of the extended double-precision format.
5045 |     The input significand must be normalized or smaller.  If the input
5046 | significand is not normalized, `zExp' must be 0; in that case, the result
5047 | returned is a subnormal number, and it must not require rounding.  The
5048 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5049 | Floating-Point Arithmetic.
5050 *----------------------------------------------------------------------------*/
5051 
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5052 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5053                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5054                               float_status *status)
5055 {
5056     FloatRoundMode roundingMode;
5057     bool roundNearestEven, increment, isTiny;
5058     int64_t roundIncrement, roundMask, roundBits;
5059 
5060     roundingMode = status->float_rounding_mode;
5061     roundNearestEven = ( roundingMode == float_round_nearest_even );
5062     switch (roundingPrecision) {
5063     case floatx80_precision_x:
5064         goto precision80;
5065     case floatx80_precision_d:
5066         roundIncrement = UINT64_C(0x0000000000000400);
5067         roundMask = UINT64_C(0x00000000000007FF);
5068         break;
5069     case floatx80_precision_s:
5070         roundIncrement = UINT64_C(0x0000008000000000);
5071         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5072         break;
5073     default:
5074         g_assert_not_reached();
5075     }
5076     zSig0 |= ( zSig1 != 0 );
5077     switch (roundingMode) {
5078     case float_round_nearest_even:
5079     case float_round_ties_away:
5080         break;
5081     case float_round_to_zero:
5082         roundIncrement = 0;
5083         break;
5084     case float_round_up:
5085         roundIncrement = zSign ? 0 : roundMask;
5086         break;
5087     case float_round_down:
5088         roundIncrement = zSign ? roundMask : 0;
5089         break;
5090     default:
5091         abort();
5092     }
5093     roundBits = zSig0 & roundMask;
5094     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5095         if (    ( 0x7FFE < zExp )
5096              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5097            ) {
5098             goto overflow;
5099         }
5100         if ( zExp <= 0 ) {
5101             if (status->flush_to_zero) {
5102                 float_raise(float_flag_output_denormal_flushed, status);
5103                 return packFloatx80(zSign, 0, 0);
5104             }
5105             isTiny = status->tininess_before_rounding
5106                   || (zExp < 0 )
5107                   || (zSig0 <= zSig0 + roundIncrement);
5108             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5109             zExp = 0;
5110             roundBits = zSig0 & roundMask;
5111             if (isTiny && roundBits) {
5112                 float_raise(float_flag_underflow, status);
5113             }
5114             if (roundBits) {
5115                 float_raise(float_flag_inexact, status);
5116             }
5117             zSig0 += roundIncrement;
5118             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5119             roundIncrement = roundMask + 1;
5120             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5121                 roundMask |= roundIncrement;
5122             }
5123             zSig0 &= ~ roundMask;
5124             return packFloatx80( zSign, zExp, zSig0 );
5125         }
5126     }
5127     if (roundBits) {
5128         float_raise(float_flag_inexact, status);
5129     }
5130     zSig0 += roundIncrement;
5131     if ( zSig0 < roundIncrement ) {
5132         ++zExp;
5133         zSig0 = UINT64_C(0x8000000000000000);
5134     }
5135     roundIncrement = roundMask + 1;
5136     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5137         roundMask |= roundIncrement;
5138     }
5139     zSig0 &= ~ roundMask;
5140     if ( zSig0 == 0 ) zExp = 0;
5141     return packFloatx80( zSign, zExp, zSig0 );
5142  precision80:
5143     switch (roundingMode) {
5144     case float_round_nearest_even:
5145     case float_round_ties_away:
5146         increment = ((int64_t)zSig1 < 0);
5147         break;
5148     case float_round_to_zero:
5149         increment = 0;
5150         break;
5151     case float_round_up:
5152         increment = !zSign && zSig1;
5153         break;
5154     case float_round_down:
5155         increment = zSign && zSig1;
5156         break;
5157     default:
5158         abort();
5159     }
5160     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5161         if (    ( 0x7FFE < zExp )
5162              || (    ( zExp == 0x7FFE )
5163                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5164                   && increment
5165                 )
5166            ) {
5167             roundMask = 0;
5168  overflow:
5169             float_raise(float_flag_overflow | float_flag_inexact, status);
5170             if (    ( roundingMode == float_round_to_zero )
5171                  || ( zSign && ( roundingMode == float_round_up ) )
5172                  || ( ! zSign && ( roundingMode == float_round_down ) )
5173                ) {
5174                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5175             }
5176             return floatx80_default_inf(zSign, status);
5177         }
5178         if ( zExp <= 0 ) {
5179             isTiny = status->tininess_before_rounding
5180                   || (zExp < 0)
5181                   || !increment
5182                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5183             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5184             zExp = 0;
5185             if (isTiny && zSig1) {
5186                 float_raise(float_flag_underflow, status);
5187             }
5188             if (zSig1) {
5189                 float_raise(float_flag_inexact, status);
5190             }
5191             switch (roundingMode) {
5192             case float_round_nearest_even:
5193             case float_round_ties_away:
5194                 increment = ((int64_t)zSig1 < 0);
5195                 break;
5196             case float_round_to_zero:
5197                 increment = 0;
5198                 break;
5199             case float_round_up:
5200                 increment = !zSign && zSig1;
5201                 break;
5202             case float_round_down:
5203                 increment = zSign && zSig1;
5204                 break;
5205             default:
5206                 abort();
5207             }
5208             if ( increment ) {
5209                 ++zSig0;
5210                 if (!(zSig1 << 1) && roundNearestEven) {
5211                     zSig0 &= ~1;
5212                 }
5213                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5214             }
5215             return packFloatx80( zSign, zExp, zSig0 );
5216         }
5217     }
5218     if (zSig1) {
5219         float_raise(float_flag_inexact, status);
5220     }
5221     if ( increment ) {
5222         ++zSig0;
5223         if ( zSig0 == 0 ) {
5224             ++zExp;
5225             zSig0 = UINT64_C(0x8000000000000000);
5226         }
5227         else {
5228             if (!(zSig1 << 1) && roundNearestEven) {
5229                 zSig0 &= ~1;
5230             }
5231         }
5232     }
5233     else {
5234         if ( zSig0 == 0 ) zExp = 0;
5235     }
5236     return packFloatx80( zSign, zExp, zSig0 );
5237 
5238 }
5239 
5240 /*----------------------------------------------------------------------------
5241 | Takes an abstract floating-point value having sign `zSign', exponent
5242 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5243 | and returns the proper extended double-precision floating-point value
5244 | corresponding to the abstract input.  This routine is just like
5245 | `roundAndPackFloatx80' except that the input significand does not have to be
5246 | normalized.
5247 *----------------------------------------------------------------------------*/
5248 
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5249 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5250                                        bool zSign, int32_t zExp,
5251                                        uint64_t zSig0, uint64_t zSig1,
5252                                        float_status *status)
5253 {
5254     int8_t shiftCount;
5255 
5256     if ( zSig0 == 0 ) {
5257         zSig0 = zSig1;
5258         zSig1 = 0;
5259         zExp -= 64;
5260     }
5261     shiftCount = clz64(zSig0);
5262     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5263     zExp -= shiftCount;
5264     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5265                                 zSig0, zSig1, status);
5266 
5267 }
5268 
5269 /*----------------------------------------------------------------------------
5270 | Returns the binary exponential of the single-precision floating-point value
5271 | `a'. The operation is performed according to the IEC/IEEE Standard for
5272 | Binary Floating-Point Arithmetic.
5273 |
5274 | Uses the following identities:
5275 |
5276 | 1. -------------------------------------------------------------------------
5277 |      x    x*ln(2)
5278 |     2  = e
5279 |
5280 | 2. -------------------------------------------------------------------------
5281 |                      2     3     4     5           n
5282 |      x        x     x     x     x     x           x
5283 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5284 |               1!    2!    3!    4!    5!          n!
5285 *----------------------------------------------------------------------------*/
5286 
5287 static const float64 float32_exp2_coefficients[15] =
5288 {
5289     const_float64( 0x3ff0000000000000ll ), /*  1 */
5290     const_float64( 0x3fe0000000000000ll ), /*  2 */
5291     const_float64( 0x3fc5555555555555ll ), /*  3 */
5292     const_float64( 0x3fa5555555555555ll ), /*  4 */
5293     const_float64( 0x3f81111111111111ll ), /*  5 */
5294     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5295     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5296     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5297     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5298     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5299     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5300     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5301     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5302     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5303     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5304 };
5305 
float32_exp2(float32 a,float_status * status)5306 float32 float32_exp2(float32 a, float_status *status)
5307 {
5308     FloatParts64 xp, xnp, tp, rp;
5309     int i;
5310 
5311     float32_unpack_canonical(&xp, a, status);
5312     if (unlikely(xp.cls != float_class_normal)) {
5313         switch (xp.cls) {
5314         case float_class_denormal:
5315             break;
5316         case float_class_snan:
5317         case float_class_qnan:
5318             parts_return_nan(&xp, status);
5319             return float32_round_pack_canonical(&xp, status);
5320         case float_class_inf:
5321             return xp.sign ? float32_zero : a;
5322         case float_class_zero:
5323             return float32_one;
5324         default:
5325             g_assert_not_reached();
5326         }
5327     }
5328 
5329     float_raise(float_flag_inexact, status);
5330 
5331     float64_unpack_canonical(&tp, float64_ln2, status);
5332     xp = *parts_mul(&xp, &tp, status);
5333     xnp = xp;
5334 
5335     float64_unpack_canonical(&rp, float64_one, status);
5336     for (i = 0 ; i < 15 ; i++) {
5337 
5338         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5339         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5340         xnp = *parts_mul(&xnp, &xp, status);
5341     }
5342 
5343     return float32_round_pack_canonical(&rp, status);
5344 }
5345 
5346 /*----------------------------------------------------------------------------
5347 | Rounds the extended double-precision floating-point value `a'
5348 | to the precision provided by floatx80_rounding_precision and returns the
5349 | result as an extended double-precision floating-point value.
5350 | The operation is performed according to the IEC/IEEE Standard for Binary
5351 | Floating-Point Arithmetic.
5352 *----------------------------------------------------------------------------*/
5353 
floatx80_round(floatx80 a,float_status * status)5354 floatx80 floatx80_round(floatx80 a, float_status *status)
5355 {
5356     FloatParts128 p;
5357 
5358     if (!floatx80_unpack_canonical(&p, a, status)) {
5359         return floatx80_default_nan(status);
5360     }
5361     return floatx80_round_pack_canonical(&p, status);
5362 }
5363 
softfloat_init(void)5364 static void __attribute__((constructor)) softfloat_init(void)
5365 {
5366     union_float64 ua, ub, uc, ur;
5367 
5368     if (QEMU_NO_HARDFLOAT) {
5369         return;
5370     }
5371     /*
5372      * Test that the host's FMA is not obviously broken. For example,
5373      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5374      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5375      */
5376     ua.s = 0x0020000000000001ULL;
5377     ub.s = 0x3ca0000000000000ULL;
5378     uc.s = 0x0020000000000000ULL;
5379     ur.h = fma(ua.h, ub.h, uc.h);
5380     if (ur.s != 0x0020000000000001ULL) {
5381         force_soft_fma = true;
5382     }
5383 }
5384