xref: /openbmc/qemu/fpu/softfloat.c (revision d7754940d78a7d5bfb13531afa9a67f8c57e987e)
1  /*
2   * QEMU float support
3   *
4   * The code in this source file is derived from release 2a of the SoftFloat
5   * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6   * some later contributions) are provided under that license, as detailed below.
7   * It has subsequently been modified by contributors to the QEMU Project,
8   * so some portions are provided under:
9   *  the SoftFloat-2a license
10   *  the BSD license
11   *  GPL-v2-or-later
12   *
13   * Any future contributions to this file after December 1st 2014 will be
14   * taken to be licensed under the Softfloat-2a license unless specifically
15   * indicated otherwise.
16   */
17  
18  /*
19  ===============================================================================
20  This C source file is part of the SoftFloat IEC/IEEE Floating-point
21  Arithmetic Package, Release 2a.
22  
23  Written by John R. Hauser.  This work was made possible in part by the
24  International Computer Science Institute, located at Suite 600, 1947 Center
25  Street, Berkeley, California 94704.  Funding was partially provided by the
26  National Science Foundation under grant MIP-9311980.  The original version
27  of this code was written as part of a project to build a fixed-point vector
28  processor in collaboration with the University of California at Berkeley,
29  overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30  is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31  arithmetic/SoftFloat.html'.
32  
33  THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34  has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35  TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36  PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37  AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38  
39  Derivative works are acceptable, even for commercial purposes, so long as
40  (1) they include prominent notice that the work is derivative, and (2) they
41  include prominent notice akin to these four paragraphs for those parts of
42  this code that are retained.
43  
44  ===============================================================================
45  */
46  
47  /* BSD licensing:
48   * Copyright (c) 2006, Fabrice Bellard
49   * All rights reserved.
50   *
51   * Redistribution and use in source and binary forms, with or without
52   * modification, are permitted provided that the following conditions are met:
53   *
54   * 1. Redistributions of source code must retain the above copyright notice,
55   * this list of conditions and the following disclaimer.
56   *
57   * 2. Redistributions in binary form must reproduce the above copyright notice,
58   * this list of conditions and the following disclaimer in the documentation
59   * and/or other materials provided with the distribution.
60   *
61   * 3. Neither the name of the copyright holder nor the names of its contributors
62   * may be used to endorse or promote products derived from this software without
63   * specific prior written permission.
64   *
65   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75   * THE POSSIBILITY OF SUCH DAMAGE.
76   */
77  
78  /* Portions of this work are licensed under the terms of the GNU GPL,
79   * version 2 or later. See the COPYING file in the top-level directory.
80   */
81  
82  /* softfloat (and in particular the code in softfloat-specialize.h) is
83   * target-dependent and needs the TARGET_* macros.
84   */
85  #include "qemu/osdep.h"
86  #include <math.h>
87  #include "qemu/bitops.h"
88  #include "fpu/softfloat.h"
89  
90  /* We only need stdlib for abort() */
91  
92  /*----------------------------------------------------------------------------
93  | Primitive arithmetic functions, including multi-word arithmetic, and
94  | division and square root approximations.  (Can be specialized to target if
95  | desired.)
96  *----------------------------------------------------------------------------*/
97  #include "fpu/softfloat-macros.h"
98  
99  /*
100   * Hardfloat
101   *
102   * Fast emulation of guest FP instructions is challenging for two reasons.
103   * First, FP instruction semantics are similar but not identical, particularly
104   * when handling NaNs. Second, emulating at reasonable speed the guest FP
105   * exception flags is not trivial: reading the host's flags register with a
106   * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107   * and trapping on every FP exception is not fast nor pleasant to work with.
108   *
109   * We address these challenges by leveraging the host FPU for a subset of the
110   * operations. To do this we expand on the idea presented in this paper:
111   *
112   * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113   * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114   *
115   * The idea is thus to leverage the host FPU to (1) compute FP operations
116   * and (2) identify whether FP exceptions occurred while avoiding
117   * expensive exception flag register accesses.
118   *
119   * An important optimization shown in the paper is that given that exception
120   * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121   * This is particularly useful for the inexact flag, which is very frequently
122   * raised in floating-point workloads.
123   *
124   * We optimize the code further by deferring to soft-fp whenever FP exception
125   * detection might get hairy. Two examples: (1) when at least one operand is
126   * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127   * and the result is < the minimum normal.
128   */
129  #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130      static inline void name(soft_t *a, float_status *s)                 \
131      {                                                                   \
132          if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133              *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                       soft_t ## _is_neg(*a));            \
135              float_raise(float_flag_input_denormal, s);                  \
136          }                                                               \
137      }
138  
GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck,float32)139  GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140  GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141  #undef GEN_INPUT_FLUSH__NOCHECK
142  
143  #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144      static inline void name(soft_t *a, float_status *s) \
145      {                                                   \
146          if (likely(!s->flush_inputs_to_zero)) {         \
147              return;                                     \
148          }                                               \
149          soft_t ## _input_flush__nocheck(a, s);          \
150      }
151  
152  GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153  GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154  #undef GEN_INPUT_FLUSH1
155  
156  #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157      static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158      {                                                                   \
159          if (likely(!s->flush_inputs_to_zero)) {                         \
160              return;                                                     \
161          }                                                               \
162          soft_t ## _input_flush__nocheck(a, s);                          \
163          soft_t ## _input_flush__nocheck(b, s);                          \
164      }
165  
166  GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167  GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168  #undef GEN_INPUT_FLUSH2
169  
170  #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171      static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172      {                                                                   \
173          if (likely(!s->flush_inputs_to_zero)) {                         \
174              return;                                                     \
175          }                                                               \
176          soft_t ## _input_flush__nocheck(a, s);                          \
177          soft_t ## _input_flush__nocheck(b, s);                          \
178          soft_t ## _input_flush__nocheck(c, s);                          \
179      }
180  
181  GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182  GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183  #undef GEN_INPUT_FLUSH3
184  
185  /*
186   * Choose whether to use fpclassify or float32/64_* primitives in the generated
187   * hardfloat functions. Each combination of number of inputs and float size
188   * gets its own value.
189   */
190  #if defined(__x86_64__)
191  # define QEMU_HARDFLOAT_1F32_USE_FP 0
192  # define QEMU_HARDFLOAT_1F64_USE_FP 1
193  # define QEMU_HARDFLOAT_2F32_USE_FP 0
194  # define QEMU_HARDFLOAT_2F64_USE_FP 1
195  # define QEMU_HARDFLOAT_3F32_USE_FP 0
196  # define QEMU_HARDFLOAT_3F64_USE_FP 1
197  #else
198  # define QEMU_HARDFLOAT_1F32_USE_FP 0
199  # define QEMU_HARDFLOAT_1F64_USE_FP 0
200  # define QEMU_HARDFLOAT_2F32_USE_FP 0
201  # define QEMU_HARDFLOAT_2F64_USE_FP 0
202  # define QEMU_HARDFLOAT_3F32_USE_FP 0
203  # define QEMU_HARDFLOAT_3F64_USE_FP 0
204  #endif
205  
206  /*
207   * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208   * float{32,64}_is_infinity when !USE_FP.
209   * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210   * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211   */
212  #if defined(__x86_64__) || defined(__aarch64__)
213  # define QEMU_HARDFLOAT_USE_ISINF   1
214  #else
215  # define QEMU_HARDFLOAT_USE_ISINF   0
216  #endif
217  
218  /*
219   * Some targets clear the FP flags before most FP operations. This prevents
220   * the use of hardfloat, since hardfloat relies on the inexact flag being
221   * already set.
222   */
223  #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224  # if defined(__FAST_MATH__)
225  #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226      IEEE implementation
227  # endif
228  # define QEMU_NO_HARDFLOAT 1
229  # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230  #else
231  # define QEMU_NO_HARDFLOAT 0
232  # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233  #endif
234  
235  static inline bool can_use_fpu(const float_status *s)
236  {
237      if (QEMU_NO_HARDFLOAT) {
238          return false;
239      }
240      return likely(s->float_exception_flags & float_flag_inexact &&
241                    s->float_rounding_mode == float_round_nearest_even);
242  }
243  
244  /*
245   * Hardfloat generation functions. Each operation can have two flavors:
246   * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247   * most condition checks, or native ones (e.g. fpclassify).
248   *
249   * The flavor is chosen by the callers. Instead of using macros, we rely on the
250   * compiler to propagate constants and inline everything into the callers.
251   *
252   * We only generate functions for operations with two inputs, since only
253   * these are common enough to justify consolidating them into common code.
254   */
255  
256  typedef union {
257      float32 s;
258      float h;
259  } union_float32;
260  
261  typedef union {
262      float64 s;
263      double h;
264  } union_float64;
265  
266  typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267  typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268  
269  typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270  typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271  typedef float   (*hard_f32_op2_fn)(float a, float b);
272  typedef double  (*hard_f64_op2_fn)(double a, double b);
273  
274  /* 2-input is-zero-or-normal */
f32_is_zon2(union_float32 a,union_float32 b)275  static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276  {
277      if (QEMU_HARDFLOAT_2F32_USE_FP) {
278          /*
279           * Not using a temp variable for consecutive fpclassify calls ends up
280           * generating faster code.
281           */
282          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284      }
285      return float32_is_zero_or_normal(a.s) &&
286             float32_is_zero_or_normal(b.s);
287  }
288  
f64_is_zon2(union_float64 a,union_float64 b)289  static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290  {
291      if (QEMU_HARDFLOAT_2F64_USE_FP) {
292          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294      }
295      return float64_is_zero_or_normal(a.s) &&
296             float64_is_zero_or_normal(b.s);
297  }
298  
299  /* 3-input is-zero-or-normal */
300  static inline
f32_is_zon3(union_float32 a,union_float32 b,union_float32 c)301  bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302  {
303      if (QEMU_HARDFLOAT_3F32_USE_FP) {
304          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307      }
308      return float32_is_zero_or_normal(a.s) &&
309             float32_is_zero_or_normal(b.s) &&
310             float32_is_zero_or_normal(c.s);
311  }
312  
313  static inline
f64_is_zon3(union_float64 a,union_float64 b,union_float64 c)314  bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315  {
316      if (QEMU_HARDFLOAT_3F64_USE_FP) {
317          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320      }
321      return float64_is_zero_or_normal(a.s) &&
322             float64_is_zero_or_normal(b.s) &&
323             float64_is_zero_or_normal(c.s);
324  }
325  
f32_is_inf(union_float32 a)326  static inline bool f32_is_inf(union_float32 a)
327  {
328      if (QEMU_HARDFLOAT_USE_ISINF) {
329          return isinf(a.h);
330      }
331      return float32_is_infinity(a.s);
332  }
333  
f64_is_inf(union_float64 a)334  static inline bool f64_is_inf(union_float64 a)
335  {
336      if (QEMU_HARDFLOAT_USE_ISINF) {
337          return isinf(a.h);
338      }
339      return float64_is_infinity(a.s);
340  }
341  
342  static inline float32
float32_gen2(float32 xa,float32 xb,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft,f32_check_fn pre,f32_check_fn post)343  float32_gen2(float32 xa, float32 xb, float_status *s,
344               hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345               f32_check_fn pre, f32_check_fn post)
346  {
347      union_float32 ua, ub, ur;
348  
349      ua.s = xa;
350      ub.s = xb;
351  
352      if (unlikely(!can_use_fpu(s))) {
353          goto soft;
354      }
355  
356      float32_input_flush2(&ua.s, &ub.s, s);
357      if (unlikely(!pre(ua, ub))) {
358          goto soft;
359      }
360  
361      ur.h = hard(ua.h, ub.h);
362      if (unlikely(f32_is_inf(ur))) {
363          float_raise(float_flag_overflow, s);
364      } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365          goto soft;
366      }
367      return ur.s;
368  
369   soft:
370      return soft(ua.s, ub.s, s);
371  }
372  
373  static inline float64
float64_gen2(float64 xa,float64 xb,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft,f64_check_fn pre,f64_check_fn post)374  float64_gen2(float64 xa, float64 xb, float_status *s,
375               hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376               f64_check_fn pre, f64_check_fn post)
377  {
378      union_float64 ua, ub, ur;
379  
380      ua.s = xa;
381      ub.s = xb;
382  
383      if (unlikely(!can_use_fpu(s))) {
384          goto soft;
385      }
386  
387      float64_input_flush2(&ua.s, &ub.s, s);
388      if (unlikely(!pre(ua, ub))) {
389          goto soft;
390      }
391  
392      ur.h = hard(ua.h, ub.h);
393      if (unlikely(f64_is_inf(ur))) {
394          float_raise(float_flag_overflow, s);
395      } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396          goto soft;
397      }
398      return ur.s;
399  
400   soft:
401      return soft(ua.s, ub.s, s);
402  }
403  
404  /*
405   * Classify a floating point number. Everything above float_class_qnan
406   * is a NaN so cls >= float_class_qnan is any NaN.
407   */
408  
409  typedef enum __attribute__ ((__packed__)) {
410      float_class_unclassified,
411      float_class_zero,
412      float_class_normal,
413      float_class_inf,
414      float_class_qnan,  /* all NaNs from here */
415      float_class_snan,
416  } FloatClass;
417  
418  #define float_cmask(bit)  (1u << (bit))
419  
420  enum {
421      float_cmask_zero    = float_cmask(float_class_zero),
422      float_cmask_normal  = float_cmask(float_class_normal),
423      float_cmask_inf     = float_cmask(float_class_inf),
424      float_cmask_qnan    = float_cmask(float_class_qnan),
425      float_cmask_snan    = float_cmask(float_class_snan),
426  
427      float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428      float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429  };
430  
431  /* Flags for parts_minmax. */
432  enum {
433      /* Set for minimum; clear for maximum. */
434      minmax_ismin = 1,
435      /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436      minmax_isnum = 2,
437      /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438      minmax_ismag = 4,
439      /*
440       * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441       * operations.
442       */
443      minmax_isnumber = 8,
444  };
445  
446  /* Simple helpers for checking if, or what kind of, NaN we have */
is_nan(FloatClass c)447  static inline __attribute__((unused)) bool is_nan(FloatClass c)
448  {
449      return unlikely(c >= float_class_qnan);
450  }
451  
is_snan(FloatClass c)452  static inline __attribute__((unused)) bool is_snan(FloatClass c)
453  {
454      return c == float_class_snan;
455  }
456  
is_qnan(FloatClass c)457  static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458  {
459      return c == float_class_qnan;
460  }
461  
462  /*
463   * Structure holding all of the decomposed parts of a float.
464   * The exponent is unbiased and the fraction is normalized.
465   *
466   * The fraction words are stored in big-endian word ordering,
467   * so that truncation from a larger format to a smaller format
468   * can be done simply by ignoring subsequent elements.
469   */
470  
471  typedef struct {
472      FloatClass cls;
473      bool sign;
474      int32_t exp;
475      union {
476          /* Routines that know the structure may reference the singular name. */
477          uint64_t frac;
478          /*
479           * Routines expanded with multiple structures reference "hi" and "lo"
480           * depending on the operation.  In FloatParts64, "hi" and "lo" are
481           * both the same word and aliased here.
482           */
483          uint64_t frac_hi;
484          uint64_t frac_lo;
485      };
486  } FloatParts64;
487  
488  typedef struct {
489      FloatClass cls;
490      bool sign;
491      int32_t exp;
492      uint64_t frac_hi;
493      uint64_t frac_lo;
494  } FloatParts128;
495  
496  typedef struct {
497      FloatClass cls;
498      bool sign;
499      int32_t exp;
500      uint64_t frac_hi;
501      uint64_t frac_hm;  /* high-middle */
502      uint64_t frac_lm;  /* low-middle */
503      uint64_t frac_lo;
504  } FloatParts256;
505  
506  /* These apply to the most significant word of each FloatPartsN. */
507  #define DECOMPOSED_BINARY_POINT    63
508  #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509  
510  /* Structure holding all of the relevant parameters for a format.
511   *   exp_size: the size of the exponent field
512   *   exp_bias: the offset applied to the exponent field
513   *   exp_max: the maximum normalised exponent
514   *   frac_size: the size of the fraction field
515   *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516   * The following are computed based the size of fraction
517   *   round_mask: bits below lsb which must be rounded
518   * The following optional modifiers are available:
519   *   arm_althp: handle ARM Alternative Half Precision
520   *   m68k_denormal: explicit integer bit for extended precision may be 1
521   */
522  typedef struct {
523      int exp_size;
524      int exp_bias;
525      int exp_re_bias;
526      int exp_max;
527      int frac_size;
528      int frac_shift;
529      bool arm_althp;
530      bool m68k_denormal;
531      uint64_t round_mask;
532  } FloatFmt;
533  
534  /* Expand fields based on the size of exponent and fraction */
535  #define FLOAT_PARAMS_(E)                                \
536      .exp_size       = E,                                \
537      .exp_bias       = ((1 << E) - 1) >> 1,              \
538      .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
539      .exp_max        = (1 << E) - 1
540  
541  #define FLOAT_PARAMS(E, F)                              \
542      FLOAT_PARAMS_(E),                                   \
543      .frac_size      = F,                                \
544      .frac_shift     = (-F - 1) & 63,                    \
545      .round_mask     = (1ull << ((-F - 1) & 63)) - 1
546  
547  static const FloatFmt float16_params = {
548      FLOAT_PARAMS(5, 10)
549  };
550  
551  static const FloatFmt float16_params_ahp = {
552      FLOAT_PARAMS(5, 10),
553      .arm_althp = true
554  };
555  
556  static const FloatFmt bfloat16_params = {
557      FLOAT_PARAMS(8, 7)
558  };
559  
560  static const FloatFmt float32_params = {
561      FLOAT_PARAMS(8, 23)
562  };
563  
564  static const FloatFmt float64_params = {
565      FLOAT_PARAMS(11, 52)
566  };
567  
568  static const FloatFmt float128_params = {
569      FLOAT_PARAMS(15, 112)
570  };
571  
572  #define FLOATX80_PARAMS(R)              \
573      FLOAT_PARAMS_(15),                  \
574      .frac_size = R == 64 ? 63 : R,      \
575      .frac_shift = 0,                    \
576      .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577  
578  static const FloatFmt floatx80_params[3] = {
579      [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580      [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581      [floatx80_precision_x] = {
582          FLOATX80_PARAMS(64),
583  #ifdef TARGET_M68K
584          .m68k_denormal = true,
585  #endif
586      },
587  };
588  
589  /* Unpack a float to parts, but do not canonicalize.  */
unpack_raw64(FloatParts64 * r,const FloatFmt * fmt,uint64_t raw)590  static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
591  {
592      const int f_size = fmt->frac_size;
593      const int e_size = fmt->exp_size;
594  
595      *r = (FloatParts64) {
596          .cls = float_class_unclassified,
597          .sign = extract64(raw, f_size + e_size, 1),
598          .exp = extract64(raw, f_size, e_size),
599          .frac = extract64(raw, 0, f_size)
600      };
601  }
602  
float16_unpack_raw(FloatParts64 * p,float16 f)603  static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
604  {
605      unpack_raw64(p, &float16_params, f);
606  }
607  
bfloat16_unpack_raw(FloatParts64 * p,bfloat16 f)608  static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
609  {
610      unpack_raw64(p, &bfloat16_params, f);
611  }
612  
float32_unpack_raw(FloatParts64 * p,float32 f)613  static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
614  {
615      unpack_raw64(p, &float32_params, f);
616  }
617  
float64_unpack_raw(FloatParts64 * p,float64 f)618  static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
619  {
620      unpack_raw64(p, &float64_params, f);
621  }
622  
floatx80_unpack_raw(FloatParts128 * p,floatx80 f)623  static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
624  {
625      *p = (FloatParts128) {
626          .cls = float_class_unclassified,
627          .sign = extract32(f.high, 15, 1),
628          .exp = extract32(f.high, 0, 15),
629          .frac_hi = f.low
630      };
631  }
632  
float128_unpack_raw(FloatParts128 * p,float128 f)633  static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
634  {
635      const int f_size = float128_params.frac_size - 64;
636      const int e_size = float128_params.exp_size;
637  
638      *p = (FloatParts128) {
639          .cls = float_class_unclassified,
640          .sign = extract64(f.high, f_size + e_size, 1),
641          .exp = extract64(f.high, f_size, e_size),
642          .frac_hi = extract64(f.high, 0, f_size),
643          .frac_lo = f.low,
644      };
645  }
646  
647  /* Pack a float from parts, but do not canonicalize.  */
pack_raw64(const FloatParts64 * p,const FloatFmt * fmt)648  static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
649  {
650      const int f_size = fmt->frac_size;
651      const int e_size = fmt->exp_size;
652      uint64_t ret;
653  
654      ret = (uint64_t)p->sign << (f_size + e_size);
655      ret = deposit64(ret, f_size, e_size, p->exp);
656      ret = deposit64(ret, 0, f_size, p->frac);
657      return ret;
658  }
659  
float16_pack_raw(const FloatParts64 * p)660  static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
661  {
662      return make_float16(pack_raw64(p, &float16_params));
663  }
664  
bfloat16_pack_raw(const FloatParts64 * p)665  static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
666  {
667      return pack_raw64(p, &bfloat16_params);
668  }
669  
float32_pack_raw(const FloatParts64 * p)670  static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
671  {
672      return make_float32(pack_raw64(p, &float32_params));
673  }
674  
float64_pack_raw(const FloatParts64 * p)675  static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
676  {
677      return make_float64(pack_raw64(p, &float64_params));
678  }
679  
float128_pack_raw(const FloatParts128 * p)680  static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
681  {
682      const int f_size = float128_params.frac_size - 64;
683      const int e_size = float128_params.exp_size;
684      uint64_t hi;
685  
686      hi = (uint64_t)p->sign << (f_size + e_size);
687      hi = deposit64(hi, f_size, e_size, p->exp);
688      hi = deposit64(hi, 0, f_size, p->frac_hi);
689      return make_float128(hi, p->frac_lo);
690  }
691  
692  /*----------------------------------------------------------------------------
693  | Functions and definitions to determine:  (1) whether tininess for underflow
694  | is detected before or after rounding by default, (2) what (if anything)
695  | happens when exceptions are raised, (3) how signaling NaNs are distinguished
696  | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697  | are propagated from function inputs to output.  These details are target-
698  | specific.
699  *----------------------------------------------------------------------------*/
700  #include "softfloat-specialize.c.inc"
701  
702  #define PARTS_GENERIC_64_128(NAME, P) \
703      _Generic((P), FloatParts64 *: parts64_##NAME, \
704                    FloatParts128 *: parts128_##NAME)
705  
706  #define PARTS_GENERIC_64_128_256(NAME, P) \
707      _Generic((P), FloatParts64 *: parts64_##NAME, \
708                    FloatParts128 *: parts128_##NAME, \
709                    FloatParts256 *: parts256_##NAME)
710  
711  #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
712  #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713  
714  static void parts64_return_nan(FloatParts64 *a, float_status *s);
715  static void parts128_return_nan(FloatParts128 *a, float_status *s);
716  
717  #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
718  
719  static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720                                        float_status *s);
721  static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722                                          float_status *s);
723  
724  #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725  
726  static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727                                               FloatParts64 *c, float_status *s,
728                                               int ab_mask, int abc_mask);
729  static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730                                                 FloatParts128 *b,
731                                                 FloatParts128 *c,
732                                                 float_status *s,
733                                                 int ab_mask, int abc_mask);
734  
735  #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736      PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737  
738  static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739                                   const FloatFmt *fmt);
740  static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741                                    const FloatFmt *fmt);
742  
743  #define parts_canonicalize(A, S, F) \
744      PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745  
746  static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747                                     const FloatFmt *fmt);
748  static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749                                      const FloatFmt *fmt);
750  
751  #define parts_uncanon_normal(A, S, F) \
752      PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753  
754  static void parts64_uncanon(FloatParts64 *p, float_status *status,
755                              const FloatFmt *fmt);
756  static void parts128_uncanon(FloatParts128 *p, float_status *status,
757                               const FloatFmt *fmt);
758  
759  #define parts_uncanon(A, S, F) \
760      PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761  
762  static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763  static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764  static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
765  
766  #define parts_add_normal(A, B) \
767      PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
768  
769  static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770  static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771  static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
772  
773  #define parts_sub_normal(A, B) \
774      PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
775  
776  static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777                                      float_status *s, bool subtract);
778  static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779                                        float_status *s, bool subtract);
780  
781  #define parts_addsub(A, B, S, Z) \
782      PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783  
784  static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785                                   float_status *s);
786  static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787                                     float_status *s);
788  
789  #define parts_mul(A, B, S) \
790      PARTS_GENERIC_64_128(mul, A)(A, B, S)
791  
792  static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
793                                      FloatParts64 *c, int flags,
794                                      float_status *s);
795  static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
796                                        FloatParts128 *c, int flags,
797                                        float_status *s);
798  
799  #define parts_muladd(A, B, C, Z, S) \
800      PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
801  
802  static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803                                   float_status *s);
804  static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805                                     float_status *s);
806  
807  #define parts_div(A, B, S) \
808      PARTS_GENERIC_64_128(div, A)(A, B, S)
809  
810  static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811                                      uint64_t *mod_quot, float_status *s);
812  static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813                                        uint64_t *mod_quot, float_status *s);
814  
815  #define parts_modrem(A, B, Q, S) \
816      PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817  
818  static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819  static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820  
821  #define parts_sqrt(A, S, F) \
822      PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823  
824  static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825                                          int scale, int frac_size);
826  static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827                                           int scale, int frac_size);
828  
829  #define parts_round_to_int_normal(A, R, C, F) \
830      PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831  
832  static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833                                   int scale, float_status *s,
834                                   const FloatFmt *fmt);
835  static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836                                    int scale, float_status *s,
837                                    const FloatFmt *fmt);
838  
839  #define parts_round_to_int(A, R, C, S, F) \
840      PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841  
842  static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843                                       int scale, int64_t min, int64_t max,
844                                       float_status *s);
845  static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846                                       int scale, int64_t min, int64_t max,
847                                       float_status *s);
848  
849  #define parts_float_to_sint(P, R, Z, MN, MX, S) \
850      PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851  
852  static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853                                        int scale, uint64_t max,
854                                        float_status *s);
855  static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856                                         int scale, uint64_t max,
857                                         float_status *s);
858  
859  #define parts_float_to_uint(P, R, Z, M, S) \
860      PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861  
862  static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863                                              FloatRoundMode rmode,
864                                              int bitsm1, float_status *s);
865  static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866                                               FloatRoundMode rmode,
867                                               int bitsm1, float_status *s);
868  
869  #define parts_float_to_sint_modulo(P, R, M, S) \
870      PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871  
872  static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873                                    int scale, float_status *s);
874  static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875                                     int scale, float_status *s);
876  
877  #define parts_float_to_sint(P, R, Z, MN, MX, S) \
878      PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879  
880  #define parts_sint_to_float(P, I, Z, S) \
881      PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882  
883  static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884                                    int scale, float_status *s);
885  static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886                                     int scale, float_status *s);
887  
888  #define parts_uint_to_float(P, I, Z, S) \
889      PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890  
891  static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892                                      float_status *s, int flags);
893  static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894                                        float_status *s, int flags);
895  
896  #define parts_minmax(A, B, S, F) \
897      PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898  
899  static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900                                       float_status *s, bool q);
901  static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902                                        float_status *s, bool q);
903  
904  #define parts_compare(A, B, S, Q) \
905      PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906  
907  static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908  static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909  
910  #define parts_scalbn(A, N, S) \
911      PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912  
913  static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914  static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915  
916  #define parts_log2(A, S, F) \
917      PARTS_GENERIC_64_128(log2, A)(A, S, F)
918  
919  /*
920   * Helper functions for softfloat-parts.c.inc, per-size operations.
921   */
922  
923  #define FRAC_GENERIC_64_128(NAME, P) \
924      _Generic((P), FloatParts64 *: frac64_##NAME, \
925                    FloatParts128 *: frac128_##NAME)
926  
927  #define FRAC_GENERIC_64_128_256(NAME, P) \
928      _Generic((P), FloatParts64 *: frac64_##NAME, \
929                    FloatParts128 *: frac128_##NAME, \
930                    FloatParts256 *: frac256_##NAME)
931  
frac64_add(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)932  static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933  {
934      return uadd64_overflow(a->frac, b->frac, &r->frac);
935  }
936  
frac128_add(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)937  static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938  {
939      bool c = 0;
940      r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941      r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942      return c;
943  }
944  
frac256_add(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)945  static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946  {
947      bool c = 0;
948      r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949      r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950      r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951      r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952      return c;
953  }
954  
955  #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
956  
frac64_addi(FloatParts64 * r,FloatParts64 * a,uint64_t c)957  static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958  {
959      return uadd64_overflow(a->frac, c, &r->frac);
960  }
961  
frac128_addi(FloatParts128 * r,FloatParts128 * a,uint64_t c)962  static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963  {
964      c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965      return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966  }
967  
968  #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
969  
frac64_allones(FloatParts64 * a)970  static void frac64_allones(FloatParts64 *a)
971  {
972      a->frac = -1;
973  }
974  
frac128_allones(FloatParts128 * a)975  static void frac128_allones(FloatParts128 *a)
976  {
977      a->frac_hi = a->frac_lo = -1;
978  }
979  
980  #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
981  
frac64_cmp(FloatParts64 * a,FloatParts64 * b)982  static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
983  {
984      return (a->frac == b->frac ? float_relation_equal
985              : a->frac < b->frac ? float_relation_less
986              : float_relation_greater);
987  }
988  
frac128_cmp(FloatParts128 * a,FloatParts128 * b)989  static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
990  {
991      uint64_t ta = a->frac_hi, tb = b->frac_hi;
992      if (ta == tb) {
993          ta = a->frac_lo, tb = b->frac_lo;
994          if (ta == tb) {
995              return float_relation_equal;
996          }
997      }
998      return ta < tb ? float_relation_less : float_relation_greater;
999  }
1000  
1001  #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002  
frac64_clear(FloatParts64 * a)1003  static void frac64_clear(FloatParts64 *a)
1004  {
1005      a->frac = 0;
1006  }
1007  
frac128_clear(FloatParts128 * a)1008  static void frac128_clear(FloatParts128 *a)
1009  {
1010      a->frac_hi = a->frac_lo = 0;
1011  }
1012  
1013  #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014  
frac64_div(FloatParts64 * a,FloatParts64 * b)1015  static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016  {
1017      uint64_t n1, n0, r, q;
1018      bool ret;
1019  
1020      /*
1021       * We want a 2*N / N-bit division to produce exactly an N-bit
1022       * result, so that we do not lose any precision and so that we
1023       * do not have to renormalize afterward.  If A.frac < B.frac,
1024       * then division would produce an (N-1)-bit result; shift A left
1025       * by one to produce the an N-bit result, and return true to
1026       * decrement the exponent to match.
1027       *
1028       * The udiv_qrnnd algorithm that we're using requires normalization,
1029       * i.e. the msb of the denominator must be set, which is already true.
1030       */
1031      ret = a->frac < b->frac;
1032      if (ret) {
1033          n0 = a->frac;
1034          n1 = 0;
1035      } else {
1036          n0 = a->frac >> 1;
1037          n1 = a->frac << 63;
1038      }
1039      q = udiv_qrnnd(&r, n0, n1, b->frac);
1040  
1041      /* Set lsb if there is a remainder, to set inexact. */
1042      a->frac = q | (r != 0);
1043  
1044      return ret;
1045  }
1046  
frac128_div(FloatParts128 * a,FloatParts128 * b)1047  static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048  {
1049      uint64_t q0, q1, a0, a1, b0, b1;
1050      uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051      bool ret = false;
1052  
1053      a0 = a->frac_hi, a1 = a->frac_lo;
1054      b0 = b->frac_hi, b1 = b->frac_lo;
1055  
1056      ret = lt128(a0, a1, b0, b1);
1057      if (!ret) {
1058          a1 = shr_double(a0, a1, 1);
1059          a0 = a0 >> 1;
1060      }
1061  
1062      /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063      q0 = estimateDiv128To64(a0, a1, b0);
1064  
1065      /*
1066       * Estimate is high because B1 was not included (unless B1 == 0).
1067       * Reduce quotient and increase remainder until remainder is non-negative.
1068       * This loop will execute 0 to 2 times.
1069       */
1070      mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071      sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072      while (r0 != 0) {
1073          q0--;
1074          add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075      }
1076  
1077      /* Repeat using the remainder, producing a second word of quotient. */
1078      q1 = estimateDiv128To64(r1, r2, b0);
1079      mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080      sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081      while (r1 != 0) {
1082          q1--;
1083          add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084      }
1085  
1086      /* Any remainder indicates inexact; set sticky bit. */
1087      q1 |= (r2 | r3) != 0;
1088  
1089      a->frac_hi = q0;
1090      a->frac_lo = q1;
1091      return ret;
1092  }
1093  
1094  #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095  
frac64_eqz(FloatParts64 * a)1096  static bool frac64_eqz(FloatParts64 *a)
1097  {
1098      return a->frac == 0;
1099  }
1100  
frac128_eqz(FloatParts128 * a)1101  static bool frac128_eqz(FloatParts128 *a)
1102  {
1103      return (a->frac_hi | a->frac_lo) == 0;
1104  }
1105  
1106  #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107  
frac64_mulw(FloatParts128 * r,FloatParts64 * a,FloatParts64 * b)1108  static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109  {
1110      mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111  }
1112  
frac128_mulw(FloatParts256 * r,FloatParts128 * a,FloatParts128 * b)1113  static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114  {
1115      mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116                  &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117  }
1118  
1119  #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120  
frac64_neg(FloatParts64 * a)1121  static void frac64_neg(FloatParts64 *a)
1122  {
1123      a->frac = -a->frac;
1124  }
1125  
frac128_neg(FloatParts128 * a)1126  static void frac128_neg(FloatParts128 *a)
1127  {
1128      bool c = 0;
1129      a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130      a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131  }
1132  
frac256_neg(FloatParts256 * a)1133  static void frac256_neg(FloatParts256 *a)
1134  {
1135      bool c = 0;
1136      a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137      a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138      a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139      a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140  }
1141  
1142  #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143  
frac64_normalize(FloatParts64 * a)1144  static int frac64_normalize(FloatParts64 *a)
1145  {
1146      if (a->frac) {
1147          int shift = clz64(a->frac);
1148          a->frac <<= shift;
1149          return shift;
1150      }
1151      return 64;
1152  }
1153  
frac128_normalize(FloatParts128 * a)1154  static int frac128_normalize(FloatParts128 *a)
1155  {
1156      if (a->frac_hi) {
1157          int shl = clz64(a->frac_hi);
1158          a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159          a->frac_lo <<= shl;
1160          return shl;
1161      } else if (a->frac_lo) {
1162          int shl = clz64(a->frac_lo);
1163          a->frac_hi = a->frac_lo << shl;
1164          a->frac_lo = 0;
1165          return shl + 64;
1166      }
1167      return 128;
1168  }
1169  
frac256_normalize(FloatParts256 * a)1170  static int frac256_normalize(FloatParts256 *a)
1171  {
1172      uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173      uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174      int ret, shl;
1175  
1176      if (likely(a0)) {
1177          shl = clz64(a0);
1178          if (shl == 0) {
1179              return 0;
1180          }
1181          ret = shl;
1182      } else {
1183          if (a1) {
1184              ret = 64;
1185              a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186          } else if (a2) {
1187              ret = 128;
1188              a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189          } else if (a3) {
1190              ret = 192;
1191              a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192          } else {
1193              ret = 256;
1194              a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195              goto done;
1196          }
1197          shl = clz64(a0);
1198          if (shl == 0) {
1199              goto done;
1200          }
1201          ret += shl;
1202      }
1203  
1204      a0 = shl_double(a0, a1, shl);
1205      a1 = shl_double(a1, a2, shl);
1206      a2 = shl_double(a2, a3, shl);
1207      a3 <<= shl;
1208  
1209   done:
1210      a->frac_hi = a0;
1211      a->frac_hm = a1;
1212      a->frac_lm = a2;
1213      a->frac_lo = a3;
1214      return ret;
1215  }
1216  
1217  #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218  
frac64_modrem(FloatParts64 * a,FloatParts64 * b,uint64_t * mod_quot)1219  static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220  {
1221      uint64_t a0, a1, b0, t0, t1, q, quot;
1222      int exp_diff = a->exp - b->exp;
1223      int shift;
1224  
1225      a0 = a->frac;
1226      a1 = 0;
1227  
1228      if (exp_diff < -1) {
1229          if (mod_quot) {
1230              *mod_quot = 0;
1231          }
1232          return;
1233      }
1234      if (exp_diff == -1) {
1235          a0 >>= 1;
1236          exp_diff = 0;
1237      }
1238  
1239      b0 = b->frac;
1240      quot = q = b0 <= a0;
1241      if (q) {
1242          a0 -= b0;
1243      }
1244  
1245      exp_diff -= 64;
1246      while (exp_diff > 0) {
1247          q = estimateDiv128To64(a0, a1, b0);
1248          q = q > 2 ? q - 2 : 0;
1249          mul64To128(b0, q, &t0, &t1);
1250          sub128(a0, a1, t0, t1, &a0, &a1);
1251          shortShift128Left(a0, a1, 62, &a0, &a1);
1252          exp_diff -= 62;
1253          quot = (quot << 62) + q;
1254      }
1255  
1256      exp_diff += 64;
1257      if (exp_diff > 0) {
1258          q = estimateDiv128To64(a0, a1, b0);
1259          q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260          mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261          sub128(a0, a1, t0, t1, &a0, &a1);
1262          shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263          while (le128(t0, t1, a0, a1)) {
1264              ++q;
1265              sub128(a0, a1, t0, t1, &a0, &a1);
1266          }
1267          quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268      } else {
1269          t0 = b0;
1270          t1 = 0;
1271      }
1272  
1273      if (mod_quot) {
1274          *mod_quot = quot;
1275      } else {
1276          sub128(t0, t1, a0, a1, &t0, &t1);
1277          if (lt128(t0, t1, a0, a1) ||
1278              (eq128(t0, t1, a0, a1) && (q & 1))) {
1279              a0 = t0;
1280              a1 = t1;
1281              a->sign = !a->sign;
1282          }
1283      }
1284  
1285      if (likely(a0)) {
1286          shift = clz64(a0);
1287          shortShift128Left(a0, a1, shift, &a0, &a1);
1288      } else if (likely(a1)) {
1289          shift = clz64(a1);
1290          a0 = a1 << shift;
1291          a1 = 0;
1292          shift += 64;
1293      } else {
1294          a->cls = float_class_zero;
1295          return;
1296      }
1297  
1298      a->exp = b->exp + exp_diff - shift;
1299      a->frac = a0 | (a1 != 0);
1300  }
1301  
frac128_modrem(FloatParts128 * a,FloatParts128 * b,uint64_t * mod_quot)1302  static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303                             uint64_t *mod_quot)
1304  {
1305      uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306      int exp_diff = a->exp - b->exp;
1307      int shift;
1308  
1309      a0 = a->frac_hi;
1310      a1 = a->frac_lo;
1311      a2 = 0;
1312  
1313      if (exp_diff < -1) {
1314          if (mod_quot) {
1315              *mod_quot = 0;
1316          }
1317          return;
1318      }
1319      if (exp_diff == -1) {
1320          shift128Right(a0, a1, 1, &a0, &a1);
1321          exp_diff = 0;
1322      }
1323  
1324      b0 = b->frac_hi;
1325      b1 = b->frac_lo;
1326  
1327      quot = q = le128(b0, b1, a0, a1);
1328      if (q) {
1329          sub128(a0, a1, b0, b1, &a0, &a1);
1330      }
1331  
1332      exp_diff -= 64;
1333      while (exp_diff > 0) {
1334          q = estimateDiv128To64(a0, a1, b0);
1335          q = q > 4 ? q - 4 : 0;
1336          mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337          sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338          shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339          exp_diff -= 61;
1340          quot = (quot << 61) + q;
1341      }
1342  
1343      exp_diff += 64;
1344      if (exp_diff > 0) {
1345          q = estimateDiv128To64(a0, a1, b0);
1346          q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347          mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348          sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349          shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350          while (le192(t0, t1, t2, a0, a1, a2)) {
1351              ++q;
1352              sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353          }
1354          quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355      } else {
1356          t0 = b0;
1357          t1 = b1;
1358          t2 = 0;
1359      }
1360  
1361      if (mod_quot) {
1362          *mod_quot = quot;
1363      } else {
1364          sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365          if (lt192(t0, t1, t2, a0, a1, a2) ||
1366              (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367              a0 = t0;
1368              a1 = t1;
1369              a2 = t2;
1370              a->sign = !a->sign;
1371          }
1372      }
1373  
1374      if (likely(a0)) {
1375          shift = clz64(a0);
1376          shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377      } else if (likely(a1)) {
1378          shift = clz64(a1);
1379          shortShift128Left(a1, a2, shift, &a0, &a1);
1380          a2 = 0;
1381          shift += 64;
1382      } else if (likely(a2)) {
1383          shift = clz64(a2);
1384          a0 = a2 << shift;
1385          a1 = a2 = 0;
1386          shift += 128;
1387      } else {
1388          a->cls = float_class_zero;
1389          return;
1390      }
1391  
1392      a->exp = b->exp + exp_diff - shift;
1393      a->frac_hi = a0;
1394      a->frac_lo = a1 | (a2 != 0);
1395  }
1396  
1397  #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398  
frac64_shl(FloatParts64 * a,int c)1399  static void frac64_shl(FloatParts64 *a, int c)
1400  {
1401      a->frac <<= c;
1402  }
1403  
frac128_shl(FloatParts128 * a,int c)1404  static void frac128_shl(FloatParts128 *a, int c)
1405  {
1406      uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407  
1408      if (c & 64) {
1409          a0 = a1, a1 = 0;
1410      }
1411  
1412      c &= 63;
1413      if (c) {
1414          a0 = shl_double(a0, a1, c);
1415          a1 = a1 << c;
1416      }
1417  
1418      a->frac_hi = a0;
1419      a->frac_lo = a1;
1420  }
1421  
1422  #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423  
frac64_shr(FloatParts64 * a,int c)1424  static void frac64_shr(FloatParts64 *a, int c)
1425  {
1426      a->frac >>= c;
1427  }
1428  
frac128_shr(FloatParts128 * a,int c)1429  static void frac128_shr(FloatParts128 *a, int c)
1430  {
1431      uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432  
1433      if (c & 64) {
1434          a1 = a0, a0 = 0;
1435      }
1436  
1437      c &= 63;
1438      if (c) {
1439          a1 = shr_double(a0, a1, c);
1440          a0 = a0 >> c;
1441      }
1442  
1443      a->frac_hi = a0;
1444      a->frac_lo = a1;
1445  }
1446  
1447  #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448  
frac64_shrjam(FloatParts64 * a,int c)1449  static void frac64_shrjam(FloatParts64 *a, int c)
1450  {
1451      uint64_t a0 = a->frac;
1452  
1453      if (likely(c != 0)) {
1454          if (likely(c < 64)) {
1455              a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456          } else {
1457              a0 = a0 != 0;
1458          }
1459          a->frac = a0;
1460      }
1461  }
1462  
frac128_shrjam(FloatParts128 * a,int c)1463  static void frac128_shrjam(FloatParts128 *a, int c)
1464  {
1465      uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466      uint64_t sticky = 0;
1467  
1468      if (unlikely(c == 0)) {
1469          return;
1470      } else if (likely(c < 64)) {
1471          /* nothing */
1472      } else if (likely(c < 128)) {
1473          sticky = a1;
1474          a1 = a0;
1475          a0 = 0;
1476          c &= 63;
1477          if (c == 0) {
1478              goto done;
1479          }
1480      } else {
1481          sticky = a0 | a1;
1482          a0 = a1 = 0;
1483          goto done;
1484      }
1485  
1486      sticky |= shr_double(a1, 0, c);
1487      a1 = shr_double(a0, a1, c);
1488      a0 = a0 >> c;
1489  
1490   done:
1491      a->frac_lo = a1 | (sticky != 0);
1492      a->frac_hi = a0;
1493  }
1494  
frac256_shrjam(FloatParts256 * a,int c)1495  static void frac256_shrjam(FloatParts256 *a, int c)
1496  {
1497      uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498      uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499      uint64_t sticky = 0;
1500  
1501      if (unlikely(c == 0)) {
1502          return;
1503      } else if (likely(c < 64)) {
1504          /* nothing */
1505      } else if (likely(c < 256)) {
1506          if (unlikely(c & 128)) {
1507              sticky |= a2 | a3;
1508              a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509          }
1510          if (unlikely(c & 64)) {
1511              sticky |= a3;
1512              a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513          }
1514          c &= 63;
1515          if (c == 0) {
1516              goto done;
1517          }
1518      } else {
1519          sticky = a0 | a1 | a2 | a3;
1520          a0 = a1 = a2 = a3 = 0;
1521          goto done;
1522      }
1523  
1524      sticky |= shr_double(a3, 0, c);
1525      a3 = shr_double(a2, a3, c);
1526      a2 = shr_double(a1, a2, c);
1527      a1 = shr_double(a0, a1, c);
1528      a0 = a0 >> c;
1529  
1530   done:
1531      a->frac_lo = a3 | (sticky != 0);
1532      a->frac_lm = a2;
1533      a->frac_hm = a1;
1534      a->frac_hi = a0;
1535  }
1536  
1537  #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538  
frac64_sub(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)1539  static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540  {
1541      return usub64_overflow(a->frac, b->frac, &r->frac);
1542  }
1543  
frac128_sub(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)1544  static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545  {
1546      bool c = 0;
1547      r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548      r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549      return c;
1550  }
1551  
frac256_sub(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)1552  static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553  {
1554      bool c = 0;
1555      r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556      r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557      r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558      r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559      return c;
1560  }
1561  
1562  #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563  
frac64_truncjam(FloatParts64 * r,FloatParts128 * a)1564  static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565  {
1566      r->frac = a->frac_hi | (a->frac_lo != 0);
1567  }
1568  
frac128_truncjam(FloatParts128 * r,FloatParts256 * a)1569  static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570  {
1571      r->frac_hi = a->frac_hi;
1572      r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573  }
1574  
1575  #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576  
frac64_widen(FloatParts128 * r,FloatParts64 * a)1577  static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578  {
1579      r->frac_hi = a->frac;
1580      r->frac_lo = 0;
1581  }
1582  
frac128_widen(FloatParts256 * r,FloatParts128 * a)1583  static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584  {
1585      r->frac_hi = a->frac_hi;
1586      r->frac_hm = a->frac_lo;
1587      r->frac_lm = 0;
1588      r->frac_lo = 0;
1589  }
1590  
1591  #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592  
1593  /*
1594   * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595   * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596   * and thus MIT licenced.
1597   */
1598  static const uint16_t rsqrt_tab[128] = {
1599      0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600      0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601      0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602      0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603      0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604      0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605      0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606      0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607      0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608      0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609      0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610      0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611      0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612      0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613      0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614      0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615  };
1616  
1617  #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618  #define FloatPartsN    glue(FloatParts,N)
1619  #define FloatPartsW    glue(FloatParts,W)
1620  
1621  #define N 64
1622  #define W 128
1623  
1624  #include "softfloat-parts-addsub.c.inc"
1625  #include "softfloat-parts.c.inc"
1626  
1627  #undef  N
1628  #undef  W
1629  #define N 128
1630  #define W 256
1631  
1632  #include "softfloat-parts-addsub.c.inc"
1633  #include "softfloat-parts.c.inc"
1634  
1635  #undef  N
1636  #undef  W
1637  #define N            256
1638  
1639  #include "softfloat-parts-addsub.c.inc"
1640  
1641  #undef  N
1642  #undef  W
1643  #undef  partsN
1644  #undef  FloatPartsN
1645  #undef  FloatPartsW
1646  
1647  /*
1648   * Pack/unpack routines with a specific FloatFmt.
1649   */
1650  
float16a_unpack_canonical(FloatParts64 * p,float16 f,float_status * s,const FloatFmt * params)1651  static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652                                        float_status *s, const FloatFmt *params)
1653  {
1654      float16_unpack_raw(p, f);
1655      parts_canonicalize(p, s, params);
1656  }
1657  
float16_unpack_canonical(FloatParts64 * p,float16 f,float_status * s)1658  static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659                                       float_status *s)
1660  {
1661      float16a_unpack_canonical(p, f, s, &float16_params);
1662  }
1663  
bfloat16_unpack_canonical(FloatParts64 * p,bfloat16 f,float_status * s)1664  static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665                                        float_status *s)
1666  {
1667      bfloat16_unpack_raw(p, f);
1668      parts_canonicalize(p, s, &bfloat16_params);
1669  }
1670  
float16a_round_pack_canonical(FloatParts64 * p,float_status * s,const FloatFmt * params)1671  static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672                                               float_status *s,
1673                                               const FloatFmt *params)
1674  {
1675      parts_uncanon(p, s, params);
1676      return float16_pack_raw(p);
1677  }
1678  
float16_round_pack_canonical(FloatParts64 * p,float_status * s)1679  static float16 float16_round_pack_canonical(FloatParts64 *p,
1680                                              float_status *s)
1681  {
1682      return float16a_round_pack_canonical(p, s, &float16_params);
1683  }
1684  
bfloat16_round_pack_canonical(FloatParts64 * p,float_status * s)1685  static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686                                                float_status *s)
1687  {
1688      parts_uncanon(p, s, &bfloat16_params);
1689      return bfloat16_pack_raw(p);
1690  }
1691  
float32_unpack_canonical(FloatParts64 * p,float32 f,float_status * s)1692  static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693                                       float_status *s)
1694  {
1695      float32_unpack_raw(p, f);
1696      parts_canonicalize(p, s, &float32_params);
1697  }
1698  
float32_round_pack_canonical(FloatParts64 * p,float_status * s)1699  static float32 float32_round_pack_canonical(FloatParts64 *p,
1700                                              float_status *s)
1701  {
1702      parts_uncanon(p, s, &float32_params);
1703      return float32_pack_raw(p);
1704  }
1705  
float64_unpack_canonical(FloatParts64 * p,float64 f,float_status * s)1706  static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707                                       float_status *s)
1708  {
1709      float64_unpack_raw(p, f);
1710      parts_canonicalize(p, s, &float64_params);
1711  }
1712  
float64_round_pack_canonical(FloatParts64 * p,float_status * s)1713  static float64 float64_round_pack_canonical(FloatParts64 *p,
1714                                              float_status *s)
1715  {
1716      parts_uncanon(p, s, &float64_params);
1717      return float64_pack_raw(p);
1718  }
1719  
float64r32_round_pack_canonical(FloatParts64 * p,float_status * s)1720  static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721                                                 float_status *s)
1722  {
1723      parts_uncanon(p, s, &float32_params);
1724  
1725      /*
1726       * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727       * We need to adjust the fraction higher so that the least N bits are
1728       * zero, and the fraction is adjacent to the float64 implicit bit.
1729       */
1730      switch (p->cls) {
1731      case float_class_normal:
1732          if (unlikely(p->exp == 0)) {
1733              /*
1734               * The result is denormal for float32, but can be represented
1735               * in normalized form for float64.  Adjust, per canonicalize.
1736               */
1737              int shift = frac_normalize(p);
1738              p->exp = (float32_params.frac_shift -
1739                        float32_params.exp_bias - shift + 1 +
1740                        float64_params.exp_bias);
1741              frac_shr(p, float64_params.frac_shift);
1742          } else {
1743              frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744              p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745          }
1746          break;
1747      case float_class_snan:
1748      case float_class_qnan:
1749          frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750          p->exp = float64_params.exp_max;
1751          break;
1752      case float_class_inf:
1753          p->exp = float64_params.exp_max;
1754          break;
1755      case float_class_zero:
1756          break;
1757      default:
1758          g_assert_not_reached();
1759      }
1760  
1761      return float64_pack_raw(p);
1762  }
1763  
float128_unpack_canonical(FloatParts128 * p,float128 f,float_status * s)1764  static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765                                        float_status *s)
1766  {
1767      float128_unpack_raw(p, f);
1768      parts_canonicalize(p, s, &float128_params);
1769  }
1770  
float128_round_pack_canonical(FloatParts128 * p,float_status * s)1771  static float128 float128_round_pack_canonical(FloatParts128 *p,
1772                                                float_status *s)
1773  {
1774      parts_uncanon(p, s, &float128_params);
1775      return float128_pack_raw(p);
1776  }
1777  
1778  /* Returns false if the encoding is invalid. */
floatx80_unpack_canonical(FloatParts128 * p,floatx80 f,float_status * s)1779  static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780                                        float_status *s)
1781  {
1782      /* Ensure rounding precision is set before beginning. */
1783      switch (s->floatx80_rounding_precision) {
1784      case floatx80_precision_x:
1785      case floatx80_precision_d:
1786      case floatx80_precision_s:
1787          break;
1788      default:
1789          g_assert_not_reached();
1790      }
1791  
1792      if (unlikely(floatx80_invalid_encoding(f))) {
1793          float_raise(float_flag_invalid, s);
1794          return false;
1795      }
1796  
1797      floatx80_unpack_raw(p, f);
1798  
1799      if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800          parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801      } else {
1802          /* The explicit integer bit is ignored, after invalid checks. */
1803          p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804          p->cls = (p->frac_hi == 0 ? float_class_inf
1805                    : parts_is_snan_frac(p->frac_hi, s)
1806                    ? float_class_snan : float_class_qnan);
1807      }
1808      return true;
1809  }
1810  
floatx80_round_pack_canonical(FloatParts128 * p,float_status * s)1811  static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812                                                float_status *s)
1813  {
1814      const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815      uint64_t frac;
1816      int exp;
1817  
1818      switch (p->cls) {
1819      case float_class_normal:
1820          if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821              parts_uncanon_normal(p, s, fmt);
1822              frac = p->frac_hi;
1823              exp = p->exp;
1824          } else {
1825              FloatParts64 p64;
1826  
1827              p64.sign = p->sign;
1828              p64.exp = p->exp;
1829              frac_truncjam(&p64, p);
1830              parts_uncanon_normal(&p64, s, fmt);
1831              frac = p64.frac;
1832              exp = p64.exp;
1833          }
1834          if (exp != fmt->exp_max) {
1835              break;
1836          }
1837          /* rounded to inf -- fall through to set frac correctly */
1838  
1839      case float_class_inf:
1840          /* x86 and m68k differ in the setting of the integer bit. */
1841          frac = floatx80_infinity_low;
1842          exp = fmt->exp_max;
1843          break;
1844  
1845      case float_class_zero:
1846          frac = 0;
1847          exp = 0;
1848          break;
1849  
1850      case float_class_snan:
1851      case float_class_qnan:
1852          /* NaNs have the integer bit set. */
1853          frac = p->frac_hi | (1ull << 63);
1854          exp = fmt->exp_max;
1855          break;
1856  
1857      default:
1858          g_assert_not_reached();
1859      }
1860  
1861      return packFloatx80(p->sign, exp, frac);
1862  }
1863  
1864  /*
1865   * Addition and subtraction
1866   */
1867  
1868  static float16 QEMU_FLATTEN
float16_addsub(float16 a,float16 b,float_status * status,bool subtract)1869  float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870  {
1871      FloatParts64 pa, pb, *pr;
1872  
1873      float16_unpack_canonical(&pa, a, status);
1874      float16_unpack_canonical(&pb, b, status);
1875      pr = parts_addsub(&pa, &pb, status, subtract);
1876  
1877      return float16_round_pack_canonical(pr, status);
1878  }
1879  
float16_add(float16 a,float16 b,float_status * status)1880  float16 float16_add(float16 a, float16 b, float_status *status)
1881  {
1882      return float16_addsub(a, b, status, false);
1883  }
1884  
float16_sub(float16 a,float16 b,float_status * status)1885  float16 float16_sub(float16 a, float16 b, float_status *status)
1886  {
1887      return float16_addsub(a, b, status, true);
1888  }
1889  
1890  static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a,float32 b,float_status * status,bool subtract)1891  soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892  {
1893      FloatParts64 pa, pb, *pr;
1894  
1895      float32_unpack_canonical(&pa, a, status);
1896      float32_unpack_canonical(&pb, b, status);
1897      pr = parts_addsub(&pa, &pb, status, subtract);
1898  
1899      return float32_round_pack_canonical(pr, status);
1900  }
1901  
soft_f32_add(float32 a,float32 b,float_status * status)1902  static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903  {
1904      return soft_f32_addsub(a, b, status, false);
1905  }
1906  
soft_f32_sub(float32 a,float32 b,float_status * status)1907  static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908  {
1909      return soft_f32_addsub(a, b, status, true);
1910  }
1911  
1912  static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a,float64 b,float_status * status,bool subtract)1913  soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914  {
1915      FloatParts64 pa, pb, *pr;
1916  
1917      float64_unpack_canonical(&pa, a, status);
1918      float64_unpack_canonical(&pb, b, status);
1919      pr = parts_addsub(&pa, &pb, status, subtract);
1920  
1921      return float64_round_pack_canonical(pr, status);
1922  }
1923  
soft_f64_add(float64 a,float64 b,float_status * status)1924  static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925  {
1926      return soft_f64_addsub(a, b, status, false);
1927  }
1928  
soft_f64_sub(float64 a,float64 b,float_status * status)1929  static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930  {
1931      return soft_f64_addsub(a, b, status, true);
1932  }
1933  
hard_f32_add(float a,float b)1934  static float hard_f32_add(float a, float b)
1935  {
1936      return a + b;
1937  }
1938  
hard_f32_sub(float a,float b)1939  static float hard_f32_sub(float a, float b)
1940  {
1941      return a - b;
1942  }
1943  
hard_f64_add(double a,double b)1944  static double hard_f64_add(double a, double b)
1945  {
1946      return a + b;
1947  }
1948  
hard_f64_sub(double a,double b)1949  static double hard_f64_sub(double a, double b)
1950  {
1951      return a - b;
1952  }
1953  
f32_addsubmul_post(union_float32 a,union_float32 b)1954  static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955  {
1956      if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957          return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958      }
1959      return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960  }
1961  
f64_addsubmul_post(union_float64 a,union_float64 b)1962  static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963  {
1964      if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965          return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966      } else {
1967          return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968      }
1969  }
1970  
float32_addsub(float32 a,float32 b,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft)1971  static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972                                hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973  {
1974      return float32_gen2(a, b, s, hard, soft,
1975                          f32_is_zon2, f32_addsubmul_post);
1976  }
1977  
float64_addsub(float64 a,float64 b,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft)1978  static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979                                hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980  {
1981      return float64_gen2(a, b, s, hard, soft,
1982                          f64_is_zon2, f64_addsubmul_post);
1983  }
1984  
1985  float32 QEMU_FLATTEN
float32_add(float32 a,float32 b,float_status * s)1986  float32_add(float32 a, float32 b, float_status *s)
1987  {
1988      return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989  }
1990  
1991  float32 QEMU_FLATTEN
float32_sub(float32 a,float32 b,float_status * s)1992  float32_sub(float32 a, float32 b, float_status *s)
1993  {
1994      return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995  }
1996  
1997  float64 QEMU_FLATTEN
float64_add(float64 a,float64 b,float_status * s)1998  float64_add(float64 a, float64 b, float_status *s)
1999  {
2000      return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001  }
2002  
2003  float64 QEMU_FLATTEN
float64_sub(float64 a,float64 b,float_status * s)2004  float64_sub(float64 a, float64 b, float_status *s)
2005  {
2006      return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007  }
2008  
float64r32_addsub(float64 a,float64 b,float_status * status,bool subtract)2009  static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010                                   bool subtract)
2011  {
2012      FloatParts64 pa, pb, *pr;
2013  
2014      float64_unpack_canonical(&pa, a, status);
2015      float64_unpack_canonical(&pb, b, status);
2016      pr = parts_addsub(&pa, &pb, status, subtract);
2017  
2018      return float64r32_round_pack_canonical(pr, status);
2019  }
2020  
float64r32_add(float64 a,float64 b,float_status * status)2021  float64 float64r32_add(float64 a, float64 b, float_status *status)
2022  {
2023      return float64r32_addsub(a, b, status, false);
2024  }
2025  
float64r32_sub(float64 a,float64 b,float_status * status)2026  float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027  {
2028      return float64r32_addsub(a, b, status, true);
2029  }
2030  
2031  static bfloat16 QEMU_FLATTEN
bfloat16_addsub(bfloat16 a,bfloat16 b,float_status * status,bool subtract)2032  bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033  {
2034      FloatParts64 pa, pb, *pr;
2035  
2036      bfloat16_unpack_canonical(&pa, a, status);
2037      bfloat16_unpack_canonical(&pb, b, status);
2038      pr = parts_addsub(&pa, &pb, status, subtract);
2039  
2040      return bfloat16_round_pack_canonical(pr, status);
2041  }
2042  
bfloat16_add(bfloat16 a,bfloat16 b,float_status * status)2043  bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044  {
2045      return bfloat16_addsub(a, b, status, false);
2046  }
2047  
bfloat16_sub(bfloat16 a,bfloat16 b,float_status * status)2048  bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049  {
2050      return bfloat16_addsub(a, b, status, true);
2051  }
2052  
2053  static float128 QEMU_FLATTEN
float128_addsub(float128 a,float128 b,float_status * status,bool subtract)2054  float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055  {
2056      FloatParts128 pa, pb, *pr;
2057  
2058      float128_unpack_canonical(&pa, a, status);
2059      float128_unpack_canonical(&pb, b, status);
2060      pr = parts_addsub(&pa, &pb, status, subtract);
2061  
2062      return float128_round_pack_canonical(pr, status);
2063  }
2064  
float128_add(float128 a,float128 b,float_status * status)2065  float128 float128_add(float128 a, float128 b, float_status *status)
2066  {
2067      return float128_addsub(a, b, status, false);
2068  }
2069  
float128_sub(float128 a,float128 b,float_status * status)2070  float128 float128_sub(float128 a, float128 b, float_status *status)
2071  {
2072      return float128_addsub(a, b, status, true);
2073  }
2074  
2075  static floatx80 QEMU_FLATTEN
floatx80_addsub(floatx80 a,floatx80 b,float_status * status,bool subtract)2076  floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077  {
2078      FloatParts128 pa, pb, *pr;
2079  
2080      if (!floatx80_unpack_canonical(&pa, a, status) ||
2081          !floatx80_unpack_canonical(&pb, b, status)) {
2082          return floatx80_default_nan(status);
2083      }
2084  
2085      pr = parts_addsub(&pa, &pb, status, subtract);
2086      return floatx80_round_pack_canonical(pr, status);
2087  }
2088  
floatx80_add(floatx80 a,floatx80 b,float_status * status)2089  floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090  {
2091      return floatx80_addsub(a, b, status, false);
2092  }
2093  
floatx80_sub(floatx80 a,floatx80 b,float_status * status)2094  floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095  {
2096      return floatx80_addsub(a, b, status, true);
2097  }
2098  
2099  /*
2100   * Multiplication
2101   */
2102  
float16_mul(float16 a,float16 b,float_status * status)2103  float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104  {
2105      FloatParts64 pa, pb, *pr;
2106  
2107      float16_unpack_canonical(&pa, a, status);
2108      float16_unpack_canonical(&pb, b, status);
2109      pr = parts_mul(&pa, &pb, status);
2110  
2111      return float16_round_pack_canonical(pr, status);
2112  }
2113  
2114  static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a,float32 b,float_status * status)2115  soft_f32_mul(float32 a, float32 b, float_status *status)
2116  {
2117      FloatParts64 pa, pb, *pr;
2118  
2119      float32_unpack_canonical(&pa, a, status);
2120      float32_unpack_canonical(&pb, b, status);
2121      pr = parts_mul(&pa, &pb, status);
2122  
2123      return float32_round_pack_canonical(pr, status);
2124  }
2125  
2126  static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a,float64 b,float_status * status)2127  soft_f64_mul(float64 a, float64 b, float_status *status)
2128  {
2129      FloatParts64 pa, pb, *pr;
2130  
2131      float64_unpack_canonical(&pa, a, status);
2132      float64_unpack_canonical(&pb, b, status);
2133      pr = parts_mul(&pa, &pb, status);
2134  
2135      return float64_round_pack_canonical(pr, status);
2136  }
2137  
hard_f32_mul(float a,float b)2138  static float hard_f32_mul(float a, float b)
2139  {
2140      return a * b;
2141  }
2142  
hard_f64_mul(double a,double b)2143  static double hard_f64_mul(double a, double b)
2144  {
2145      return a * b;
2146  }
2147  
2148  float32 QEMU_FLATTEN
float32_mul(float32 a,float32 b,float_status * s)2149  float32_mul(float32 a, float32 b, float_status *s)
2150  {
2151      return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152                          f32_is_zon2, f32_addsubmul_post);
2153  }
2154  
2155  float64 QEMU_FLATTEN
float64_mul(float64 a,float64 b,float_status * s)2156  float64_mul(float64 a, float64 b, float_status *s)
2157  {
2158      return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159                          f64_is_zon2, f64_addsubmul_post);
2160  }
2161  
float64r32_mul(float64 a,float64 b,float_status * status)2162  float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163  {
2164      FloatParts64 pa, pb, *pr;
2165  
2166      float64_unpack_canonical(&pa, a, status);
2167      float64_unpack_canonical(&pb, b, status);
2168      pr = parts_mul(&pa, &pb, status);
2169  
2170      return float64r32_round_pack_canonical(pr, status);
2171  }
2172  
2173  bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a,bfloat16 b,float_status * status)2174  bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175  {
2176      FloatParts64 pa, pb, *pr;
2177  
2178      bfloat16_unpack_canonical(&pa, a, status);
2179      bfloat16_unpack_canonical(&pb, b, status);
2180      pr = parts_mul(&pa, &pb, status);
2181  
2182      return bfloat16_round_pack_canonical(pr, status);
2183  }
2184  
2185  float128 QEMU_FLATTEN
float128_mul(float128 a,float128 b,float_status * status)2186  float128_mul(float128 a, float128 b, float_status *status)
2187  {
2188      FloatParts128 pa, pb, *pr;
2189  
2190      float128_unpack_canonical(&pa, a, status);
2191      float128_unpack_canonical(&pb, b, status);
2192      pr = parts_mul(&pa, &pb, status);
2193  
2194      return float128_round_pack_canonical(pr, status);
2195  }
2196  
2197  floatx80 QEMU_FLATTEN
floatx80_mul(floatx80 a,floatx80 b,float_status * status)2198  floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199  {
2200      FloatParts128 pa, pb, *pr;
2201  
2202      if (!floatx80_unpack_canonical(&pa, a, status) ||
2203          !floatx80_unpack_canonical(&pb, b, status)) {
2204          return floatx80_default_nan(status);
2205      }
2206  
2207      pr = parts_mul(&pa, &pb, status);
2208      return floatx80_round_pack_canonical(pr, status);
2209  }
2210  
2211  /*
2212   * Fused multiply-add
2213   */
2214  
float16_muladd(float16 a,float16 b,float16 c,int flags,float_status * status)2215  float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2216                                      int flags, float_status *status)
2217  {
2218      FloatParts64 pa, pb, pc, *pr;
2219  
2220      float16_unpack_canonical(&pa, a, status);
2221      float16_unpack_canonical(&pb, b, status);
2222      float16_unpack_canonical(&pc, c, status);
2223      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2224  
2225      return float16_round_pack_canonical(pr, status);
2226  }
2227  
2228  static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_muladd(float32 a,float32 b,float32 c,int flags,float_status * status)2229  soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230                  float_status *status)
2231  {
2232      FloatParts64 pa, pb, pc, *pr;
2233  
2234      float32_unpack_canonical(&pa, a, status);
2235      float32_unpack_canonical(&pb, b, status);
2236      float32_unpack_canonical(&pc, c, status);
2237      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2238  
2239      return float32_round_pack_canonical(pr, status);
2240  }
2241  
2242  static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2243  soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244                  float_status *status)
2245  {
2246      FloatParts64 pa, pb, pc, *pr;
2247  
2248      float64_unpack_canonical(&pa, a, status);
2249      float64_unpack_canonical(&pb, b, status);
2250      float64_unpack_canonical(&pc, c, status);
2251      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2252  
2253      return float64_round_pack_canonical(pr, status);
2254  }
2255  
2256  static bool force_soft_fma;
2257  
2258  float32 QEMU_FLATTEN
float32_muladd(float32 xa,float32 xb,float32 xc,int flags,float_status * s)2259  float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2260  {
2261      union_float32 ua, ub, uc, ur;
2262  
2263      ua.s = xa;
2264      ub.s = xb;
2265      uc.s = xc;
2266  
2267      if (unlikely(!can_use_fpu(s))) {
2268          goto soft;
2269      }
2270      if (unlikely(flags & float_muladd_halve_result)) {
2271          goto soft;
2272      }
2273  
2274      float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275      if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276          goto soft;
2277      }
2278  
2279      if (unlikely(force_soft_fma)) {
2280          goto soft;
2281      }
2282  
2283      /*
2284       * When (a || b) == 0, there's no need to check for under/over flow,
2285       * since we know the addend is (normal || 0) and the product is 0.
2286       */
2287      if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288          union_float32 up;
2289          bool prod_sign;
2290  
2291          prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292          prod_sign ^= !!(flags & float_muladd_negate_product);
2293          up.s = float32_set_sign(float32_zero, prod_sign);
2294  
2295          if (flags & float_muladd_negate_c) {
2296              uc.h = -uc.h;
2297          }
2298          ur.h = up.h + uc.h;
2299      } else {
2300          union_float32 ua_orig = ua;
2301          union_float32 uc_orig = uc;
2302  
2303          if (flags & float_muladd_negate_product) {
2304              ua.h = -ua.h;
2305          }
2306          if (flags & float_muladd_negate_c) {
2307              uc.h = -uc.h;
2308          }
2309  
2310          ur.h = fmaf(ua.h, ub.h, uc.h);
2311  
2312          if (unlikely(f32_is_inf(ur))) {
2313              float_raise(float_flag_overflow, s);
2314          } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2315              ua = ua_orig;
2316              uc = uc_orig;
2317              goto soft;
2318          }
2319      }
2320      if (flags & float_muladd_negate_result) {
2321          return float32_chs(ur.s);
2322      }
2323      return ur.s;
2324  
2325   soft:
2326      return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2327  }
2328  
2329  float64 QEMU_FLATTEN
float64_muladd(float64 xa,float64 xb,float64 xc,int flags,float_status * s)2330  float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2331  {
2332      union_float64 ua, ub, uc, ur;
2333  
2334      ua.s = xa;
2335      ub.s = xb;
2336      uc.s = xc;
2337  
2338      if (unlikely(!can_use_fpu(s))) {
2339          goto soft;
2340      }
2341      if (unlikely(flags & float_muladd_halve_result)) {
2342          goto soft;
2343      }
2344  
2345      float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346      if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347          goto soft;
2348      }
2349  
2350      if (unlikely(force_soft_fma)) {
2351          goto soft;
2352      }
2353  
2354      /*
2355       * When (a || b) == 0, there's no need to check for under/over flow,
2356       * since we know the addend is (normal || 0) and the product is 0.
2357       */
2358      if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359          union_float64 up;
2360          bool prod_sign;
2361  
2362          prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363          prod_sign ^= !!(flags & float_muladd_negate_product);
2364          up.s = float64_set_sign(float64_zero, prod_sign);
2365  
2366          if (flags & float_muladd_negate_c) {
2367              uc.h = -uc.h;
2368          }
2369          ur.h = up.h + uc.h;
2370      } else {
2371          union_float64 ua_orig = ua;
2372          union_float64 uc_orig = uc;
2373  
2374          if (flags & float_muladd_negate_product) {
2375              ua.h = -ua.h;
2376          }
2377          if (flags & float_muladd_negate_c) {
2378              uc.h = -uc.h;
2379          }
2380  
2381          ur.h = fma(ua.h, ub.h, uc.h);
2382  
2383          if (unlikely(f64_is_inf(ur))) {
2384              float_raise(float_flag_overflow, s);
2385          } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2386              ua = ua_orig;
2387              uc = uc_orig;
2388              goto soft;
2389          }
2390      }
2391      if (flags & float_muladd_negate_result) {
2392          return float64_chs(ur.s);
2393      }
2394      return ur.s;
2395  
2396   soft:
2397      return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2398  }
2399  
float64r32_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2400  float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401                            int flags, float_status *status)
2402  {
2403      FloatParts64 pa, pb, pc, *pr;
2404  
2405      float64_unpack_canonical(&pa, a, status);
2406      float64_unpack_canonical(&pb, b, status);
2407      float64_unpack_canonical(&pc, c, status);
2408      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2409  
2410      return float64r32_round_pack_canonical(pr, status);
2411  }
2412  
bfloat16_muladd(bfloat16 a,bfloat16 b,bfloat16 c,int flags,float_status * status)2413  bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414                                        int flags, float_status *status)
2415  {
2416      FloatParts64 pa, pb, pc, *pr;
2417  
2418      bfloat16_unpack_canonical(&pa, a, status);
2419      bfloat16_unpack_canonical(&pb, b, status);
2420      bfloat16_unpack_canonical(&pc, c, status);
2421      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2422  
2423      return bfloat16_round_pack_canonical(pr, status);
2424  }
2425  
float128_muladd(float128 a,float128 b,float128 c,int flags,float_status * status)2426  float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427                                        int flags, float_status *status)
2428  {
2429      FloatParts128 pa, pb, pc, *pr;
2430  
2431      float128_unpack_canonical(&pa, a, status);
2432      float128_unpack_canonical(&pb, b, status);
2433      float128_unpack_canonical(&pc, c, status);
2434      pr = parts_muladd(&pa, &pb, &pc, flags, status);
2435  
2436      return float128_round_pack_canonical(pr, status);
2437  }
2438  
2439  /*
2440   * Division
2441   */
2442  
float16_div(float16 a,float16 b,float_status * status)2443  float16 float16_div(float16 a, float16 b, float_status *status)
2444  {
2445      FloatParts64 pa, pb, *pr;
2446  
2447      float16_unpack_canonical(&pa, a, status);
2448      float16_unpack_canonical(&pb, b, status);
2449      pr = parts_div(&pa, &pb, status);
2450  
2451      return float16_round_pack_canonical(pr, status);
2452  }
2453  
2454  static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a,float32 b,float_status * status)2455  soft_f32_div(float32 a, float32 b, float_status *status)
2456  {
2457      FloatParts64 pa, pb, *pr;
2458  
2459      float32_unpack_canonical(&pa, a, status);
2460      float32_unpack_canonical(&pb, b, status);
2461      pr = parts_div(&pa, &pb, status);
2462  
2463      return float32_round_pack_canonical(pr, status);
2464  }
2465  
2466  static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a,float64 b,float_status * status)2467  soft_f64_div(float64 a, float64 b, float_status *status)
2468  {
2469      FloatParts64 pa, pb, *pr;
2470  
2471      float64_unpack_canonical(&pa, a, status);
2472      float64_unpack_canonical(&pb, b, status);
2473      pr = parts_div(&pa, &pb, status);
2474  
2475      return float64_round_pack_canonical(pr, status);
2476  }
2477  
hard_f32_div(float a,float b)2478  static float hard_f32_div(float a, float b)
2479  {
2480      return a / b;
2481  }
2482  
hard_f64_div(double a,double b)2483  static double hard_f64_div(double a, double b)
2484  {
2485      return a / b;
2486  }
2487  
f32_div_pre(union_float32 a,union_float32 b)2488  static bool f32_div_pre(union_float32 a, union_float32 b)
2489  {
2490      if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492                 fpclassify(b.h) == FP_NORMAL;
2493      }
2494      return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2495  }
2496  
f64_div_pre(union_float64 a,union_float64 b)2497  static bool f64_div_pre(union_float64 a, union_float64 b)
2498  {
2499      if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500          return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501                 fpclassify(b.h) == FP_NORMAL;
2502      }
2503      return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2504  }
2505  
f32_div_post(union_float32 a,union_float32 b)2506  static bool f32_div_post(union_float32 a, union_float32 b)
2507  {
2508      if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509          return fpclassify(a.h) != FP_ZERO;
2510      }
2511      return !float32_is_zero(a.s);
2512  }
2513  
f64_div_post(union_float64 a,union_float64 b)2514  static bool f64_div_post(union_float64 a, union_float64 b)
2515  {
2516      if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517          return fpclassify(a.h) != FP_ZERO;
2518      }
2519      return !float64_is_zero(a.s);
2520  }
2521  
2522  float32 QEMU_FLATTEN
float32_div(float32 a,float32 b,float_status * s)2523  float32_div(float32 a, float32 b, float_status *s)
2524  {
2525      return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2526                          f32_div_pre, f32_div_post);
2527  }
2528  
2529  float64 QEMU_FLATTEN
float64_div(float64 a,float64 b,float_status * s)2530  float64_div(float64 a, float64 b, float_status *s)
2531  {
2532      return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2533                          f64_div_pre, f64_div_post);
2534  }
2535  
float64r32_div(float64 a,float64 b,float_status * status)2536  float64 float64r32_div(float64 a, float64 b, float_status *status)
2537  {
2538      FloatParts64 pa, pb, *pr;
2539  
2540      float64_unpack_canonical(&pa, a, status);
2541      float64_unpack_canonical(&pb, b, status);
2542      pr = parts_div(&pa, &pb, status);
2543  
2544      return float64r32_round_pack_canonical(pr, status);
2545  }
2546  
2547  bfloat16 QEMU_FLATTEN
bfloat16_div(bfloat16 a,bfloat16 b,float_status * status)2548  bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2549  {
2550      FloatParts64 pa, pb, *pr;
2551  
2552      bfloat16_unpack_canonical(&pa, a, status);
2553      bfloat16_unpack_canonical(&pb, b, status);
2554      pr = parts_div(&pa, &pb, status);
2555  
2556      return bfloat16_round_pack_canonical(pr, status);
2557  }
2558  
2559  float128 QEMU_FLATTEN
float128_div(float128 a,float128 b,float_status * status)2560  float128_div(float128 a, float128 b, float_status *status)
2561  {
2562      FloatParts128 pa, pb, *pr;
2563  
2564      float128_unpack_canonical(&pa, a, status);
2565      float128_unpack_canonical(&pb, b, status);
2566      pr = parts_div(&pa, &pb, status);
2567  
2568      return float128_round_pack_canonical(pr, status);
2569  }
2570  
floatx80_div(floatx80 a,floatx80 b,float_status * status)2571  floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2572  {
2573      FloatParts128 pa, pb, *pr;
2574  
2575      if (!floatx80_unpack_canonical(&pa, a, status) ||
2576          !floatx80_unpack_canonical(&pb, b, status)) {
2577          return floatx80_default_nan(status);
2578      }
2579  
2580      pr = parts_div(&pa, &pb, status);
2581      return floatx80_round_pack_canonical(pr, status);
2582  }
2583  
2584  /*
2585   * Remainder
2586   */
2587  
float32_rem(float32 a,float32 b,float_status * status)2588  float32 float32_rem(float32 a, float32 b, float_status *status)
2589  {
2590      FloatParts64 pa, pb, *pr;
2591  
2592      float32_unpack_canonical(&pa, a, status);
2593      float32_unpack_canonical(&pb, b, status);
2594      pr = parts_modrem(&pa, &pb, NULL, status);
2595  
2596      return float32_round_pack_canonical(pr, status);
2597  }
2598  
float64_rem(float64 a,float64 b,float_status * status)2599  float64 float64_rem(float64 a, float64 b, float_status *status)
2600  {
2601      FloatParts64 pa, pb, *pr;
2602  
2603      float64_unpack_canonical(&pa, a, status);
2604      float64_unpack_canonical(&pb, b, status);
2605      pr = parts_modrem(&pa, &pb, NULL, status);
2606  
2607      return float64_round_pack_canonical(pr, status);
2608  }
2609  
float128_rem(float128 a,float128 b,float_status * status)2610  float128 float128_rem(float128 a, float128 b, float_status *status)
2611  {
2612      FloatParts128 pa, pb, *pr;
2613  
2614      float128_unpack_canonical(&pa, a, status);
2615      float128_unpack_canonical(&pb, b, status);
2616      pr = parts_modrem(&pa, &pb, NULL, status);
2617  
2618      return float128_round_pack_canonical(pr, status);
2619  }
2620  
2621  /*
2622   * Returns the remainder of the extended double-precision floating-point value
2623   * `a' with respect to the corresponding value `b'.
2624   * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625   * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2626   * the remainder based on truncating the quotient toward zero instead and
2627   * *quotient is set to the low 64 bits of the absolute value of the integer
2628   * quotient.
2629   */
floatx80_modrem(floatx80 a,floatx80 b,bool mod,uint64_t * quotient,float_status * status)2630  floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631                           uint64_t *quotient, float_status *status)
2632  {
2633      FloatParts128 pa, pb, *pr;
2634  
2635      *quotient = 0;
2636      if (!floatx80_unpack_canonical(&pa, a, status) ||
2637          !floatx80_unpack_canonical(&pb, b, status)) {
2638          return floatx80_default_nan(status);
2639      }
2640      pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2641  
2642      return floatx80_round_pack_canonical(pr, status);
2643  }
2644  
floatx80_rem(floatx80 a,floatx80 b,float_status * status)2645  floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2646  {
2647      uint64_t quotient;
2648      return floatx80_modrem(a, b, false, &quotient, status);
2649  }
2650  
floatx80_mod(floatx80 a,floatx80 b,float_status * status)2651  floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2652  {
2653      uint64_t quotient;
2654      return floatx80_modrem(a, b, true, &quotient, status);
2655  }
2656  
2657  /*
2658   * Float to Float conversions
2659   *
2660   * Returns the result of converting one float format to another. The
2661   * conversion is performed according to the IEC/IEEE Standard for
2662   * Binary Floating-Point Arithmetic.
2663   *
2664   * Usually this only needs to take care of raising invalid exceptions
2665   * and handling the conversion on NaNs.
2666   */
2667  
parts_float_to_ahp(FloatParts64 * a,float_status * s)2668  static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2669  {
2670      switch (a->cls) {
2671      case float_class_snan:
2672          float_raise(float_flag_invalid_snan, s);
2673          /* fall through */
2674      case float_class_qnan:
2675          /*
2676           * There is no NaN in the destination format.  Raise Invalid
2677           * and return a zero with the sign of the input NaN.
2678           */
2679          float_raise(float_flag_invalid, s);
2680          a->cls = float_class_zero;
2681          break;
2682  
2683      case float_class_inf:
2684          /*
2685           * There is no Inf in the destination format.  Raise Invalid
2686           * and return the maximum normal with the correct sign.
2687           */
2688          float_raise(float_flag_invalid, s);
2689          a->cls = float_class_normal;
2690          a->exp = float16_params_ahp.exp_max;
2691          a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692                                    float16_params_ahp.frac_size + 1);
2693          break;
2694  
2695      case float_class_normal:
2696      case float_class_zero:
2697          break;
2698  
2699      default:
2700          g_assert_not_reached();
2701      }
2702  }
2703  
parts64_float_to_float(FloatParts64 * a,float_status * s)2704  static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2705  {
2706      if (is_nan(a->cls)) {
2707          parts_return_nan(a, s);
2708      }
2709  }
2710  
parts128_float_to_float(FloatParts128 * a,float_status * s)2711  static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2712  {
2713      if (is_nan(a->cls)) {
2714          parts_return_nan(a, s);
2715      }
2716  }
2717  
2718  #define parts_float_to_float(P, S) \
2719      PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2720  
parts_float_to_float_narrow(FloatParts64 * a,FloatParts128 * b,float_status * s)2721  static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722                                          float_status *s)
2723  {
2724      a->cls = b->cls;
2725      a->sign = b->sign;
2726      a->exp = b->exp;
2727  
2728      if (a->cls == float_class_normal) {
2729          frac_truncjam(a, b);
2730      } else if (is_nan(a->cls)) {
2731          /* Discard the low bits of the NaN. */
2732          a->frac = b->frac_hi;
2733          parts_return_nan(a, s);
2734      }
2735  }
2736  
parts_float_to_float_widen(FloatParts128 * a,FloatParts64 * b,float_status * s)2737  static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738                                         float_status *s)
2739  {
2740      a->cls = b->cls;
2741      a->sign = b->sign;
2742      a->exp = b->exp;
2743      frac_widen(a, b);
2744  
2745      if (is_nan(a->cls)) {
2746          parts_return_nan(a, s);
2747      }
2748  }
2749  
float16_to_float32(float16 a,bool ieee,float_status * s)2750  float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2751  {
2752      const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2753      FloatParts64 p;
2754  
2755      float16a_unpack_canonical(&p, a, s, fmt16);
2756      parts_float_to_float(&p, s);
2757      return float32_round_pack_canonical(&p, s);
2758  }
2759  
float16_to_float64(float16 a,bool ieee,float_status * s)2760  float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2761  {
2762      const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2763      FloatParts64 p;
2764  
2765      float16a_unpack_canonical(&p, a, s, fmt16);
2766      parts_float_to_float(&p, s);
2767      return float64_round_pack_canonical(&p, s);
2768  }
2769  
float32_to_float16(float32 a,bool ieee,float_status * s)2770  float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2771  {
2772      FloatParts64 p;
2773      const FloatFmt *fmt;
2774  
2775      float32_unpack_canonical(&p, a, s);
2776      if (ieee) {
2777          parts_float_to_float(&p, s);
2778          fmt = &float16_params;
2779      } else {
2780          parts_float_to_ahp(&p, s);
2781          fmt = &float16_params_ahp;
2782      }
2783      return float16a_round_pack_canonical(&p, s, fmt);
2784  }
2785  
2786  static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a,float_status * s)2787  soft_float32_to_float64(float32 a, float_status *s)
2788  {
2789      FloatParts64 p;
2790  
2791      float32_unpack_canonical(&p, a, s);
2792      parts_float_to_float(&p, s);
2793      return float64_round_pack_canonical(&p, s);
2794  }
2795  
float32_to_float64(float32 a,float_status * s)2796  float64 float32_to_float64(float32 a, float_status *s)
2797  {
2798      if (likely(float32_is_normal(a))) {
2799          /* Widening conversion can never produce inexact results.  */
2800          union_float32 uf;
2801          union_float64 ud;
2802          uf.s = a;
2803          ud.h = uf.h;
2804          return ud.s;
2805      } else if (float32_is_zero(a)) {
2806          return float64_set_sign(float64_zero, float32_is_neg(a));
2807      } else {
2808          return soft_float32_to_float64(a, s);
2809      }
2810  }
2811  
float64_to_float16(float64 a,bool ieee,float_status * s)2812  float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2813  {
2814      FloatParts64 p;
2815      const FloatFmt *fmt;
2816  
2817      float64_unpack_canonical(&p, a, s);
2818      if (ieee) {
2819          parts_float_to_float(&p, s);
2820          fmt = &float16_params;
2821      } else {
2822          parts_float_to_ahp(&p, s);
2823          fmt = &float16_params_ahp;
2824      }
2825      return float16a_round_pack_canonical(&p, s, fmt);
2826  }
2827  
float64_to_float32(float64 a,float_status * s)2828  float32 float64_to_float32(float64 a, float_status *s)
2829  {
2830      FloatParts64 p;
2831  
2832      float64_unpack_canonical(&p, a, s);
2833      parts_float_to_float(&p, s);
2834      return float32_round_pack_canonical(&p, s);
2835  }
2836  
bfloat16_to_float32(bfloat16 a,float_status * s)2837  float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2838  {
2839      FloatParts64 p;
2840  
2841      bfloat16_unpack_canonical(&p, a, s);
2842      parts_float_to_float(&p, s);
2843      return float32_round_pack_canonical(&p, s);
2844  }
2845  
bfloat16_to_float64(bfloat16 a,float_status * s)2846  float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2847  {
2848      FloatParts64 p;
2849  
2850      bfloat16_unpack_canonical(&p, a, s);
2851      parts_float_to_float(&p, s);
2852      return float64_round_pack_canonical(&p, s);
2853  }
2854  
float32_to_bfloat16(float32 a,float_status * s)2855  bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2856  {
2857      FloatParts64 p;
2858  
2859      float32_unpack_canonical(&p, a, s);
2860      parts_float_to_float(&p, s);
2861      return bfloat16_round_pack_canonical(&p, s);
2862  }
2863  
float64_to_bfloat16(float64 a,float_status * s)2864  bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2865  {
2866      FloatParts64 p;
2867  
2868      float64_unpack_canonical(&p, a, s);
2869      parts_float_to_float(&p, s);
2870      return bfloat16_round_pack_canonical(&p, s);
2871  }
2872  
float128_to_float32(float128 a,float_status * s)2873  float32 float128_to_float32(float128 a, float_status *s)
2874  {
2875      FloatParts64 p64;
2876      FloatParts128 p128;
2877  
2878      float128_unpack_canonical(&p128, a, s);
2879      parts_float_to_float_narrow(&p64, &p128, s);
2880      return float32_round_pack_canonical(&p64, s);
2881  }
2882  
float128_to_float64(float128 a,float_status * s)2883  float64 float128_to_float64(float128 a, float_status *s)
2884  {
2885      FloatParts64 p64;
2886      FloatParts128 p128;
2887  
2888      float128_unpack_canonical(&p128, a, s);
2889      parts_float_to_float_narrow(&p64, &p128, s);
2890      return float64_round_pack_canonical(&p64, s);
2891  }
2892  
float32_to_float128(float32 a,float_status * s)2893  float128 float32_to_float128(float32 a, float_status *s)
2894  {
2895      FloatParts64 p64;
2896      FloatParts128 p128;
2897  
2898      float32_unpack_canonical(&p64, a, s);
2899      parts_float_to_float_widen(&p128, &p64, s);
2900      return float128_round_pack_canonical(&p128, s);
2901  }
2902  
float64_to_float128(float64 a,float_status * s)2903  float128 float64_to_float128(float64 a, float_status *s)
2904  {
2905      FloatParts64 p64;
2906      FloatParts128 p128;
2907  
2908      float64_unpack_canonical(&p64, a, s);
2909      parts_float_to_float_widen(&p128, &p64, s);
2910      return float128_round_pack_canonical(&p128, s);
2911  }
2912  
floatx80_to_float32(floatx80 a,float_status * s)2913  float32 floatx80_to_float32(floatx80 a, float_status *s)
2914  {
2915      FloatParts64 p64;
2916      FloatParts128 p128;
2917  
2918      if (floatx80_unpack_canonical(&p128, a, s)) {
2919          parts_float_to_float_narrow(&p64, &p128, s);
2920      } else {
2921          parts_default_nan(&p64, s);
2922      }
2923      return float32_round_pack_canonical(&p64, s);
2924  }
2925  
floatx80_to_float64(floatx80 a,float_status * s)2926  float64 floatx80_to_float64(floatx80 a, float_status *s)
2927  {
2928      FloatParts64 p64;
2929      FloatParts128 p128;
2930  
2931      if (floatx80_unpack_canonical(&p128, a, s)) {
2932          parts_float_to_float_narrow(&p64, &p128, s);
2933      } else {
2934          parts_default_nan(&p64, s);
2935      }
2936      return float64_round_pack_canonical(&p64, s);
2937  }
2938  
floatx80_to_float128(floatx80 a,float_status * s)2939  float128 floatx80_to_float128(floatx80 a, float_status *s)
2940  {
2941      FloatParts128 p;
2942  
2943      if (floatx80_unpack_canonical(&p, a, s)) {
2944          parts_float_to_float(&p, s);
2945      } else {
2946          parts_default_nan(&p, s);
2947      }
2948      return float128_round_pack_canonical(&p, s);
2949  }
2950  
float32_to_floatx80(float32 a,float_status * s)2951  floatx80 float32_to_floatx80(float32 a, float_status *s)
2952  {
2953      FloatParts64 p64;
2954      FloatParts128 p128;
2955  
2956      float32_unpack_canonical(&p64, a, s);
2957      parts_float_to_float_widen(&p128, &p64, s);
2958      return floatx80_round_pack_canonical(&p128, s);
2959  }
2960  
float64_to_floatx80(float64 a,float_status * s)2961  floatx80 float64_to_floatx80(float64 a, float_status *s)
2962  {
2963      FloatParts64 p64;
2964      FloatParts128 p128;
2965  
2966      float64_unpack_canonical(&p64, a, s);
2967      parts_float_to_float_widen(&p128, &p64, s);
2968      return floatx80_round_pack_canonical(&p128, s);
2969  }
2970  
float128_to_floatx80(float128 a,float_status * s)2971  floatx80 float128_to_floatx80(float128 a, float_status *s)
2972  {
2973      FloatParts128 p;
2974  
2975      float128_unpack_canonical(&p, a, s);
2976      parts_float_to_float(&p, s);
2977      return floatx80_round_pack_canonical(&p, s);
2978  }
2979  
2980  /*
2981   * Round to integral value
2982   */
2983  
float16_round_to_int(float16 a,float_status * s)2984  float16 float16_round_to_int(float16 a, float_status *s)
2985  {
2986      FloatParts64 p;
2987  
2988      float16_unpack_canonical(&p, a, s);
2989      parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990      return float16_round_pack_canonical(&p, s);
2991  }
2992  
float32_round_to_int(float32 a,float_status * s)2993  float32 float32_round_to_int(float32 a, float_status *s)
2994  {
2995      FloatParts64 p;
2996  
2997      float32_unpack_canonical(&p, a, s);
2998      parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999      return float32_round_pack_canonical(&p, s);
3000  }
3001  
float64_round_to_int(float64 a,float_status * s)3002  float64 float64_round_to_int(float64 a, float_status *s)
3003  {
3004      FloatParts64 p;
3005  
3006      float64_unpack_canonical(&p, a, s);
3007      parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008      return float64_round_pack_canonical(&p, s);
3009  }
3010  
bfloat16_round_to_int(bfloat16 a,float_status * s)3011  bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3012  {
3013      FloatParts64 p;
3014  
3015      bfloat16_unpack_canonical(&p, a, s);
3016      parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017      return bfloat16_round_pack_canonical(&p, s);
3018  }
3019  
float128_round_to_int(float128 a,float_status * s)3020  float128 float128_round_to_int(float128 a, float_status *s)
3021  {
3022      FloatParts128 p;
3023  
3024      float128_unpack_canonical(&p, a, s);
3025      parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026      return float128_round_pack_canonical(&p, s);
3027  }
3028  
floatx80_round_to_int(floatx80 a,float_status * status)3029  floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3030  {
3031      FloatParts128 p;
3032  
3033      if (!floatx80_unpack_canonical(&p, a, status)) {
3034          return floatx80_default_nan(status);
3035      }
3036  
3037      parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038                         &floatx80_params[status->floatx80_rounding_precision]);
3039      return floatx80_round_pack_canonical(&p, status);
3040  }
3041  
3042  /*
3043   * Floating-point to signed integer conversions
3044   */
3045  
float16_to_int8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3046  int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047                                float_status *s)
3048  {
3049      FloatParts64 p;
3050  
3051      float16_unpack_canonical(&p, a, s);
3052      return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3053  }
3054  
float16_to_int16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3055  int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3056                                  float_status *s)
3057  {
3058      FloatParts64 p;
3059  
3060      float16_unpack_canonical(&p, a, s);
3061      return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3062  }
3063  
float16_to_int32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3064  int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3065                                  float_status *s)
3066  {
3067      FloatParts64 p;
3068  
3069      float16_unpack_canonical(&p, a, s);
3070      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3071  }
3072  
float16_to_int64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3073  int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074                                  float_status *s)
3075  {
3076      FloatParts64 p;
3077  
3078      float16_unpack_canonical(&p, a, s);
3079      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3080  }
3081  
float32_to_int16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3082  int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3083                                  float_status *s)
3084  {
3085      FloatParts64 p;
3086  
3087      float32_unpack_canonical(&p, a, s);
3088      return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3089  }
3090  
float32_to_int32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3091  int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3092                                  float_status *s)
3093  {
3094      FloatParts64 p;
3095  
3096      float32_unpack_canonical(&p, a, s);
3097      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3098  }
3099  
float32_to_int64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3100  int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3101                                  float_status *s)
3102  {
3103      FloatParts64 p;
3104  
3105      float32_unpack_canonical(&p, a, s);
3106      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3107  }
3108  
float64_to_int16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3109  int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3110                                  float_status *s)
3111  {
3112      FloatParts64 p;
3113  
3114      float64_unpack_canonical(&p, a, s);
3115      return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3116  }
3117  
float64_to_int32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3118  int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3119                                  float_status *s)
3120  {
3121      FloatParts64 p;
3122  
3123      float64_unpack_canonical(&p, a, s);
3124      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3125  }
3126  
float64_to_int64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3127  int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3128                                  float_status *s)
3129  {
3130      FloatParts64 p;
3131  
3132      float64_unpack_canonical(&p, a, s);
3133      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134  }
3135  
bfloat16_to_int8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3136  int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137                                 float_status *s)
3138  {
3139      FloatParts64 p;
3140  
3141      bfloat16_unpack_canonical(&p, a, s);
3142      return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3143  }
3144  
bfloat16_to_int16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3145  int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146                                   float_status *s)
3147  {
3148      FloatParts64 p;
3149  
3150      bfloat16_unpack_canonical(&p, a, s);
3151      return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3152  }
3153  
bfloat16_to_int32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3154  int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155                                   float_status *s)
3156  {
3157      FloatParts64 p;
3158  
3159      bfloat16_unpack_canonical(&p, a, s);
3160      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3161  }
3162  
bfloat16_to_int64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3163  int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164                                   float_status *s)
3165  {
3166      FloatParts64 p;
3167  
3168      bfloat16_unpack_canonical(&p, a, s);
3169      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3170  }
3171  
float128_to_int32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3172  static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173                                          int scale, float_status *s)
3174  {
3175      FloatParts128 p;
3176  
3177      float128_unpack_canonical(&p, a, s);
3178      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3179  }
3180  
float128_to_int64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3181  static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182                                          int scale, float_status *s)
3183  {
3184      FloatParts128 p;
3185  
3186      float128_unpack_canonical(&p, a, s);
3187      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3188  }
3189  
float128_to_int128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3190  static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191                                          int scale, float_status *s)
3192  {
3193      int flags = 0;
3194      Int128 r;
3195      FloatParts128 p;
3196  
3197      float128_unpack_canonical(&p, a, s);
3198  
3199      switch (p.cls) {
3200      case float_class_snan:
3201          flags |= float_flag_invalid_snan;
3202          /* fall through */
3203      case float_class_qnan:
3204          flags |= float_flag_invalid;
3205          r = UINT128_MAX;
3206          break;
3207  
3208      case float_class_inf:
3209          flags = float_flag_invalid | float_flag_invalid_cvti;
3210          r = p.sign ? INT128_MIN : INT128_MAX;
3211          break;
3212  
3213      case float_class_zero:
3214          return int128_zero();
3215  
3216      case float_class_normal:
3217          if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218              flags = float_flag_inexact;
3219          }
3220  
3221          if (p.exp < 127) {
3222              int shift = 127 - p.exp;
3223              r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224              if (p.sign) {
3225                  r = int128_neg(r);
3226              }
3227          } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228                     p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229              r = INT128_MIN;
3230          } else {
3231              flags = float_flag_invalid | float_flag_invalid_cvti;
3232              r = p.sign ? INT128_MIN : INT128_MAX;
3233          }
3234          break;
3235  
3236      default:
3237          g_assert_not_reached();
3238      }
3239  
3240      float_raise(flags, s);
3241      return r;
3242  }
3243  
floatx80_to_int32_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3244  static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245                                          int scale, float_status *s)
3246  {
3247      FloatParts128 p;
3248  
3249      if (!floatx80_unpack_canonical(&p, a, s)) {
3250          parts_default_nan(&p, s);
3251      }
3252      return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3253  }
3254  
floatx80_to_int64_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3255  static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256                                          int scale, float_status *s)
3257  {
3258      FloatParts128 p;
3259  
3260      if (!floatx80_unpack_canonical(&p, a, s)) {
3261          parts_default_nan(&p, s);
3262      }
3263      return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264  }
3265  
float16_to_int8(float16 a,float_status * s)3266  int8_t float16_to_int8(float16 a, float_status *s)
3267  {
3268      return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3269  }
3270  
float16_to_int16(float16 a,float_status * s)3271  int16_t float16_to_int16(float16 a, float_status *s)
3272  {
3273      return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3274  }
3275  
float16_to_int32(float16 a,float_status * s)3276  int32_t float16_to_int32(float16 a, float_status *s)
3277  {
3278      return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3279  }
3280  
float16_to_int64(float16 a,float_status * s)3281  int64_t float16_to_int64(float16 a, float_status *s)
3282  {
3283      return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3284  }
3285  
float32_to_int16(float32 a,float_status * s)3286  int16_t float32_to_int16(float32 a, float_status *s)
3287  {
3288      return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3289  }
3290  
float32_to_int32(float32 a,float_status * s)3291  int32_t float32_to_int32(float32 a, float_status *s)
3292  {
3293      return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3294  }
3295  
float32_to_int64(float32 a,float_status * s)3296  int64_t float32_to_int64(float32 a, float_status *s)
3297  {
3298      return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3299  }
3300  
float64_to_int16(float64 a,float_status * s)3301  int16_t float64_to_int16(float64 a, float_status *s)
3302  {
3303      return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3304  }
3305  
float64_to_int32(float64 a,float_status * s)3306  int32_t float64_to_int32(float64 a, float_status *s)
3307  {
3308      return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3309  }
3310  
float64_to_int64(float64 a,float_status * s)3311  int64_t float64_to_int64(float64 a, float_status *s)
3312  {
3313      return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3314  }
3315  
float128_to_int32(float128 a,float_status * s)3316  int32_t float128_to_int32(float128 a, float_status *s)
3317  {
3318      return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3319  }
3320  
float128_to_int64(float128 a,float_status * s)3321  int64_t float128_to_int64(float128 a, float_status *s)
3322  {
3323      return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3324  }
3325  
float128_to_int128(float128 a,float_status * s)3326  Int128 float128_to_int128(float128 a, float_status *s)
3327  {
3328      return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3329  }
3330  
floatx80_to_int32(floatx80 a,float_status * s)3331  int32_t floatx80_to_int32(floatx80 a, float_status *s)
3332  {
3333      return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3334  }
3335  
floatx80_to_int64(floatx80 a,float_status * s)3336  int64_t floatx80_to_int64(floatx80 a, float_status *s)
3337  {
3338      return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3339  }
3340  
float16_to_int16_round_to_zero(float16 a,float_status * s)3341  int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3342  {
3343      return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3344  }
3345  
float16_to_int32_round_to_zero(float16 a,float_status * s)3346  int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3347  {
3348      return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3349  }
3350  
float16_to_int64_round_to_zero(float16 a,float_status * s)3351  int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3352  {
3353      return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3354  }
3355  
float32_to_int16_round_to_zero(float32 a,float_status * s)3356  int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3357  {
3358      return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3359  }
3360  
float32_to_int32_round_to_zero(float32 a,float_status * s)3361  int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3362  {
3363      return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3364  }
3365  
float32_to_int64_round_to_zero(float32 a,float_status * s)3366  int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3367  {
3368      return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3369  }
3370  
float64_to_int16_round_to_zero(float64 a,float_status * s)3371  int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3372  {
3373      return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3374  }
3375  
float64_to_int32_round_to_zero(float64 a,float_status * s)3376  int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3377  {
3378      return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3379  }
3380  
float64_to_int64_round_to_zero(float64 a,float_status * s)3381  int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3382  {
3383      return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3384  }
3385  
float128_to_int32_round_to_zero(float128 a,float_status * s)3386  int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3387  {
3388      return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3389  }
3390  
float128_to_int64_round_to_zero(float128 a,float_status * s)3391  int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3392  {
3393      return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3394  }
3395  
float128_to_int128_round_to_zero(float128 a,float_status * s)3396  Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3397  {
3398      return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3399  }
3400  
floatx80_to_int32_round_to_zero(floatx80 a,float_status * s)3401  int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3402  {
3403      return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3404  }
3405  
floatx80_to_int64_round_to_zero(floatx80 a,float_status * s)3406  int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3407  {
3408      return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3409  }
3410  
bfloat16_to_int8(bfloat16 a,float_status * s)3411  int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3412  {
3413      return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3414  }
3415  
bfloat16_to_int16(bfloat16 a,float_status * s)3416  int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3417  {
3418      return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3419  }
3420  
bfloat16_to_int32(bfloat16 a,float_status * s)3421  int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3422  {
3423      return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3424  }
3425  
bfloat16_to_int64(bfloat16 a,float_status * s)3426  int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3427  {
3428      return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3429  }
3430  
bfloat16_to_int8_round_to_zero(bfloat16 a,float_status * s)3431  int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3432  {
3433      return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3434  }
3435  
bfloat16_to_int16_round_to_zero(bfloat16 a,float_status * s)3436  int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3437  {
3438      return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3439  }
3440  
bfloat16_to_int32_round_to_zero(bfloat16 a,float_status * s)3441  int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3442  {
3443      return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3444  }
3445  
bfloat16_to_int64_round_to_zero(bfloat16 a,float_status * s)3446  int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3447  {
3448      return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3449  }
3450  
float64_to_int32_modulo(float64 a,FloatRoundMode rmode,float_status * s)3451  int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452                                  float_status *s)
3453  {
3454      FloatParts64 p;
3455  
3456      float64_unpack_canonical(&p, a, s);
3457      return parts_float_to_sint_modulo(&p, rmode, 31, s);
3458  }
3459  
float64_to_int64_modulo(float64 a,FloatRoundMode rmode,float_status * s)3460  int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461                                  float_status *s)
3462  {
3463      FloatParts64 p;
3464  
3465      float64_unpack_canonical(&p, a, s);
3466      return parts_float_to_sint_modulo(&p, rmode, 63, s);
3467  }
3468  
3469  /*
3470   * Floating-point to unsigned integer conversions
3471   */
3472  
float16_to_uint8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3473  uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474                                  float_status *s)
3475  {
3476      FloatParts64 p;
3477  
3478      float16_unpack_canonical(&p, a, s);
3479      return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3480  }
3481  
float16_to_uint16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3482  uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3483                                    float_status *s)
3484  {
3485      FloatParts64 p;
3486  
3487      float16_unpack_canonical(&p, a, s);
3488      return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3489  }
3490  
float16_to_uint32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3491  uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3492                                    float_status *s)
3493  {
3494      FloatParts64 p;
3495  
3496      float16_unpack_canonical(&p, a, s);
3497      return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3498  }
3499  
float16_to_uint64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3500  uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3501                                    float_status *s)
3502  {
3503      FloatParts64 p;
3504  
3505      float16_unpack_canonical(&p, a, s);
3506      return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3507  }
3508  
float32_to_uint16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3509  uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3510                                    float_status *s)
3511  {
3512      FloatParts64 p;
3513  
3514      float32_unpack_canonical(&p, a, s);
3515      return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3516  }
3517  
float32_to_uint32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3518  uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3519                                    float_status *s)
3520  {
3521      FloatParts64 p;
3522  
3523      float32_unpack_canonical(&p, a, s);
3524      return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3525  }
3526  
float32_to_uint64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3527  uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3528                                    float_status *s)
3529  {
3530      FloatParts64 p;
3531  
3532      float32_unpack_canonical(&p, a, s);
3533      return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3534  }
3535  
float64_to_uint16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3536  uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3537                                    float_status *s)
3538  {
3539      FloatParts64 p;
3540  
3541      float64_unpack_canonical(&p, a, s);
3542      return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3543  }
3544  
float64_to_uint32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3545  uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3546                                    float_status *s)
3547  {
3548      FloatParts64 p;
3549  
3550      float64_unpack_canonical(&p, a, s);
3551      return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3552  }
3553  
float64_to_uint64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3554  uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3555                                    float_status *s)
3556  {
3557      FloatParts64 p;
3558  
3559      float64_unpack_canonical(&p, a, s);
3560      return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3561  }
3562  
bfloat16_to_uint8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3563  uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564                                   int scale, float_status *s)
3565  {
3566      FloatParts64 p;
3567  
3568      bfloat16_unpack_canonical(&p, a, s);
3569      return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3570  }
3571  
bfloat16_to_uint16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3572  uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573                                     int scale, float_status *s)
3574  {
3575      FloatParts64 p;
3576  
3577      bfloat16_unpack_canonical(&p, a, s);
3578      return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3579  }
3580  
bfloat16_to_uint32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3581  uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582                                     int scale, float_status *s)
3583  {
3584      FloatParts64 p;
3585  
3586      bfloat16_unpack_canonical(&p, a, s);
3587      return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3588  }
3589  
bfloat16_to_uint64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3590  uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591                                     int scale, float_status *s)
3592  {
3593      FloatParts64 p;
3594  
3595      bfloat16_unpack_canonical(&p, a, s);
3596      return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3597  }
3598  
float128_to_uint32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3599  static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600                                            int scale, float_status *s)
3601  {
3602      FloatParts128 p;
3603  
3604      float128_unpack_canonical(&p, a, s);
3605      return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3606  }
3607  
float128_to_uint64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3608  static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609                                            int scale, float_status *s)
3610  {
3611      FloatParts128 p;
3612  
3613      float128_unpack_canonical(&p, a, s);
3614      return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3615  }
3616  
float128_to_uint128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3617  static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618                                           int scale, float_status *s)
3619  {
3620      int flags = 0;
3621      Int128 r;
3622      FloatParts128 p;
3623  
3624      float128_unpack_canonical(&p, a, s);
3625  
3626      switch (p.cls) {
3627      case float_class_snan:
3628          flags |= float_flag_invalid_snan;
3629          /* fall through */
3630      case float_class_qnan:
3631          flags |= float_flag_invalid;
3632          r = UINT128_MAX;
3633          break;
3634  
3635      case float_class_inf:
3636          flags = float_flag_invalid | float_flag_invalid_cvti;
3637          r = p.sign ? int128_zero() : UINT128_MAX;
3638          break;
3639  
3640      case float_class_zero:
3641          return int128_zero();
3642  
3643      case float_class_normal:
3644          if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645              flags = float_flag_inexact;
3646              if (p.cls == float_class_zero) {
3647                  r = int128_zero();
3648                  break;
3649              }
3650          }
3651  
3652          if (p.sign) {
3653              flags = float_flag_invalid | float_flag_invalid_cvti;
3654              r = int128_zero();
3655          } else if (p.exp <= 127) {
3656              int shift = 127 - p.exp;
3657              r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658          } else {
3659              flags = float_flag_invalid | float_flag_invalid_cvti;
3660              r = UINT128_MAX;
3661          }
3662          break;
3663  
3664      default:
3665          g_assert_not_reached();
3666      }
3667  
3668      float_raise(flags, s);
3669      return r;
3670  }
3671  
float16_to_uint8(float16 a,float_status * s)3672  uint8_t float16_to_uint8(float16 a, float_status *s)
3673  {
3674      return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3675  }
3676  
float16_to_uint16(float16 a,float_status * s)3677  uint16_t float16_to_uint16(float16 a, float_status *s)
3678  {
3679      return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3680  }
3681  
float16_to_uint32(float16 a,float_status * s)3682  uint32_t float16_to_uint32(float16 a, float_status *s)
3683  {
3684      return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3685  }
3686  
float16_to_uint64(float16 a,float_status * s)3687  uint64_t float16_to_uint64(float16 a, float_status *s)
3688  {
3689      return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3690  }
3691  
float32_to_uint16(float32 a,float_status * s)3692  uint16_t float32_to_uint16(float32 a, float_status *s)
3693  {
3694      return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3695  }
3696  
float32_to_uint32(float32 a,float_status * s)3697  uint32_t float32_to_uint32(float32 a, float_status *s)
3698  {
3699      return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3700  }
3701  
float32_to_uint64(float32 a,float_status * s)3702  uint64_t float32_to_uint64(float32 a, float_status *s)
3703  {
3704      return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3705  }
3706  
float64_to_uint16(float64 a,float_status * s)3707  uint16_t float64_to_uint16(float64 a, float_status *s)
3708  {
3709      return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3710  }
3711  
float64_to_uint32(float64 a,float_status * s)3712  uint32_t float64_to_uint32(float64 a, float_status *s)
3713  {
3714      return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3715  }
3716  
float64_to_uint64(float64 a,float_status * s)3717  uint64_t float64_to_uint64(float64 a, float_status *s)
3718  {
3719      return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3720  }
3721  
float128_to_uint32(float128 a,float_status * s)3722  uint32_t float128_to_uint32(float128 a, float_status *s)
3723  {
3724      return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3725  }
3726  
float128_to_uint64(float128 a,float_status * s)3727  uint64_t float128_to_uint64(float128 a, float_status *s)
3728  {
3729      return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3730  }
3731  
float128_to_uint128(float128 a,float_status * s)3732  Int128 float128_to_uint128(float128 a, float_status *s)
3733  {
3734      return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3735  }
3736  
float16_to_uint16_round_to_zero(float16 a,float_status * s)3737  uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3738  {
3739      return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3740  }
3741  
float16_to_uint32_round_to_zero(float16 a,float_status * s)3742  uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3743  {
3744      return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3745  }
3746  
float16_to_uint64_round_to_zero(float16 a,float_status * s)3747  uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3748  {
3749      return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3750  }
3751  
float32_to_uint16_round_to_zero(float32 a,float_status * s)3752  uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3753  {
3754      return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3755  }
3756  
float32_to_uint32_round_to_zero(float32 a,float_status * s)3757  uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3758  {
3759      return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3760  }
3761  
float32_to_uint64_round_to_zero(float32 a,float_status * s)3762  uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3763  {
3764      return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3765  }
3766  
float64_to_uint16_round_to_zero(float64 a,float_status * s)3767  uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3768  {
3769      return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3770  }
3771  
float64_to_uint32_round_to_zero(float64 a,float_status * s)3772  uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3773  {
3774      return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3775  }
3776  
float64_to_uint64_round_to_zero(float64 a,float_status * s)3777  uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3778  {
3779      return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3780  }
3781  
float128_to_uint32_round_to_zero(float128 a,float_status * s)3782  uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3783  {
3784      return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3785  }
3786  
float128_to_uint64_round_to_zero(float128 a,float_status * s)3787  uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3788  {
3789      return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3790  }
3791  
float128_to_uint128_round_to_zero(float128 a,float_status * s)3792  Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3793  {
3794      return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3795  }
3796  
bfloat16_to_uint8(bfloat16 a,float_status * s)3797  uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3798  {
3799      return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3800  }
3801  
bfloat16_to_uint16(bfloat16 a,float_status * s)3802  uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3803  {
3804      return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3805  }
3806  
bfloat16_to_uint32(bfloat16 a,float_status * s)3807  uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3808  {
3809      return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3810  }
3811  
bfloat16_to_uint64(bfloat16 a,float_status * s)3812  uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3813  {
3814      return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3815  }
3816  
bfloat16_to_uint8_round_to_zero(bfloat16 a,float_status * s)3817  uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3818  {
3819      return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3820  }
3821  
bfloat16_to_uint16_round_to_zero(bfloat16 a,float_status * s)3822  uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3823  {
3824      return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3825  }
3826  
bfloat16_to_uint32_round_to_zero(bfloat16 a,float_status * s)3827  uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3828  {
3829      return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3830  }
3831  
bfloat16_to_uint64_round_to_zero(bfloat16 a,float_status * s)3832  uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3833  {
3834      return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3835  }
3836  
3837  /*
3838   * Signed integer to floating-point conversions
3839   */
3840  
int64_to_float16_scalbn(int64_t a,int scale,float_status * status)3841  float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3842  {
3843      FloatParts64 p;
3844  
3845      parts_sint_to_float(&p, a, scale, status);
3846      return float16_round_pack_canonical(&p, status);
3847  }
3848  
int32_to_float16_scalbn(int32_t a,int scale,float_status * status)3849  float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3850  {
3851      return int64_to_float16_scalbn(a, scale, status);
3852  }
3853  
int16_to_float16_scalbn(int16_t a,int scale,float_status * status)3854  float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3855  {
3856      return int64_to_float16_scalbn(a, scale, status);
3857  }
3858  
int64_to_float16(int64_t a,float_status * status)3859  float16 int64_to_float16(int64_t a, float_status *status)
3860  {
3861      return int64_to_float16_scalbn(a, 0, status);
3862  }
3863  
int32_to_float16(int32_t a,float_status * status)3864  float16 int32_to_float16(int32_t a, float_status *status)
3865  {
3866      return int64_to_float16_scalbn(a, 0, status);
3867  }
3868  
int16_to_float16(int16_t a,float_status * status)3869  float16 int16_to_float16(int16_t a, float_status *status)
3870  {
3871      return int64_to_float16_scalbn(a, 0, status);
3872  }
3873  
int8_to_float16(int8_t a,float_status * status)3874  float16 int8_to_float16(int8_t a, float_status *status)
3875  {
3876      return int64_to_float16_scalbn(a, 0, status);
3877  }
3878  
int64_to_float32_scalbn(int64_t a,int scale,float_status * status)3879  float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3880  {
3881      FloatParts64 p;
3882  
3883      /* Without scaling, there are no overflow concerns. */
3884      if (likely(scale == 0) && can_use_fpu(status)) {
3885          union_float32 ur;
3886          ur.h = a;
3887          return ur.s;
3888      }
3889  
3890      parts64_sint_to_float(&p, a, scale, status);
3891      return float32_round_pack_canonical(&p, status);
3892  }
3893  
int32_to_float32_scalbn(int32_t a,int scale,float_status * status)3894  float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3895  {
3896      return int64_to_float32_scalbn(a, scale, status);
3897  }
3898  
int16_to_float32_scalbn(int16_t a,int scale,float_status * status)3899  float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3900  {
3901      return int64_to_float32_scalbn(a, scale, status);
3902  }
3903  
int64_to_float32(int64_t a,float_status * status)3904  float32 int64_to_float32(int64_t a, float_status *status)
3905  {
3906      return int64_to_float32_scalbn(a, 0, status);
3907  }
3908  
int32_to_float32(int32_t a,float_status * status)3909  float32 int32_to_float32(int32_t a, float_status *status)
3910  {
3911      return int64_to_float32_scalbn(a, 0, status);
3912  }
3913  
int16_to_float32(int16_t a,float_status * status)3914  float32 int16_to_float32(int16_t a, float_status *status)
3915  {
3916      return int64_to_float32_scalbn(a, 0, status);
3917  }
3918  
int64_to_float64_scalbn(int64_t a,int scale,float_status * status)3919  float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3920  {
3921      FloatParts64 p;
3922  
3923      /* Without scaling, there are no overflow concerns. */
3924      if (likely(scale == 0) && can_use_fpu(status)) {
3925          union_float64 ur;
3926          ur.h = a;
3927          return ur.s;
3928      }
3929  
3930      parts_sint_to_float(&p, a, scale, status);
3931      return float64_round_pack_canonical(&p, status);
3932  }
3933  
int32_to_float64_scalbn(int32_t a,int scale,float_status * status)3934  float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3935  {
3936      return int64_to_float64_scalbn(a, scale, status);
3937  }
3938  
int16_to_float64_scalbn(int16_t a,int scale,float_status * status)3939  float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3940  {
3941      return int64_to_float64_scalbn(a, scale, status);
3942  }
3943  
int64_to_float64(int64_t a,float_status * status)3944  float64 int64_to_float64(int64_t a, float_status *status)
3945  {
3946      return int64_to_float64_scalbn(a, 0, status);
3947  }
3948  
int32_to_float64(int32_t a,float_status * status)3949  float64 int32_to_float64(int32_t a, float_status *status)
3950  {
3951      return int64_to_float64_scalbn(a, 0, status);
3952  }
3953  
int16_to_float64(int16_t a,float_status * status)3954  float64 int16_to_float64(int16_t a, float_status *status)
3955  {
3956      return int64_to_float64_scalbn(a, 0, status);
3957  }
3958  
int64_to_bfloat16_scalbn(int64_t a,int scale,float_status * status)3959  bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3960  {
3961      FloatParts64 p;
3962  
3963      parts_sint_to_float(&p, a, scale, status);
3964      return bfloat16_round_pack_canonical(&p, status);
3965  }
3966  
int32_to_bfloat16_scalbn(int32_t a,int scale,float_status * status)3967  bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3968  {
3969      return int64_to_bfloat16_scalbn(a, scale, status);
3970  }
3971  
int16_to_bfloat16_scalbn(int16_t a,int scale,float_status * status)3972  bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3973  {
3974      return int64_to_bfloat16_scalbn(a, scale, status);
3975  }
3976  
int8_to_bfloat16_scalbn(int8_t a,int scale,float_status * status)3977  bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3978  {
3979      return int64_to_bfloat16_scalbn(a, scale, status);
3980  }
3981  
int64_to_bfloat16(int64_t a,float_status * status)3982  bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3983  {
3984      return int64_to_bfloat16_scalbn(a, 0, status);
3985  }
3986  
int32_to_bfloat16(int32_t a,float_status * status)3987  bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3988  {
3989      return int64_to_bfloat16_scalbn(a, 0, status);
3990  }
3991  
int16_to_bfloat16(int16_t a,float_status * status)3992  bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3993  {
3994      return int64_to_bfloat16_scalbn(a, 0, status);
3995  }
3996  
int8_to_bfloat16(int8_t a,float_status * status)3997  bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3998  {
3999      return int64_to_bfloat16_scalbn(a, 0, status);
4000  }
4001  
int128_to_float128(Int128 a,float_status * status)4002  float128 int128_to_float128(Int128 a, float_status *status)
4003  {
4004      FloatParts128 p = { };
4005      int shift;
4006  
4007      if (int128_nz(a)) {
4008          p.cls = float_class_normal;
4009          if (!int128_nonneg(a)) {
4010              p.sign = true;
4011              a = int128_neg(a);
4012          }
4013  
4014          shift = clz64(int128_gethi(a));
4015          if (shift == 64) {
4016              shift += clz64(int128_getlo(a));
4017          }
4018  
4019          p.exp = 127 - shift;
4020          a = int128_lshift(a, shift);
4021  
4022          p.frac_hi = int128_gethi(a);
4023          p.frac_lo = int128_getlo(a);
4024      } else {
4025          p.cls = float_class_zero;
4026      }
4027  
4028      return float128_round_pack_canonical(&p, status);
4029  }
4030  
int64_to_float128(int64_t a,float_status * status)4031  float128 int64_to_float128(int64_t a, float_status *status)
4032  {
4033      FloatParts128 p;
4034  
4035      parts_sint_to_float(&p, a, 0, status);
4036      return float128_round_pack_canonical(&p, status);
4037  }
4038  
int32_to_float128(int32_t a,float_status * status)4039  float128 int32_to_float128(int32_t a, float_status *status)
4040  {
4041      return int64_to_float128(a, status);
4042  }
4043  
int64_to_floatx80(int64_t a,float_status * status)4044  floatx80 int64_to_floatx80(int64_t a, float_status *status)
4045  {
4046      FloatParts128 p;
4047  
4048      parts_sint_to_float(&p, a, 0, status);
4049      return floatx80_round_pack_canonical(&p, status);
4050  }
4051  
int32_to_floatx80(int32_t a,float_status * status)4052  floatx80 int32_to_floatx80(int32_t a, float_status *status)
4053  {
4054      return int64_to_floatx80(a, status);
4055  }
4056  
4057  /*
4058   * Unsigned Integer to floating-point conversions
4059   */
4060  
uint64_to_float16_scalbn(uint64_t a,int scale,float_status * status)4061  float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4062  {
4063      FloatParts64 p;
4064  
4065      parts_uint_to_float(&p, a, scale, status);
4066      return float16_round_pack_canonical(&p, status);
4067  }
4068  
uint32_to_float16_scalbn(uint32_t a,int scale,float_status * status)4069  float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4070  {
4071      return uint64_to_float16_scalbn(a, scale, status);
4072  }
4073  
uint16_to_float16_scalbn(uint16_t a,int scale,float_status * status)4074  float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4075  {
4076      return uint64_to_float16_scalbn(a, scale, status);
4077  }
4078  
uint64_to_float16(uint64_t a,float_status * status)4079  float16 uint64_to_float16(uint64_t a, float_status *status)
4080  {
4081      return uint64_to_float16_scalbn(a, 0, status);
4082  }
4083  
uint32_to_float16(uint32_t a,float_status * status)4084  float16 uint32_to_float16(uint32_t a, float_status *status)
4085  {
4086      return uint64_to_float16_scalbn(a, 0, status);
4087  }
4088  
uint16_to_float16(uint16_t a,float_status * status)4089  float16 uint16_to_float16(uint16_t a, float_status *status)
4090  {
4091      return uint64_to_float16_scalbn(a, 0, status);
4092  }
4093  
uint8_to_float16(uint8_t a,float_status * status)4094  float16 uint8_to_float16(uint8_t a, float_status *status)
4095  {
4096      return uint64_to_float16_scalbn(a, 0, status);
4097  }
4098  
uint64_to_float32_scalbn(uint64_t a,int scale,float_status * status)4099  float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4100  {
4101      FloatParts64 p;
4102  
4103      /* Without scaling, there are no overflow concerns. */
4104      if (likely(scale == 0) && can_use_fpu(status)) {
4105          union_float32 ur;
4106          ur.h = a;
4107          return ur.s;
4108      }
4109  
4110      parts_uint_to_float(&p, a, scale, status);
4111      return float32_round_pack_canonical(&p, status);
4112  }
4113  
uint32_to_float32_scalbn(uint32_t a,int scale,float_status * status)4114  float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4115  {
4116      return uint64_to_float32_scalbn(a, scale, status);
4117  }
4118  
uint16_to_float32_scalbn(uint16_t a,int scale,float_status * status)4119  float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4120  {
4121      return uint64_to_float32_scalbn(a, scale, status);
4122  }
4123  
uint64_to_float32(uint64_t a,float_status * status)4124  float32 uint64_to_float32(uint64_t a, float_status *status)
4125  {
4126      return uint64_to_float32_scalbn(a, 0, status);
4127  }
4128  
uint32_to_float32(uint32_t a,float_status * status)4129  float32 uint32_to_float32(uint32_t a, float_status *status)
4130  {
4131      return uint64_to_float32_scalbn(a, 0, status);
4132  }
4133  
uint16_to_float32(uint16_t a,float_status * status)4134  float32 uint16_to_float32(uint16_t a, float_status *status)
4135  {
4136      return uint64_to_float32_scalbn(a, 0, status);
4137  }
4138  
uint64_to_float64_scalbn(uint64_t a,int scale,float_status * status)4139  float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4140  {
4141      FloatParts64 p;
4142  
4143      /* Without scaling, there are no overflow concerns. */
4144      if (likely(scale == 0) && can_use_fpu(status)) {
4145          union_float64 ur;
4146          ur.h = a;
4147          return ur.s;
4148      }
4149  
4150      parts_uint_to_float(&p, a, scale, status);
4151      return float64_round_pack_canonical(&p, status);
4152  }
4153  
uint32_to_float64_scalbn(uint32_t a,int scale,float_status * status)4154  float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4155  {
4156      return uint64_to_float64_scalbn(a, scale, status);
4157  }
4158  
uint16_to_float64_scalbn(uint16_t a,int scale,float_status * status)4159  float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4160  {
4161      return uint64_to_float64_scalbn(a, scale, status);
4162  }
4163  
uint64_to_float64(uint64_t a,float_status * status)4164  float64 uint64_to_float64(uint64_t a, float_status *status)
4165  {
4166      return uint64_to_float64_scalbn(a, 0, status);
4167  }
4168  
uint32_to_float64(uint32_t a,float_status * status)4169  float64 uint32_to_float64(uint32_t a, float_status *status)
4170  {
4171      return uint64_to_float64_scalbn(a, 0, status);
4172  }
4173  
uint16_to_float64(uint16_t a,float_status * status)4174  float64 uint16_to_float64(uint16_t a, float_status *status)
4175  {
4176      return uint64_to_float64_scalbn(a, 0, status);
4177  }
4178  
uint64_to_bfloat16_scalbn(uint64_t a,int scale,float_status * status)4179  bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4180  {
4181      FloatParts64 p;
4182  
4183      parts_uint_to_float(&p, a, scale, status);
4184      return bfloat16_round_pack_canonical(&p, status);
4185  }
4186  
uint32_to_bfloat16_scalbn(uint32_t a,int scale,float_status * status)4187  bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4188  {
4189      return uint64_to_bfloat16_scalbn(a, scale, status);
4190  }
4191  
uint16_to_bfloat16_scalbn(uint16_t a,int scale,float_status * status)4192  bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4193  {
4194      return uint64_to_bfloat16_scalbn(a, scale, status);
4195  }
4196  
uint8_to_bfloat16_scalbn(uint8_t a,int scale,float_status * status)4197  bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4198  {
4199      return uint64_to_bfloat16_scalbn(a, scale, status);
4200  }
4201  
uint64_to_bfloat16(uint64_t a,float_status * status)4202  bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4203  {
4204      return uint64_to_bfloat16_scalbn(a, 0, status);
4205  }
4206  
uint32_to_bfloat16(uint32_t a,float_status * status)4207  bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4208  {
4209      return uint64_to_bfloat16_scalbn(a, 0, status);
4210  }
4211  
uint16_to_bfloat16(uint16_t a,float_status * status)4212  bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4213  {
4214      return uint64_to_bfloat16_scalbn(a, 0, status);
4215  }
4216  
uint8_to_bfloat16(uint8_t a,float_status * status)4217  bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4218  {
4219      return uint64_to_bfloat16_scalbn(a, 0, status);
4220  }
4221  
uint64_to_float128(uint64_t a,float_status * status)4222  float128 uint64_to_float128(uint64_t a, float_status *status)
4223  {
4224      FloatParts128 p;
4225  
4226      parts_uint_to_float(&p, a, 0, status);
4227      return float128_round_pack_canonical(&p, status);
4228  }
4229  
uint128_to_float128(Int128 a,float_status * status)4230  float128 uint128_to_float128(Int128 a, float_status *status)
4231  {
4232      FloatParts128 p = { };
4233      int shift;
4234  
4235      if (int128_nz(a)) {
4236          p.cls = float_class_normal;
4237  
4238          shift = clz64(int128_gethi(a));
4239          if (shift == 64) {
4240              shift += clz64(int128_getlo(a));
4241          }
4242  
4243          p.exp = 127 - shift;
4244          a = int128_lshift(a, shift);
4245  
4246          p.frac_hi = int128_gethi(a);
4247          p.frac_lo = int128_getlo(a);
4248      } else {
4249          p.cls = float_class_zero;
4250      }
4251  
4252      return float128_round_pack_canonical(&p, status);
4253  }
4254  
4255  /*
4256   * Minimum and maximum
4257   */
4258  
float16_minmax(float16 a,float16 b,float_status * s,int flags)4259  static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4260  {
4261      FloatParts64 pa, pb, *pr;
4262  
4263      float16_unpack_canonical(&pa, a, s);
4264      float16_unpack_canonical(&pb, b, s);
4265      pr = parts_minmax(&pa, &pb, s, flags);
4266  
4267      return float16_round_pack_canonical(pr, s);
4268  }
4269  
bfloat16_minmax(bfloat16 a,bfloat16 b,float_status * s,int flags)4270  static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271                                  float_status *s, int flags)
4272  {
4273      FloatParts64 pa, pb, *pr;
4274  
4275      bfloat16_unpack_canonical(&pa, a, s);
4276      bfloat16_unpack_canonical(&pb, b, s);
4277      pr = parts_minmax(&pa, &pb, s, flags);
4278  
4279      return bfloat16_round_pack_canonical(pr, s);
4280  }
4281  
float32_minmax(float32 a,float32 b,float_status * s,int flags)4282  static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4283  {
4284      FloatParts64 pa, pb, *pr;
4285  
4286      float32_unpack_canonical(&pa, a, s);
4287      float32_unpack_canonical(&pb, b, s);
4288      pr = parts_minmax(&pa, &pb, s, flags);
4289  
4290      return float32_round_pack_canonical(pr, s);
4291  }
4292  
float64_minmax(float64 a,float64 b,float_status * s,int flags)4293  static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4294  {
4295      FloatParts64 pa, pb, *pr;
4296  
4297      float64_unpack_canonical(&pa, a, s);
4298      float64_unpack_canonical(&pb, b, s);
4299      pr = parts_minmax(&pa, &pb, s, flags);
4300  
4301      return float64_round_pack_canonical(pr, s);
4302  }
4303  
float128_minmax(float128 a,float128 b,float_status * s,int flags)4304  static float128 float128_minmax(float128 a, float128 b,
4305                                  float_status *s, int flags)
4306  {
4307      FloatParts128 pa, pb, *pr;
4308  
4309      float128_unpack_canonical(&pa, a, s);
4310      float128_unpack_canonical(&pb, b, s);
4311      pr = parts_minmax(&pa, &pb, s, flags);
4312  
4313      return float128_round_pack_canonical(pr, s);
4314  }
4315  
4316  #define MINMAX_1(type, name, flags) \
4317      type type##_##name(type a, type b, float_status *s) \
4318      { return type##_minmax(a, b, s, flags); }
4319  
4320  #define MINMAX_2(type) \
4321      MINMAX_1(type, max, 0)                                                \
4322      MINMAX_1(type, maxnum, minmax_isnum)                                  \
4323      MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4324      MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4325      MINMAX_1(type, min, minmax_ismin)                                     \
4326      MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4327      MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328      MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4329  
4330  MINMAX_2(float16)
MINMAX_2(bfloat16)4331  MINMAX_2(bfloat16)
4332  MINMAX_2(float32)
4333  MINMAX_2(float64)
4334  MINMAX_2(float128)
4335  
4336  #undef MINMAX_1
4337  #undef MINMAX_2
4338  
4339  /*
4340   * Floating point compare
4341   */
4342  
4343  static FloatRelation QEMU_FLATTEN
4344  float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4345  {
4346      FloatParts64 pa, pb;
4347  
4348      float16_unpack_canonical(&pa, a, s);
4349      float16_unpack_canonical(&pb, b, s);
4350      return parts_compare(&pa, &pb, s, is_quiet);
4351  }
4352  
float16_compare(float16 a,float16 b,float_status * s)4353  FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4354  {
4355      return float16_do_compare(a, b, s, false);
4356  }
4357  
float16_compare_quiet(float16 a,float16 b,float_status * s)4358  FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4359  {
4360      return float16_do_compare(a, b, s, true);
4361  }
4362  
4363  static FloatRelation QEMU_SOFTFLOAT_ATTR
float32_do_compare(float32 a,float32 b,float_status * s,bool is_quiet)4364  float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4365  {
4366      FloatParts64 pa, pb;
4367  
4368      float32_unpack_canonical(&pa, a, s);
4369      float32_unpack_canonical(&pb, b, s);
4370      return parts_compare(&pa, &pb, s, is_quiet);
4371  }
4372  
4373  static FloatRelation QEMU_FLATTEN
float32_hs_compare(float32 xa,float32 xb,float_status * s,bool is_quiet)4374  float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4375  {
4376      union_float32 ua, ub;
4377  
4378      ua.s = xa;
4379      ub.s = xb;
4380  
4381      if (QEMU_NO_HARDFLOAT) {
4382          goto soft;
4383      }
4384  
4385      float32_input_flush2(&ua.s, &ub.s, s);
4386      if (isgreaterequal(ua.h, ub.h)) {
4387          if (isgreater(ua.h, ub.h)) {
4388              return float_relation_greater;
4389          }
4390          return float_relation_equal;
4391      }
4392      if (likely(isless(ua.h, ub.h))) {
4393          return float_relation_less;
4394      }
4395      /*
4396       * The only condition remaining is unordered.
4397       * Fall through to set flags.
4398       */
4399   soft:
4400      return float32_do_compare(ua.s, ub.s, s, is_quiet);
4401  }
4402  
float32_compare(float32 a,float32 b,float_status * s)4403  FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4404  {
4405      return float32_hs_compare(a, b, s, false);
4406  }
4407  
float32_compare_quiet(float32 a,float32 b,float_status * s)4408  FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4409  {
4410      return float32_hs_compare(a, b, s, true);
4411  }
4412  
4413  static FloatRelation QEMU_SOFTFLOAT_ATTR
float64_do_compare(float64 a,float64 b,float_status * s,bool is_quiet)4414  float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4415  {
4416      FloatParts64 pa, pb;
4417  
4418      float64_unpack_canonical(&pa, a, s);
4419      float64_unpack_canonical(&pb, b, s);
4420      return parts_compare(&pa, &pb, s, is_quiet);
4421  }
4422  
4423  static FloatRelation QEMU_FLATTEN
float64_hs_compare(float64 xa,float64 xb,float_status * s,bool is_quiet)4424  float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4425  {
4426      union_float64 ua, ub;
4427  
4428      ua.s = xa;
4429      ub.s = xb;
4430  
4431      if (QEMU_NO_HARDFLOAT) {
4432          goto soft;
4433      }
4434  
4435      float64_input_flush2(&ua.s, &ub.s, s);
4436      if (isgreaterequal(ua.h, ub.h)) {
4437          if (isgreater(ua.h, ub.h)) {
4438              return float_relation_greater;
4439          }
4440          return float_relation_equal;
4441      }
4442      if (likely(isless(ua.h, ub.h))) {
4443          return float_relation_less;
4444      }
4445      /*
4446       * The only condition remaining is unordered.
4447       * Fall through to set flags.
4448       */
4449   soft:
4450      return float64_do_compare(ua.s, ub.s, s, is_quiet);
4451  }
4452  
float64_compare(float64 a,float64 b,float_status * s)4453  FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4454  {
4455      return float64_hs_compare(a, b, s, false);
4456  }
4457  
float64_compare_quiet(float64 a,float64 b,float_status * s)4458  FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4459  {
4460      return float64_hs_compare(a, b, s, true);
4461  }
4462  
4463  static FloatRelation QEMU_FLATTEN
bfloat16_do_compare(bfloat16 a,bfloat16 b,float_status * s,bool is_quiet)4464  bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4465  {
4466      FloatParts64 pa, pb;
4467  
4468      bfloat16_unpack_canonical(&pa, a, s);
4469      bfloat16_unpack_canonical(&pb, b, s);
4470      return parts_compare(&pa, &pb, s, is_quiet);
4471  }
4472  
bfloat16_compare(bfloat16 a,bfloat16 b,float_status * s)4473  FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4474  {
4475      return bfloat16_do_compare(a, b, s, false);
4476  }
4477  
bfloat16_compare_quiet(bfloat16 a,bfloat16 b,float_status * s)4478  FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4479  {
4480      return bfloat16_do_compare(a, b, s, true);
4481  }
4482  
4483  static FloatRelation QEMU_FLATTEN
float128_do_compare(float128 a,float128 b,float_status * s,bool is_quiet)4484  float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4485  {
4486      FloatParts128 pa, pb;
4487  
4488      float128_unpack_canonical(&pa, a, s);
4489      float128_unpack_canonical(&pb, b, s);
4490      return parts_compare(&pa, &pb, s, is_quiet);
4491  }
4492  
float128_compare(float128 a,float128 b,float_status * s)4493  FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4494  {
4495      return float128_do_compare(a, b, s, false);
4496  }
4497  
float128_compare_quiet(float128 a,float128 b,float_status * s)4498  FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4499  {
4500      return float128_do_compare(a, b, s, true);
4501  }
4502  
4503  static FloatRelation QEMU_FLATTEN
floatx80_do_compare(floatx80 a,floatx80 b,float_status * s,bool is_quiet)4504  floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4505  {
4506      FloatParts128 pa, pb;
4507  
4508      if (!floatx80_unpack_canonical(&pa, a, s) ||
4509          !floatx80_unpack_canonical(&pb, b, s)) {
4510          return float_relation_unordered;
4511      }
4512      return parts_compare(&pa, &pb, s, is_quiet);
4513  }
4514  
floatx80_compare(floatx80 a,floatx80 b,float_status * s)4515  FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4516  {
4517      return floatx80_do_compare(a, b, s, false);
4518  }
4519  
floatx80_compare_quiet(floatx80 a,floatx80 b,float_status * s)4520  FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4521  {
4522      return floatx80_do_compare(a, b, s, true);
4523  }
4524  
4525  /*
4526   * Scale by 2**N
4527   */
4528  
float16_scalbn(float16 a,int n,float_status * status)4529  float16 float16_scalbn(float16 a, int n, float_status *status)
4530  {
4531      FloatParts64 p;
4532  
4533      float16_unpack_canonical(&p, a, status);
4534      parts_scalbn(&p, n, status);
4535      return float16_round_pack_canonical(&p, status);
4536  }
4537  
float32_scalbn(float32 a,int n,float_status * status)4538  float32 float32_scalbn(float32 a, int n, float_status *status)
4539  {
4540      FloatParts64 p;
4541  
4542      float32_unpack_canonical(&p, a, status);
4543      parts_scalbn(&p, n, status);
4544      return float32_round_pack_canonical(&p, status);
4545  }
4546  
float64_scalbn(float64 a,int n,float_status * status)4547  float64 float64_scalbn(float64 a, int n, float_status *status)
4548  {
4549      FloatParts64 p;
4550  
4551      float64_unpack_canonical(&p, a, status);
4552      parts_scalbn(&p, n, status);
4553      return float64_round_pack_canonical(&p, status);
4554  }
4555  
bfloat16_scalbn(bfloat16 a,int n,float_status * status)4556  bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4557  {
4558      FloatParts64 p;
4559  
4560      bfloat16_unpack_canonical(&p, a, status);
4561      parts_scalbn(&p, n, status);
4562      return bfloat16_round_pack_canonical(&p, status);
4563  }
4564  
float128_scalbn(float128 a,int n,float_status * status)4565  float128 float128_scalbn(float128 a, int n, float_status *status)
4566  {
4567      FloatParts128 p;
4568  
4569      float128_unpack_canonical(&p, a, status);
4570      parts_scalbn(&p, n, status);
4571      return float128_round_pack_canonical(&p, status);
4572  }
4573  
floatx80_scalbn(floatx80 a,int n,float_status * status)4574  floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4575  {
4576      FloatParts128 p;
4577  
4578      if (!floatx80_unpack_canonical(&p, a, status)) {
4579          return floatx80_default_nan(status);
4580      }
4581      parts_scalbn(&p, n, status);
4582      return floatx80_round_pack_canonical(&p, status);
4583  }
4584  
4585  /*
4586   * Square Root
4587   */
4588  
float16_sqrt(float16 a,float_status * status)4589  float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4590  {
4591      FloatParts64 p;
4592  
4593      float16_unpack_canonical(&p, a, status);
4594      parts_sqrt(&p, status, &float16_params);
4595      return float16_round_pack_canonical(&p, status);
4596  }
4597  
4598  static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a,float_status * status)4599  soft_f32_sqrt(float32 a, float_status *status)
4600  {
4601      FloatParts64 p;
4602  
4603      float32_unpack_canonical(&p, a, status);
4604      parts_sqrt(&p, status, &float32_params);
4605      return float32_round_pack_canonical(&p, status);
4606  }
4607  
4608  static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a,float_status * status)4609  soft_f64_sqrt(float64 a, float_status *status)
4610  {
4611      FloatParts64 p;
4612  
4613      float64_unpack_canonical(&p, a, status);
4614      parts_sqrt(&p, status, &float64_params);
4615      return float64_round_pack_canonical(&p, status);
4616  }
4617  
float32_sqrt(float32 xa,float_status * s)4618  float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4619  {
4620      union_float32 ua, ur;
4621  
4622      ua.s = xa;
4623      if (unlikely(!can_use_fpu(s))) {
4624          goto soft;
4625      }
4626  
4627      float32_input_flush1(&ua.s, s);
4628      if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629          if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630                         fpclassify(ua.h) == FP_ZERO) ||
4631                       signbit(ua.h))) {
4632              goto soft;
4633          }
4634      } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635                          float32_is_neg(ua.s))) {
4636          goto soft;
4637      }
4638      ur.h = sqrtf(ua.h);
4639      return ur.s;
4640  
4641   soft:
4642      return soft_f32_sqrt(ua.s, s);
4643  }
4644  
float64_sqrt(float64 xa,float_status * s)4645  float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4646  {
4647      union_float64 ua, ur;
4648  
4649      ua.s = xa;
4650      if (unlikely(!can_use_fpu(s))) {
4651          goto soft;
4652      }
4653  
4654      float64_input_flush1(&ua.s, s);
4655      if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656          if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657                         fpclassify(ua.h) == FP_ZERO) ||
4658                       signbit(ua.h))) {
4659              goto soft;
4660          }
4661      } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662                          float64_is_neg(ua.s))) {
4663          goto soft;
4664      }
4665      ur.h = sqrt(ua.h);
4666      return ur.s;
4667  
4668   soft:
4669      return soft_f64_sqrt(ua.s, s);
4670  }
4671  
float64r32_sqrt(float64 a,float_status * status)4672  float64 float64r32_sqrt(float64 a, float_status *status)
4673  {
4674      FloatParts64 p;
4675  
4676      float64_unpack_canonical(&p, a, status);
4677      parts_sqrt(&p, status, &float64_params);
4678      return float64r32_round_pack_canonical(&p, status);
4679  }
4680  
bfloat16_sqrt(bfloat16 a,float_status * status)4681  bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4682  {
4683      FloatParts64 p;
4684  
4685      bfloat16_unpack_canonical(&p, a, status);
4686      parts_sqrt(&p, status, &bfloat16_params);
4687      return bfloat16_round_pack_canonical(&p, status);
4688  }
4689  
float128_sqrt(float128 a,float_status * status)4690  float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4691  {
4692      FloatParts128 p;
4693  
4694      float128_unpack_canonical(&p, a, status);
4695      parts_sqrt(&p, status, &float128_params);
4696      return float128_round_pack_canonical(&p, status);
4697  }
4698  
floatx80_sqrt(floatx80 a,float_status * s)4699  floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4700  {
4701      FloatParts128 p;
4702  
4703      if (!floatx80_unpack_canonical(&p, a, s)) {
4704          return floatx80_default_nan(s);
4705      }
4706      parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707      return floatx80_round_pack_canonical(&p, s);
4708  }
4709  
4710  /*
4711   * log2
4712   */
float32_log2(float32 a,float_status * status)4713  float32 float32_log2(float32 a, float_status *status)
4714  {
4715      FloatParts64 p;
4716  
4717      float32_unpack_canonical(&p, a, status);
4718      parts_log2(&p, status, &float32_params);
4719      return float32_round_pack_canonical(&p, status);
4720  }
4721  
float64_log2(float64 a,float_status * status)4722  float64 float64_log2(float64 a, float_status *status)
4723  {
4724      FloatParts64 p;
4725  
4726      float64_unpack_canonical(&p, a, status);
4727      parts_log2(&p, status, &float64_params);
4728      return float64_round_pack_canonical(&p, status);
4729  }
4730  
4731  /*----------------------------------------------------------------------------
4732  | The pattern for a default generated NaN.
4733  *----------------------------------------------------------------------------*/
4734  
float16_default_nan(float_status * status)4735  float16 float16_default_nan(float_status *status)
4736  {
4737      FloatParts64 p;
4738  
4739      parts_default_nan(&p, status);
4740      p.frac >>= float16_params.frac_shift;
4741      return float16_pack_raw(&p);
4742  }
4743  
float32_default_nan(float_status * status)4744  float32 float32_default_nan(float_status *status)
4745  {
4746      FloatParts64 p;
4747  
4748      parts_default_nan(&p, status);
4749      p.frac >>= float32_params.frac_shift;
4750      return float32_pack_raw(&p);
4751  }
4752  
float64_default_nan(float_status * status)4753  float64 float64_default_nan(float_status *status)
4754  {
4755      FloatParts64 p;
4756  
4757      parts_default_nan(&p, status);
4758      p.frac >>= float64_params.frac_shift;
4759      return float64_pack_raw(&p);
4760  }
4761  
float128_default_nan(float_status * status)4762  float128 float128_default_nan(float_status *status)
4763  {
4764      FloatParts128 p;
4765  
4766      parts_default_nan(&p, status);
4767      frac_shr(&p, float128_params.frac_shift);
4768      return float128_pack_raw(&p);
4769  }
4770  
bfloat16_default_nan(float_status * status)4771  bfloat16 bfloat16_default_nan(float_status *status)
4772  {
4773      FloatParts64 p;
4774  
4775      parts_default_nan(&p, status);
4776      p.frac >>= bfloat16_params.frac_shift;
4777      return bfloat16_pack_raw(&p);
4778  }
4779  
4780  /*----------------------------------------------------------------------------
4781  | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782  *----------------------------------------------------------------------------*/
4783  
float16_silence_nan(float16 a,float_status * status)4784  float16 float16_silence_nan(float16 a, float_status *status)
4785  {
4786      FloatParts64 p;
4787  
4788      float16_unpack_raw(&p, a);
4789      p.frac <<= float16_params.frac_shift;
4790      parts_silence_nan(&p, status);
4791      p.frac >>= float16_params.frac_shift;
4792      return float16_pack_raw(&p);
4793  }
4794  
float32_silence_nan(float32 a,float_status * status)4795  float32 float32_silence_nan(float32 a, float_status *status)
4796  {
4797      FloatParts64 p;
4798  
4799      float32_unpack_raw(&p, a);
4800      p.frac <<= float32_params.frac_shift;
4801      parts_silence_nan(&p, status);
4802      p.frac >>= float32_params.frac_shift;
4803      return float32_pack_raw(&p);
4804  }
4805  
float64_silence_nan(float64 a,float_status * status)4806  float64 float64_silence_nan(float64 a, float_status *status)
4807  {
4808      FloatParts64 p;
4809  
4810      float64_unpack_raw(&p, a);
4811      p.frac <<= float64_params.frac_shift;
4812      parts_silence_nan(&p, status);
4813      p.frac >>= float64_params.frac_shift;
4814      return float64_pack_raw(&p);
4815  }
4816  
bfloat16_silence_nan(bfloat16 a,float_status * status)4817  bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4818  {
4819      FloatParts64 p;
4820  
4821      bfloat16_unpack_raw(&p, a);
4822      p.frac <<= bfloat16_params.frac_shift;
4823      parts_silence_nan(&p, status);
4824      p.frac >>= bfloat16_params.frac_shift;
4825      return bfloat16_pack_raw(&p);
4826  }
4827  
float128_silence_nan(float128 a,float_status * status)4828  float128 float128_silence_nan(float128 a, float_status *status)
4829  {
4830      FloatParts128 p;
4831  
4832      float128_unpack_raw(&p, a);
4833      frac_shl(&p, float128_params.frac_shift);
4834      parts_silence_nan(&p, status);
4835      frac_shr(&p, float128_params.frac_shift);
4836      return float128_pack_raw(&p);
4837  }
4838  
4839  /*----------------------------------------------------------------------------
4840  | If `a' is denormal and we are in flush-to-zero mode then set the
4841  | input-denormal exception and return zero. Otherwise just return the value.
4842  *----------------------------------------------------------------------------*/
4843  
parts_squash_denormal(FloatParts64 p,float_status * status)4844  static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4845  {
4846      if (p.exp == 0 && p.frac != 0) {
4847          float_raise(float_flag_input_denormal, status);
4848          return true;
4849      }
4850  
4851      return false;
4852  }
4853  
float16_squash_input_denormal(float16 a,float_status * status)4854  float16 float16_squash_input_denormal(float16 a, float_status *status)
4855  {
4856      if (status->flush_inputs_to_zero) {
4857          FloatParts64 p;
4858  
4859          float16_unpack_raw(&p, a);
4860          if (parts_squash_denormal(p, status)) {
4861              return float16_set_sign(float16_zero, p.sign);
4862          }
4863      }
4864      return a;
4865  }
4866  
float32_squash_input_denormal(float32 a,float_status * status)4867  float32 float32_squash_input_denormal(float32 a, float_status *status)
4868  {
4869      if (status->flush_inputs_to_zero) {
4870          FloatParts64 p;
4871  
4872          float32_unpack_raw(&p, a);
4873          if (parts_squash_denormal(p, status)) {
4874              return float32_set_sign(float32_zero, p.sign);
4875          }
4876      }
4877      return a;
4878  }
4879  
float64_squash_input_denormal(float64 a,float_status * status)4880  float64 float64_squash_input_denormal(float64 a, float_status *status)
4881  {
4882      if (status->flush_inputs_to_zero) {
4883          FloatParts64 p;
4884  
4885          float64_unpack_raw(&p, a);
4886          if (parts_squash_denormal(p, status)) {
4887              return float64_set_sign(float64_zero, p.sign);
4888          }
4889      }
4890      return a;
4891  }
4892  
bfloat16_squash_input_denormal(bfloat16 a,float_status * status)4893  bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4894  {
4895      if (status->flush_inputs_to_zero) {
4896          FloatParts64 p;
4897  
4898          bfloat16_unpack_raw(&p, a);
4899          if (parts_squash_denormal(p, status)) {
4900              return bfloat16_set_sign(bfloat16_zero, p.sign);
4901          }
4902      }
4903      return a;
4904  }
4905  
4906  /*----------------------------------------------------------------------------
4907  | Normalizes the subnormal extended double-precision floating-point value
4908  | represented by the denormalized significand `aSig'.  The normalized exponent
4909  | and significand are stored at the locations pointed to by `zExpPtr' and
4910  | `zSigPtr', respectively.
4911  *----------------------------------------------------------------------------*/
4912  
normalizeFloatx80Subnormal(uint64_t aSig,int32_t * zExpPtr,uint64_t * zSigPtr)4913  void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914                                  uint64_t *zSigPtr)
4915  {
4916      int8_t shiftCount;
4917  
4918      shiftCount = clz64(aSig);
4919      *zSigPtr = aSig<<shiftCount;
4920      *zExpPtr = 1 - shiftCount;
4921  }
4922  
4923  /*----------------------------------------------------------------------------
4924  | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4925  | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926  | and returns the proper extended double-precision floating-point value
4927  | corresponding to the abstract input.  Ordinarily, the abstract value is
4928  | rounded and packed into the extended double-precision format, with the
4929  | inexact exception raised if the abstract input cannot be represented
4930  | exactly.  However, if the abstract value is too large, the overflow and
4931  | inexact exceptions are raised and an infinity or maximal finite value is
4932  | returned.  If the abstract value is too small, the input value is rounded to
4933  | a subnormal number, and the underflow and inexact exceptions are raised if
4934  | the abstract input cannot be represented exactly as a subnormal extended
4935  | double-precision floating-point number.
4936  |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937  | the result is rounded to the same number of bits as single or double
4938  | precision, respectively.  Otherwise, the result is rounded to the full
4939  | precision of the extended double-precision format.
4940  |     The input significand must be normalized or smaller.  If the input
4941  | significand is not normalized, `zExp' must be 0; in that case, the result
4942  | returned is a subnormal number, and it must not require rounding.  The
4943  | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944  | Floating-Point Arithmetic.
4945  *----------------------------------------------------------------------------*/
4946  
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)4947  floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948                                int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949                                float_status *status)
4950  {
4951      FloatRoundMode roundingMode;
4952      bool roundNearestEven, increment, isTiny;
4953      int64_t roundIncrement, roundMask, roundBits;
4954  
4955      roundingMode = status->float_rounding_mode;
4956      roundNearestEven = ( roundingMode == float_round_nearest_even );
4957      switch (roundingPrecision) {
4958      case floatx80_precision_x:
4959          goto precision80;
4960      case floatx80_precision_d:
4961          roundIncrement = UINT64_C(0x0000000000000400);
4962          roundMask = UINT64_C(0x00000000000007FF);
4963          break;
4964      case floatx80_precision_s:
4965          roundIncrement = UINT64_C(0x0000008000000000);
4966          roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967          break;
4968      default:
4969          g_assert_not_reached();
4970      }
4971      zSig0 |= ( zSig1 != 0 );
4972      switch (roundingMode) {
4973      case float_round_nearest_even:
4974      case float_round_ties_away:
4975          break;
4976      case float_round_to_zero:
4977          roundIncrement = 0;
4978          break;
4979      case float_round_up:
4980          roundIncrement = zSign ? 0 : roundMask;
4981          break;
4982      case float_round_down:
4983          roundIncrement = zSign ? roundMask : 0;
4984          break;
4985      default:
4986          abort();
4987      }
4988      roundBits = zSig0 & roundMask;
4989      if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990          if (    ( 0x7FFE < zExp )
4991               || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4992             ) {
4993              goto overflow;
4994          }
4995          if ( zExp <= 0 ) {
4996              if (status->flush_to_zero) {
4997                  float_raise(float_flag_output_denormal, status);
4998                  return packFloatx80(zSign, 0, 0);
4999              }
5000              isTiny = status->tininess_before_rounding
5001                    || (zExp < 0 )
5002                    || (zSig0 <= zSig0 + roundIncrement);
5003              shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5004              zExp = 0;
5005              roundBits = zSig0 & roundMask;
5006              if (isTiny && roundBits) {
5007                  float_raise(float_flag_underflow, status);
5008              }
5009              if (roundBits) {
5010                  float_raise(float_flag_inexact, status);
5011              }
5012              zSig0 += roundIncrement;
5013              if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014              roundIncrement = roundMask + 1;
5015              if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016                  roundMask |= roundIncrement;
5017              }
5018              zSig0 &= ~ roundMask;
5019              return packFloatx80( zSign, zExp, zSig0 );
5020          }
5021      }
5022      if (roundBits) {
5023          float_raise(float_flag_inexact, status);
5024      }
5025      zSig0 += roundIncrement;
5026      if ( zSig0 < roundIncrement ) {
5027          ++zExp;
5028          zSig0 = UINT64_C(0x8000000000000000);
5029      }
5030      roundIncrement = roundMask + 1;
5031      if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032          roundMask |= roundIncrement;
5033      }
5034      zSig0 &= ~ roundMask;
5035      if ( zSig0 == 0 ) zExp = 0;
5036      return packFloatx80( zSign, zExp, zSig0 );
5037   precision80:
5038      switch (roundingMode) {
5039      case float_round_nearest_even:
5040      case float_round_ties_away:
5041          increment = ((int64_t)zSig1 < 0);
5042          break;
5043      case float_round_to_zero:
5044          increment = 0;
5045          break;
5046      case float_round_up:
5047          increment = !zSign && zSig1;
5048          break;
5049      case float_round_down:
5050          increment = zSign && zSig1;
5051          break;
5052      default:
5053          abort();
5054      }
5055      if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5056          if (    ( 0x7FFE < zExp )
5057               || (    ( zExp == 0x7FFE )
5058                    && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5059                    && increment
5060                  )
5061             ) {
5062              roundMask = 0;
5063   overflow:
5064              float_raise(float_flag_overflow | float_flag_inexact, status);
5065              if (    ( roundingMode == float_round_to_zero )
5066                   || ( zSign && ( roundingMode == float_round_up ) )
5067                   || ( ! zSign && ( roundingMode == float_round_down ) )
5068                 ) {
5069                  return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5070              }
5071              return packFloatx80(zSign,
5072                                  floatx80_infinity_high,
5073                                  floatx80_infinity_low);
5074          }
5075          if ( zExp <= 0 ) {
5076              isTiny = status->tininess_before_rounding
5077                    || (zExp < 0)
5078                    || !increment
5079                    || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5080              shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081              zExp = 0;
5082              if (isTiny && zSig1) {
5083                  float_raise(float_flag_underflow, status);
5084              }
5085              if (zSig1) {
5086                  float_raise(float_flag_inexact, status);
5087              }
5088              switch (roundingMode) {
5089              case float_round_nearest_even:
5090              case float_round_ties_away:
5091                  increment = ((int64_t)zSig1 < 0);
5092                  break;
5093              case float_round_to_zero:
5094                  increment = 0;
5095                  break;
5096              case float_round_up:
5097                  increment = !zSign && zSig1;
5098                  break;
5099              case float_round_down:
5100                  increment = zSign && zSig1;
5101                  break;
5102              default:
5103                  abort();
5104              }
5105              if ( increment ) {
5106                  ++zSig0;
5107                  if (!(zSig1 << 1) && roundNearestEven) {
5108                      zSig0 &= ~1;
5109                  }
5110                  if ( (int64_t) zSig0 < 0 ) zExp = 1;
5111              }
5112              return packFloatx80( zSign, zExp, zSig0 );
5113          }
5114      }
5115      if (zSig1) {
5116          float_raise(float_flag_inexact, status);
5117      }
5118      if ( increment ) {
5119          ++zSig0;
5120          if ( zSig0 == 0 ) {
5121              ++zExp;
5122              zSig0 = UINT64_C(0x8000000000000000);
5123          }
5124          else {
5125              if (!(zSig1 << 1) && roundNearestEven) {
5126                  zSig0 &= ~1;
5127              }
5128          }
5129      }
5130      else {
5131          if ( zSig0 == 0 ) zExp = 0;
5132      }
5133      return packFloatx80( zSign, zExp, zSig0 );
5134  
5135  }
5136  
5137  /*----------------------------------------------------------------------------
5138  | Takes an abstract floating-point value having sign `zSign', exponent
5139  | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140  | and returns the proper extended double-precision floating-point value
5141  | corresponding to the abstract input.  This routine is just like
5142  | `roundAndPackFloatx80' except that the input significand does not have to be
5143  | normalized.
5144  *----------------------------------------------------------------------------*/
5145  
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5146  floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5147                                         bool zSign, int32_t zExp,
5148                                         uint64_t zSig0, uint64_t zSig1,
5149                                         float_status *status)
5150  {
5151      int8_t shiftCount;
5152  
5153      if ( zSig0 == 0 ) {
5154          zSig0 = zSig1;
5155          zSig1 = 0;
5156          zExp -= 64;
5157      }
5158      shiftCount = clz64(zSig0);
5159      shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160      zExp -= shiftCount;
5161      return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162                                  zSig0, zSig1, status);
5163  
5164  }
5165  
5166  /*----------------------------------------------------------------------------
5167  | Returns the binary exponential of the single-precision floating-point value
5168  | `a'. The operation is performed according to the IEC/IEEE Standard for
5169  | Binary Floating-Point Arithmetic.
5170  |
5171  | Uses the following identities:
5172  |
5173  | 1. -------------------------------------------------------------------------
5174  |      x    x*ln(2)
5175  |     2  = e
5176  |
5177  | 2. -------------------------------------------------------------------------
5178  |                      2     3     4     5           n
5179  |      x        x     x     x     x     x           x
5180  |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181  |               1!    2!    3!    4!    5!          n!
5182  *----------------------------------------------------------------------------*/
5183  
5184  static const float64 float32_exp2_coefficients[15] =
5185  {
5186      const_float64( 0x3ff0000000000000ll ), /*  1 */
5187      const_float64( 0x3fe0000000000000ll ), /*  2 */
5188      const_float64( 0x3fc5555555555555ll ), /*  3 */
5189      const_float64( 0x3fa5555555555555ll ), /*  4 */
5190      const_float64( 0x3f81111111111111ll ), /*  5 */
5191      const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5192      const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5193      const_float64( 0x3efa01a01a01a01all ), /*  8 */
5194      const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5195      const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196      const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197      const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198      const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199      const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200      const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5201  };
5202  
float32_exp2(float32 a,float_status * status)5203  float32 float32_exp2(float32 a, float_status *status)
5204  {
5205      FloatParts64 xp, xnp, tp, rp;
5206      int i;
5207  
5208      float32_unpack_canonical(&xp, a, status);
5209      if (unlikely(xp.cls != float_class_normal)) {
5210          switch (xp.cls) {
5211          case float_class_snan:
5212          case float_class_qnan:
5213              parts_return_nan(&xp, status);
5214              return float32_round_pack_canonical(&xp, status);
5215          case float_class_inf:
5216              return xp.sign ? float32_zero : a;
5217          case float_class_zero:
5218              return float32_one;
5219          default:
5220              break;
5221          }
5222          g_assert_not_reached();
5223      }
5224  
5225      float_raise(float_flag_inexact, status);
5226  
5227      float64_unpack_canonical(&tp, float64_ln2, status);
5228      xp = *parts_mul(&xp, &tp, status);
5229      xnp = xp;
5230  
5231      float64_unpack_canonical(&rp, float64_one, status);
5232      for (i = 0 ; i < 15 ; i++) {
5233          float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5234          rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5235          xnp = *parts_mul(&xnp, &xp, status);
5236      }
5237  
5238      return float32_round_pack_canonical(&rp, status);
5239  }
5240  
5241  /*----------------------------------------------------------------------------
5242  | Rounds the extended double-precision floating-point value `a'
5243  | to the precision provided by floatx80_rounding_precision and returns the
5244  | result as an extended double-precision floating-point value.
5245  | The operation is performed according to the IEC/IEEE Standard for Binary
5246  | Floating-Point Arithmetic.
5247  *----------------------------------------------------------------------------*/
5248  
floatx80_round(floatx80 a,float_status * status)5249  floatx80 floatx80_round(floatx80 a, float_status *status)
5250  {
5251      FloatParts128 p;
5252  
5253      if (!floatx80_unpack_canonical(&p, a, status)) {
5254          return floatx80_default_nan(status);
5255      }
5256      return floatx80_round_pack_canonical(&p, status);
5257  }
5258  
softfloat_init(void)5259  static void __attribute__((constructor)) softfloat_init(void)
5260  {
5261      union_float64 ua, ub, uc, ur;
5262  
5263      if (QEMU_NO_HARDFLOAT) {
5264          return;
5265      }
5266      /*
5267       * Test that the host's FMA is not obviously broken. For example,
5268       * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269       *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5270       */
5271      ua.s = 0x0020000000000001ULL;
5272      ub.s = 0x3ca0000000000000ULL;
5273      uc.s = 0x0020000000000000ULL;
5274      ur.h = fma(ua.h, ub.h, uc.h);
5275      if (ur.s != 0x0020000000000001ULL) {
5276          force_soft_fma = true;
5277      }
5278  }
5279