xref: /openbmc/qemu/target/arm/vfp_helper.c (revision 64552b6b)
1 /*
2  * ARM VFP floating-point operations
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23 #include "internals.h"
24 #ifdef CONFIG_TCG
25 #include "qemu/log.h"
26 #include "fpu/softfloat.h"
27 #endif
28 
29 /* VFP support.  We follow the convention used for VFP instructions:
30    Single precision routines have a "s" suffix, double precision a
31    "d" suffix.  */
32 
33 #ifdef CONFIG_TCG
34 
35 /* Convert host exception flags to vfp form.  */
36 static inline int vfp_exceptbits_from_host(int host_bits)
37 {
38     int target_bits = 0;
39 
40     if (host_bits & float_flag_invalid) {
41         target_bits |= 1;
42     }
43     if (host_bits & float_flag_divbyzero) {
44         target_bits |= 2;
45     }
46     if (host_bits & float_flag_overflow) {
47         target_bits |= 4;
48     }
49     if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
50         target_bits |= 8;
51     }
52     if (host_bits & float_flag_inexact) {
53         target_bits |= 0x10;
54     }
55     if (host_bits & float_flag_input_denormal) {
56         target_bits |= 0x80;
57     }
58     return target_bits;
59 }
60 
61 /* Convert vfp exception flags to target form.  */
62 static inline int vfp_exceptbits_to_host(int target_bits)
63 {
64     int host_bits = 0;
65 
66     if (target_bits & 1) {
67         host_bits |= float_flag_invalid;
68     }
69     if (target_bits & 2) {
70         host_bits |= float_flag_divbyzero;
71     }
72     if (target_bits & 4) {
73         host_bits |= float_flag_overflow;
74     }
75     if (target_bits & 8) {
76         host_bits |= float_flag_underflow;
77     }
78     if (target_bits & 0x10) {
79         host_bits |= float_flag_inexact;
80     }
81     if (target_bits & 0x80) {
82         host_bits |= float_flag_input_denormal;
83     }
84     return host_bits;
85 }
86 
87 static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
88 {
89     uint32_t i;
90 
91     i = get_float_exception_flags(&env->vfp.fp_status);
92     i |= get_float_exception_flags(&env->vfp.standard_fp_status);
93     /* FZ16 does not generate an input denormal exception.  */
94     i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
95           & ~float_flag_input_denormal);
96     return vfp_exceptbits_from_host(i);
97 }
98 
99 static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
100 {
101     int i;
102     uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
103 
104     changed ^= val;
105     if (changed & (3 << 22)) {
106         i = (val >> 22) & 3;
107         switch (i) {
108         case FPROUNDING_TIEEVEN:
109             i = float_round_nearest_even;
110             break;
111         case FPROUNDING_POSINF:
112             i = float_round_up;
113             break;
114         case FPROUNDING_NEGINF:
115             i = float_round_down;
116             break;
117         case FPROUNDING_ZERO:
118             i = float_round_to_zero;
119             break;
120         }
121         set_float_rounding_mode(i, &env->vfp.fp_status);
122         set_float_rounding_mode(i, &env->vfp.fp_status_f16);
123     }
124     if (changed & FPCR_FZ16) {
125         bool ftz_enabled = val & FPCR_FZ16;
126         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
127         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
128     }
129     if (changed & FPCR_FZ) {
130         bool ftz_enabled = val & FPCR_FZ;
131         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
132         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
133     }
134     if (changed & FPCR_DN) {
135         bool dnan_enabled = val & FPCR_DN;
136         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
137         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
138     }
139 
140     /*
141      * The exception flags are ORed together when we read fpscr so we
142      * only need to preserve the current state in one of our
143      * float_status values.
144      */
145     i = vfp_exceptbits_to_host(val);
146     set_float_exception_flags(i, &env->vfp.fp_status);
147     set_float_exception_flags(0, &env->vfp.fp_status_f16);
148     set_float_exception_flags(0, &env->vfp.standard_fp_status);
149 }
150 
151 #else
152 
153 static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
154 {
155     return 0;
156 }
157 
158 static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
159 {
160 }
161 
162 #endif
163 
164 uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
165 {
166     uint32_t i, fpscr;
167 
168     fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
169             | (env->vfp.vec_len << 16)
170             | (env->vfp.vec_stride << 20);
171 
172     fpscr |= vfp_get_fpscr_from_host(env);
173 
174     i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
175     fpscr |= i ? FPCR_QC : 0;
176 
177     return fpscr;
178 }
179 
180 uint32_t vfp_get_fpscr(CPUARMState *env)
181 {
182     return HELPER(vfp_get_fpscr)(env);
183 }
184 
185 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
186 {
187     /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
188     if (!cpu_isar_feature(aa64_fp16, env_archcpu(env))) {
189         val &= ~FPCR_FZ16;
190     }
191 
192     if (arm_feature(env, ARM_FEATURE_M)) {
193         /*
194          * M profile FPSCR is RES0 for the QC, STRIDE, FZ16, LEN bits
195          * and also for the trapped-exception-handling bits IxE.
196          */
197         val &= 0xf7c0009f;
198     }
199 
200     vfp_set_fpscr_to_host(env, val);
201 
202     /*
203      * We don't implement trapped exception handling, so the
204      * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
205      *
206      * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
207      * (which are stored in fp_status), and the other RES0 bits
208      * in between, then we clear all of the low 16 bits.
209      */
210     env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
211     env->vfp.vec_len = (val >> 16) & 7;
212     env->vfp.vec_stride = (val >> 20) & 3;
213 
214     /*
215      * The bit we set within fpscr_q is arbitrary; the register as a
216      * whole being zero/non-zero is what counts.
217      */
218     env->vfp.qc[0] = val & FPCR_QC;
219     env->vfp.qc[1] = 0;
220     env->vfp.qc[2] = 0;
221     env->vfp.qc[3] = 0;
222 }
223 
224 void vfp_set_fpscr(CPUARMState *env, uint32_t val)
225 {
226     HELPER(vfp_set_fpscr)(env, val);
227 }
228 
229 #ifdef CONFIG_TCG
230 
231 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
232 
233 #define VFP_BINOP(name) \
234 float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
235 { \
236     float_status *fpst = fpstp; \
237     return float32_ ## name(a, b, fpst); \
238 } \
239 float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
240 { \
241     float_status *fpst = fpstp; \
242     return float64_ ## name(a, b, fpst); \
243 }
244 VFP_BINOP(add)
245 VFP_BINOP(sub)
246 VFP_BINOP(mul)
247 VFP_BINOP(div)
248 VFP_BINOP(min)
249 VFP_BINOP(max)
250 VFP_BINOP(minnum)
251 VFP_BINOP(maxnum)
252 #undef VFP_BINOP
253 
254 float32 VFP_HELPER(neg, s)(float32 a)
255 {
256     return float32_chs(a);
257 }
258 
259 float64 VFP_HELPER(neg, d)(float64 a)
260 {
261     return float64_chs(a);
262 }
263 
264 float32 VFP_HELPER(abs, s)(float32 a)
265 {
266     return float32_abs(a);
267 }
268 
269 float64 VFP_HELPER(abs, d)(float64 a)
270 {
271     return float64_abs(a);
272 }
273 
274 float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
275 {
276     return float32_sqrt(a, &env->vfp.fp_status);
277 }
278 
279 float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
280 {
281     return float64_sqrt(a, &env->vfp.fp_status);
282 }
283 
284 static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
285 {
286     uint32_t flags;
287     switch (cmp) {
288     case float_relation_equal:
289         flags = 0x6;
290         break;
291     case float_relation_less:
292         flags = 0x8;
293         break;
294     case float_relation_greater:
295         flags = 0x2;
296         break;
297     case float_relation_unordered:
298         flags = 0x3;
299         break;
300     default:
301         g_assert_not_reached();
302     }
303     env->vfp.xregs[ARM_VFP_FPSCR] =
304         deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
305 }
306 
307 /* XXX: check quiet/signaling case */
308 #define DO_VFP_cmp(p, type) \
309 void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env)  \
310 { \
311     softfloat_to_vfp_compare(env, \
312         type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
313 } \
314 void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
315 { \
316     softfloat_to_vfp_compare(env, \
317         type ## _compare(a, b, &env->vfp.fp_status)); \
318 }
319 DO_VFP_cmp(s, float32)
320 DO_VFP_cmp(d, float64)
321 #undef DO_VFP_cmp
322 
323 /* Integer to float and float to integer conversions */
324 
325 #define CONV_ITOF(name, ftype, fsz, sign)                           \
326 ftype HELPER(name)(uint32_t x, void *fpstp)                         \
327 {                                                                   \
328     float_status *fpst = fpstp;                                     \
329     return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
330 }
331 
332 #define CONV_FTOI(name, ftype, fsz, sign, round)                \
333 sign##int32_t HELPER(name)(ftype x, void *fpstp)                \
334 {                                                               \
335     float_status *fpst = fpstp;                                 \
336     if (float##fsz##_is_any_nan(x)) {                           \
337         float_raise(float_flag_invalid, fpst);                  \
338         return 0;                                               \
339     }                                                           \
340     return float##fsz##_to_##sign##int32##round(x, fpst);       \
341 }
342 
343 #define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
344     CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
345     CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
346     CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
347 
348 FLOAT_CONVS(si, h, uint32_t, 16, )
349 FLOAT_CONVS(si, s, float32, 32, )
350 FLOAT_CONVS(si, d, float64, 64, )
351 FLOAT_CONVS(ui, h, uint32_t, 16, u)
352 FLOAT_CONVS(ui, s, float32, 32, u)
353 FLOAT_CONVS(ui, d, float64, 64, u)
354 
355 #undef CONV_ITOF
356 #undef CONV_FTOI
357 #undef FLOAT_CONVS
358 
359 /* floating point conversion */
360 float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
361 {
362     return float32_to_float64(x, &env->vfp.fp_status);
363 }
364 
365 float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
366 {
367     return float64_to_float32(x, &env->vfp.fp_status);
368 }
369 
370 /* VFP3 fixed point conversion.  */
371 #define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
372 float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
373                                      void *fpstp) \
374 { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
375 
376 #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff)   \
377 uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
378                                             void *fpst)                   \
379 {                                                                         \
380     if (unlikely(float##fsz##_is_any_nan(x))) {                           \
381         float_raise(float_flag_invalid, fpst);                            \
382         return 0;                                                         \
383     }                                                                     \
384     return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
385 }
386 
387 #define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
388 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
389 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
390                          float_round_to_zero, _round_to_zero)    \
391 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
392                          get_float_rounding_mode(fpst), )
393 
394 #define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
395 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
396 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
397                          get_float_rounding_mode(fpst), )
398 
399 VFP_CONV_FIX(sh, d, 64, 64, int16)
400 VFP_CONV_FIX(sl, d, 64, 64, int32)
401 VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
402 VFP_CONV_FIX(uh, d, 64, 64, uint16)
403 VFP_CONV_FIX(ul, d, 64, 64, uint32)
404 VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
405 VFP_CONV_FIX(sh, s, 32, 32, int16)
406 VFP_CONV_FIX(sl, s, 32, 32, int32)
407 VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
408 VFP_CONV_FIX(uh, s, 32, 32, uint16)
409 VFP_CONV_FIX(ul, s, 32, 32, uint32)
410 VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
411 
412 #undef VFP_CONV_FIX
413 #undef VFP_CONV_FIX_FLOAT
414 #undef VFP_CONV_FLOAT_FIX_ROUND
415 #undef VFP_CONV_FIX_A64
416 
417 uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
418 {
419     return int32_to_float16_scalbn(x, -shift, fpst);
420 }
421 
422 uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
423 {
424     return uint32_to_float16_scalbn(x, -shift, fpst);
425 }
426 
427 uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
428 {
429     return int64_to_float16_scalbn(x, -shift, fpst);
430 }
431 
432 uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
433 {
434     return uint64_to_float16_scalbn(x, -shift, fpst);
435 }
436 
437 uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
438 {
439     if (unlikely(float16_is_any_nan(x))) {
440         float_raise(float_flag_invalid, fpst);
441         return 0;
442     }
443     return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
444                                    shift, fpst);
445 }
446 
447 uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
448 {
449     if (unlikely(float16_is_any_nan(x))) {
450         float_raise(float_flag_invalid, fpst);
451         return 0;
452     }
453     return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
454                                     shift, fpst);
455 }
456 
457 uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
458 {
459     if (unlikely(float16_is_any_nan(x))) {
460         float_raise(float_flag_invalid, fpst);
461         return 0;
462     }
463     return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
464                                    shift, fpst);
465 }
466 
467 uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
468 {
469     if (unlikely(float16_is_any_nan(x))) {
470         float_raise(float_flag_invalid, fpst);
471         return 0;
472     }
473     return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
474                                     shift, fpst);
475 }
476 
477 uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
478 {
479     if (unlikely(float16_is_any_nan(x))) {
480         float_raise(float_flag_invalid, fpst);
481         return 0;
482     }
483     return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
484                                    shift, fpst);
485 }
486 
487 uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
488 {
489     if (unlikely(float16_is_any_nan(x))) {
490         float_raise(float_flag_invalid, fpst);
491         return 0;
492     }
493     return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
494                                     shift, fpst);
495 }
496 
497 /* Set the current fp rounding mode and return the old one.
498  * The argument is a softfloat float_round_ value.
499  */
500 uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
501 {
502     float_status *fp_status = fpstp;
503 
504     uint32_t prev_rmode = get_float_rounding_mode(fp_status);
505     set_float_rounding_mode(rmode, fp_status);
506 
507     return prev_rmode;
508 }
509 
510 /* Set the current fp rounding mode in the standard fp status and return
511  * the old one. This is for NEON instructions that need to change the
512  * rounding mode but wish to use the standard FPSCR values for everything
513  * else. Always set the rounding mode back to the correct value after
514  * modifying it.
515  * The argument is a softfloat float_round_ value.
516  */
517 uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
518 {
519     float_status *fp_status = &env->vfp.standard_fp_status;
520 
521     uint32_t prev_rmode = get_float_rounding_mode(fp_status);
522     set_float_rounding_mode(rmode, fp_status);
523 
524     return prev_rmode;
525 }
526 
527 /* Half precision conversions.  */
528 float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
529 {
530     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
531      * it would affect flushing input denormals.
532      */
533     float_status *fpst = fpstp;
534     flag save = get_flush_inputs_to_zero(fpst);
535     set_flush_inputs_to_zero(false, fpst);
536     float32 r = float16_to_float32(a, !ahp_mode, fpst);
537     set_flush_inputs_to_zero(save, fpst);
538     return r;
539 }
540 
541 uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
542 {
543     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
544      * it would affect flushing output denormals.
545      */
546     float_status *fpst = fpstp;
547     flag save = get_flush_to_zero(fpst);
548     set_flush_to_zero(false, fpst);
549     float16 r = float32_to_float16(a, !ahp_mode, fpst);
550     set_flush_to_zero(save, fpst);
551     return r;
552 }
553 
554 float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
555 {
556     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
557      * it would affect flushing input denormals.
558      */
559     float_status *fpst = fpstp;
560     flag save = get_flush_inputs_to_zero(fpst);
561     set_flush_inputs_to_zero(false, fpst);
562     float64 r = float16_to_float64(a, !ahp_mode, fpst);
563     set_flush_inputs_to_zero(save, fpst);
564     return r;
565 }
566 
567 uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
568 {
569     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
570      * it would affect flushing output denormals.
571      */
572     float_status *fpst = fpstp;
573     flag save = get_flush_to_zero(fpst);
574     set_flush_to_zero(false, fpst);
575     float16 r = float64_to_float16(a, !ahp_mode, fpst);
576     set_flush_to_zero(save, fpst);
577     return r;
578 }
579 
580 #define float32_two make_float32(0x40000000)
581 #define float32_three make_float32(0x40400000)
582 #define float32_one_point_five make_float32(0x3fc00000)
583 
584 float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
585 {
586     float_status *s = &env->vfp.standard_fp_status;
587     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
588         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
589         if (!(float32_is_zero(a) || float32_is_zero(b))) {
590             float_raise(float_flag_input_denormal, s);
591         }
592         return float32_two;
593     }
594     return float32_sub(float32_two, float32_mul(a, b, s), s);
595 }
596 
597 float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
598 {
599     float_status *s = &env->vfp.standard_fp_status;
600     float32 product;
601     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
602         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
603         if (!(float32_is_zero(a) || float32_is_zero(b))) {
604             float_raise(float_flag_input_denormal, s);
605         }
606         return float32_one_point_five;
607     }
608     product = float32_mul(a, b, s);
609     return float32_div(float32_sub(float32_three, product, s), float32_two, s);
610 }
611 
612 /* NEON helpers.  */
613 
614 /* Constants 256 and 512 are used in some helpers; we avoid relying on
615  * int->float conversions at run-time.  */
616 #define float64_256 make_float64(0x4070000000000000LL)
617 #define float64_512 make_float64(0x4080000000000000LL)
618 #define float16_maxnorm make_float16(0x7bff)
619 #define float32_maxnorm make_float32(0x7f7fffff)
620 #define float64_maxnorm make_float64(0x7fefffffffffffffLL)
621 
622 /* Reciprocal functions
623  *
624  * The algorithm that must be used to calculate the estimate
625  * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
626  */
627 
628 /* See RecipEstimate()
629  *
630  * input is a 9 bit fixed point number
631  * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
632  * result range 256 .. 511 for a number from 1.0 to 511/256.
633  */
634 
635 static int recip_estimate(int input)
636 {
637     int a, b, r;
638     assert(256 <= input && input < 512);
639     a = (input * 2) + 1;
640     b = (1 << 19) / a;
641     r = (b + 1) >> 1;
642     assert(256 <= r && r < 512);
643     return r;
644 }
645 
646 /*
647  * Common wrapper to call recip_estimate
648  *
649  * The parameters are exponent and 64 bit fraction (without implicit
650  * bit) where the binary point is nominally at bit 52. Returns a
651  * float64 which can then be rounded to the appropriate size by the
652  * callee.
653  */
654 
655 static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
656 {
657     uint32_t scaled, estimate;
658     uint64_t result_frac;
659     int result_exp;
660 
661     /* Handle sub-normals */
662     if (*exp == 0) {
663         if (extract64(frac, 51, 1) == 0) {
664             *exp = -1;
665             frac <<= 2;
666         } else {
667             frac <<= 1;
668         }
669     }
670 
671     /* scaled = UInt('1':fraction<51:44>) */
672     scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
673     estimate = recip_estimate(scaled);
674 
675     result_exp = exp_off - *exp;
676     result_frac = deposit64(0, 44, 8, estimate);
677     if (result_exp == 0) {
678         result_frac = deposit64(result_frac >> 1, 51, 1, 1);
679     } else if (result_exp == -1) {
680         result_frac = deposit64(result_frac >> 2, 50, 2, 1);
681         result_exp = 0;
682     }
683 
684     *exp = result_exp;
685 
686     return result_frac;
687 }
688 
689 static bool round_to_inf(float_status *fpst, bool sign_bit)
690 {
691     switch (fpst->float_rounding_mode) {
692     case float_round_nearest_even: /* Round to Nearest */
693         return true;
694     case float_round_up: /* Round to +Inf */
695         return !sign_bit;
696     case float_round_down: /* Round to -Inf */
697         return sign_bit;
698     case float_round_to_zero: /* Round to Zero */
699         return false;
700     }
701 
702     g_assert_not_reached();
703 }
704 
705 uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
706 {
707     float_status *fpst = fpstp;
708     float16 f16 = float16_squash_input_denormal(input, fpst);
709     uint32_t f16_val = float16_val(f16);
710     uint32_t f16_sign = float16_is_neg(f16);
711     int f16_exp = extract32(f16_val, 10, 5);
712     uint32_t f16_frac = extract32(f16_val, 0, 10);
713     uint64_t f64_frac;
714 
715     if (float16_is_any_nan(f16)) {
716         float16 nan = f16;
717         if (float16_is_signaling_nan(f16, fpst)) {
718             float_raise(float_flag_invalid, fpst);
719             nan = float16_silence_nan(f16, fpst);
720         }
721         if (fpst->default_nan_mode) {
722             nan =  float16_default_nan(fpst);
723         }
724         return nan;
725     } else if (float16_is_infinity(f16)) {
726         return float16_set_sign(float16_zero, float16_is_neg(f16));
727     } else if (float16_is_zero(f16)) {
728         float_raise(float_flag_divbyzero, fpst);
729         return float16_set_sign(float16_infinity, float16_is_neg(f16));
730     } else if (float16_abs(f16) < (1 << 8)) {
731         /* Abs(value) < 2.0^-16 */
732         float_raise(float_flag_overflow | float_flag_inexact, fpst);
733         if (round_to_inf(fpst, f16_sign)) {
734             return float16_set_sign(float16_infinity, f16_sign);
735         } else {
736             return float16_set_sign(float16_maxnorm, f16_sign);
737         }
738     } else if (f16_exp >= 29 && fpst->flush_to_zero) {
739         float_raise(float_flag_underflow, fpst);
740         return float16_set_sign(float16_zero, float16_is_neg(f16));
741     }
742 
743     f64_frac = call_recip_estimate(&f16_exp, 29,
744                                    ((uint64_t) f16_frac) << (52 - 10));
745 
746     /* result = sign : result_exp<4:0> : fraction<51:42> */
747     f16_val = deposit32(0, 15, 1, f16_sign);
748     f16_val = deposit32(f16_val, 10, 5, f16_exp);
749     f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
750     return make_float16(f16_val);
751 }
752 
753 float32 HELPER(recpe_f32)(float32 input, void *fpstp)
754 {
755     float_status *fpst = fpstp;
756     float32 f32 = float32_squash_input_denormal(input, fpst);
757     uint32_t f32_val = float32_val(f32);
758     bool f32_sign = float32_is_neg(f32);
759     int f32_exp = extract32(f32_val, 23, 8);
760     uint32_t f32_frac = extract32(f32_val, 0, 23);
761     uint64_t f64_frac;
762 
763     if (float32_is_any_nan(f32)) {
764         float32 nan = f32;
765         if (float32_is_signaling_nan(f32, fpst)) {
766             float_raise(float_flag_invalid, fpst);
767             nan = float32_silence_nan(f32, fpst);
768         }
769         if (fpst->default_nan_mode) {
770             nan =  float32_default_nan(fpst);
771         }
772         return nan;
773     } else if (float32_is_infinity(f32)) {
774         return float32_set_sign(float32_zero, float32_is_neg(f32));
775     } else if (float32_is_zero(f32)) {
776         float_raise(float_flag_divbyzero, fpst);
777         return float32_set_sign(float32_infinity, float32_is_neg(f32));
778     } else if (float32_abs(f32) < (1ULL << 21)) {
779         /* Abs(value) < 2.0^-128 */
780         float_raise(float_flag_overflow | float_flag_inexact, fpst);
781         if (round_to_inf(fpst, f32_sign)) {
782             return float32_set_sign(float32_infinity, f32_sign);
783         } else {
784             return float32_set_sign(float32_maxnorm, f32_sign);
785         }
786     } else if (f32_exp >= 253 && fpst->flush_to_zero) {
787         float_raise(float_flag_underflow, fpst);
788         return float32_set_sign(float32_zero, float32_is_neg(f32));
789     }
790 
791     f64_frac = call_recip_estimate(&f32_exp, 253,
792                                    ((uint64_t) f32_frac) << (52 - 23));
793 
794     /* result = sign : result_exp<7:0> : fraction<51:29> */
795     f32_val = deposit32(0, 31, 1, f32_sign);
796     f32_val = deposit32(f32_val, 23, 8, f32_exp);
797     f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
798     return make_float32(f32_val);
799 }
800 
801 float64 HELPER(recpe_f64)(float64 input, void *fpstp)
802 {
803     float_status *fpst = fpstp;
804     float64 f64 = float64_squash_input_denormal(input, fpst);
805     uint64_t f64_val = float64_val(f64);
806     bool f64_sign = float64_is_neg(f64);
807     int f64_exp = extract64(f64_val, 52, 11);
808     uint64_t f64_frac = extract64(f64_val, 0, 52);
809 
810     /* Deal with any special cases */
811     if (float64_is_any_nan(f64)) {
812         float64 nan = f64;
813         if (float64_is_signaling_nan(f64, fpst)) {
814             float_raise(float_flag_invalid, fpst);
815             nan = float64_silence_nan(f64, fpst);
816         }
817         if (fpst->default_nan_mode) {
818             nan =  float64_default_nan(fpst);
819         }
820         return nan;
821     } else if (float64_is_infinity(f64)) {
822         return float64_set_sign(float64_zero, float64_is_neg(f64));
823     } else if (float64_is_zero(f64)) {
824         float_raise(float_flag_divbyzero, fpst);
825         return float64_set_sign(float64_infinity, float64_is_neg(f64));
826     } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
827         /* Abs(value) < 2.0^-1024 */
828         float_raise(float_flag_overflow | float_flag_inexact, fpst);
829         if (round_to_inf(fpst, f64_sign)) {
830             return float64_set_sign(float64_infinity, f64_sign);
831         } else {
832             return float64_set_sign(float64_maxnorm, f64_sign);
833         }
834     } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
835         float_raise(float_flag_underflow, fpst);
836         return float64_set_sign(float64_zero, float64_is_neg(f64));
837     }
838 
839     f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
840 
841     /* result = sign : result_exp<10:0> : fraction<51:0>; */
842     f64_val = deposit64(0, 63, 1, f64_sign);
843     f64_val = deposit64(f64_val, 52, 11, f64_exp);
844     f64_val = deposit64(f64_val, 0, 52, f64_frac);
845     return make_float64(f64_val);
846 }
847 
848 /* The algorithm that must be used to calculate the estimate
849  * is specified by the ARM ARM.
850  */
851 
852 static int do_recip_sqrt_estimate(int a)
853 {
854     int b, estimate;
855 
856     assert(128 <= a && a < 512);
857     if (a < 256) {
858         a = a * 2 + 1;
859     } else {
860         a = (a >> 1) << 1;
861         a = (a + 1) * 2;
862     }
863     b = 512;
864     while (a * (b + 1) * (b + 1) < (1 << 28)) {
865         b += 1;
866     }
867     estimate = (b + 1) / 2;
868     assert(256 <= estimate && estimate < 512);
869 
870     return estimate;
871 }
872 
873 
874 static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
875 {
876     int estimate;
877     uint32_t scaled;
878 
879     if (*exp == 0) {
880         while (extract64(frac, 51, 1) == 0) {
881             frac = frac << 1;
882             *exp -= 1;
883         }
884         frac = extract64(frac, 0, 51) << 1;
885     }
886 
887     if (*exp & 1) {
888         /* scaled = UInt('01':fraction<51:45>) */
889         scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
890     } else {
891         /* scaled = UInt('1':fraction<51:44>) */
892         scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
893     }
894     estimate = do_recip_sqrt_estimate(scaled);
895 
896     *exp = (exp_off - *exp) / 2;
897     return extract64(estimate, 0, 8) << 44;
898 }
899 
900 uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
901 {
902     float_status *s = fpstp;
903     float16 f16 = float16_squash_input_denormal(input, s);
904     uint16_t val = float16_val(f16);
905     bool f16_sign = float16_is_neg(f16);
906     int f16_exp = extract32(val, 10, 5);
907     uint16_t f16_frac = extract32(val, 0, 10);
908     uint64_t f64_frac;
909 
910     if (float16_is_any_nan(f16)) {
911         float16 nan = f16;
912         if (float16_is_signaling_nan(f16, s)) {
913             float_raise(float_flag_invalid, s);
914             nan = float16_silence_nan(f16, s);
915         }
916         if (s->default_nan_mode) {
917             nan =  float16_default_nan(s);
918         }
919         return nan;
920     } else if (float16_is_zero(f16)) {
921         float_raise(float_flag_divbyzero, s);
922         return float16_set_sign(float16_infinity, f16_sign);
923     } else if (f16_sign) {
924         float_raise(float_flag_invalid, s);
925         return float16_default_nan(s);
926     } else if (float16_is_infinity(f16)) {
927         return float16_zero;
928     }
929 
930     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
931      * preserving the parity of the exponent.  */
932 
933     f64_frac = ((uint64_t) f16_frac) << (52 - 10);
934 
935     f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
936 
937     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
938     val = deposit32(0, 15, 1, f16_sign);
939     val = deposit32(val, 10, 5, f16_exp);
940     val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
941     return make_float16(val);
942 }
943 
944 float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
945 {
946     float_status *s = fpstp;
947     float32 f32 = float32_squash_input_denormal(input, s);
948     uint32_t val = float32_val(f32);
949     uint32_t f32_sign = float32_is_neg(f32);
950     int f32_exp = extract32(val, 23, 8);
951     uint32_t f32_frac = extract32(val, 0, 23);
952     uint64_t f64_frac;
953 
954     if (float32_is_any_nan(f32)) {
955         float32 nan = f32;
956         if (float32_is_signaling_nan(f32, s)) {
957             float_raise(float_flag_invalid, s);
958             nan = float32_silence_nan(f32, s);
959         }
960         if (s->default_nan_mode) {
961             nan =  float32_default_nan(s);
962         }
963         return nan;
964     } else if (float32_is_zero(f32)) {
965         float_raise(float_flag_divbyzero, s);
966         return float32_set_sign(float32_infinity, float32_is_neg(f32));
967     } else if (float32_is_neg(f32)) {
968         float_raise(float_flag_invalid, s);
969         return float32_default_nan(s);
970     } else if (float32_is_infinity(f32)) {
971         return float32_zero;
972     }
973 
974     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
975      * preserving the parity of the exponent.  */
976 
977     f64_frac = ((uint64_t) f32_frac) << 29;
978 
979     f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
980 
981     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
982     val = deposit32(0, 31, 1, f32_sign);
983     val = deposit32(val, 23, 8, f32_exp);
984     val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
985     return make_float32(val);
986 }
987 
988 float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
989 {
990     float_status *s = fpstp;
991     float64 f64 = float64_squash_input_denormal(input, s);
992     uint64_t val = float64_val(f64);
993     bool f64_sign = float64_is_neg(f64);
994     int f64_exp = extract64(val, 52, 11);
995     uint64_t f64_frac = extract64(val, 0, 52);
996 
997     if (float64_is_any_nan(f64)) {
998         float64 nan = f64;
999         if (float64_is_signaling_nan(f64, s)) {
1000             float_raise(float_flag_invalid, s);
1001             nan = float64_silence_nan(f64, s);
1002         }
1003         if (s->default_nan_mode) {
1004             nan =  float64_default_nan(s);
1005         }
1006         return nan;
1007     } else if (float64_is_zero(f64)) {
1008         float_raise(float_flag_divbyzero, s);
1009         return float64_set_sign(float64_infinity, float64_is_neg(f64));
1010     } else if (float64_is_neg(f64)) {
1011         float_raise(float_flag_invalid, s);
1012         return float64_default_nan(s);
1013     } else if (float64_is_infinity(f64)) {
1014         return float64_zero;
1015     }
1016 
1017     f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
1018 
1019     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
1020     val = deposit64(0, 61, 1, f64_sign);
1021     val = deposit64(val, 52, 11, f64_exp);
1022     val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
1023     return make_float64(val);
1024 }
1025 
1026 uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
1027 {
1028     /* float_status *s = fpstp; */
1029     int input, estimate;
1030 
1031     if ((a & 0x80000000) == 0) {
1032         return 0xffffffff;
1033     }
1034 
1035     input = extract32(a, 23, 9);
1036     estimate = recip_estimate(input);
1037 
1038     return deposit32(0, (32 - 9), 9, estimate);
1039 }
1040 
1041 uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
1042 {
1043     int estimate;
1044 
1045     if ((a & 0xc0000000) == 0) {
1046         return 0xffffffff;
1047     }
1048 
1049     estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
1050 
1051     return deposit32(0, 23, 9, estimate);
1052 }
1053 
1054 /* VFPv4 fused multiply-accumulate */
1055 float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
1056 {
1057     float_status *fpst = fpstp;
1058     return float32_muladd(a, b, c, 0, fpst);
1059 }
1060 
1061 float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
1062 {
1063     float_status *fpst = fpstp;
1064     return float64_muladd(a, b, c, 0, fpst);
1065 }
1066 
1067 /* ARMv8 round to integral */
1068 float32 HELPER(rints_exact)(float32 x, void *fp_status)
1069 {
1070     return float32_round_to_int(x, fp_status);
1071 }
1072 
1073 float64 HELPER(rintd_exact)(float64 x, void *fp_status)
1074 {
1075     return float64_round_to_int(x, fp_status);
1076 }
1077 
1078 float32 HELPER(rints)(float32 x, void *fp_status)
1079 {
1080     int old_flags = get_float_exception_flags(fp_status), new_flags;
1081     float32 ret;
1082 
1083     ret = float32_round_to_int(x, fp_status);
1084 
1085     /* Suppress any inexact exceptions the conversion produced */
1086     if (!(old_flags & float_flag_inexact)) {
1087         new_flags = get_float_exception_flags(fp_status);
1088         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1089     }
1090 
1091     return ret;
1092 }
1093 
1094 float64 HELPER(rintd)(float64 x, void *fp_status)
1095 {
1096     int old_flags = get_float_exception_flags(fp_status), new_flags;
1097     float64 ret;
1098 
1099     ret = float64_round_to_int(x, fp_status);
1100 
1101     new_flags = get_float_exception_flags(fp_status);
1102 
1103     /* Suppress any inexact exceptions the conversion produced */
1104     if (!(old_flags & float_flag_inexact)) {
1105         new_flags = get_float_exception_flags(fp_status);
1106         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1107     }
1108 
1109     return ret;
1110 }
1111 
1112 /* Convert ARM rounding mode to softfloat */
1113 int arm_rmode_to_sf(int rmode)
1114 {
1115     switch (rmode) {
1116     case FPROUNDING_TIEAWAY:
1117         rmode = float_round_ties_away;
1118         break;
1119     case FPROUNDING_ODD:
1120         /* FIXME: add support for TIEAWAY and ODD */
1121         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
1122                       rmode);
1123         /* fall through for now */
1124     case FPROUNDING_TIEEVEN:
1125     default:
1126         rmode = float_round_nearest_even;
1127         break;
1128     case FPROUNDING_POSINF:
1129         rmode = float_round_up;
1130         break;
1131     case FPROUNDING_NEGINF:
1132         rmode = float_round_down;
1133         break;
1134     case FPROUNDING_ZERO:
1135         rmode = float_round_to_zero;
1136         break;
1137     }
1138     return rmode;
1139 }
1140 
1141 /*
1142  * Implement float64 to int32_t conversion without saturation;
1143  * the result is supplied modulo 2^32.
1144  */
1145 uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
1146 {
1147     float_status *status = vstatus;
1148     uint32_t exp, sign;
1149     uint64_t frac;
1150     uint32_t inexact = 1; /* !Z */
1151 
1152     sign = extract64(value, 63, 1);
1153     exp = extract64(value, 52, 11);
1154     frac = extract64(value, 0, 52);
1155 
1156     if (exp == 0) {
1157         /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript.  */
1158         inexact = sign;
1159         if (frac != 0) {
1160             if (status->flush_inputs_to_zero) {
1161                 float_raise(float_flag_input_denormal, status);
1162             } else {
1163                 float_raise(float_flag_inexact, status);
1164                 inexact = 1;
1165             }
1166         }
1167         frac = 0;
1168     } else if (exp == 0x7ff) {
1169         /* This operation raises Invalid for both NaN and overflow (Inf).  */
1170         float_raise(float_flag_invalid, status);
1171         frac = 0;
1172     } else {
1173         int true_exp = exp - 1023;
1174         int shift = true_exp - 52;
1175 
1176         /* Restore implicit bit.  */
1177         frac |= 1ull << 52;
1178 
1179         /* Shift the fraction into place.  */
1180         if (shift >= 0) {
1181             /* The number is so large we must shift the fraction left.  */
1182             if (shift >= 64) {
1183                 /* The fraction is shifted out entirely.  */
1184                 frac = 0;
1185             } else {
1186                 frac <<= shift;
1187             }
1188         } else if (shift > -64) {
1189             /* Normal case -- shift right and notice if bits shift out.  */
1190             inexact = (frac << (64 + shift)) != 0;
1191             frac >>= -shift;
1192         } else {
1193             /* The fraction is shifted out entirely.  */
1194             frac = 0;
1195         }
1196 
1197         /* Notice overflow or inexact exceptions.  */
1198         if (true_exp > 31 || frac > (sign ? 0x80000000ull : 0x7fffffff)) {
1199             /* Overflow, for which this operation raises invalid.  */
1200             float_raise(float_flag_invalid, status);
1201             inexact = 1;
1202         } else if (inexact) {
1203             float_raise(float_flag_inexact, status);
1204         }
1205 
1206         /* Honor the sign.  */
1207         if (sign) {
1208             frac = -frac;
1209         }
1210     }
1211 
1212     /* Pack the result and the env->ZF representation of Z together.  */
1213     return deposit64(frac, 32, 32, inexact);
1214 }
1215 
1216 uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
1217 {
1218     uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
1219     uint32_t result = pair;
1220     uint32_t z = (pair >> 32) == 0;
1221 
1222     /* Store Z, clear NCV, in FPSCR.NZCV.  */
1223     env->vfp.xregs[ARM_VFP_FPSCR]
1224         = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
1225 
1226     return result;
1227 }
1228 
1229 /* Round a float32 to an integer that fits in int32_t or int64_t.  */
1230 static float32 frint_s(float32 f, float_status *fpst, int intsize)
1231 {
1232     int old_flags = get_float_exception_flags(fpst);
1233     uint32_t exp = extract32(f, 23, 8);
1234 
1235     if (unlikely(exp == 0xff)) {
1236         /* NaN or Inf.  */
1237         goto overflow;
1238     }
1239 
1240     /* Round and re-extract the exponent.  */
1241     f = float32_round_to_int(f, fpst);
1242     exp = extract32(f, 23, 8);
1243 
1244     /* Validate the range of the result.  */
1245     if (exp < 126 + intsize) {
1246         /* abs(F) <= INT{N}_MAX */
1247         return f;
1248     }
1249     if (exp == 126 + intsize) {
1250         uint32_t sign = extract32(f, 31, 1);
1251         uint32_t frac = extract32(f, 0, 23);
1252         if (sign && frac == 0) {
1253             /* F == INT{N}_MIN */
1254             return f;
1255         }
1256     }
1257 
1258  overflow:
1259     /*
1260      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
1261      * inexact exception float32_round_to_int may have raised.
1262      */
1263     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
1264     return (0x100u + 126u + intsize) << 23;
1265 }
1266 
1267 float32 HELPER(frint32_s)(float32 f, void *fpst)
1268 {
1269     return frint_s(f, fpst, 32);
1270 }
1271 
1272 float32 HELPER(frint64_s)(float32 f, void *fpst)
1273 {
1274     return frint_s(f, fpst, 64);
1275 }
1276 
1277 /* Round a float64 to an integer that fits in int32_t or int64_t.  */
1278 static float64 frint_d(float64 f, float_status *fpst, int intsize)
1279 {
1280     int old_flags = get_float_exception_flags(fpst);
1281     uint32_t exp = extract64(f, 52, 11);
1282 
1283     if (unlikely(exp == 0x7ff)) {
1284         /* NaN or Inf.  */
1285         goto overflow;
1286     }
1287 
1288     /* Round and re-extract the exponent.  */
1289     f = float64_round_to_int(f, fpst);
1290     exp = extract64(f, 52, 11);
1291 
1292     /* Validate the range of the result.  */
1293     if (exp < 1022 + intsize) {
1294         /* abs(F) <= INT{N}_MAX */
1295         return f;
1296     }
1297     if (exp == 1022 + intsize) {
1298         uint64_t sign = extract64(f, 63, 1);
1299         uint64_t frac = extract64(f, 0, 52);
1300         if (sign && frac == 0) {
1301             /* F == INT{N}_MIN */
1302             return f;
1303         }
1304     }
1305 
1306  overflow:
1307     /*
1308      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
1309      * inexact exception float64_round_to_int may have raised.
1310      */
1311     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
1312     return (uint64_t)(0x800 + 1022 + intsize) << 52;
1313 }
1314 
1315 float64 HELPER(frint32_d)(float64 f, void *fpst)
1316 {
1317     return frint_d(f, fpst, 32);
1318 }
1319 
1320 float64 HELPER(frint64_d)(float64 f, void *fpst)
1321 {
1322     return frint_d(f, fpst, 64);
1323 }
1324 
1325 #endif
1326