137356079SRichard Henderson /*
237356079SRichard Henderson * ARM VFP floating-point operations
337356079SRichard Henderson *
437356079SRichard Henderson * Copyright (c) 2003 Fabrice Bellard
537356079SRichard Henderson *
637356079SRichard Henderson * This library is free software; you can redistribute it and/or
737356079SRichard Henderson * modify it under the terms of the GNU Lesser General Public
837356079SRichard Henderson * License as published by the Free Software Foundation; either
937356079SRichard Henderson * version 2.1 of the License, or (at your option) any later version.
1037356079SRichard Henderson *
1137356079SRichard Henderson * This library is distributed in the hope that it will be useful,
1237356079SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of
1337356079SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1437356079SRichard Henderson * Lesser General Public License for more details.
1537356079SRichard Henderson *
1637356079SRichard Henderson * You should have received a copy of the GNU Lesser General Public
1737356079SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>.
1837356079SRichard Henderson */
1937356079SRichard Henderson
2037356079SRichard Henderson #include "qemu/osdep.h"
2137356079SRichard Henderson #include "cpu.h"
2237356079SRichard Henderson #include "exec/helper-proto.h"
2337356079SRichard Henderson #include "internals.h"
245a534314SPeter Maydell #include "cpu-features.h"
254a15527cSPhilippe Mathieu-Daudé #ifdef CONFIG_TCG
264a15527cSPhilippe Mathieu-Daudé #include "qemu/log.h"
274a15527cSPhilippe Mathieu-Daudé #include "fpu/softfloat.h"
284a15527cSPhilippe Mathieu-Daudé #endif
2937356079SRichard Henderson
3037356079SRichard Henderson /* VFP support. We follow the convention used for VFP instructions:
3137356079SRichard Henderson Single precision routines have a "s" suffix, double precision a
3237356079SRichard Henderson "d" suffix. */
3337356079SRichard Henderson
344a15527cSPhilippe Mathieu-Daudé #ifdef CONFIG_TCG
354a15527cSPhilippe Mathieu-Daudé
3637356079SRichard Henderson /* Convert host exception flags to vfp form. */
vfp_exceptbits_from_host(int host_bits)3737356079SRichard Henderson static inline int vfp_exceptbits_from_host(int host_bits)
3837356079SRichard Henderson {
3937356079SRichard Henderson int target_bits = 0;
4037356079SRichard Henderson
419798ac71SPhilippe Mathieu-Daudé if (host_bits & float_flag_invalid) {
4237356079SRichard Henderson target_bits |= 1;
439798ac71SPhilippe Mathieu-Daudé }
449798ac71SPhilippe Mathieu-Daudé if (host_bits & float_flag_divbyzero) {
4537356079SRichard Henderson target_bits |= 2;
469798ac71SPhilippe Mathieu-Daudé }
479798ac71SPhilippe Mathieu-Daudé if (host_bits & float_flag_overflow) {
4837356079SRichard Henderson target_bits |= 4;
499798ac71SPhilippe Mathieu-Daudé }
509798ac71SPhilippe Mathieu-Daudé if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
5137356079SRichard Henderson target_bits |= 8;
529798ac71SPhilippe Mathieu-Daudé }
539798ac71SPhilippe Mathieu-Daudé if (host_bits & float_flag_inexact) {
5437356079SRichard Henderson target_bits |= 0x10;
559798ac71SPhilippe Mathieu-Daudé }
569798ac71SPhilippe Mathieu-Daudé if (host_bits & float_flag_input_denormal) {
5737356079SRichard Henderson target_bits |= 0x80;
589798ac71SPhilippe Mathieu-Daudé }
5937356079SRichard Henderson return target_bits;
6037356079SRichard Henderson }
6137356079SRichard Henderson
vfp_get_fpsr_from_host(CPUARMState * env)622de7cf9eSPeter Maydell static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
630c6ad948SPhilippe Mathieu-Daudé {
640c6ad948SPhilippe Mathieu-Daudé uint32_t i;
650c6ad948SPhilippe Mathieu-Daudé
660c6ad948SPhilippe Mathieu-Daudé i = get_float_exception_flags(&env->vfp.fp_status);
670c6ad948SPhilippe Mathieu-Daudé i |= get_float_exception_flags(&env->vfp.standard_fp_status);
680c6ad948SPhilippe Mathieu-Daudé /* FZ16 does not generate an input denormal exception. */
690c6ad948SPhilippe Mathieu-Daudé i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
700c6ad948SPhilippe Mathieu-Daudé & ~float_flag_input_denormal);
71aaae563bSPeter Maydell i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
72aaae563bSPeter Maydell & ~float_flag_input_denormal);
730c6ad948SPhilippe Mathieu-Daudé return vfp_exceptbits_from_host(i);
740c6ad948SPhilippe Mathieu-Daudé }
750c6ad948SPhilippe Mathieu-Daudé
vfp_clear_float_status_exc_flags(CPUARMState * env)76*13d162d4SPeter Maydell static void vfp_clear_float_status_exc_flags(CPUARMState *env)
7737356079SRichard Henderson {
78b167325eSPeter Maydell /*
79*13d162d4SPeter Maydell * Clear out all the exception-flag information in the float_status
80*13d162d4SPeter Maydell * values. The caller should have arranged for env->vfp.fpsr to
81*13d162d4SPeter Maydell * be the architecturally up-to-date exception flag information first.
82b167325eSPeter Maydell */
83*13d162d4SPeter Maydell set_float_exception_flags(0, &env->vfp.fp_status);
84b167325eSPeter Maydell set_float_exception_flags(0, &env->vfp.fp_status_f16);
85b167325eSPeter Maydell set_float_exception_flags(0, &env->vfp.standard_fp_status);
86b167325eSPeter Maydell set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
87b167325eSPeter Maydell }
88b167325eSPeter Maydell
vfp_set_fpcr_to_host(CPUARMState * env,uint32_t val,uint32_t mask)89a8ab8706SPeter Maydell static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
90b167325eSPeter Maydell {
91ce07ea61SPeter Maydell uint64_t changed = env->vfp.fpcr;
9237356079SRichard Henderson
9337356079SRichard Henderson changed ^= val;
94a8ab8706SPeter Maydell changed &= mask;
9537356079SRichard Henderson if (changed & (3 << 22)) {
96b167325eSPeter Maydell int i = (val >> 22) & 3;
9737356079SRichard Henderson switch (i) {
9837356079SRichard Henderson case FPROUNDING_TIEEVEN:
9937356079SRichard Henderson i = float_round_nearest_even;
10037356079SRichard Henderson break;
10137356079SRichard Henderson case FPROUNDING_POSINF:
10237356079SRichard Henderson i = float_round_up;
10337356079SRichard Henderson break;
10437356079SRichard Henderson case FPROUNDING_NEGINF:
10537356079SRichard Henderson i = float_round_down;
10637356079SRichard Henderson break;
10737356079SRichard Henderson case FPROUNDING_ZERO:
10837356079SRichard Henderson i = float_round_to_zero;
10937356079SRichard Henderson break;
11037356079SRichard Henderson }
11137356079SRichard Henderson set_float_rounding_mode(i, &env->vfp.fp_status);
11237356079SRichard Henderson set_float_rounding_mode(i, &env->vfp.fp_status_f16);
11337356079SRichard Henderson }
11437356079SRichard Henderson if (changed & FPCR_FZ16) {
11537356079SRichard Henderson bool ftz_enabled = val & FPCR_FZ16;
11637356079SRichard Henderson set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
117aaae563bSPeter Maydell set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
11837356079SRichard Henderson set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
119aaae563bSPeter Maydell set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
12037356079SRichard Henderson }
12137356079SRichard Henderson if (changed & FPCR_FZ) {
12237356079SRichard Henderson bool ftz_enabled = val & FPCR_FZ;
12337356079SRichard Henderson set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
12437356079SRichard Henderson set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
12537356079SRichard Henderson }
12637356079SRichard Henderson if (changed & FPCR_DN) {
12737356079SRichard Henderson bool dnan_enabled = val & FPCR_DN;
12837356079SRichard Henderson set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
12937356079SRichard Henderson set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
13037356079SRichard Henderson }
13137356079SRichard Henderson }
13237356079SRichard Henderson
1334a15527cSPhilippe Mathieu-Daudé #else
1344a15527cSPhilippe Mathieu-Daudé
vfp_get_fpsr_from_host(CPUARMState * env)1352de7cf9eSPeter Maydell static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
1364a15527cSPhilippe Mathieu-Daudé {
1374a15527cSPhilippe Mathieu-Daudé return 0;
1384a15527cSPhilippe Mathieu-Daudé }
1394a15527cSPhilippe Mathieu-Daudé
vfp_clear_float_status_exc_flags(CPUARMState * env)140*13d162d4SPeter Maydell static void vfp_clear_float_status_exc_flags(CPUARMState *env)
141b167325eSPeter Maydell {
142b167325eSPeter Maydell }
143b167325eSPeter Maydell
vfp_set_fpcr_to_host(CPUARMState * env,uint32_t val,uint32_t mask)144a8ab8706SPeter Maydell static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
1454a15527cSPhilippe Mathieu-Daudé {
1464a15527cSPhilippe Mathieu-Daudé }
1474a15527cSPhilippe Mathieu-Daudé
1484a15527cSPhilippe Mathieu-Daudé #endif
1494a15527cSPhilippe Mathieu-Daudé
vfp_get_fpcr(CPUARMState * env)1502de7cf9eSPeter Maydell uint32_t vfp_get_fpcr(CPUARMState *env)
151e9d65282SPhilippe Mathieu-Daudé {
152ce07ea61SPeter Maydell uint32_t fpcr = env->vfp.fpcr
153e9d65282SPhilippe Mathieu-Daudé | (env->vfp.vec_len << 16)
154e9d65282SPhilippe Mathieu-Daudé | (env->vfp.vec_stride << 20);
155e9d65282SPhilippe Mathieu-Daudé
1568128c8e8SPeter Maydell /*
157ea861838SPeter Maydell * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
158ea861838SPeter Maydell * of the two is not applicable to this CPU will always be zero.
1598128c8e8SPeter Maydell */
1602de7cf9eSPeter Maydell fpcr |= env->v7m.ltpsize << 16;
1618128c8e8SPeter Maydell
1622de7cf9eSPeter Maydell return fpcr;
1632de7cf9eSPeter Maydell }
1642de7cf9eSPeter Maydell
vfp_get_fpsr(CPUARMState * env)1652de7cf9eSPeter Maydell uint32_t vfp_get_fpsr(CPUARMState *env)
1662de7cf9eSPeter Maydell {
167ce07ea61SPeter Maydell uint32_t fpsr = env->vfp.fpsr;
1682de7cf9eSPeter Maydell uint32_t i;
1692de7cf9eSPeter Maydell
1702de7cf9eSPeter Maydell fpsr |= vfp_get_fpsr_from_host(env);
171e9d65282SPhilippe Mathieu-Daudé
172e9d65282SPhilippe Mathieu-Daudé i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
173a26db547SPeter Maydell fpsr |= i ? FPSR_QC : 0;
1742de7cf9eSPeter Maydell return fpsr;
1752de7cf9eSPeter Maydell }
176e9d65282SPhilippe Mathieu-Daudé
HELPER(vfp_get_fpscr)1772de7cf9eSPeter Maydell uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
1782de7cf9eSPeter Maydell {
179db397a81SPeter Maydell return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
180db397a81SPeter Maydell (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
181e9d65282SPhilippe Mathieu-Daudé }
182e9d65282SPhilippe Mathieu-Daudé
vfp_get_fpscr(CPUARMState * env)183e9d65282SPhilippe Mathieu-Daudé uint32_t vfp_get_fpscr(CPUARMState *env)
184e9d65282SPhilippe Mathieu-Daudé {
185e9d65282SPhilippe Mathieu-Daudé return HELPER(vfp_get_fpscr)(env);
186e9d65282SPhilippe Mathieu-Daudé }
187e9d65282SPhilippe Mathieu-Daudé
vfp_set_fpsr(CPUARMState * env,uint32_t val)188b167325eSPeter Maydell void vfp_set_fpsr(CPUARMState *env, uint32_t val)
189b167325eSPeter Maydell {
190b167325eSPeter Maydell ARMCPU *cpu = env_archcpu(env);
191b167325eSPeter Maydell
192b167325eSPeter Maydell if (arm_feature(env, ARM_FEATURE_NEON) ||
193b167325eSPeter Maydell cpu_isar_feature(aa32_mve, cpu)) {
194b167325eSPeter Maydell /*
195b167325eSPeter Maydell * The bit we set within vfp.qc[] is arbitrary; the array as a
196b167325eSPeter Maydell * whole being zero/non-zero is what counts.
197b167325eSPeter Maydell */
198a26db547SPeter Maydell env->vfp.qc[0] = val & FPSR_QC;
199b167325eSPeter Maydell env->vfp.qc[1] = 0;
200b167325eSPeter Maydell env->vfp.qc[2] = 0;
201b167325eSPeter Maydell env->vfp.qc[3] = 0;
202b167325eSPeter Maydell }
203b167325eSPeter Maydell
204b167325eSPeter Maydell /*
205*13d162d4SPeter Maydell * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
206*13d162d4SPeter Maydell * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
207*13d162d4SPeter Maydell * extra pending exception information that hasn't yet been folded in
208*13d162d4SPeter Maydell * living in the float_status values (for TCG).
209*13d162d4SPeter Maydell * Since this FPSR write gives us the up to date values of the exception
210*13d162d4SPeter Maydell * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
211*13d162d4SPeter Maydell * anything else. We also need to clear out the float_status exception
212*13d162d4SPeter Maydell * information so that the next vfp_get_fpsr does not fold in stale data.
213b167325eSPeter Maydell */
214*13d162d4SPeter Maydell val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
215ce07ea61SPeter Maydell env->vfp.fpsr = val;
216*13d162d4SPeter Maydell vfp_clear_float_status_exc_flags(env);
217b167325eSPeter Maydell }
218b167325eSPeter Maydell
vfp_set_fpcr_masked(CPUARMState * env,uint32_t val,uint32_t mask)219a8ab8706SPeter Maydell static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
220e9d65282SPhilippe Mathieu-Daudé {
221a8ab8706SPeter Maydell /*
222a8ab8706SPeter Maydell * We only set FPCR bits defined by mask, and leave the others alone.
223a8ab8706SPeter Maydell * We assume the mask is sensible (e.g. doesn't try to set only
224a8ab8706SPeter Maydell * part of a field)
225a8ab8706SPeter Maydell */
226b26b5629SPeter Maydell ARMCPU *cpu = env_archcpu(env);
227b26b5629SPeter Maydell
228e9d65282SPhilippe Mathieu-Daudé /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
229b26b5629SPeter Maydell if (!cpu_isar_feature(any_fp16, cpu)) {
230e9d65282SPhilippe Mathieu-Daudé val &= ~FPCR_FZ16;
231e9d65282SPhilippe Mathieu-Daudé }
232e9d65282SPhilippe Mathieu-Daudé
233a8ab8706SPeter Maydell if (!cpu_isar_feature(aa64_ebf16, cpu)) {
23485795187SPhilippe Mathieu-Daudé val &= ~FPCR_EBF;
235a8ab8706SPeter Maydell }
236d31e2ce6SPeter Maydell
237e9d65282SPhilippe Mathieu-Daudé vfp_set_fpcr_to_host(env, val, mask);
238d31e2ce6SPeter Maydell
239d31e2ce6SPeter Maydell if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
240d31e2ce6SPeter Maydell if (!arm_feature(env, ARM_FEATURE_M)) {
241d31e2ce6SPeter Maydell /*
242d31e2ce6SPeter Maydell * Short-vector length and stride; on M-profile these bits
243d31e2ce6SPeter Maydell * are used for different purposes.
244e9d65282SPhilippe Mathieu-Daudé * We can't make this conditional be "if MVFR0.FPShVec != 0",
245d31e2ce6SPeter Maydell * because in v7A no-short-vector-support cores still had to
246d31e2ce6SPeter Maydell * allow Stride/Len to be written with the only effect that
247b26b5629SPeter Maydell * some insns are required to UNDEF if the guest sets them.
248b26b5629SPeter Maydell */
249b26b5629SPeter Maydell env->vfp.vec_len = extract32(val, 16, 3);
250d31e2ce6SPeter Maydell env->vfp.vec_stride = extract32(val, 20, 2);
251a8ab8706SPeter Maydell } else if (cpu_isar_feature(aa32_mve, cpu)) {
252e9d65282SPhilippe Mathieu-Daudé env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
253d31e2ce6SPeter Maydell FPCR_LTPSIZE_LENGTH);
254d31e2ce6SPeter Maydell }
255d31e2ce6SPeter Maydell }
256d31e2ce6SPeter Maydell
257ce07ea61SPeter Maydell /*
258b167325eSPeter Maydell * We don't implement trapped exception handling, so the
259b167325eSPeter Maydell * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
260b167325eSPeter Maydell *
261d31e2ce6SPeter Maydell * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF
262b167325eSPeter Maydell * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
263a8ab8706SPeter Maydell * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
264a8ab8706SPeter Maydell * bits.
265a8ab8706SPeter Maydell */
266a8ab8706SPeter Maydell val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF;
267a8ab8706SPeter Maydell env->vfp.fpcr &= ~mask;
268a8ab8706SPeter Maydell env->vfp.fpcr |= val;
269a8ab8706SPeter Maydell }
270b167325eSPeter Maydell
vfp_set_fpcr(CPUARMState * env,uint32_t val)271b167325eSPeter Maydell void vfp_set_fpcr(CPUARMState *env, uint32_t val)
272b167325eSPeter Maydell {
273b167325eSPeter Maydell vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
274a8ab8706SPeter Maydell }
275db397a81SPeter Maydell
HELPER(vfp_set_fpscr)276d31e2ce6SPeter Maydell void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
277d31e2ce6SPeter Maydell {
27837356079SRichard Henderson vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
27937356079SRichard Henderson vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
28037356079SRichard Henderson }
28137356079SRichard Henderson
vfp_set_fpscr(CPUARMState * env,uint32_t val)28237356079SRichard Henderson void vfp_set_fpscr(CPUARMState *env, uint32_t val)
2834a15527cSPhilippe Mathieu-Daudé {
2844a15527cSPhilippe Mathieu-Daudé HELPER(vfp_set_fpscr)(env, val);
28537356079SRichard Henderson }
28637356079SRichard Henderson
28737356079SRichard Henderson #ifdef CONFIG_TCG
288120a0eb3SPeter Maydell
289120a0eb3SPeter Maydell #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
290120a0eb3SPeter Maydell
291120a0eb3SPeter Maydell #define VFP_BINOP(name) \
292120a0eb3SPeter Maydell dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
29337356079SRichard Henderson { \
29437356079SRichard Henderson float_status *fpst = fpstp; \
29537356079SRichard Henderson return float16_ ## name(a, b, fpst); \
29637356079SRichard Henderson } \
29737356079SRichard Henderson float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
29837356079SRichard Henderson { \
29937356079SRichard Henderson float_status *fpst = fpstp; \
30037356079SRichard Henderson return float32_ ## name(a, b, fpst); \
30137356079SRichard Henderson } \
30237356079SRichard Henderson float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
30337356079SRichard Henderson { \
30437356079SRichard Henderson float_status *fpst = fpstp; \
30537356079SRichard Henderson return float64_ ## name(a, b, fpst); \
30637356079SRichard Henderson }
30737356079SRichard Henderson VFP_BINOP(add)
VFP_BINOP(sub)30837356079SRichard Henderson VFP_BINOP(sub)
30937356079SRichard Henderson VFP_BINOP(mul)
31037356079SRichard Henderson VFP_BINOP(div)
31137356079SRichard Henderson VFP_BINOP(min)
31237356079SRichard Henderson VFP_BINOP(max)
313ce2d65a5SPeter Maydell VFP_BINOP(minnum)
314ce2d65a5SPeter Maydell VFP_BINOP(maxnum)
315ce2d65a5SPeter Maydell #undef VFP_BINOP
316ce2d65a5SPeter Maydell
317ce2d65a5SPeter Maydell dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
31837356079SRichard Henderson {
31937356079SRichard Henderson return float16_sqrt(a, &env->vfp.fp_status_f16);
32037356079SRichard Henderson }
32137356079SRichard Henderson
VFP_HELPER(sqrt,s)32237356079SRichard Henderson float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
32337356079SRichard Henderson {
32437356079SRichard Henderson return float32_sqrt(a, &env->vfp.fp_status);
32537356079SRichard Henderson }
32637356079SRichard Henderson
VFP_HELPER(sqrt,d)32737356079SRichard Henderson float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
32871bfd65cSRichard Henderson {
32937356079SRichard Henderson return float64_sqrt(a, &env->vfp.fp_status);
33037356079SRichard Henderson }
33137356079SRichard Henderson
softfloat_to_vfp_compare(CPUARMState * env,FloatRelation cmp)33237356079SRichard Henderson static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
33337356079SRichard Henderson {
33437356079SRichard Henderson uint32_t flags;
33537356079SRichard Henderson switch (cmp) {
33637356079SRichard Henderson case float_relation_equal:
33737356079SRichard Henderson flags = 0x6;
33837356079SRichard Henderson break;
33937356079SRichard Henderson case float_relation_less:
34037356079SRichard Henderson flags = 0x8;
34137356079SRichard Henderson break;
34237356079SRichard Henderson case float_relation_greater:
34337356079SRichard Henderson flags = 0x2;
34437356079SRichard Henderson break;
34537356079SRichard Henderson case float_relation_unordered:
34637356079SRichard Henderson flags = 0x3;
347ce07ea61SPeter Maydell break;
34837356079SRichard Henderson default:
34937356079SRichard Henderson g_assert_not_reached();
35037356079SRichard Henderson }
3511b88b054SPeter Maydell env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
3521b88b054SPeter Maydell }
35337356079SRichard Henderson
35437356079SRichard Henderson /* XXX: check quiet/signaling case */
3551b88b054SPeter Maydell #define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
35637356079SRichard Henderson void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
3571b88b054SPeter Maydell { \
35837356079SRichard Henderson softfloat_to_vfp_compare(env, \
35937356079SRichard Henderson FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
3601b88b054SPeter Maydell } \
36137356079SRichard Henderson void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
3621b88b054SPeter Maydell { \
3631b88b054SPeter Maydell softfloat_to_vfp_compare(env, \
3641b88b054SPeter Maydell FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
36537356079SRichard Henderson }
DO_VFP_cmp(h,float16,dh_ctype_f16,fp_status_f16)36637356079SRichard Henderson DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
36737356079SRichard Henderson DO_VFP_cmp(s, float32, float32, fp_status)
36837356079SRichard Henderson DO_VFP_cmp(d, float64, float64, fp_status)
36937356079SRichard Henderson #undef DO_VFP_cmp
37037356079SRichard Henderson
37137356079SRichard Henderson /* Integer to float and float to integer conversions */
37237356079SRichard Henderson
37337356079SRichard Henderson #define CONV_ITOF(name, ftype, fsz, sign) \
37437356079SRichard Henderson ftype HELPER(name)(uint32_t x, void *fpstp) \
37537356079SRichard Henderson { \
37637356079SRichard Henderson float_status *fpst = fpstp; \
37737356079SRichard Henderson return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
37837356079SRichard Henderson }
37937356079SRichard Henderson
38037356079SRichard Henderson #define CONV_FTOI(name, ftype, fsz, sign, round) \
38137356079SRichard Henderson sign##int32_t HELPER(name)(ftype x, void *fpstp) \
38237356079SRichard Henderson { \
38337356079SRichard Henderson float_status *fpst = fpstp; \
38437356079SRichard Henderson if (float##fsz##_is_any_nan(x)) { \
38537356079SRichard Henderson float_raise(float_flag_invalid, fpst); \
38637356079SRichard Henderson return 0; \
38737356079SRichard Henderson } \
38837356079SRichard Henderson return float##fsz##_to_##sign##int32##round(x, fpst); \
38937356079SRichard Henderson }
39037356079SRichard Henderson
39137356079SRichard Henderson #define FLOAT_CONVS(name, p, ftype, fsz, sign) \
39237356079SRichard Henderson CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
39337356079SRichard Henderson CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
39437356079SRichard Henderson CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
39537356079SRichard Henderson
39637356079SRichard Henderson FLOAT_CONVS(si, h, uint32_t, 16, )
39737356079SRichard Henderson FLOAT_CONVS(si, s, float32, 32, )
39837356079SRichard Henderson FLOAT_CONVS(si, d, float64, 64, )
39937356079SRichard Henderson FLOAT_CONVS(ui, h, uint32_t, 16, u)
40037356079SRichard Henderson FLOAT_CONVS(ui, s, float32, 32, u)
40137356079SRichard Henderson FLOAT_CONVS(ui, d, float64, 64, u)
40237356079SRichard Henderson
40337356079SRichard Henderson #undef CONV_ITOF
40437356079SRichard Henderson #undef CONV_FTOI
40537356079SRichard Henderson #undef FLOAT_CONVS
40637356079SRichard Henderson
40737356079SRichard Henderson /* floating point conversion */
40837356079SRichard Henderson float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
40937356079SRichard Henderson {
41037356079SRichard Henderson return float32_to_float64(x, &env->vfp.fp_status);
41137356079SRichard Henderson }
41237356079SRichard Henderson
VFP_HELPER(fcvts,d)41337356079SRichard Henderson float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
4143a98ac40SRichard Henderson {
4153a98ac40SRichard Henderson return float64_to_float32(x, &env->vfp.fp_status);
4163a98ac40SRichard Henderson }
4173a98ac40SRichard Henderson
HELPER(bfcvt)4183a98ac40SRichard Henderson uint32_t HELPER(bfcvt)(float32 x, void *status)
419d29b17caSRichard Henderson {
420d29b17caSRichard Henderson return float32_to_bfloat16(x, status);
421d29b17caSRichard Henderson }
422d29b17caSRichard Henderson
HELPER(bfcvt_pair)423d29b17caSRichard Henderson uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
424d29b17caSRichard Henderson {
425d29b17caSRichard Henderson bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
42661db12d9SPeter Maydell bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
42761db12d9SPeter Maydell return deposit32(lo, 16, 16, hi);
42861db12d9SPeter Maydell }
42961db12d9SPeter Maydell
43061db12d9SPeter Maydell /*
43161db12d9SPeter Maydell * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
43261db12d9SPeter Maydell * must always round-to-nearest; the AArch64 ones honour the FPSCR
4335366f6adSPeter Maydell * rounding mode. (For AArch32 Neon the standard-FPSCR is set to
4345366f6adSPeter Maydell * round-to-nearest so either helper will work.) AArch32 float-to-fix
43537356079SRichard Henderson * must round-to-zero.
43637356079SRichard Henderson */
43737356079SRichard Henderson #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
43861db12d9SPeter Maydell ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
43961db12d9SPeter Maydell void *fpstp) \
44061db12d9SPeter Maydell { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
44161db12d9SPeter Maydell
44261db12d9SPeter Maydell #define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
44361db12d9SPeter Maydell ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \
44461db12d9SPeter Maydell uint32_t shift, \
44561db12d9SPeter Maydell void *fpstp) \
44661db12d9SPeter Maydell { \
44761db12d9SPeter Maydell ftype ret; \
44861db12d9SPeter Maydell float_status *fpst = fpstp; \
44961db12d9SPeter Maydell FloatRoundMode oldmode = fpst->float_rounding_mode; \
45061db12d9SPeter Maydell fpst->float_rounding_mode = float_round_nearest_even; \
45161db12d9SPeter Maydell ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \
4525366f6adSPeter Maydell fpst->float_rounding_mode = oldmode; \
4535366f6adSPeter Maydell return ret; \
45437356079SRichard Henderson }
45537356079SRichard Henderson
45637356079SRichard Henderson #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
45737356079SRichard Henderson uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \
45837356079SRichard Henderson void *fpst) \
45937356079SRichard Henderson { \
46037356079SRichard Henderson if (unlikely(float##fsz##_is_any_nan(x))) { \
46137356079SRichard Henderson float_raise(float_flag_invalid, fpst); \
46237356079SRichard Henderson return 0; \
4635366f6adSPeter Maydell } \
4645366f6adSPeter Maydell return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
46561db12d9SPeter Maydell }
4665366f6adSPeter Maydell
46737356079SRichard Henderson #define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \
4685366f6adSPeter Maydell VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
46937356079SRichard Henderson VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
47037356079SRichard Henderson VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
4715366f6adSPeter Maydell float_round_to_zero, _round_to_zero) \
4725366f6adSPeter Maydell VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
4735366f6adSPeter Maydell get_float_rounding_mode(fpst), )
47437356079SRichard Henderson
47537356079SRichard Henderson #define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \
4765366f6adSPeter Maydell VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
4775366f6adSPeter Maydell VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
4785366f6adSPeter Maydell get_float_rounding_mode(fpst), )
4795366f6adSPeter Maydell
4805366f6adSPeter Maydell VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
4815366f6adSPeter Maydell VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
4825366f6adSPeter Maydell VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
4835366f6adSPeter Maydell VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
4845366f6adSPeter Maydell VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
4855366f6adSPeter Maydell VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
4865366f6adSPeter Maydell VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
4875366f6adSPeter Maydell VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
488414ba270SPeter Maydell VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
489414ba270SPeter Maydell VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
490414ba270SPeter Maydell VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
491414ba270SPeter Maydell VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
492414ba270SPeter Maydell VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
493414ba270SPeter Maydell VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
49437356079SRichard Henderson VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
49537356079SRichard Henderson VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
49637356079SRichard Henderson VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
49737356079SRichard Henderson VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
49837356079SRichard Henderson
49937356079SRichard Henderson #undef VFP_CONV_FIX
50037356079SRichard Henderson #undef VFP_CONV_FIX_FLOAT
50137356079SRichard Henderson #undef VFP_CONV_FLOAT_FIX_ROUND
50237356079SRichard Henderson #undef VFP_CONV_FIX_A64
50337356079SRichard Henderson
50437356079SRichard Henderson /* Set the current fp rounding mode and return the old one.
50537356079SRichard Henderson * The argument is a softfloat float_round_ value.
50637356079SRichard Henderson */
HELPER(set_rmode)50737356079SRichard Henderson uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
50837356079SRichard Henderson {
50937356079SRichard Henderson float_status *fp_status = fpstp;
51037356079SRichard Henderson
51137356079SRichard Henderson uint32_t prev_rmode = get_float_rounding_mode(fp_status);
51237356079SRichard Henderson set_float_rounding_mode(rmode, fp_status);
51337356079SRichard Henderson
51437356079SRichard Henderson return prev_rmode;
51537356079SRichard Henderson }
51637356079SRichard Henderson
51737356079SRichard Henderson /* Half precision conversions. */
HELPER(vfp_fcvt_f16_to_f32)51837356079SRichard Henderson float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
51937356079SRichard Henderson {
520c120391cSRichard Henderson /* Squash FZ16 to 0 for the duration of conversion. In this case,
52137356079SRichard Henderson * it would affect flushing input denormals.
52237356079SRichard Henderson */
52337356079SRichard Henderson float_status *fpst = fpstp;
52437356079SRichard Henderson bool save = get_flush_inputs_to_zero(fpst);
52537356079SRichard Henderson set_flush_inputs_to_zero(false, fpst);
52637356079SRichard Henderson float32 r = float16_to_float32(a, !ahp_mode, fpst);
52737356079SRichard Henderson set_flush_inputs_to_zero(save, fpst);
52837356079SRichard Henderson return r;
52937356079SRichard Henderson }
53037356079SRichard Henderson
HELPER(vfp_fcvt_f32_to_f16)53137356079SRichard Henderson uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
53237356079SRichard Henderson {
533c120391cSRichard Henderson /* Squash FZ16 to 0 for the duration of conversion. In this case,
53437356079SRichard Henderson * it would affect flushing output denormals.
53537356079SRichard Henderson */
53637356079SRichard Henderson float_status *fpst = fpstp;
53737356079SRichard Henderson bool save = get_flush_to_zero(fpst);
53837356079SRichard Henderson set_flush_to_zero(false, fpst);
53937356079SRichard Henderson float16 r = float32_to_float16(a, !ahp_mode, fpst);
54037356079SRichard Henderson set_flush_to_zero(save, fpst);
54137356079SRichard Henderson return r;
54237356079SRichard Henderson }
54337356079SRichard Henderson
HELPER(vfp_fcvt_f16_to_f64)54437356079SRichard Henderson float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
54537356079SRichard Henderson {
546c120391cSRichard Henderson /* Squash FZ16 to 0 for the duration of conversion. In this case,
54737356079SRichard Henderson * it would affect flushing input denormals.
54837356079SRichard Henderson */
54937356079SRichard Henderson float_status *fpst = fpstp;
55037356079SRichard Henderson bool save = get_flush_inputs_to_zero(fpst);
55137356079SRichard Henderson set_flush_inputs_to_zero(false, fpst);
55237356079SRichard Henderson float64 r = float16_to_float64(a, !ahp_mode, fpst);
55337356079SRichard Henderson set_flush_inputs_to_zero(save, fpst);
55437356079SRichard Henderson return r;
55537356079SRichard Henderson }
55637356079SRichard Henderson
HELPER(vfp_fcvt_f64_to_f16)55737356079SRichard Henderson uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
55837356079SRichard Henderson {
559c120391cSRichard Henderson /* Squash FZ16 to 0 for the duration of conversion. In this case,
56037356079SRichard Henderson * it would affect flushing output denormals.
56137356079SRichard Henderson */
56237356079SRichard Henderson float_status *fpst = fpstp;
56337356079SRichard Henderson bool save = get_flush_to_zero(fpst);
56437356079SRichard Henderson set_flush_to_zero(false, fpst);
56537356079SRichard Henderson float16 r = float64_to_float16(a, !ahp_mode, fpst);
56637356079SRichard Henderson set_flush_to_zero(save, fpst);
56737356079SRichard Henderson return r;
56837356079SRichard Henderson }
56937356079SRichard Henderson
57037356079SRichard Henderson /* NEON helpers. */
57137356079SRichard Henderson
57237356079SRichard Henderson /* Constants 256 and 512 are used in some helpers; we avoid relying on
57337356079SRichard Henderson * int->float conversions at run-time. */
57437356079SRichard Henderson #define float64_256 make_float64(0x4070000000000000LL)
57537356079SRichard Henderson #define float64_512 make_float64(0x4080000000000000LL)
57637356079SRichard Henderson #define float16_maxnorm make_float16(0x7bff)
57737356079SRichard Henderson #define float32_maxnorm make_float32(0x7f7fffff)
57837356079SRichard Henderson #define float64_maxnorm make_float64(0x7fefffffffffffffLL)
57937356079SRichard Henderson
58037356079SRichard Henderson /* Reciprocal functions
58137356079SRichard Henderson *
58237356079SRichard Henderson * The algorithm that must be used to calculate the estimate
58337356079SRichard Henderson * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
58437356079SRichard Henderson */
58537356079SRichard Henderson
58637356079SRichard Henderson /* See RecipEstimate()
58737356079SRichard Henderson *
58837356079SRichard Henderson * input is a 9 bit fixed point number
58937356079SRichard Henderson * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
59037356079SRichard Henderson * result range 256 .. 511 for a number from 1.0 to 511/256.
59137356079SRichard Henderson */
59237356079SRichard Henderson
recip_estimate(int input)59337356079SRichard Henderson static int recip_estimate(int input)
59437356079SRichard Henderson {
59537356079SRichard Henderson int a, b, r;
59637356079SRichard Henderson assert(256 <= input && input < 512);
59737356079SRichard Henderson a = (input * 2) + 1;
59837356079SRichard Henderson b = (1 << 19) / a;
59937356079SRichard Henderson r = (b + 1) >> 1;
60037356079SRichard Henderson assert(256 <= r && r < 512);
60137356079SRichard Henderson return r;
60237356079SRichard Henderson }
60337356079SRichard Henderson
60437356079SRichard Henderson /*
60537356079SRichard Henderson * Common wrapper to call recip_estimate
60637356079SRichard Henderson *
60737356079SRichard Henderson * The parameters are exponent and 64 bit fraction (without implicit
60837356079SRichard Henderson * bit) where the binary point is nominally at bit 52. Returns a
60937356079SRichard Henderson * float64 which can then be rounded to the appropriate size by the
61037356079SRichard Henderson * callee.
61137356079SRichard Henderson */
61237356079SRichard Henderson
call_recip_estimate(int * exp,int exp_off,uint64_t frac)61337356079SRichard Henderson static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
61437356079SRichard Henderson {
61537356079SRichard Henderson uint32_t scaled, estimate;
61637356079SRichard Henderson uint64_t result_frac;
61737356079SRichard Henderson int result_exp;
61837356079SRichard Henderson
61937356079SRichard Henderson /* Handle sub-normals */
62037356079SRichard Henderson if (*exp == 0) {
62137356079SRichard Henderson if (extract64(frac, 51, 1) == 0) {
62237356079SRichard Henderson *exp = -1;
62337356079SRichard Henderson frac <<= 2;
62437356079SRichard Henderson } else {
62537356079SRichard Henderson frac <<= 1;
62637356079SRichard Henderson }
62737356079SRichard Henderson }
62837356079SRichard Henderson
62937356079SRichard Henderson /* scaled = UInt('1':fraction<51:44>) */
63037356079SRichard Henderson scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
63137356079SRichard Henderson estimate = recip_estimate(scaled);
63237356079SRichard Henderson
63337356079SRichard Henderson result_exp = exp_off - *exp;
63437356079SRichard Henderson result_frac = deposit64(0, 44, 8, estimate);
63537356079SRichard Henderson if (result_exp == 0) {
63637356079SRichard Henderson result_frac = deposit64(result_frac >> 1, 51, 1, 1);
63737356079SRichard Henderson } else if (result_exp == -1) {
63837356079SRichard Henderson result_frac = deposit64(result_frac >> 2, 50, 2, 1);
63937356079SRichard Henderson result_exp = 0;
64037356079SRichard Henderson }
64137356079SRichard Henderson
64237356079SRichard Henderson *exp = result_exp;
64337356079SRichard Henderson
64437356079SRichard Henderson return result_frac;
64537356079SRichard Henderson }
64637356079SRichard Henderson
round_to_inf(float_status * fpst,bool sign_bit)64737356079SRichard Henderson static bool round_to_inf(float_status *fpst, bool sign_bit)
64837356079SRichard Henderson {
64937356079SRichard Henderson switch (fpst->float_rounding_mode) {
65037356079SRichard Henderson case float_round_nearest_even: /* Round to Nearest */
65137356079SRichard Henderson return true;
65237356079SRichard Henderson case float_round_up: /* Round to +Inf */
65337356079SRichard Henderson return !sign_bit;
6543dede407SRichard Henderson case float_round_down: /* Round to -Inf */
65537356079SRichard Henderson return sign_bit;
65637356079SRichard Henderson case float_round_to_zero: /* Round to Zero */
6573dede407SRichard Henderson return false;
65837356079SRichard Henderson default:
65937356079SRichard Henderson g_assert_not_reached();
66037356079SRichard Henderson }
66137356079SRichard Henderson }
66237356079SRichard Henderson
HELPER(recpe_f16)66337356079SRichard Henderson uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
66437356079SRichard Henderson {
66537356079SRichard Henderson float_status *fpst = fpstp;
66637356079SRichard Henderson float16 f16 = float16_squash_input_denormal(input, fpst);
66737356079SRichard Henderson uint32_t f16_val = float16_val(f16);
66837356079SRichard Henderson uint32_t f16_sign = float16_is_neg(f16);
66937356079SRichard Henderson int f16_exp = extract32(f16_val, 10, 5);
67037356079SRichard Henderson uint32_t f16_frac = extract32(f16_val, 0, 10);
67137356079SRichard Henderson uint64_t f64_frac;
67237356079SRichard Henderson
673103e7579SJoe Komlodi if (float16_is_any_nan(f16)) {
67437356079SRichard Henderson float16 nan = f16;
67537356079SRichard Henderson if (float16_is_signaling_nan(f16, fpst)) {
676103e7579SJoe Komlodi float_raise(float_flag_invalid, fpst);
67737356079SRichard Henderson if (!fpst->default_nan_mode) {
67837356079SRichard Henderson nan = float16_silence_nan(f16, fpst);
67937356079SRichard Henderson }
68037356079SRichard Henderson }
68137356079SRichard Henderson if (fpst->default_nan_mode) {
68237356079SRichard Henderson nan = float16_default_nan(fpst);
68337356079SRichard Henderson }
68437356079SRichard Henderson return nan;
68537356079SRichard Henderson } else if (float16_is_infinity(f16)) {
68637356079SRichard Henderson return float16_set_sign(float16_zero, float16_is_neg(f16));
68737356079SRichard Henderson } else if (float16_is_zero(f16)) {
68837356079SRichard Henderson float_raise(float_flag_divbyzero, fpst);
68937356079SRichard Henderson return float16_set_sign(float16_infinity, float16_is_neg(f16));
69037356079SRichard Henderson } else if (float16_abs(f16) < (1 << 8)) {
69137356079SRichard Henderson /* Abs(value) < 2.0^-16 */
69237356079SRichard Henderson float_raise(float_flag_overflow | float_flag_inexact, fpst);
69337356079SRichard Henderson if (round_to_inf(fpst, f16_sign)) {
69437356079SRichard Henderson return float16_set_sign(float16_infinity, f16_sign);
69537356079SRichard Henderson } else {
69637356079SRichard Henderson return float16_set_sign(float16_maxnorm, f16_sign);
69737356079SRichard Henderson }
69837356079SRichard Henderson } else if (f16_exp >= 29 && fpst->flush_to_zero) {
69937356079SRichard Henderson float_raise(float_flag_underflow, fpst);
70037356079SRichard Henderson return float16_set_sign(float16_zero, float16_is_neg(f16));
70137356079SRichard Henderson }
70237356079SRichard Henderson
70337356079SRichard Henderson f64_frac = call_recip_estimate(&f16_exp, 29,
70437356079SRichard Henderson ((uint64_t) f16_frac) << (52 - 10));
70537356079SRichard Henderson
70637356079SRichard Henderson /* result = sign : result_exp<4:0> : fraction<51:42> */
70737356079SRichard Henderson f16_val = deposit32(0, 15, 1, f16_sign);
70837356079SRichard Henderson f16_val = deposit32(f16_val, 10, 5, f16_exp);
70937356079SRichard Henderson f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
71037356079SRichard Henderson return make_float16(f16_val);
71137356079SRichard Henderson }
71237356079SRichard Henderson
HELPER(recpe_f32)71337356079SRichard Henderson float32 HELPER(recpe_f32)(float32 input, void *fpstp)
71437356079SRichard Henderson {
71537356079SRichard Henderson float_status *fpst = fpstp;
71637356079SRichard Henderson float32 f32 = float32_squash_input_denormal(input, fpst);
71737356079SRichard Henderson uint32_t f32_val = float32_val(f32);
71837356079SRichard Henderson bool f32_sign = float32_is_neg(f32);
71937356079SRichard Henderson int f32_exp = extract32(f32_val, 23, 8);
72037356079SRichard Henderson uint32_t f32_frac = extract32(f32_val, 0, 23);
72137356079SRichard Henderson uint64_t f64_frac;
72237356079SRichard Henderson
723103e7579SJoe Komlodi if (float32_is_any_nan(f32)) {
72437356079SRichard Henderson float32 nan = f32;
72537356079SRichard Henderson if (float32_is_signaling_nan(f32, fpst)) {
726103e7579SJoe Komlodi float_raise(float_flag_invalid, fpst);
72737356079SRichard Henderson if (!fpst->default_nan_mode) {
72837356079SRichard Henderson nan = float32_silence_nan(f32, fpst);
72937356079SRichard Henderson }
73037356079SRichard Henderson }
73137356079SRichard Henderson if (fpst->default_nan_mode) {
73237356079SRichard Henderson nan = float32_default_nan(fpst);
73337356079SRichard Henderson }
73437356079SRichard Henderson return nan;
73537356079SRichard Henderson } else if (float32_is_infinity(f32)) {
73637356079SRichard Henderson return float32_set_sign(float32_zero, float32_is_neg(f32));
73737356079SRichard Henderson } else if (float32_is_zero(f32)) {
73837356079SRichard Henderson float_raise(float_flag_divbyzero, fpst);
73937356079SRichard Henderson return float32_set_sign(float32_infinity, float32_is_neg(f32));
74037356079SRichard Henderson } else if (float32_abs(f32) < (1ULL << 21)) {
74137356079SRichard Henderson /* Abs(value) < 2.0^-128 */
74237356079SRichard Henderson float_raise(float_flag_overflow | float_flag_inexact, fpst);
74337356079SRichard Henderson if (round_to_inf(fpst, f32_sign)) {
74437356079SRichard Henderson return float32_set_sign(float32_infinity, f32_sign);
74537356079SRichard Henderson } else {
74637356079SRichard Henderson return float32_set_sign(float32_maxnorm, f32_sign);
74737356079SRichard Henderson }
74837356079SRichard Henderson } else if (f32_exp >= 253 && fpst->flush_to_zero) {
74937356079SRichard Henderson float_raise(float_flag_underflow, fpst);
75037356079SRichard Henderson return float32_set_sign(float32_zero, float32_is_neg(f32));
75137356079SRichard Henderson }
75237356079SRichard Henderson
75337356079SRichard Henderson f64_frac = call_recip_estimate(&f32_exp, 253,
75437356079SRichard Henderson ((uint64_t) f32_frac) << (52 - 23));
75537356079SRichard Henderson
75637356079SRichard Henderson /* result = sign : result_exp<7:0> : fraction<51:29> */
75737356079SRichard Henderson f32_val = deposit32(0, 31, 1, f32_sign);
75837356079SRichard Henderson f32_val = deposit32(f32_val, 23, 8, f32_exp);
75937356079SRichard Henderson f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
76037356079SRichard Henderson return make_float32(f32_val);
76137356079SRichard Henderson }
76237356079SRichard Henderson
HELPER(recpe_f64)76337356079SRichard Henderson float64 HELPER(recpe_f64)(float64 input, void *fpstp)
76437356079SRichard Henderson {
76537356079SRichard Henderson float_status *fpst = fpstp;
76637356079SRichard Henderson float64 f64 = float64_squash_input_denormal(input, fpst);
76737356079SRichard Henderson uint64_t f64_val = float64_val(f64);
76837356079SRichard Henderson bool f64_sign = float64_is_neg(f64);
76937356079SRichard Henderson int f64_exp = extract64(f64_val, 52, 11);
77037356079SRichard Henderson uint64_t f64_frac = extract64(f64_val, 0, 52);
77137356079SRichard Henderson
77237356079SRichard Henderson /* Deal with any special cases */
773103e7579SJoe Komlodi if (float64_is_any_nan(f64)) {
77437356079SRichard Henderson float64 nan = f64;
77537356079SRichard Henderson if (float64_is_signaling_nan(f64, fpst)) {
776103e7579SJoe Komlodi float_raise(float_flag_invalid, fpst);
77737356079SRichard Henderson if (!fpst->default_nan_mode) {
77837356079SRichard Henderson nan = float64_silence_nan(f64, fpst);
77937356079SRichard Henderson }
78037356079SRichard Henderson }
78137356079SRichard Henderson if (fpst->default_nan_mode) {
78237356079SRichard Henderson nan = float64_default_nan(fpst);
78337356079SRichard Henderson }
78437356079SRichard Henderson return nan;
78537356079SRichard Henderson } else if (float64_is_infinity(f64)) {
78637356079SRichard Henderson return float64_set_sign(float64_zero, float64_is_neg(f64));
78737356079SRichard Henderson } else if (float64_is_zero(f64)) {
78837356079SRichard Henderson float_raise(float_flag_divbyzero, fpst);
78937356079SRichard Henderson return float64_set_sign(float64_infinity, float64_is_neg(f64));
79037356079SRichard Henderson } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
79137356079SRichard Henderson /* Abs(value) < 2.0^-1024 */
79237356079SRichard Henderson float_raise(float_flag_overflow | float_flag_inexact, fpst);
79337356079SRichard Henderson if (round_to_inf(fpst, f64_sign)) {
79437356079SRichard Henderson return float64_set_sign(float64_infinity, f64_sign);
79537356079SRichard Henderson } else {
79637356079SRichard Henderson return float64_set_sign(float64_maxnorm, f64_sign);
79737356079SRichard Henderson }
79837356079SRichard Henderson } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
79937356079SRichard Henderson float_raise(float_flag_underflow, fpst);
80037356079SRichard Henderson return float64_set_sign(float64_zero, float64_is_neg(f64));
80137356079SRichard Henderson }
80237356079SRichard Henderson
80337356079SRichard Henderson f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
80437356079SRichard Henderson
80537356079SRichard Henderson /* result = sign : result_exp<10:0> : fraction<51:0>; */
80637356079SRichard Henderson f64_val = deposit64(0, 63, 1, f64_sign);
80737356079SRichard Henderson f64_val = deposit64(f64_val, 52, 11, f64_exp);
80837356079SRichard Henderson f64_val = deposit64(f64_val, 0, 52, f64_frac);
80937356079SRichard Henderson return make_float64(f64_val);
81037356079SRichard Henderson }
81137356079SRichard Henderson
81237356079SRichard Henderson /* The algorithm that must be used to calculate the estimate
81337356079SRichard Henderson * is specified by the ARM ARM.
81437356079SRichard Henderson */
81537356079SRichard Henderson
do_recip_sqrt_estimate(int a)81637356079SRichard Henderson static int do_recip_sqrt_estimate(int a)
81737356079SRichard Henderson {
81837356079SRichard Henderson int b, estimate;
81937356079SRichard Henderson
82037356079SRichard Henderson assert(128 <= a && a < 512);
82137356079SRichard Henderson if (a < 256) {
82237356079SRichard Henderson a = a * 2 + 1;
82337356079SRichard Henderson } else {
82437356079SRichard Henderson a = (a >> 1) << 1;
82537356079SRichard Henderson a = (a + 1) * 2;
82637356079SRichard Henderson }
82737356079SRichard Henderson b = 512;
82837356079SRichard Henderson while (a * (b + 1) * (b + 1) < (1 << 28)) {
82937356079SRichard Henderson b += 1;
83037356079SRichard Henderson }
83137356079SRichard Henderson estimate = (b + 1) / 2;
83237356079SRichard Henderson assert(256 <= estimate && estimate < 512);
83337356079SRichard Henderson
83437356079SRichard Henderson return estimate;
83537356079SRichard Henderson }
83637356079SRichard Henderson
83737356079SRichard Henderson
recip_sqrt_estimate(int * exp,int exp_off,uint64_t frac)83837356079SRichard Henderson static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
83937356079SRichard Henderson {
84037356079SRichard Henderson int estimate;
84137356079SRichard Henderson uint32_t scaled;
84237356079SRichard Henderson
84337356079SRichard Henderson if (*exp == 0) {
84437356079SRichard Henderson while (extract64(frac, 51, 1) == 0) {
84537356079SRichard Henderson frac = frac << 1;
84637356079SRichard Henderson *exp -= 1;
84737356079SRichard Henderson }
84837356079SRichard Henderson frac = extract64(frac, 0, 51) << 1;
84937356079SRichard Henderson }
85037356079SRichard Henderson
85137356079SRichard Henderson if (*exp & 1) {
85237356079SRichard Henderson /* scaled = UInt('01':fraction<51:45>) */
85337356079SRichard Henderson scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
85437356079SRichard Henderson } else {
85537356079SRichard Henderson /* scaled = UInt('1':fraction<51:44>) */
85637356079SRichard Henderson scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
85737356079SRichard Henderson }
85837356079SRichard Henderson estimate = do_recip_sqrt_estimate(scaled);
85937356079SRichard Henderson
86037356079SRichard Henderson *exp = (exp_off - *exp) / 2;
86137356079SRichard Henderson return extract64(estimate, 0, 8) << 44;
86237356079SRichard Henderson }
86337356079SRichard Henderson
HELPER(rsqrte_f16)86437356079SRichard Henderson uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
86537356079SRichard Henderson {
86637356079SRichard Henderson float_status *s = fpstp;
86737356079SRichard Henderson float16 f16 = float16_squash_input_denormal(input, s);
86837356079SRichard Henderson uint16_t val = float16_val(f16);
86937356079SRichard Henderson bool f16_sign = float16_is_neg(f16);
87037356079SRichard Henderson int f16_exp = extract32(val, 10, 5);
87137356079SRichard Henderson uint16_t f16_frac = extract32(val, 0, 10);
87237356079SRichard Henderson uint64_t f64_frac;
87337356079SRichard Henderson
874103e7579SJoe Komlodi if (float16_is_any_nan(f16)) {
875103e7579SJoe Komlodi float16 nan = f16;
876103e7579SJoe Komlodi if (float16_is_signaling_nan(f16, s)) {
87737356079SRichard Henderson float_raise(float_flag_invalid, s);
87837356079SRichard Henderson if (!s->default_nan_mode) {
87937356079SRichard Henderson nan = float16_silence_nan(f16, fpstp);
88037356079SRichard Henderson }
88137356079SRichard Henderson }
88237356079SRichard Henderson if (s->default_nan_mode) {
88337356079SRichard Henderson nan = float16_default_nan(s);
88437356079SRichard Henderson }
88537356079SRichard Henderson return nan;
88637356079SRichard Henderson } else if (float16_is_zero(f16)) {
88737356079SRichard Henderson float_raise(float_flag_divbyzero, s);
88837356079SRichard Henderson return float16_set_sign(float16_infinity, f16_sign);
88937356079SRichard Henderson } else if (f16_sign) {
89037356079SRichard Henderson float_raise(float_flag_invalid, s);
89137356079SRichard Henderson return float16_default_nan(s);
89237356079SRichard Henderson } else if (float16_is_infinity(f16)) {
89337356079SRichard Henderson return float16_zero;
89437356079SRichard Henderson }
89537356079SRichard Henderson
89637356079SRichard Henderson /* Scale and normalize to a double-precision value between 0.25 and 1.0,
89737356079SRichard Henderson * preserving the parity of the exponent. */
89837356079SRichard Henderson
89937356079SRichard Henderson f64_frac = ((uint64_t) f16_frac) << (52 - 10);
90037356079SRichard Henderson
90137356079SRichard Henderson f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
90237356079SRichard Henderson
90337356079SRichard Henderson /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
90437356079SRichard Henderson val = deposit32(0, 15, 1, f16_sign);
90537356079SRichard Henderson val = deposit32(val, 10, 5, f16_exp);
90637356079SRichard Henderson val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
90737356079SRichard Henderson return make_float16(val);
90837356079SRichard Henderson }
90937356079SRichard Henderson
HELPER(rsqrte_f32)91037356079SRichard Henderson float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
91137356079SRichard Henderson {
91237356079SRichard Henderson float_status *s = fpstp;
91337356079SRichard Henderson float32 f32 = float32_squash_input_denormal(input, s);
91437356079SRichard Henderson uint32_t val = float32_val(f32);
91537356079SRichard Henderson uint32_t f32_sign = float32_is_neg(f32);
91637356079SRichard Henderson int f32_exp = extract32(val, 23, 8);
91737356079SRichard Henderson uint32_t f32_frac = extract32(val, 0, 23);
91837356079SRichard Henderson uint64_t f64_frac;
91937356079SRichard Henderson
920103e7579SJoe Komlodi if (float32_is_any_nan(f32)) {
921103e7579SJoe Komlodi float32 nan = f32;
922103e7579SJoe Komlodi if (float32_is_signaling_nan(f32, s)) {
92337356079SRichard Henderson float_raise(float_flag_invalid, s);
92437356079SRichard Henderson if (!s->default_nan_mode) {
92537356079SRichard Henderson nan = float32_silence_nan(f32, fpstp);
92637356079SRichard Henderson }
92737356079SRichard Henderson }
92837356079SRichard Henderson if (s->default_nan_mode) {
92937356079SRichard Henderson nan = float32_default_nan(s);
93037356079SRichard Henderson }
93137356079SRichard Henderson return nan;
93237356079SRichard Henderson } else if (float32_is_zero(f32)) {
93337356079SRichard Henderson float_raise(float_flag_divbyzero, s);
93437356079SRichard Henderson return float32_set_sign(float32_infinity, float32_is_neg(f32));
93537356079SRichard Henderson } else if (float32_is_neg(f32)) {
93637356079SRichard Henderson float_raise(float_flag_invalid, s);
93737356079SRichard Henderson return float32_default_nan(s);
93837356079SRichard Henderson } else if (float32_is_infinity(f32)) {
93937356079SRichard Henderson return float32_zero;
94037356079SRichard Henderson }
94137356079SRichard Henderson
94237356079SRichard Henderson /* Scale and normalize to a double-precision value between 0.25 and 1.0,
94337356079SRichard Henderson * preserving the parity of the exponent. */
94437356079SRichard Henderson
94537356079SRichard Henderson f64_frac = ((uint64_t) f32_frac) << 29;
94637356079SRichard Henderson
94737356079SRichard Henderson f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
94837356079SRichard Henderson
94937356079SRichard Henderson /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
95037356079SRichard Henderson val = deposit32(0, 31, 1, f32_sign);
95137356079SRichard Henderson val = deposit32(val, 23, 8, f32_exp);
95237356079SRichard Henderson val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
95337356079SRichard Henderson return make_float32(val);
95437356079SRichard Henderson }
95537356079SRichard Henderson
HELPER(rsqrte_f64)95637356079SRichard Henderson float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
95737356079SRichard Henderson {
95837356079SRichard Henderson float_status *s = fpstp;
95937356079SRichard Henderson float64 f64 = float64_squash_input_denormal(input, s);
96037356079SRichard Henderson uint64_t val = float64_val(f64);
96137356079SRichard Henderson bool f64_sign = float64_is_neg(f64);
96237356079SRichard Henderson int f64_exp = extract64(val, 52, 11);
96337356079SRichard Henderson uint64_t f64_frac = extract64(val, 0, 52);
96437356079SRichard Henderson
965103e7579SJoe Komlodi if (float64_is_any_nan(f64)) {
966103e7579SJoe Komlodi float64 nan = f64;
967103e7579SJoe Komlodi if (float64_is_signaling_nan(f64, s)) {
96837356079SRichard Henderson float_raise(float_flag_invalid, s);
96937356079SRichard Henderson if (!s->default_nan_mode) {
97037356079SRichard Henderson nan = float64_silence_nan(f64, fpstp);
97137356079SRichard Henderson }
97237356079SRichard Henderson }
97337356079SRichard Henderson if (s->default_nan_mode) {
97437356079SRichard Henderson nan = float64_default_nan(s);
97537356079SRichard Henderson }
97637356079SRichard Henderson return nan;
97737356079SRichard Henderson } else if (float64_is_zero(f64)) {
97837356079SRichard Henderson float_raise(float_flag_divbyzero, s);
97937356079SRichard Henderson return float64_set_sign(float64_infinity, float64_is_neg(f64));
98037356079SRichard Henderson } else if (float64_is_neg(f64)) {
98137356079SRichard Henderson float_raise(float_flag_invalid, s);
98237356079SRichard Henderson return float64_default_nan(s);
98337356079SRichard Henderson } else if (float64_is_infinity(f64)) {
98437356079SRichard Henderson return float64_zero;
98537356079SRichard Henderson }
98637356079SRichard Henderson
98737356079SRichard Henderson f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
98837356079SRichard Henderson
98937356079SRichard Henderson /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
99037356079SRichard Henderson val = deposit64(0, 61, 1, f64_sign);
99137356079SRichard Henderson val = deposit64(val, 52, 11, f64_exp);
992fe6fb4beSRichard Henderson val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
99337356079SRichard Henderson return make_float64(val);
99437356079SRichard Henderson }
99537356079SRichard Henderson
HELPER(recpe_u32)99637356079SRichard Henderson uint32_t HELPER(recpe_u32)(uint32_t a)
99737356079SRichard Henderson {
99837356079SRichard Henderson int input, estimate;
99937356079SRichard Henderson
100037356079SRichard Henderson if ((a & 0x80000000) == 0) {
100137356079SRichard Henderson return 0xffffffff;
100237356079SRichard Henderson }
100337356079SRichard Henderson
100437356079SRichard Henderson input = extract32(a, 23, 9);
100537356079SRichard Henderson estimate = recip_estimate(input);
1006fe6fb4beSRichard Henderson
100737356079SRichard Henderson return deposit32(0, (32 - 9), 9, estimate);
100837356079SRichard Henderson }
100937356079SRichard Henderson
HELPER(rsqrte_u32)101037356079SRichard Henderson uint32_t HELPER(rsqrte_u32)(uint32_t a)
101137356079SRichard Henderson {
101237356079SRichard Henderson int estimate;
101337356079SRichard Henderson
101437356079SRichard Henderson if ((a & 0xc0000000) == 0) {
101537356079SRichard Henderson return 0xffffffff;
101637356079SRichard Henderson }
101737356079SRichard Henderson
101837356079SRichard Henderson estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
101937356079SRichard Henderson
10209886fe28SPeter Maydell return deposit32(0, 23, 9, estimate);
10219886fe28SPeter Maydell }
10229886fe28SPeter Maydell
10239886fe28SPeter Maydell /* VFPv4 fused multiply-accumulate */
VFP_HELPER(muladd,h)10249886fe28SPeter Maydell dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
10259886fe28SPeter Maydell dh_ctype_f16 c, void *fpstp)
10269886fe28SPeter Maydell {
102737356079SRichard Henderson float_status *fpst = fpstp;
102837356079SRichard Henderson return float16_muladd(a, b, c, 0, fpst);
102937356079SRichard Henderson }
103037356079SRichard Henderson
VFP_HELPER(muladd,s)103137356079SRichard Henderson float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
103237356079SRichard Henderson {
103337356079SRichard Henderson float_status *fpst = fpstp;
103437356079SRichard Henderson return float32_muladd(a, b, c, 0, fpst);
103537356079SRichard Henderson }
103637356079SRichard Henderson
VFP_HELPER(muladd,d)103737356079SRichard Henderson float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
103837356079SRichard Henderson {
103937356079SRichard Henderson float_status *fpst = fpstp;
10400a6f4b4cSPeter Maydell return float64_muladd(a, b, c, 0, fpst);
10410a6f4b4cSPeter Maydell }
10420a6f4b4cSPeter Maydell
10430a6f4b4cSPeter Maydell /* ARMv8 round to integral */
HELPER(rinth_exact)10440a6f4b4cSPeter Maydell dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
104537356079SRichard Henderson {
104637356079SRichard Henderson return float16_round_to_int(x, fp_status);
104737356079SRichard Henderson }
104837356079SRichard Henderson
HELPER(rints_exact)104937356079SRichard Henderson float32 HELPER(rints_exact)(float32 x, void *fp_status)
105037356079SRichard Henderson {
105137356079SRichard Henderson return float32_round_to_int(x, fp_status);
105237356079SRichard Henderson }
105337356079SRichard Henderson
HELPER(rintd_exact)105437356079SRichard Henderson float64 HELPER(rintd_exact)(float64 x, void *fp_status)
10550a6f4b4cSPeter Maydell {
10560a6f4b4cSPeter Maydell return float64_round_to_int(x, fp_status);
10570a6f4b4cSPeter Maydell }
10580a6f4b4cSPeter Maydell
HELPER(rinth)10590a6f4b4cSPeter Maydell dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
10600a6f4b4cSPeter Maydell {
10610a6f4b4cSPeter Maydell int old_flags = get_float_exception_flags(fp_status), new_flags;
10620a6f4b4cSPeter Maydell float16 ret;
10630a6f4b4cSPeter Maydell
10640a6f4b4cSPeter Maydell ret = float16_round_to_int(x, fp_status);
10650a6f4b4cSPeter Maydell
10660a6f4b4cSPeter Maydell /* Suppress any inexact exceptions the conversion produced */
10670a6f4b4cSPeter Maydell if (!(old_flags & float_flag_inexact)) {
10680a6f4b4cSPeter Maydell new_flags = get_float_exception_flags(fp_status);
10690a6f4b4cSPeter Maydell set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
10700a6f4b4cSPeter Maydell }
107137356079SRichard Henderson
107237356079SRichard Henderson return ret;
107337356079SRichard Henderson }
107437356079SRichard Henderson
HELPER(rints)107537356079SRichard Henderson float32 HELPER(rints)(float32 x, void *fp_status)
107637356079SRichard Henderson {
107737356079SRichard Henderson int old_flags = get_float_exception_flags(fp_status), new_flags;
107837356079SRichard Henderson float32 ret;
107937356079SRichard Henderson
108037356079SRichard Henderson ret = float32_round_to_int(x, fp_status);
108137356079SRichard Henderson
108237356079SRichard Henderson /* Suppress any inexact exceptions the conversion produced */
108337356079SRichard Henderson if (!(old_flags & float_flag_inexact)) {
108437356079SRichard Henderson new_flags = get_float_exception_flags(fp_status);
108537356079SRichard Henderson set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
108637356079SRichard Henderson }
108737356079SRichard Henderson
108837356079SRichard Henderson return ret;
108937356079SRichard Henderson }
109037356079SRichard Henderson
HELPER(rintd)109137356079SRichard Henderson float64 HELPER(rintd)(float64 x, void *fp_status)
109237356079SRichard Henderson {
109337356079SRichard Henderson int old_flags = get_float_exception_flags(fp_status), new_flags;
109437356079SRichard Henderson float64 ret;
109537356079SRichard Henderson
109637356079SRichard Henderson ret = float64_round_to_int(x, fp_status);
109737356079SRichard Henderson
109837356079SRichard Henderson new_flags = get_float_exception_flags(fp_status);
109937356079SRichard Henderson
110037356079SRichard Henderson /* Suppress any inexact exceptions the conversion produced */
110137356079SRichard Henderson if (!(old_flags & float_flag_inexact)) {
110237356079SRichard Henderson new_flags = get_float_exception_flags(fp_status);
110337356079SRichard Henderson set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
110437356079SRichard Henderson }
110537356079SRichard Henderson
11066ce21abdSRichard Henderson return ret;
11076ce21abdSRichard Henderson }
11086ce21abdSRichard Henderson
11096ce21abdSRichard Henderson /* Convert ARM rounding mode to softfloat */
11106ce21abdSRichard Henderson const FloatRoundMode arm_rmode_to_sf_map[] = {
11116ce21abdSRichard Henderson [FPROUNDING_TIEEVEN] = float_round_nearest_even,
11126ce21abdSRichard Henderson [FPROUNDING_POSINF] = float_round_up,
11136ce21abdSRichard Henderson [FPROUNDING_NEGINF] = float_round_down,
11146c1f6f27SRichard Henderson [FPROUNDING_ZERO] = float_round_to_zero,
11156c1f6f27SRichard Henderson [FPROUNDING_TIEAWAY] = float_round_ties_away,
11166c1f6f27SRichard Henderson [FPROUNDING_ODD] = float_round_to_odd,
11176c1f6f27SRichard Henderson };
11186c1f6f27SRichard Henderson
11196c1f6f27SRichard Henderson /*
11206c1f6f27SRichard Henderson * Implement float64 to int32_t conversion without saturation;
11216c1f6f27SRichard Henderson * the result is supplied modulo 2^32.
11227619129fSRichard Henderson */
HELPER(fjcvtzs)11237619129fSRichard Henderson uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
11246c1f6f27SRichard Henderson {
112534d03ad9SRichard Henderson float_status *status = vstatus;
112634d03ad9SRichard Henderson uint32_t frac, e_old, e_new;
112734d03ad9SRichard Henderson bool inexact;
112834d03ad9SRichard Henderson
112934d03ad9SRichard Henderson e_old = get_float_exception_flags(status);
11306c1f6f27SRichard Henderson set_float_exception_flags(0, status);
11317619129fSRichard Henderson frac = float64_to_int32_modulo(value, float_round_to_zero, status);
11327619129fSRichard Henderson e_new = get_float_exception_flags(status);
11337619129fSRichard Henderson set_float_exception_flags(e_old | e_new, status);
11347619129fSRichard Henderson
11357619129fSRichard Henderson /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
11366c1f6f27SRichard Henderson inexact = e_new & (float_flag_inexact |
11377619129fSRichard Henderson float_flag_input_denormal |
11386c1f6f27SRichard Henderson float_flag_invalid);
11396c1f6f27SRichard Henderson
11406c1f6f27SRichard Henderson /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
11416c1f6f27SRichard Henderson inexact |= value == float64_chs(float64_zero);
11426c1f6f27SRichard Henderson
11436c1f6f27SRichard Henderson /* Pack the result and the env->ZF representation of Z together. */
11446c1f6f27SRichard Henderson return deposit64(frac, 32, 32, inexact);
11456c1f6f27SRichard Henderson }
11466c1f6f27SRichard Henderson
HELPER(vjcvt)11476c1f6f27SRichard Henderson uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
11486c1f6f27SRichard Henderson {
11496c1f6f27SRichard Henderson uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
1150a26db547SPeter Maydell uint32_t result = pair;
11516c1f6f27SRichard Henderson uint32_t z = (pair >> 32) == 0;
11526c1f6f27SRichard Henderson
11536c1f6f27SRichard Henderson /* Store Z, clear NCV, in FPSCR.NZCV. */
11546bea2563SRichard Henderson env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);
11556bea2563SRichard Henderson
11566bea2563SRichard Henderson return result;
11576bea2563SRichard Henderson }
11586bea2563SRichard Henderson
11596bea2563SRichard Henderson /* Round a float32 to an integer that fits in int32_t or int64_t. */
frint_s(float32 f,float_status * fpst,int intsize)11606bea2563SRichard Henderson static float32 frint_s(float32 f, float_status *fpst, int intsize)
11616bea2563SRichard Henderson {
11626bea2563SRichard Henderson int old_flags = get_float_exception_flags(fpst);
11636bea2563SRichard Henderson uint32_t exp = extract32(f, 23, 8);
11646bea2563SRichard Henderson
11656bea2563SRichard Henderson if (unlikely(exp == 0xff)) {
11666bea2563SRichard Henderson /* NaN or Inf. */
11676bea2563SRichard Henderson goto overflow;
11686bea2563SRichard Henderson }
11696bea2563SRichard Henderson
11706bea2563SRichard Henderson /* Round and re-extract the exponent. */
11716bea2563SRichard Henderson f = float32_round_to_int(f, fpst);
11726bea2563SRichard Henderson exp = extract32(f, 23, 8);
11736bea2563SRichard Henderson
11746bea2563SRichard Henderson /* Validate the range of the result. */
11756bea2563SRichard Henderson if (exp < 126 + intsize) {
11766bea2563SRichard Henderson /* abs(F) <= INT{N}_MAX */
11776bea2563SRichard Henderson return f;
11786bea2563SRichard Henderson }
11796bea2563SRichard Henderson if (exp == 126 + intsize) {
11806bea2563SRichard Henderson uint32_t sign = extract32(f, 31, 1);
11816bea2563SRichard Henderson uint32_t frac = extract32(f, 0, 23);
11826bea2563SRichard Henderson if (sign && frac == 0) {
11836bea2563SRichard Henderson /* F == INT{N}_MIN */
11846bea2563SRichard Henderson return f;
11856bea2563SRichard Henderson }
11866bea2563SRichard Henderson }
11876bea2563SRichard Henderson
11886bea2563SRichard Henderson overflow:
11896bea2563SRichard Henderson /*
11906bea2563SRichard Henderson * Raise Invalid and return INT{N}_MIN as a float. Revert any
11916bea2563SRichard Henderson * inexact exception float32_round_to_int may have raised.
11926bea2563SRichard Henderson */
11936bea2563SRichard Henderson set_float_exception_flags(old_flags | float_flag_invalid, fpst);
11946bea2563SRichard Henderson return (0x100u + 126u + intsize) << 23;
11956bea2563SRichard Henderson }
11966bea2563SRichard Henderson
HELPER(frint32_s)11976bea2563SRichard Henderson float32 HELPER(frint32_s)(float32 f, void *fpst)
11986bea2563SRichard Henderson {
11996bea2563SRichard Henderson return frint_s(f, fpst, 32);
12006bea2563SRichard Henderson }
12016bea2563SRichard Henderson
HELPER(frint64_s)12026bea2563SRichard Henderson float32 HELPER(frint64_s)(float32 f, void *fpst)
12036bea2563SRichard Henderson {
12046bea2563SRichard Henderson return frint_s(f, fpst, 64);
12056bea2563SRichard Henderson }
12066bea2563SRichard Henderson
12076bea2563SRichard Henderson /* Round a float64 to an integer that fits in int32_t or int64_t. */
frint_d(float64 f,float_status * fpst,int intsize)12086bea2563SRichard Henderson static float64 frint_d(float64 f, float_status *fpst, int intsize)
12096bea2563SRichard Henderson {
12106bea2563SRichard Henderson int old_flags = get_float_exception_flags(fpst);
12116bea2563SRichard Henderson uint32_t exp = extract64(f, 52, 11);
12126bea2563SRichard Henderson
12136bea2563SRichard Henderson if (unlikely(exp == 0x7ff)) {
12146bea2563SRichard Henderson /* NaN or Inf. */
12156bea2563SRichard Henderson goto overflow;
12166bea2563SRichard Henderson }
12176bea2563SRichard Henderson
12186bea2563SRichard Henderson /* Round and re-extract the exponent. */
12196bea2563SRichard Henderson f = float64_round_to_int(f, fpst);
12206bea2563SRichard Henderson exp = extract64(f, 52, 11);
12216bea2563SRichard Henderson
12226bea2563SRichard Henderson /* Validate the range of the result. */
12236bea2563SRichard Henderson if (exp < 1022 + intsize) {
12246bea2563SRichard Henderson /* abs(F) <= INT{N}_MAX */
12256bea2563SRichard Henderson return f;
12266bea2563SRichard Henderson }
12276bea2563SRichard Henderson if (exp == 1022 + intsize) {
12286bea2563SRichard Henderson uint64_t sign = extract64(f, 63, 1);
12296bea2563SRichard Henderson uint64_t frac = extract64(f, 0, 52);
12306bea2563SRichard Henderson if (sign && frac == 0) {
12316bea2563SRichard Henderson /* F == INT{N}_MIN */
12326bea2563SRichard Henderson return f;
12336bea2563SRichard Henderson }
12346bea2563SRichard Henderson }
12356bea2563SRichard Henderson
12366bea2563SRichard Henderson overflow:
12376bea2563SRichard Henderson /*
12386bea2563SRichard Henderson * Raise Invalid and return INT{N}_MIN as a float. Revert any
12396bea2563SRichard Henderson * inexact exception float64_round_to_int may have raised.
12406bea2563SRichard Henderson */
12416bea2563SRichard Henderson set_float_exception_flags(old_flags | float_flag_invalid, fpst);
12426bea2563SRichard Henderson return (uint64_t)(0x800 + 1022 + intsize) << 52;
12436bea2563SRichard Henderson }
12446bea2563SRichard Henderson
HELPER(frint32_d)12456bea2563SRichard Henderson float64 HELPER(frint32_d)(float64 f, void *fpst)
12466bea2563SRichard Henderson {
12476bea2563SRichard Henderson return frint_d(f, fpst, 32);
12486bea2563SRichard Henderson }
12496bea2563SRichard Henderson
HELPER(frint64_d)12504a15527cSPhilippe Mathieu-Daudé float64 HELPER(frint64_d)(float64 f, void *fpst)
12519ca1d776SMarc Zyngier {
12529ca1d776SMarc Zyngier return frint_d(f, fpst, 64);
12539ca1d776SMarc Zyngier }
12549ca1d776SMarc Zyngier
HELPER(check_hcr_el2_trap)12559ca1d776SMarc Zyngier void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
12569ca1d776SMarc Zyngier {
12579ca1d776SMarc Zyngier uint32_t syndrome;
12589ca1d776SMarc Zyngier
12599ca1d776SMarc Zyngier switch (reg) {
12609ca1d776SMarc Zyngier case ARM_VFP_MVFR0:
12619ca1d776SMarc Zyngier case ARM_VFP_MVFR1:
12629ca1d776SMarc Zyngier case ARM_VFP_MVFR2:
12639ca1d776SMarc Zyngier if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
12649ca1d776SMarc Zyngier return;
12659ca1d776SMarc Zyngier }
12669ca1d776SMarc Zyngier break;
12679ca1d776SMarc Zyngier case ARM_VFP_FPSID:
12689ca1d776SMarc Zyngier if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
12699ca1d776SMarc Zyngier return;
12709ca1d776SMarc Zyngier }
12719ca1d776SMarc Zyngier break;
12729ca1d776SMarc Zyngier default:
12739ca1d776SMarc Zyngier g_assert_not_reached();
12749ca1d776SMarc Zyngier }
12759ca1d776SMarc Zyngier
12769ca1d776SMarc Zyngier syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
12779ca1d776SMarc Zyngier | ARM_EL_IL
12789ca1d776SMarc Zyngier | (1 << 24) | (0xe << 20) | (7 << 14)
12799ca1d776SMarc Zyngier | (reg << 10) | (rt << 5) | 1);
12804a15527cSPhilippe Mathieu-Daudé
1281 raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
1282 }
1283
1284 #endif
1285