xref: /openbmc/qemu/target/arm/tcg/translate-vfp.c (revision ba379542)
1 /*
2  *  ARM translation: AArch32 VFP instructions
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *  Copyright (c) 2019 Linaro, Ltd.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26 
27 /* Include the generated VFP decoder */
28 #include "decode-vfp.c.inc"
29 #include "decode-vfp-uncond.c.inc"
30 
31 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
32 {
33     tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg));
34 }
35 
36 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
37 {
38     tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg));
39 }
40 
41 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
42 {
43     tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg));
44 }
45 
46 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
47 {
48     tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
49 }
50 
51 static inline void vfp_load_reg16(TCGv_i32 var, int reg)
52 {
53     tcg_gen_ld16u_i32(var, tcg_env,
54                       vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2);
55 }
56 
57 /*
58  * The imm8 encodes the sign bit, enough bits to represent an exponent in
59  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
60  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
61  */
62 uint64_t vfp_expand_imm(int size, uint8_t imm8)
63 {
64     uint64_t imm;
65 
66     switch (size) {
67     case MO_64:
68         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
69             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
70             extract32(imm8, 0, 6);
71         imm <<= 48;
72         break;
73     case MO_32:
74         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
75             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
76             (extract32(imm8, 0, 6) << 3);
77         imm <<= 16;
78         break;
79     case MO_16:
80         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
81             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
82             (extract32(imm8, 0, 6) << 6);
83         break;
84     default:
85         g_assert_not_reached();
86     }
87     return imm;
88 }
89 
90 /*
91  * Return the offset of a 16-bit half of the specified VFP single-precision
92  * register. If top is true, returns the top 16 bits; otherwise the bottom
93  * 16 bits.
94  */
95 static inline long vfp_f16_offset(unsigned reg, bool top)
96 {
97     long offs = vfp_reg_offset(false, reg);
98 #if HOST_BIG_ENDIAN
99     if (!top) {
100         offs += 2;
101     }
102 #else
103     if (top) {
104         offs += 2;
105     }
106 #endif
107     return offs;
108 }
109 
110 /*
111  * Generate code for M-profile lazy FP state preservation if needed;
112  * this corresponds to the pseudocode PreserveFPState() function.
113  */
114 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
115 {
116     if (s->v7m_lspact) {
117         /*
118          * Lazy state saving affects external memory and also the NVIC,
119          * so we must mark it as an IO operation for icount (and cause
120          * this to be the last insn in the TB).
121          */
122         if (translator_io_start(&s->base)) {
123             s->base.is_jmp = DISAS_UPDATE_EXIT;
124         }
125         gen_helper_v7m_preserve_fp_state(tcg_env);
126         /*
127          * If the preserve_fp_state helper doesn't throw an exception
128          * then it will clear LSPACT; we don't need to repeat this for
129          * any further FP insns in this TB.
130          */
131         s->v7m_lspact = false;
132         /*
133          * The helper might have zeroed VPR, so we do not know the
134          * correct value for the MVE_NO_PRED TB flag any more.
135          * If we're about to create a new fp context then that
136          * will precisely determine the MVE_NO_PRED value (see
137          * gen_update_fp_context()). Otherwise, we must:
138          *  - set s->mve_no_pred to false, so this instruction
139          *    is generated to use helper functions
140          *  - end the TB now, without chaining to the next TB
141          */
142         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
143             s->mve_no_pred = false;
144             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
145         }
146     }
147 }
148 
149 /*
150  * Generate code for M-profile FP context handling: update the
151  * ownership of the FP context, and create a new context if
152  * necessary. This corresponds to the parts of the pseudocode
153  * ExecuteFPCheck() after the initial PreserveFPState() call.
154  */
155 static void gen_update_fp_context(DisasContext *s)
156 {
157     /* Update ownership of FP context: set FPCCR.S to match current state */
158     if (s->v8m_fpccr_s_wrong) {
159         TCGv_i32 tmp;
160 
161         tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
162         if (s->v8m_secure) {
163             tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
164         } else {
165             tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
166         }
167         store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
168         /* Don't need to do this for any further FP insns in this TB */
169         s->v8m_fpccr_s_wrong = false;
170     }
171 
172     if (s->v7m_new_fp_ctxt_needed) {
173         /*
174          * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
175          * the FPSCR, and VPR.
176          */
177         TCGv_i32 control, fpscr;
178         uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
179 
180         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
181         gen_helper_vfp_set_fpscr(tcg_env, fpscr);
182         if (dc_isar_feature(aa32_mve, s)) {
183             store_cpu_field(tcg_constant_i32(0), v7m.vpr);
184         }
185         /*
186          * We just updated the FPSCR and VPR. Some of this state is cached
187          * in the MVE_NO_PRED TB flag. We want to avoid having to end the
188          * TB here, which means we need the new value of the MVE_NO_PRED
189          * flag to be exactly known here and the same for all executions.
190          * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
191          * always set to 0, so the new MVE_NO_PRED flag is always 1
192          * if and only if we have MVE.
193          *
194          * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
195          * but those do not exist for M-profile, so are not relevant here.)
196          */
197         s->mve_no_pred = dc_isar_feature(aa32_mve, s);
198 
199         if (s->v8m_secure) {
200             bits |= R_V7M_CONTROL_SFPA_MASK;
201         }
202         control = load_cpu_field(v7m.control[M_REG_S]);
203         tcg_gen_ori_i32(control, control, bits);
204         store_cpu_field(control, v7m.control[M_REG_S]);
205         /* Don't need to do this for any further FP insns in this TB */
206         s->v7m_new_fp_ctxt_needed = false;
207     }
208 }
209 
210 /*
211  * Check that VFP access is enabled, A-profile specific version.
212  *
213  * If VFP is enabled, return true. If not, emit code to generate an
214  * appropriate exception and return false.
215  * The ignore_vfp_enabled argument specifies that we should ignore
216  * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
217  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
218  */
219 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
220 {
221     if (s->fp_excp_el) {
222         /*
223          * The full syndrome is only used for HSR when HCPTR traps:
224          * For v8, when TA==0, coproc is RES0.
225          * For v7, any use of a Floating-point instruction or access
226          * to a Floating-point Extension register that is trapped to
227          * Hyp mode because of a trap configured in the HCPTR sets
228          * this field to 0xA.
229          */
230         int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa;
231         uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc);
232 
233         gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);
234         return false;
235     }
236 
237     /*
238      * Note that rebuild_hflags_a32 has already accounted for being in EL0
239      * and the higher EL in A64 mode, etc.  Unlike A64 mode, there do not
240      * appear to be any insns which touch VFP which are allowed.
241      */
242     if (s->sme_trap_nonstreaming) {
243         gen_exception_insn(s, 0, EXCP_UDEF,
244                            syn_smetrap(SME_ET_Streaming,
245                                        curr_insn_len(s) == 2));
246         return false;
247     }
248 
249     if (!s->vfp_enabled && !ignore_vfp_enabled) {
250         assert(!arm_dc_feature(s, ARM_FEATURE_M));
251         unallocated_encoding(s);
252         return false;
253     }
254     return true;
255 }
256 
257 /*
258  * Check that VFP access is enabled, M-profile specific version.
259  *
260  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
261  * return true. If not, emit code to generate an appropriate exception and
262  * return false.
263  * skip_context_update is true to skip the "update FP context" part of this.
264  */
265 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
266 {
267     if (s->fp_excp_el) {
268         /*
269          * M-profile mostly catches the "FPU disabled" case early, in
270          * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
271          * which do coprocessor-checks are outside the large ranges of
272          * the encoding space handled by the patterns in m-nocp.decode,
273          * and for them we may need to raise NOCP here.
274          */
275         gen_exception_insn_el(s, 0, EXCP_NOCP,
276                               syn_uncategorized(), s->fp_excp_el);
277         return false;
278     }
279 
280     /* Handle M-profile lazy FP state mechanics */
281 
282     /* Trigger lazy-state preservation if necessary */
283     gen_preserve_fp_state(s, skip_context_update);
284 
285     if (!skip_context_update) {
286         /* Update ownership of FP context and create new FP context if needed */
287         gen_update_fp_context(s);
288     }
289 
290     return true;
291 }
292 
293 /*
294  * The most usual kind of VFP access check, for everything except
295  * FMXR/FMRX to the always-available special registers.
296  */
297 bool vfp_access_check(DisasContext *s)
298 {
299     if (arm_dc_feature(s, ARM_FEATURE_M)) {
300         return vfp_access_check_m(s, false);
301     } else {
302         return vfp_access_check_a(s, false);
303     }
304 }
305 
306 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
307 {
308     uint32_t rd, rn, rm;
309     int sz = a->sz;
310 
311     if (!dc_isar_feature(aa32_vsel, s)) {
312         return false;
313     }
314 
315     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
316         return false;
317     }
318 
319     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
320         return false;
321     }
322 
323     /* UNDEF accesses to D16-D31 if they don't exist */
324     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
325         ((a->vm | a->vn | a->vd) & 0x10)) {
326         return false;
327     }
328 
329     rd = a->vd;
330     rn = a->vn;
331     rm = a->vm;
332 
333     if (!vfp_access_check(s)) {
334         return true;
335     }
336 
337     if (sz == 3) {
338         TCGv_i64 frn, frm, dest;
339         TCGv_i64 tmp, zero, zf, nf, vf;
340 
341         zero = tcg_constant_i64(0);
342 
343         frn = tcg_temp_new_i64();
344         frm = tcg_temp_new_i64();
345         dest = tcg_temp_new_i64();
346 
347         zf = tcg_temp_new_i64();
348         nf = tcg_temp_new_i64();
349         vf = tcg_temp_new_i64();
350 
351         tcg_gen_extu_i32_i64(zf, cpu_ZF);
352         tcg_gen_ext_i32_i64(nf, cpu_NF);
353         tcg_gen_ext_i32_i64(vf, cpu_VF);
354 
355         vfp_load_reg64(frn, rn);
356         vfp_load_reg64(frm, rm);
357         switch (a->cc) {
358         case 0: /* eq: Z */
359             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
360             break;
361         case 1: /* vs: V */
362             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
363             break;
364         case 2: /* ge: N == V -> N ^ V == 0 */
365             tmp = tcg_temp_new_i64();
366             tcg_gen_xor_i64(tmp, vf, nf);
367             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
368             break;
369         case 3: /* gt: !Z && N == V */
370             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
371             tmp = tcg_temp_new_i64();
372             tcg_gen_xor_i64(tmp, vf, nf);
373             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
374             break;
375         }
376         vfp_store_reg64(dest, rd);
377     } else {
378         TCGv_i32 frn, frm, dest;
379         TCGv_i32 tmp, zero;
380 
381         zero = tcg_constant_i32(0);
382 
383         frn = tcg_temp_new_i32();
384         frm = tcg_temp_new_i32();
385         dest = tcg_temp_new_i32();
386         vfp_load_reg32(frn, rn);
387         vfp_load_reg32(frm, rm);
388         switch (a->cc) {
389         case 0: /* eq: Z */
390             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
391             break;
392         case 1: /* vs: V */
393             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
394             break;
395         case 2: /* ge: N == V -> N ^ V == 0 */
396             tmp = tcg_temp_new_i32();
397             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
398             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
399             break;
400         case 3: /* gt: !Z && N == V */
401             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
402             tmp = tcg_temp_new_i32();
403             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
404             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
405             break;
406         }
407         /* For fp16 the top half is always zeroes */
408         if (sz == 1) {
409             tcg_gen_andi_i32(dest, dest, 0xffff);
410         }
411         vfp_store_reg32(dest, rd);
412     }
413 
414     return true;
415 }
416 
417 /*
418  * Table for converting the most common AArch32 encoding of
419  * rounding mode to arm_fprounding order (which matches the
420  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
421  */
422 static const uint8_t fp_decode_rm[] = {
423     FPROUNDING_TIEAWAY,
424     FPROUNDING_TIEEVEN,
425     FPROUNDING_POSINF,
426     FPROUNDING_NEGINF,
427 };
428 
429 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
430 {
431     uint32_t rd, rm;
432     int sz = a->sz;
433     TCGv_ptr fpst;
434     TCGv_i32 tcg_rmode;
435     int rounding = fp_decode_rm[a->rm];
436 
437     if (!dc_isar_feature(aa32_vrint, s)) {
438         return false;
439     }
440 
441     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
442         return false;
443     }
444 
445     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
446         return false;
447     }
448 
449     /* UNDEF accesses to D16-D31 if they don't exist */
450     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
451         ((a->vm | a->vd) & 0x10)) {
452         return false;
453     }
454 
455     rd = a->vd;
456     rm = a->vm;
457 
458     if (!vfp_access_check(s)) {
459         return true;
460     }
461 
462     if (sz == 1) {
463         fpst = fpstatus_ptr(FPST_FPCR_F16);
464     } else {
465         fpst = fpstatus_ptr(FPST_FPCR);
466     }
467 
468     tcg_rmode = gen_set_rmode(rounding, fpst);
469 
470     if (sz == 3) {
471         TCGv_i64 tcg_op;
472         TCGv_i64 tcg_res;
473         tcg_op = tcg_temp_new_i64();
474         tcg_res = tcg_temp_new_i64();
475         vfp_load_reg64(tcg_op, rm);
476         gen_helper_rintd(tcg_res, tcg_op, fpst);
477         vfp_store_reg64(tcg_res, rd);
478     } else {
479         TCGv_i32 tcg_op;
480         TCGv_i32 tcg_res;
481         tcg_op = tcg_temp_new_i32();
482         tcg_res = tcg_temp_new_i32();
483         vfp_load_reg32(tcg_op, rm);
484         if (sz == 1) {
485             gen_helper_rinth(tcg_res, tcg_op, fpst);
486         } else {
487             gen_helper_rints(tcg_res, tcg_op, fpst);
488         }
489         vfp_store_reg32(tcg_res, rd);
490     }
491 
492     gen_restore_rmode(tcg_rmode, fpst);
493     return true;
494 }
495 
496 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
497 {
498     uint32_t rd, rm;
499     int sz = a->sz;
500     TCGv_ptr fpst;
501     TCGv_i32 tcg_rmode, tcg_shift;
502     int rounding = fp_decode_rm[a->rm];
503     bool is_signed = a->op;
504 
505     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
506         return false;
507     }
508 
509     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
510         return false;
511     }
512 
513     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
514         return false;
515     }
516 
517     /* UNDEF accesses to D16-D31 if they don't exist */
518     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
519         return false;
520     }
521 
522     rd = a->vd;
523     rm = a->vm;
524 
525     if (!vfp_access_check(s)) {
526         return true;
527     }
528 
529     if (sz == 1) {
530         fpst = fpstatus_ptr(FPST_FPCR_F16);
531     } else {
532         fpst = fpstatus_ptr(FPST_FPCR);
533     }
534 
535     tcg_shift = tcg_constant_i32(0);
536     tcg_rmode = gen_set_rmode(rounding, fpst);
537 
538     if (sz == 3) {
539         TCGv_i64 tcg_double, tcg_res;
540         TCGv_i32 tcg_tmp;
541         tcg_double = tcg_temp_new_i64();
542         tcg_res = tcg_temp_new_i64();
543         tcg_tmp = tcg_temp_new_i32();
544         vfp_load_reg64(tcg_double, rm);
545         if (is_signed) {
546             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
547         } else {
548             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
549         }
550         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
551         vfp_store_reg32(tcg_tmp, rd);
552     } else {
553         TCGv_i32 tcg_single, tcg_res;
554         tcg_single = tcg_temp_new_i32();
555         tcg_res = tcg_temp_new_i32();
556         vfp_load_reg32(tcg_single, rm);
557         if (sz == 1) {
558             if (is_signed) {
559                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
560             } else {
561                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
562             }
563         } else {
564             if (is_signed) {
565                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
566             } else {
567                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
568             }
569         }
570         vfp_store_reg32(tcg_res, rd);
571     }
572 
573     gen_restore_rmode(tcg_rmode, fpst);
574     return true;
575 }
576 
577 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
578 {
579     /*
580      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
581      * and VMOV (general-purpose register to vector lane) insns are not
582      * predicated, but they are subject to beatwise execution if they are
583      * not in an IT block.
584      *
585      * Since our implementation always executes all 4 beats in one tick,
586      * this means only that if PSR.ECI says we should not be executing
587      * the beat corresponding to the lane of the vector register being
588      * accessed then we should skip performing the move, and that we need
589      * to do the usual check for bad ECI state and advance of ECI state.
590      *
591      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
592      *
593      * Return true if this VMOV scalar <-> gpreg should be skipped because
594      * the MVE PSR.ECI state says we skip the beat where the store happens.
595      */
596 
597     /* Calculate the byte offset into Qn which we're going to access */
598     int ofs = (index << size) + ((vn & 1) * 8);
599 
600     if (!dc_isar_feature(aa32_mve, s)) {
601         return false;
602     }
603 
604     switch (s->eci) {
605     case ECI_NONE:
606         return false;
607     case ECI_A0:
608         return ofs < 4;
609     case ECI_A0A1:
610         return ofs < 8;
611     case ECI_A0A1A2:
612     case ECI_A0A1A2B0:
613         return ofs < 12;
614     default:
615         g_assert_not_reached();
616     }
617 }
618 
619 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
620 {
621     /* VMOV scalar to general purpose register */
622     TCGv_i32 tmp;
623 
624     /*
625      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
626      * all sizes, whether the CPU has fp or not.
627      */
628     if (!dc_isar_feature(aa32_mve, s)) {
629         if (a->size == MO_32
630             ? !dc_isar_feature(aa32_fpsp_v2, s)
631             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
632             return false;
633         }
634     }
635 
636     /* UNDEF accesses to D16-D31 if they don't exist */
637     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
638         return false;
639     }
640 
641     if (dc_isar_feature(aa32_mve, s)) {
642         if (!mve_eci_check(s)) {
643             return true;
644         }
645     }
646 
647     if (!vfp_access_check(s)) {
648         return true;
649     }
650 
651     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
652         tmp = tcg_temp_new_i32();
653         read_neon_element32(tmp, a->vn, a->index,
654                             a->size | (a->u ? 0 : MO_SIGN));
655         store_reg(s, a->rt, tmp);
656     }
657 
658     if (dc_isar_feature(aa32_mve, s)) {
659         mve_update_and_store_eci(s);
660     }
661     return true;
662 }
663 
664 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
665 {
666     /* VMOV general purpose register to scalar */
667     TCGv_i32 tmp;
668 
669     /*
670      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
671      * all sizes, whether the CPU has fp or not.
672      */
673     if (!dc_isar_feature(aa32_mve, s)) {
674         if (a->size == MO_32
675             ? !dc_isar_feature(aa32_fpsp_v2, s)
676             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
677             return false;
678         }
679     }
680 
681     /* UNDEF accesses to D16-D31 if they don't exist */
682     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
683         return false;
684     }
685 
686     if (dc_isar_feature(aa32_mve, s)) {
687         if (!mve_eci_check(s)) {
688             return true;
689         }
690     }
691 
692     if (!vfp_access_check(s)) {
693         return true;
694     }
695 
696     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
697         tmp = load_reg(s, a->rt);
698         write_neon_element32(tmp, a->vn, a->index, a->size);
699     }
700 
701     if (dc_isar_feature(aa32_mve, s)) {
702         mve_update_and_store_eci(s);
703     }
704     return true;
705 }
706 
707 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
708 {
709     /* VDUP (general purpose register) */
710     TCGv_i32 tmp;
711     int size, vec_size;
712 
713     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
714         return false;
715     }
716 
717     /* UNDEF accesses to D16-D31 if they don't exist */
718     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
719         return false;
720     }
721 
722     if (a->b && a->e) {
723         return false;
724     }
725 
726     if (a->q && (a->vn & 1)) {
727         return false;
728     }
729 
730     vec_size = a->q ? 16 : 8;
731     if (a->b) {
732         size = 0;
733     } else if (a->e) {
734         size = 1;
735     } else {
736         size = 2;
737     }
738 
739     if (!vfp_access_check(s)) {
740         return true;
741     }
742 
743     tmp = load_reg(s, a->rt);
744     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
745                          vec_size, vec_size, tmp);
746     return true;
747 }
748 
749 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
750 {
751     TCGv_i32 tmp;
752     bool ignore_vfp_enabled = false;
753 
754     if (arm_dc_feature(s, ARM_FEATURE_M)) {
755         /* M profile version was already handled in m-nocp.decode */
756         return false;
757     }
758 
759     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
760         return false;
761     }
762 
763     switch (a->reg) {
764     case ARM_VFP_FPSID:
765         /*
766          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
767          * all ID registers to privileged access only.
768          */
769         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
770             return false;
771         }
772         ignore_vfp_enabled = true;
773         break;
774     case ARM_VFP_MVFR0:
775     case ARM_VFP_MVFR1:
776         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
777             return false;
778         }
779         ignore_vfp_enabled = true;
780         break;
781     case ARM_VFP_MVFR2:
782         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
783             return false;
784         }
785         ignore_vfp_enabled = true;
786         break;
787     case ARM_VFP_FPSCR:
788         break;
789     case ARM_VFP_FPEXC:
790         if (IS_USER(s)) {
791             return false;
792         }
793         ignore_vfp_enabled = true;
794         break;
795     case ARM_VFP_FPINST:
796     case ARM_VFP_FPINST2:
797         /* Not present in VFPv3 */
798         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
799             return false;
800         }
801         break;
802     default:
803         return false;
804     }
805 
806     /*
807      * Call vfp_access_check_a() directly, because we need to tell
808      * it to ignore FPEXC.EN for some register accesses.
809      */
810     if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
811         return true;
812     }
813 
814     if (a->l) {
815         /* VMRS, move VFP special register to gp register */
816         switch (a->reg) {
817         case ARM_VFP_MVFR0:
818         case ARM_VFP_MVFR1:
819         case ARM_VFP_MVFR2:
820         case ARM_VFP_FPSID:
821             if (s->current_el == 1) {
822                 gen_set_condexec(s);
823                 gen_update_pc(s, 0);
824                 gen_helper_check_hcr_el2_trap(tcg_env,
825                                               tcg_constant_i32(a->rt),
826                                               tcg_constant_i32(a->reg));
827             }
828             /* fall through */
829         case ARM_VFP_FPEXC:
830         case ARM_VFP_FPINST:
831         case ARM_VFP_FPINST2:
832             tmp = load_cpu_field(vfp.xregs[a->reg]);
833             break;
834         case ARM_VFP_FPSCR:
835             if (a->rt == 15) {
836                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
837                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
838             } else {
839                 tmp = tcg_temp_new_i32();
840                 gen_helper_vfp_get_fpscr(tmp, tcg_env);
841             }
842             break;
843         default:
844             g_assert_not_reached();
845         }
846 
847         if (a->rt == 15) {
848             /* Set the 4 flag bits in the CPSR.  */
849             gen_set_nzcv(tmp);
850         } else {
851             store_reg(s, a->rt, tmp);
852         }
853     } else {
854         /* VMSR, move gp register to VFP special register */
855         switch (a->reg) {
856         case ARM_VFP_FPSID:
857         case ARM_VFP_MVFR0:
858         case ARM_VFP_MVFR1:
859         case ARM_VFP_MVFR2:
860             /* Writes are ignored.  */
861             break;
862         case ARM_VFP_FPSCR:
863             tmp = load_reg(s, a->rt);
864             gen_helper_vfp_set_fpscr(tcg_env, tmp);
865             gen_lookup_tb(s);
866             break;
867         case ARM_VFP_FPEXC:
868             /*
869              * TODO: VFP subarchitecture support.
870              * For now, keep the EN bit only
871              */
872             tmp = load_reg(s, a->rt);
873             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
874             store_cpu_field(tmp, vfp.xregs[a->reg]);
875             gen_lookup_tb(s);
876             break;
877         case ARM_VFP_FPINST:
878         case ARM_VFP_FPINST2:
879             tmp = load_reg(s, a->rt);
880             store_cpu_field(tmp, vfp.xregs[a->reg]);
881             break;
882         default:
883             g_assert_not_reached();
884         }
885     }
886 
887     return true;
888 }
889 
890 
891 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
892 {
893     TCGv_i32 tmp;
894 
895     if (!dc_isar_feature(aa32_fp16_arith, s)) {
896         return false;
897     }
898 
899     if (a->rt == 15) {
900         /* UNPREDICTABLE; we choose to UNDEF */
901         return false;
902     }
903 
904     if (!vfp_access_check(s)) {
905         return true;
906     }
907 
908     if (a->l) {
909         /* VFP to general purpose register */
910         tmp = tcg_temp_new_i32();
911         vfp_load_reg16(tmp, a->vn);
912         store_reg(s, a->rt, tmp);
913     } else {
914         /* general purpose register to VFP */
915         tmp = load_reg(s, a->rt);
916         tcg_gen_andi_i32(tmp, tmp, 0xffff);
917         vfp_store_reg32(tmp, a->vn);
918     }
919 
920     return true;
921 }
922 
923 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
924 {
925     TCGv_i32 tmp;
926 
927     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
928         return false;
929     }
930 
931     if (!vfp_access_check(s)) {
932         return true;
933     }
934 
935     if (a->l) {
936         /* VFP to general purpose register */
937         tmp = tcg_temp_new_i32();
938         vfp_load_reg32(tmp, a->vn);
939         if (a->rt == 15) {
940             /* Set the 4 flag bits in the CPSR.  */
941             gen_set_nzcv(tmp);
942         } else {
943             store_reg(s, a->rt, tmp);
944         }
945     } else {
946         /* general purpose register to VFP */
947         tmp = load_reg(s, a->rt);
948         vfp_store_reg32(tmp, a->vn);
949     }
950 
951     return true;
952 }
953 
954 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
955 {
956     TCGv_i32 tmp;
957 
958     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
959         return false;
960     }
961 
962     /*
963      * VMOV between two general-purpose registers and two single precision
964      * floating point registers
965      */
966     if (!vfp_access_check(s)) {
967         return true;
968     }
969 
970     if (a->op) {
971         /* fpreg to gpreg */
972         tmp = tcg_temp_new_i32();
973         vfp_load_reg32(tmp, a->vm);
974         store_reg(s, a->rt, tmp);
975         tmp = tcg_temp_new_i32();
976         vfp_load_reg32(tmp, a->vm + 1);
977         store_reg(s, a->rt2, tmp);
978     } else {
979         /* gpreg to fpreg */
980         tmp = load_reg(s, a->rt);
981         vfp_store_reg32(tmp, a->vm);
982         tmp = load_reg(s, a->rt2);
983         vfp_store_reg32(tmp, a->vm + 1);
984     }
985 
986     return true;
987 }
988 
989 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
990 {
991     TCGv_i32 tmp;
992 
993     /*
994      * VMOV between two general-purpose registers and one double precision
995      * floating point register.  Note that this does not require support
996      * for double precision arithmetic.
997      */
998     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
999         return false;
1000     }
1001 
1002     /* UNDEF accesses to D16-D31 if they don't exist */
1003     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1004         return false;
1005     }
1006 
1007     if (!vfp_access_check(s)) {
1008         return true;
1009     }
1010 
1011     if (a->op) {
1012         /* fpreg to gpreg */
1013         tmp = tcg_temp_new_i32();
1014         vfp_load_reg32(tmp, a->vm * 2);
1015         store_reg(s, a->rt, tmp);
1016         tmp = tcg_temp_new_i32();
1017         vfp_load_reg32(tmp, a->vm * 2 + 1);
1018         store_reg(s, a->rt2, tmp);
1019     } else {
1020         /* gpreg to fpreg */
1021         tmp = load_reg(s, a->rt);
1022         vfp_store_reg32(tmp, a->vm * 2);
1023         tmp = load_reg(s, a->rt2);
1024         vfp_store_reg32(tmp, a->vm * 2 + 1);
1025     }
1026 
1027     return true;
1028 }
1029 
1030 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1031 {
1032     uint32_t offset;
1033     TCGv_i32 addr, tmp;
1034 
1035     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1036         return false;
1037     }
1038 
1039     if (!vfp_access_check(s)) {
1040         return true;
1041     }
1042 
1043     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1044     offset = a->imm << 1;
1045     if (!a->u) {
1046         offset = -offset;
1047     }
1048 
1049     /* For thumb, use of PC is UNPREDICTABLE.  */
1050     addr = add_reg_for_lit(s, a->rn, offset);
1051     tmp = tcg_temp_new_i32();
1052     if (a->l) {
1053         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1054         vfp_store_reg32(tmp, a->vd);
1055     } else {
1056         vfp_load_reg32(tmp, a->vd);
1057         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1058     }
1059     return true;
1060 }
1061 
1062 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1063 {
1064     uint32_t offset;
1065     TCGv_i32 addr, tmp;
1066 
1067     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1068         return false;
1069     }
1070 
1071     if (!vfp_access_check(s)) {
1072         return true;
1073     }
1074 
1075     offset = a->imm << 2;
1076     if (!a->u) {
1077         offset = -offset;
1078     }
1079 
1080     /* For thumb, use of PC is UNPREDICTABLE.  */
1081     addr = add_reg_for_lit(s, a->rn, offset);
1082     tmp = tcg_temp_new_i32();
1083     if (a->l) {
1084         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1085         vfp_store_reg32(tmp, a->vd);
1086     } else {
1087         vfp_load_reg32(tmp, a->vd);
1088         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1089     }
1090     return true;
1091 }
1092 
1093 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1094 {
1095     uint32_t offset;
1096     TCGv_i32 addr;
1097     TCGv_i64 tmp;
1098 
1099     /* Note that this does not require support for double arithmetic.  */
1100     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1101         return false;
1102     }
1103 
1104     /* UNDEF accesses to D16-D31 if they don't exist */
1105     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1106         return false;
1107     }
1108 
1109     if (!vfp_access_check(s)) {
1110         return true;
1111     }
1112 
1113     offset = a->imm << 2;
1114     if (!a->u) {
1115         offset = -offset;
1116     }
1117 
1118     /* For thumb, use of PC is UNPREDICTABLE.  */
1119     addr = add_reg_for_lit(s, a->rn, offset);
1120     tmp = tcg_temp_new_i64();
1121     if (a->l) {
1122         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1123         vfp_store_reg64(tmp, a->vd);
1124     } else {
1125         vfp_load_reg64(tmp, a->vd);
1126         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1127     }
1128     return true;
1129 }
1130 
1131 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1132 {
1133     uint32_t offset;
1134     TCGv_i32 addr, tmp;
1135     int i, n;
1136 
1137     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1138         return false;
1139     }
1140 
1141     n = a->imm;
1142 
1143     if (n == 0 || (a->vd + n) > 32) {
1144         /*
1145          * UNPREDICTABLE cases for bad immediates: we choose to
1146          * UNDEF to avoid generating huge numbers of TCG ops
1147          */
1148         return false;
1149     }
1150     if (a->rn == 15 && a->w) {
1151         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1152         return false;
1153     }
1154 
1155     s->eci_handled = true;
1156 
1157     if (!vfp_access_check(s)) {
1158         return true;
1159     }
1160 
1161     /* For thumb, use of PC is UNPREDICTABLE.  */
1162     addr = add_reg_for_lit(s, a->rn, 0);
1163     if (a->p) {
1164         /* pre-decrement */
1165         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1166     }
1167 
1168     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1169         /*
1170          * Here 'addr' is the lowest address we will store to,
1171          * and is either the old SP (if post-increment) or
1172          * the new SP (if pre-decrement). For post-increment
1173          * where the old value is below the limit and the new
1174          * value is above, it is UNKNOWN whether the limit check
1175          * triggers; we choose to trigger.
1176          */
1177         gen_helper_v8m_stackcheck(tcg_env, addr);
1178     }
1179 
1180     offset = 4;
1181     tmp = tcg_temp_new_i32();
1182     for (i = 0; i < n; i++) {
1183         if (a->l) {
1184             /* load */
1185             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1186             vfp_store_reg32(tmp, a->vd + i);
1187         } else {
1188             /* store */
1189             vfp_load_reg32(tmp, a->vd + i);
1190             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1191         }
1192         tcg_gen_addi_i32(addr, addr, offset);
1193     }
1194     if (a->w) {
1195         /* writeback */
1196         if (a->p) {
1197             offset = -offset * n;
1198             tcg_gen_addi_i32(addr, addr, offset);
1199         }
1200         store_reg(s, a->rn, addr);
1201     }
1202 
1203     clear_eci_state(s);
1204     return true;
1205 }
1206 
1207 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1208 {
1209     uint32_t offset;
1210     TCGv_i32 addr;
1211     TCGv_i64 tmp;
1212     int i, n;
1213 
1214     /* Note that this does not require support for double arithmetic.  */
1215     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1216         return false;
1217     }
1218 
1219     n = a->imm >> 1;
1220 
1221     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1222         /*
1223          * UNPREDICTABLE cases for bad immediates: we choose to
1224          * UNDEF to avoid generating huge numbers of TCG ops
1225          */
1226         return false;
1227     }
1228     if (a->rn == 15 && a->w) {
1229         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1230         return false;
1231     }
1232 
1233     /* UNDEF accesses to D16-D31 if they don't exist */
1234     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1235         return false;
1236     }
1237 
1238     s->eci_handled = true;
1239 
1240     if (!vfp_access_check(s)) {
1241         return true;
1242     }
1243 
1244     /* For thumb, use of PC is UNPREDICTABLE.  */
1245     addr = add_reg_for_lit(s, a->rn, 0);
1246     if (a->p) {
1247         /* pre-decrement */
1248         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1249     }
1250 
1251     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1252         /*
1253          * Here 'addr' is the lowest address we will store to,
1254          * and is either the old SP (if post-increment) or
1255          * the new SP (if pre-decrement). For post-increment
1256          * where the old value is below the limit and the new
1257          * value is above, it is UNKNOWN whether the limit check
1258          * triggers; we choose to trigger.
1259          */
1260         gen_helper_v8m_stackcheck(tcg_env, addr);
1261     }
1262 
1263     offset = 8;
1264     tmp = tcg_temp_new_i64();
1265     for (i = 0; i < n; i++) {
1266         if (a->l) {
1267             /* load */
1268             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1269             vfp_store_reg64(tmp, a->vd + i);
1270         } else {
1271             /* store */
1272             vfp_load_reg64(tmp, a->vd + i);
1273             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1274         }
1275         tcg_gen_addi_i32(addr, addr, offset);
1276     }
1277     if (a->w) {
1278         /* writeback */
1279         if (a->p) {
1280             offset = -offset * n;
1281         } else if (a->imm & 1) {
1282             offset = 4;
1283         } else {
1284             offset = 0;
1285         }
1286 
1287         if (offset != 0) {
1288             tcg_gen_addi_i32(addr, addr, offset);
1289         }
1290         store_reg(s, a->rn, addr);
1291     }
1292 
1293     clear_eci_state(s);
1294     return true;
1295 }
1296 
1297 /*
1298  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1299  * The callback should emit code to write a value to vd. If
1300  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1301  * will contain the old value of the relevant VFP register;
1302  * otherwise it must be written to only.
1303  */
1304 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1305                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1306 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1307                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1308 
1309 /*
1310  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1311  * The callback should emit code to write a value to vd (which
1312  * should be written to only).
1313  */
1314 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1315 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1316 
1317 /*
1318  * Return true if the specified S reg is in a scalar bank
1319  * (ie if it is s0..s7)
1320  */
1321 static inline bool vfp_sreg_is_scalar(int reg)
1322 {
1323     return (reg & 0x18) == 0;
1324 }
1325 
1326 /*
1327  * Return true if the specified D reg is in a scalar bank
1328  * (ie if it is d0..d3 or d16..d19)
1329  */
1330 static inline bool vfp_dreg_is_scalar(int reg)
1331 {
1332     return (reg & 0xc) == 0;
1333 }
1334 
1335 /*
1336  * Advance the S reg number forwards by delta within its bank
1337  * (ie increment the low 3 bits but leave the rest the same)
1338  */
1339 static inline int vfp_advance_sreg(int reg, int delta)
1340 {
1341     return ((reg + delta) & 0x7) | (reg & ~0x7);
1342 }
1343 
1344 /*
1345  * Advance the D reg number forwards by delta within its bank
1346  * (ie increment the low 2 bits but leave the rest the same)
1347  */
1348 static inline int vfp_advance_dreg(int reg, int delta)
1349 {
1350     return ((reg + delta) & 0x3) | (reg & ~0x3);
1351 }
1352 
1353 /*
1354  * Perform a 3-operand VFP data processing instruction. fn is the
1355  * callback to do the actual operation; this function deals with the
1356  * code to handle looping around for VFP vector processing.
1357  */
1358 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1359                           int vd, int vn, int vm, bool reads_vd)
1360 {
1361     uint32_t delta_m = 0;
1362     uint32_t delta_d = 0;
1363     int veclen = s->vec_len;
1364     TCGv_i32 f0, f1, fd;
1365     TCGv_ptr fpst;
1366 
1367     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1368         return false;
1369     }
1370 
1371     if (!dc_isar_feature(aa32_fpshvec, s) &&
1372         (veclen != 0 || s->vec_stride != 0)) {
1373         return false;
1374     }
1375 
1376     if (!vfp_access_check(s)) {
1377         return true;
1378     }
1379 
1380     if (veclen > 0) {
1381         /* Figure out what type of vector operation this is.  */
1382         if (vfp_sreg_is_scalar(vd)) {
1383             /* scalar */
1384             veclen = 0;
1385         } else {
1386             delta_d = s->vec_stride + 1;
1387 
1388             if (vfp_sreg_is_scalar(vm)) {
1389                 /* mixed scalar/vector */
1390                 delta_m = 0;
1391             } else {
1392                 /* vector */
1393                 delta_m = delta_d;
1394             }
1395         }
1396     }
1397 
1398     f0 = tcg_temp_new_i32();
1399     f1 = tcg_temp_new_i32();
1400     fd = tcg_temp_new_i32();
1401     fpst = fpstatus_ptr(FPST_FPCR);
1402 
1403     vfp_load_reg32(f0, vn);
1404     vfp_load_reg32(f1, vm);
1405 
1406     for (;;) {
1407         if (reads_vd) {
1408             vfp_load_reg32(fd, vd);
1409         }
1410         fn(fd, f0, f1, fpst);
1411         vfp_store_reg32(fd, vd);
1412 
1413         if (veclen == 0) {
1414             break;
1415         }
1416 
1417         /* Set up the operands for the next iteration */
1418         veclen--;
1419         vd = vfp_advance_sreg(vd, delta_d);
1420         vn = vfp_advance_sreg(vn, delta_d);
1421         vfp_load_reg32(f0, vn);
1422         if (delta_m) {
1423             vm = vfp_advance_sreg(vm, delta_m);
1424             vfp_load_reg32(f1, vm);
1425         }
1426     }
1427     return true;
1428 }
1429 
1430 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1431                           int vd, int vn, int vm, bool reads_vd)
1432 {
1433     /*
1434      * Do a half-precision operation. Functionally this is
1435      * the same as do_vfp_3op_sp(), except:
1436      *  - it uses the FPST_FPCR_F16
1437      *  - it doesn't need the VFP vector handling (fp16 is a
1438      *    v8 feature, and in v8 VFP vectors don't exist)
1439      *  - it does the aa32_fp16_arith feature test
1440      */
1441     TCGv_i32 f0, f1, fd;
1442     TCGv_ptr fpst;
1443 
1444     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1445         return false;
1446     }
1447 
1448     if (s->vec_len != 0 || s->vec_stride != 0) {
1449         return false;
1450     }
1451 
1452     if (!vfp_access_check(s)) {
1453         return true;
1454     }
1455 
1456     f0 = tcg_temp_new_i32();
1457     f1 = tcg_temp_new_i32();
1458     fd = tcg_temp_new_i32();
1459     fpst = fpstatus_ptr(FPST_FPCR_F16);
1460 
1461     vfp_load_reg16(f0, vn);
1462     vfp_load_reg16(f1, vm);
1463 
1464     if (reads_vd) {
1465         vfp_load_reg16(fd, vd);
1466     }
1467     fn(fd, f0, f1, fpst);
1468     vfp_store_reg32(fd, vd);
1469     return true;
1470 }
1471 
1472 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1473                           int vd, int vn, int vm, bool reads_vd)
1474 {
1475     uint32_t delta_m = 0;
1476     uint32_t delta_d = 0;
1477     int veclen = s->vec_len;
1478     TCGv_i64 f0, f1, fd;
1479     TCGv_ptr fpst;
1480 
1481     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1482         return false;
1483     }
1484 
1485     /* UNDEF accesses to D16-D31 if they don't exist */
1486     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1487         return false;
1488     }
1489 
1490     if (!dc_isar_feature(aa32_fpshvec, s) &&
1491         (veclen != 0 || s->vec_stride != 0)) {
1492         return false;
1493     }
1494 
1495     if (!vfp_access_check(s)) {
1496         return true;
1497     }
1498 
1499     if (veclen > 0) {
1500         /* Figure out what type of vector operation this is.  */
1501         if (vfp_dreg_is_scalar(vd)) {
1502             /* scalar */
1503             veclen = 0;
1504         } else {
1505             delta_d = (s->vec_stride >> 1) + 1;
1506 
1507             if (vfp_dreg_is_scalar(vm)) {
1508                 /* mixed scalar/vector */
1509                 delta_m = 0;
1510             } else {
1511                 /* vector */
1512                 delta_m = delta_d;
1513             }
1514         }
1515     }
1516 
1517     f0 = tcg_temp_new_i64();
1518     f1 = tcg_temp_new_i64();
1519     fd = tcg_temp_new_i64();
1520     fpst = fpstatus_ptr(FPST_FPCR);
1521 
1522     vfp_load_reg64(f0, vn);
1523     vfp_load_reg64(f1, vm);
1524 
1525     for (;;) {
1526         if (reads_vd) {
1527             vfp_load_reg64(fd, vd);
1528         }
1529         fn(fd, f0, f1, fpst);
1530         vfp_store_reg64(fd, vd);
1531 
1532         if (veclen == 0) {
1533             break;
1534         }
1535         /* Set up the operands for the next iteration */
1536         veclen--;
1537         vd = vfp_advance_dreg(vd, delta_d);
1538         vn = vfp_advance_dreg(vn, delta_d);
1539         vfp_load_reg64(f0, vn);
1540         if (delta_m) {
1541             vm = vfp_advance_dreg(vm, delta_m);
1542             vfp_load_reg64(f1, vm);
1543         }
1544     }
1545     return true;
1546 }
1547 
1548 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1549 {
1550     uint32_t delta_m = 0;
1551     uint32_t delta_d = 0;
1552     int veclen = s->vec_len;
1553     TCGv_i32 f0, fd;
1554 
1555     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1556 
1557     if (!dc_isar_feature(aa32_fpshvec, s) &&
1558         (veclen != 0 || s->vec_stride != 0)) {
1559         return false;
1560     }
1561 
1562     if (!vfp_access_check(s)) {
1563         return true;
1564     }
1565 
1566     if (veclen > 0) {
1567         /* Figure out what type of vector operation this is.  */
1568         if (vfp_sreg_is_scalar(vd)) {
1569             /* scalar */
1570             veclen = 0;
1571         } else {
1572             delta_d = s->vec_stride + 1;
1573 
1574             if (vfp_sreg_is_scalar(vm)) {
1575                 /* mixed scalar/vector */
1576                 delta_m = 0;
1577             } else {
1578                 /* vector */
1579                 delta_m = delta_d;
1580             }
1581         }
1582     }
1583 
1584     f0 = tcg_temp_new_i32();
1585     fd = tcg_temp_new_i32();
1586 
1587     vfp_load_reg32(f0, vm);
1588 
1589     for (;;) {
1590         fn(fd, f0);
1591         vfp_store_reg32(fd, vd);
1592 
1593         if (veclen == 0) {
1594             break;
1595         }
1596 
1597         if (delta_m == 0) {
1598             /* single source one-many */
1599             while (veclen--) {
1600                 vd = vfp_advance_sreg(vd, delta_d);
1601                 vfp_store_reg32(fd, vd);
1602             }
1603             break;
1604         }
1605 
1606         /* Set up the operands for the next iteration */
1607         veclen--;
1608         vd = vfp_advance_sreg(vd, delta_d);
1609         vm = vfp_advance_sreg(vm, delta_m);
1610         vfp_load_reg32(f0, vm);
1611     }
1612     return true;
1613 }
1614 
1615 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1616 {
1617     /*
1618      * Do a half-precision operation. Functionally this is
1619      * the same as do_vfp_2op_sp(), except:
1620      *  - it doesn't need the VFP vector handling (fp16 is a
1621      *    v8 feature, and in v8 VFP vectors don't exist)
1622      *  - it does the aa32_fp16_arith feature test
1623      */
1624     TCGv_i32 f0;
1625 
1626     /* Note that the caller must check the aa32_fp16_arith feature */
1627 
1628     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1629         return false;
1630     }
1631 
1632     if (s->vec_len != 0 || s->vec_stride != 0) {
1633         return false;
1634     }
1635 
1636     if (!vfp_access_check(s)) {
1637         return true;
1638     }
1639 
1640     f0 = tcg_temp_new_i32();
1641     vfp_load_reg16(f0, vm);
1642     fn(f0, f0);
1643     vfp_store_reg32(f0, vd);
1644 
1645     return true;
1646 }
1647 
1648 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1649 {
1650     uint32_t delta_m = 0;
1651     uint32_t delta_d = 0;
1652     int veclen = s->vec_len;
1653     TCGv_i64 f0, fd;
1654 
1655     /* Note that the caller must check the aa32_fpdp_v2 feature. */
1656 
1657     /* UNDEF accesses to D16-D31 if they don't exist */
1658     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1659         return false;
1660     }
1661 
1662     if (!dc_isar_feature(aa32_fpshvec, s) &&
1663         (veclen != 0 || s->vec_stride != 0)) {
1664         return false;
1665     }
1666 
1667     if (!vfp_access_check(s)) {
1668         return true;
1669     }
1670 
1671     if (veclen > 0) {
1672         /* Figure out what type of vector operation this is.  */
1673         if (vfp_dreg_is_scalar(vd)) {
1674             /* scalar */
1675             veclen = 0;
1676         } else {
1677             delta_d = (s->vec_stride >> 1) + 1;
1678 
1679             if (vfp_dreg_is_scalar(vm)) {
1680                 /* mixed scalar/vector */
1681                 delta_m = 0;
1682             } else {
1683                 /* vector */
1684                 delta_m = delta_d;
1685             }
1686         }
1687     }
1688 
1689     f0 = tcg_temp_new_i64();
1690     fd = tcg_temp_new_i64();
1691 
1692     vfp_load_reg64(f0, vm);
1693 
1694     for (;;) {
1695         fn(fd, f0);
1696         vfp_store_reg64(fd, vd);
1697 
1698         if (veclen == 0) {
1699             break;
1700         }
1701 
1702         if (delta_m == 0) {
1703             /* single source one-many */
1704             while (veclen--) {
1705                 vd = vfp_advance_dreg(vd, delta_d);
1706                 vfp_store_reg64(fd, vd);
1707             }
1708             break;
1709         }
1710 
1711         /* Set up the operands for the next iteration */
1712         veclen--;
1713         vd = vfp_advance_dreg(vd, delta_d);
1714         vd = vfp_advance_dreg(vm, delta_m);
1715         vfp_load_reg64(f0, vm);
1716     }
1717     return true;
1718 }
1719 
1720 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1721 {
1722     /* Note that order of inputs to the add matters for NaNs */
1723     TCGv_i32 tmp = tcg_temp_new_i32();
1724 
1725     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1726     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1727 }
1728 
1729 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1730 {
1731     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1732 }
1733 
1734 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1735 {
1736     /* Note that order of inputs to the add matters for NaNs */
1737     TCGv_i32 tmp = tcg_temp_new_i32();
1738 
1739     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1740     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1741 }
1742 
1743 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1744 {
1745     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1746 }
1747 
1748 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1749 {
1750     /* Note that order of inputs to the add matters for NaNs */
1751     TCGv_i64 tmp = tcg_temp_new_i64();
1752 
1753     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1754     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1755 }
1756 
1757 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1758 {
1759     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1760 }
1761 
1762 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1763 {
1764     /*
1765      * VMLS: vd = vd + -(vn * vm)
1766      * Note that order of inputs to the add matters for NaNs.
1767      */
1768     TCGv_i32 tmp = tcg_temp_new_i32();
1769 
1770     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1771     gen_vfp_negh(tmp, tmp);
1772     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1773 }
1774 
1775 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1776 {
1777     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1778 }
1779 
1780 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1781 {
1782     /*
1783      * VMLS: vd = vd + -(vn * vm)
1784      * Note that order of inputs to the add matters for NaNs.
1785      */
1786     TCGv_i32 tmp = tcg_temp_new_i32();
1787 
1788     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1789     gen_vfp_negs(tmp, tmp);
1790     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1791 }
1792 
1793 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1794 {
1795     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1796 }
1797 
1798 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1799 {
1800     /*
1801      * VMLS: vd = vd + -(vn * vm)
1802      * Note that order of inputs to the add matters for NaNs.
1803      */
1804     TCGv_i64 tmp = tcg_temp_new_i64();
1805 
1806     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1807     gen_vfp_negd(tmp, tmp);
1808     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1809 }
1810 
1811 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1812 {
1813     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1814 }
1815 
1816 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1817 {
1818     /*
1819      * VNMLS: -fd + (fn * fm)
1820      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1821      * plausible looking simplifications because this will give wrong results
1822      * for NaNs.
1823      */
1824     TCGv_i32 tmp = tcg_temp_new_i32();
1825 
1826     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1827     gen_vfp_negh(vd, vd);
1828     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1829 }
1830 
1831 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1832 {
1833     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1834 }
1835 
1836 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1837 {
1838     /*
1839      * VNMLS: -fd + (fn * fm)
1840      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1841      * plausible looking simplifications because this will give wrong results
1842      * for NaNs.
1843      */
1844     TCGv_i32 tmp = tcg_temp_new_i32();
1845 
1846     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1847     gen_vfp_negs(vd, vd);
1848     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1849 }
1850 
1851 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1852 {
1853     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1854 }
1855 
1856 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1857 {
1858     /*
1859      * VNMLS: -fd + (fn * fm)
1860      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1861      * plausible looking simplifications because this will give wrong results
1862      * for NaNs.
1863      */
1864     TCGv_i64 tmp = tcg_temp_new_i64();
1865 
1866     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1867     gen_vfp_negd(vd, vd);
1868     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1869 }
1870 
1871 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1872 {
1873     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1874 }
1875 
1876 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1877 {
1878     /* VNMLA: -fd + -(fn * fm) */
1879     TCGv_i32 tmp = tcg_temp_new_i32();
1880 
1881     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1882     gen_vfp_negh(tmp, tmp);
1883     gen_vfp_negh(vd, vd);
1884     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1885 }
1886 
1887 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1888 {
1889     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1890 }
1891 
1892 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1893 {
1894     /* VNMLA: -fd + -(fn * fm) */
1895     TCGv_i32 tmp = tcg_temp_new_i32();
1896 
1897     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1898     gen_vfp_negs(tmp, tmp);
1899     gen_vfp_negs(vd, vd);
1900     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1901 }
1902 
1903 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1904 {
1905     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1906 }
1907 
1908 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1909 {
1910     /* VNMLA: -fd + (fn * fm) */
1911     TCGv_i64 tmp = tcg_temp_new_i64();
1912 
1913     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1914     gen_vfp_negd(tmp, tmp);
1915     gen_vfp_negd(vd, vd);
1916     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1917 }
1918 
1919 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1920 {
1921     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1922 }
1923 
1924 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
1925 {
1926     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
1927 }
1928 
1929 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1930 {
1931     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1932 }
1933 
1934 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1935 {
1936     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1937 }
1938 
1939 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1940 {
1941     /* VNMUL: -(fn * fm) */
1942     gen_helper_vfp_mulh(vd, vn, vm, fpst);
1943     gen_vfp_negh(vd, vd);
1944 }
1945 
1946 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
1947 {
1948     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
1949 }
1950 
1951 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1952 {
1953     /* VNMUL: -(fn * fm) */
1954     gen_helper_vfp_muls(vd, vn, vm, fpst);
1955     gen_vfp_negs(vd, vd);
1956 }
1957 
1958 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1959 {
1960     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1961 }
1962 
1963 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1964 {
1965     /* VNMUL: -(fn * fm) */
1966     gen_helper_vfp_muld(vd, vn, vm, fpst);
1967     gen_vfp_negd(vd, vd);
1968 }
1969 
1970 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1971 {
1972     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1973 }
1974 
1975 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
1976 {
1977     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
1978 }
1979 
1980 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1981 {
1982     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1983 }
1984 
1985 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1986 {
1987     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1988 }
1989 
1990 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
1991 {
1992     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
1993 }
1994 
1995 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1996 {
1997     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1998 }
1999 
2000 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2001 {
2002     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2003 }
2004 
2005 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2006 {
2007     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2008 }
2009 
2010 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2011 {
2012     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2013 }
2014 
2015 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2016 {
2017     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2018 }
2019 
2020 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2021 {
2022     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2023         return false;
2024     }
2025     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2026                          a->vd, a->vn, a->vm, false);
2027 }
2028 
2029 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2030 {
2031     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2032         return false;
2033     }
2034     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2035                          a->vd, a->vn, a->vm, false);
2036 }
2037 
2038 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2039 {
2040     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2041         return false;
2042     }
2043     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2044                          a->vd, a->vn, a->vm, false);
2045 }
2046 
2047 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2048 {
2049     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2050         return false;
2051     }
2052     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2053                          a->vd, a->vn, a->vm, false);
2054 }
2055 
2056 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2057 {
2058     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2059         return false;
2060     }
2061     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2062                          a->vd, a->vn, a->vm, false);
2063 }
2064 
2065 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2066 {
2067     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2068         return false;
2069     }
2070     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2071                          a->vd, a->vn, a->vm, false);
2072 }
2073 
2074 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2075 {
2076     /*
2077      * VFNMA : fd = muladd(-fd,  fn, fm)
2078      * VFNMS : fd = muladd(-fd, -fn, fm)
2079      * VFMA  : fd = muladd( fd,  fn, fm)
2080      * VFMS  : fd = muladd( fd, -fn, fm)
2081      *
2082      * These are fused multiply-add, and must be done as one floating
2083      * point operation with no rounding between the multiplication and
2084      * addition steps.  NB that doing the negations here as separate
2085      * steps is correct : an input NaN should come out with its sign
2086      * bit flipped if it is a negated-input.
2087      */
2088     TCGv_ptr fpst;
2089     TCGv_i32 vn, vm, vd;
2090 
2091     /*
2092      * Present in VFPv4 only, and only with the FP16 extension.
2093      * Note that we can't rely on the SIMDFMAC check alone, because
2094      * in a Neon-no-VFP core that ID register field will be non-zero.
2095      */
2096     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2097         !dc_isar_feature(aa32_simdfmac, s) ||
2098         !dc_isar_feature(aa32_fpsp_v2, s)) {
2099         return false;
2100     }
2101 
2102     if (s->vec_len != 0 || s->vec_stride != 0) {
2103         return false;
2104     }
2105 
2106     if (!vfp_access_check(s)) {
2107         return true;
2108     }
2109 
2110     vn = tcg_temp_new_i32();
2111     vm = tcg_temp_new_i32();
2112     vd = tcg_temp_new_i32();
2113 
2114     vfp_load_reg16(vn, a->vn);
2115     vfp_load_reg16(vm, a->vm);
2116     if (neg_n) {
2117         /* VFNMS, VFMS */
2118         gen_vfp_negh(vn, vn);
2119     }
2120     vfp_load_reg16(vd, a->vd);
2121     if (neg_d) {
2122         /* VFNMA, VFNMS */
2123         gen_vfp_negh(vd, vd);
2124     }
2125     fpst = fpstatus_ptr(FPST_FPCR_F16);
2126     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2127     vfp_store_reg32(vd, a->vd);
2128     return true;
2129 }
2130 
2131 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2132 {
2133     /*
2134      * VFNMA : fd = muladd(-fd,  fn, fm)
2135      * VFNMS : fd = muladd(-fd, -fn, fm)
2136      * VFMA  : fd = muladd( fd,  fn, fm)
2137      * VFMS  : fd = muladd( fd, -fn, fm)
2138      *
2139      * These are fused multiply-add, and must be done as one floating
2140      * point operation with no rounding between the multiplication and
2141      * addition steps.  NB that doing the negations here as separate
2142      * steps is correct : an input NaN should come out with its sign
2143      * bit flipped if it is a negated-input.
2144      */
2145     TCGv_ptr fpst;
2146     TCGv_i32 vn, vm, vd;
2147 
2148     /*
2149      * Present in VFPv4 only.
2150      * Note that we can't rely on the SIMDFMAC check alone, because
2151      * in a Neon-no-VFP core that ID register field will be non-zero.
2152      */
2153     if (!dc_isar_feature(aa32_simdfmac, s) ||
2154         !dc_isar_feature(aa32_fpsp_v2, s)) {
2155         return false;
2156     }
2157     /*
2158      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2159      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2160      */
2161     if (s->vec_len != 0 || s->vec_stride != 0) {
2162         return false;
2163     }
2164 
2165     if (!vfp_access_check(s)) {
2166         return true;
2167     }
2168 
2169     vn = tcg_temp_new_i32();
2170     vm = tcg_temp_new_i32();
2171     vd = tcg_temp_new_i32();
2172 
2173     vfp_load_reg32(vn, a->vn);
2174     vfp_load_reg32(vm, a->vm);
2175     if (neg_n) {
2176         /* VFNMS, VFMS */
2177         gen_vfp_negs(vn, vn);
2178     }
2179     vfp_load_reg32(vd, a->vd);
2180     if (neg_d) {
2181         /* VFNMA, VFNMS */
2182         gen_vfp_negs(vd, vd);
2183     }
2184     fpst = fpstatus_ptr(FPST_FPCR);
2185     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2186     vfp_store_reg32(vd, a->vd);
2187     return true;
2188 }
2189 
2190 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2191 {
2192     /*
2193      * VFNMA : fd = muladd(-fd,  fn, fm)
2194      * VFNMS : fd = muladd(-fd, -fn, fm)
2195      * VFMA  : fd = muladd( fd,  fn, fm)
2196      * VFMS  : fd = muladd( fd, -fn, fm)
2197      *
2198      * These are fused multiply-add, and must be done as one floating
2199      * point operation with no rounding between the multiplication and
2200      * addition steps.  NB that doing the negations here as separate
2201      * steps is correct : an input NaN should come out with its sign
2202      * bit flipped if it is a negated-input.
2203      */
2204     TCGv_ptr fpst;
2205     TCGv_i64 vn, vm, vd;
2206 
2207     /*
2208      * Present in VFPv4 only.
2209      * Note that we can't rely on the SIMDFMAC check alone, because
2210      * in a Neon-no-VFP core that ID register field will be non-zero.
2211      */
2212     if (!dc_isar_feature(aa32_simdfmac, s) ||
2213         !dc_isar_feature(aa32_fpdp_v2, s)) {
2214         return false;
2215     }
2216     /*
2217      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2218      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2219      */
2220     if (s->vec_len != 0 || s->vec_stride != 0) {
2221         return false;
2222     }
2223 
2224     /* UNDEF accesses to D16-D31 if they don't exist. */
2225     if (!dc_isar_feature(aa32_simd_r32, s) &&
2226         ((a->vd | a->vn | a->vm) & 0x10)) {
2227         return false;
2228     }
2229 
2230     if (!vfp_access_check(s)) {
2231         return true;
2232     }
2233 
2234     vn = tcg_temp_new_i64();
2235     vm = tcg_temp_new_i64();
2236     vd = tcg_temp_new_i64();
2237 
2238     vfp_load_reg64(vn, a->vn);
2239     vfp_load_reg64(vm, a->vm);
2240     if (neg_n) {
2241         /* VFNMS, VFMS */
2242         gen_vfp_negd(vn, vn);
2243     }
2244     vfp_load_reg64(vd, a->vd);
2245     if (neg_d) {
2246         /* VFNMA, VFNMS */
2247         gen_vfp_negd(vd, vd);
2248     }
2249     fpst = fpstatus_ptr(FPST_FPCR);
2250     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2251     vfp_store_reg64(vd, a->vd);
2252     return true;
2253 }
2254 
2255 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2256     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2257                                       arg_##INSN##_##PREC *a)           \
2258     {                                                                   \
2259         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2260     }
2261 
2262 #define MAKE_VFM_TRANS_FNS(PREC) \
2263     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2264     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2265     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2266     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2267 
2268 MAKE_VFM_TRANS_FNS(hp)
2269 MAKE_VFM_TRANS_FNS(sp)
2270 MAKE_VFM_TRANS_FNS(dp)
2271 
2272 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2273 {
2274     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2275         return false;
2276     }
2277 
2278     if (s->vec_len != 0 || s->vec_stride != 0) {
2279         return false;
2280     }
2281 
2282     if (!vfp_access_check(s)) {
2283         return true;
2284     }
2285 
2286     vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
2287     return true;
2288 }
2289 
2290 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2291 {
2292     uint32_t delta_d = 0;
2293     int veclen = s->vec_len;
2294     TCGv_i32 fd;
2295     uint32_t vd;
2296 
2297     vd = a->vd;
2298 
2299     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2300         return false;
2301     }
2302 
2303     if (!dc_isar_feature(aa32_fpshvec, s) &&
2304         (veclen != 0 || s->vec_stride != 0)) {
2305         return false;
2306     }
2307 
2308     if (!vfp_access_check(s)) {
2309         return true;
2310     }
2311 
2312     if (veclen > 0) {
2313         /* Figure out what type of vector operation this is.  */
2314         if (vfp_sreg_is_scalar(vd)) {
2315             /* scalar */
2316             veclen = 0;
2317         } else {
2318             delta_d = s->vec_stride + 1;
2319         }
2320     }
2321 
2322     fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
2323 
2324     for (;;) {
2325         vfp_store_reg32(fd, vd);
2326 
2327         if (veclen == 0) {
2328             break;
2329         }
2330 
2331         /* Set up the operands for the next iteration */
2332         veclen--;
2333         vd = vfp_advance_sreg(vd, delta_d);
2334     }
2335 
2336     return true;
2337 }
2338 
2339 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2340 {
2341     uint32_t delta_d = 0;
2342     int veclen = s->vec_len;
2343     TCGv_i64 fd;
2344     uint32_t vd;
2345 
2346     vd = a->vd;
2347 
2348     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2349         return false;
2350     }
2351 
2352     /* UNDEF accesses to D16-D31 if they don't exist. */
2353     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2354         return false;
2355     }
2356 
2357     if (!dc_isar_feature(aa32_fpshvec, s) &&
2358         (veclen != 0 || s->vec_stride != 0)) {
2359         return false;
2360     }
2361 
2362     if (!vfp_access_check(s)) {
2363         return true;
2364     }
2365 
2366     if (veclen > 0) {
2367         /* Figure out what type of vector operation this is.  */
2368         if (vfp_dreg_is_scalar(vd)) {
2369             /* scalar */
2370             veclen = 0;
2371         } else {
2372             delta_d = (s->vec_stride >> 1) + 1;
2373         }
2374     }
2375 
2376     fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
2377 
2378     for (;;) {
2379         vfp_store_reg64(fd, vd);
2380 
2381         if (veclen == 0) {
2382             break;
2383         }
2384 
2385         /* Set up the operands for the next iteration */
2386         veclen--;
2387         vd = vfp_advance_dreg(vd, delta_d);
2388     }
2389 
2390     return true;
2391 }
2392 
2393 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2394     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2395                                       arg_##INSN##_##PREC *a)   \
2396     {                                                           \
2397         if (!dc_isar_feature(CHECK, s)) {                       \
2398             return false;                                       \
2399         }                                                       \
2400         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2401     }
2402 
2403 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2404     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2405                                       arg_##INSN##_##PREC *a)   \
2406     {                                                           \
2407         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2408             !dc_isar_feature(aa32_mve, s)) {                    \
2409             return false;                                       \
2410         }                                                       \
2411         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2412     }
2413 
2414 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2415 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2416 
2417 DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith)
2418 DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2)
2419 DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2)
2420 
2421 DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith)
2422 DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2)
2423 DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
2424 
2425 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2426 {
2427     gen_helper_vfp_sqrth(vd, vm, tcg_env);
2428 }
2429 
2430 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2431 {
2432     gen_helper_vfp_sqrts(vd, vm, tcg_env);
2433 }
2434 
2435 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2436 {
2437     gen_helper_vfp_sqrtd(vd, vm, tcg_env);
2438 }
2439 
2440 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2441 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2442 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2443 
2444 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2445 {
2446     TCGv_i32 vd, vm;
2447 
2448     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2449         return false;
2450     }
2451 
2452     /* Vm/M bits must be zero for the Z variant */
2453     if (a->z && a->vm != 0) {
2454         return false;
2455     }
2456 
2457     if (!vfp_access_check(s)) {
2458         return true;
2459     }
2460 
2461     vd = tcg_temp_new_i32();
2462     vm = tcg_temp_new_i32();
2463 
2464     vfp_load_reg16(vd, a->vd);
2465     if (a->z) {
2466         tcg_gen_movi_i32(vm, 0);
2467     } else {
2468         vfp_load_reg16(vm, a->vm);
2469     }
2470 
2471     if (a->e) {
2472         gen_helper_vfp_cmpeh(vd, vm, tcg_env);
2473     } else {
2474         gen_helper_vfp_cmph(vd, vm, tcg_env);
2475     }
2476     return true;
2477 }
2478 
2479 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2480 {
2481     TCGv_i32 vd, vm;
2482 
2483     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2484         return false;
2485     }
2486 
2487     /* Vm/M bits must be zero for the Z variant */
2488     if (a->z && a->vm != 0) {
2489         return false;
2490     }
2491 
2492     if (!vfp_access_check(s)) {
2493         return true;
2494     }
2495 
2496     vd = tcg_temp_new_i32();
2497     vm = tcg_temp_new_i32();
2498 
2499     vfp_load_reg32(vd, a->vd);
2500     if (a->z) {
2501         tcg_gen_movi_i32(vm, 0);
2502     } else {
2503         vfp_load_reg32(vm, a->vm);
2504     }
2505 
2506     if (a->e) {
2507         gen_helper_vfp_cmpes(vd, vm, tcg_env);
2508     } else {
2509         gen_helper_vfp_cmps(vd, vm, tcg_env);
2510     }
2511     return true;
2512 }
2513 
2514 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2515 {
2516     TCGv_i64 vd, vm;
2517 
2518     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2519         return false;
2520     }
2521 
2522     /* Vm/M bits must be zero for the Z variant */
2523     if (a->z && a->vm != 0) {
2524         return false;
2525     }
2526 
2527     /* UNDEF accesses to D16-D31 if they don't exist. */
2528     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2529         return false;
2530     }
2531 
2532     if (!vfp_access_check(s)) {
2533         return true;
2534     }
2535 
2536     vd = tcg_temp_new_i64();
2537     vm = tcg_temp_new_i64();
2538 
2539     vfp_load_reg64(vd, a->vd);
2540     if (a->z) {
2541         tcg_gen_movi_i64(vm, 0);
2542     } else {
2543         vfp_load_reg64(vm, a->vm);
2544     }
2545 
2546     if (a->e) {
2547         gen_helper_vfp_cmped(vd, vm, tcg_env);
2548     } else {
2549         gen_helper_vfp_cmpd(vd, vm, tcg_env);
2550     }
2551     return true;
2552 }
2553 
2554 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2555 {
2556     TCGv_ptr fpst;
2557     TCGv_i32 ahp_mode;
2558     TCGv_i32 tmp;
2559 
2560     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2561         return false;
2562     }
2563 
2564     if (!vfp_access_check(s)) {
2565         return true;
2566     }
2567 
2568     fpst = fpstatus_ptr(FPST_FPCR);
2569     ahp_mode = get_ahp_flag();
2570     tmp = tcg_temp_new_i32();
2571     /* The T bit tells us if we want the low or high 16 bits of Vm */
2572     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2573     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2574     vfp_store_reg32(tmp, a->vd);
2575     return true;
2576 }
2577 
2578 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2579 {
2580     TCGv_ptr fpst;
2581     TCGv_i32 ahp_mode;
2582     TCGv_i32 tmp;
2583     TCGv_i64 vd;
2584 
2585     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2586         return false;
2587     }
2588 
2589     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2590         return false;
2591     }
2592 
2593     /* UNDEF accesses to D16-D31 if they don't exist. */
2594     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2595         return false;
2596     }
2597 
2598     if (!vfp_access_check(s)) {
2599         return true;
2600     }
2601 
2602     fpst = fpstatus_ptr(FPST_FPCR);
2603     ahp_mode = get_ahp_flag();
2604     tmp = tcg_temp_new_i32();
2605     /* The T bit tells us if we want the low or high 16 bits of Vm */
2606     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2607     vd = tcg_temp_new_i64();
2608     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2609     vfp_store_reg64(vd, a->vd);
2610     return true;
2611 }
2612 
2613 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2614 {
2615     TCGv_ptr fpst;
2616     TCGv_i32 tmp;
2617 
2618     if (!dc_isar_feature(aa32_bf16, s)) {
2619         return false;
2620     }
2621 
2622     if (!vfp_access_check(s)) {
2623         return true;
2624     }
2625 
2626     fpst = fpstatus_ptr(FPST_FPCR);
2627     tmp = tcg_temp_new_i32();
2628 
2629     vfp_load_reg32(tmp, a->vm);
2630     gen_helper_bfcvt(tmp, tmp, fpst);
2631     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2632     return true;
2633 }
2634 
2635 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2636 {
2637     TCGv_ptr fpst;
2638     TCGv_i32 ahp_mode;
2639     TCGv_i32 tmp;
2640 
2641     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2642         return false;
2643     }
2644 
2645     if (!vfp_access_check(s)) {
2646         return true;
2647     }
2648 
2649     fpst = fpstatus_ptr(FPST_FPCR);
2650     ahp_mode = get_ahp_flag();
2651     tmp = tcg_temp_new_i32();
2652 
2653     vfp_load_reg32(tmp, a->vm);
2654     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2655     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2656     return true;
2657 }
2658 
2659 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2660 {
2661     TCGv_ptr fpst;
2662     TCGv_i32 ahp_mode;
2663     TCGv_i32 tmp;
2664     TCGv_i64 vm;
2665 
2666     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2667         return false;
2668     }
2669 
2670     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2671         return false;
2672     }
2673 
2674     /* UNDEF accesses to D16-D31 if they don't exist. */
2675     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2676         return false;
2677     }
2678 
2679     if (!vfp_access_check(s)) {
2680         return true;
2681     }
2682 
2683     fpst = fpstatus_ptr(FPST_FPCR);
2684     ahp_mode = get_ahp_flag();
2685     tmp = tcg_temp_new_i32();
2686     vm = tcg_temp_new_i64();
2687 
2688     vfp_load_reg64(vm, a->vm);
2689     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2690     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2691     return true;
2692 }
2693 
2694 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2695 {
2696     TCGv_ptr fpst;
2697     TCGv_i32 tmp;
2698 
2699     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2700         return false;
2701     }
2702 
2703     if (!vfp_access_check(s)) {
2704         return true;
2705     }
2706 
2707     tmp = tcg_temp_new_i32();
2708     vfp_load_reg16(tmp, a->vm);
2709     fpst = fpstatus_ptr(FPST_FPCR_F16);
2710     gen_helper_rinth(tmp, tmp, fpst);
2711     vfp_store_reg32(tmp, a->vd);
2712     return true;
2713 }
2714 
2715 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2716 {
2717     TCGv_ptr fpst;
2718     TCGv_i32 tmp;
2719 
2720     if (!dc_isar_feature(aa32_vrint, s)) {
2721         return false;
2722     }
2723 
2724     if (!vfp_access_check(s)) {
2725         return true;
2726     }
2727 
2728     tmp = tcg_temp_new_i32();
2729     vfp_load_reg32(tmp, a->vm);
2730     fpst = fpstatus_ptr(FPST_FPCR);
2731     gen_helper_rints(tmp, tmp, fpst);
2732     vfp_store_reg32(tmp, a->vd);
2733     return true;
2734 }
2735 
2736 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2737 {
2738     TCGv_ptr fpst;
2739     TCGv_i64 tmp;
2740 
2741     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2742         return false;
2743     }
2744 
2745     if (!dc_isar_feature(aa32_vrint, s)) {
2746         return false;
2747     }
2748 
2749     /* UNDEF accesses to D16-D31 if they don't exist. */
2750     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2751         return false;
2752     }
2753 
2754     if (!vfp_access_check(s)) {
2755         return true;
2756     }
2757 
2758     tmp = tcg_temp_new_i64();
2759     vfp_load_reg64(tmp, a->vm);
2760     fpst = fpstatus_ptr(FPST_FPCR);
2761     gen_helper_rintd(tmp, tmp, fpst);
2762     vfp_store_reg64(tmp, a->vd);
2763     return true;
2764 }
2765 
2766 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2767 {
2768     TCGv_ptr fpst;
2769     TCGv_i32 tmp;
2770     TCGv_i32 tcg_rmode;
2771 
2772     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2773         return false;
2774     }
2775 
2776     if (!vfp_access_check(s)) {
2777         return true;
2778     }
2779 
2780     tmp = tcg_temp_new_i32();
2781     vfp_load_reg16(tmp, a->vm);
2782     fpst = fpstatus_ptr(FPST_FPCR_F16);
2783     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2784     gen_helper_rinth(tmp, tmp, fpst);
2785     gen_restore_rmode(tcg_rmode, fpst);
2786     vfp_store_reg32(tmp, a->vd);
2787     return true;
2788 }
2789 
2790 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2791 {
2792     TCGv_ptr fpst;
2793     TCGv_i32 tmp;
2794     TCGv_i32 tcg_rmode;
2795 
2796     if (!dc_isar_feature(aa32_vrint, s)) {
2797         return false;
2798     }
2799 
2800     if (!vfp_access_check(s)) {
2801         return true;
2802     }
2803 
2804     tmp = tcg_temp_new_i32();
2805     vfp_load_reg32(tmp, a->vm);
2806     fpst = fpstatus_ptr(FPST_FPCR);
2807     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2808     gen_helper_rints(tmp, tmp, fpst);
2809     gen_restore_rmode(tcg_rmode, fpst);
2810     vfp_store_reg32(tmp, a->vd);
2811     return true;
2812 }
2813 
2814 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2815 {
2816     TCGv_ptr fpst;
2817     TCGv_i64 tmp;
2818     TCGv_i32 tcg_rmode;
2819 
2820     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2821         return false;
2822     }
2823 
2824     if (!dc_isar_feature(aa32_vrint, s)) {
2825         return false;
2826     }
2827 
2828     /* UNDEF accesses to D16-D31 if they don't exist. */
2829     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2830         return false;
2831     }
2832 
2833     if (!vfp_access_check(s)) {
2834         return true;
2835     }
2836 
2837     tmp = tcg_temp_new_i64();
2838     vfp_load_reg64(tmp, a->vm);
2839     fpst = fpstatus_ptr(FPST_FPCR);
2840     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2841     gen_helper_rintd(tmp, tmp, fpst);
2842     gen_restore_rmode(tcg_rmode, fpst);
2843     vfp_store_reg64(tmp, a->vd);
2844     return true;
2845 }
2846 
2847 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2848 {
2849     TCGv_ptr fpst;
2850     TCGv_i32 tmp;
2851 
2852     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2853         return false;
2854     }
2855 
2856     if (!vfp_access_check(s)) {
2857         return true;
2858     }
2859 
2860     tmp = tcg_temp_new_i32();
2861     vfp_load_reg16(tmp, a->vm);
2862     fpst = fpstatus_ptr(FPST_FPCR_F16);
2863     gen_helper_rinth_exact(tmp, tmp, fpst);
2864     vfp_store_reg32(tmp, a->vd);
2865     return true;
2866 }
2867 
2868 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2869 {
2870     TCGv_ptr fpst;
2871     TCGv_i32 tmp;
2872 
2873     if (!dc_isar_feature(aa32_vrint, s)) {
2874         return false;
2875     }
2876 
2877     if (!vfp_access_check(s)) {
2878         return true;
2879     }
2880 
2881     tmp = tcg_temp_new_i32();
2882     vfp_load_reg32(tmp, a->vm);
2883     fpst = fpstatus_ptr(FPST_FPCR);
2884     gen_helper_rints_exact(tmp, tmp, fpst);
2885     vfp_store_reg32(tmp, a->vd);
2886     return true;
2887 }
2888 
2889 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2890 {
2891     TCGv_ptr fpst;
2892     TCGv_i64 tmp;
2893 
2894     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2895         return false;
2896     }
2897 
2898     if (!dc_isar_feature(aa32_vrint, s)) {
2899         return false;
2900     }
2901 
2902     /* UNDEF accesses to D16-D31 if they don't exist. */
2903     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2904         return false;
2905     }
2906 
2907     if (!vfp_access_check(s)) {
2908         return true;
2909     }
2910 
2911     tmp = tcg_temp_new_i64();
2912     vfp_load_reg64(tmp, a->vm);
2913     fpst = fpstatus_ptr(FPST_FPCR);
2914     gen_helper_rintd_exact(tmp, tmp, fpst);
2915     vfp_store_reg64(tmp, a->vd);
2916     return true;
2917 }
2918 
2919 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2920 {
2921     TCGv_i64 vd;
2922     TCGv_i32 vm;
2923 
2924     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2925         return false;
2926     }
2927 
2928     /* UNDEF accesses to D16-D31 if they don't exist. */
2929     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2930         return false;
2931     }
2932 
2933     if (!vfp_access_check(s)) {
2934         return true;
2935     }
2936 
2937     vm = tcg_temp_new_i32();
2938     vd = tcg_temp_new_i64();
2939     vfp_load_reg32(vm, a->vm);
2940     gen_helper_vfp_fcvtds(vd, vm, tcg_env);
2941     vfp_store_reg64(vd, a->vd);
2942     return true;
2943 }
2944 
2945 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2946 {
2947     TCGv_i64 vm;
2948     TCGv_i32 vd;
2949 
2950     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2951         return false;
2952     }
2953 
2954     /* UNDEF accesses to D16-D31 if they don't exist. */
2955     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2956         return false;
2957     }
2958 
2959     if (!vfp_access_check(s)) {
2960         return true;
2961     }
2962 
2963     vd = tcg_temp_new_i32();
2964     vm = tcg_temp_new_i64();
2965     vfp_load_reg64(vm, a->vm);
2966     gen_helper_vfp_fcvtsd(vd, vm, tcg_env);
2967     vfp_store_reg32(vd, a->vd);
2968     return true;
2969 }
2970 
2971 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
2972 {
2973     TCGv_i32 vm;
2974     TCGv_ptr fpst;
2975 
2976     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2977         return false;
2978     }
2979 
2980     if (!vfp_access_check(s)) {
2981         return true;
2982     }
2983 
2984     vm = tcg_temp_new_i32();
2985     vfp_load_reg32(vm, a->vm);
2986     fpst = fpstatus_ptr(FPST_FPCR_F16);
2987     if (a->s) {
2988         /* i32 -> f16 */
2989         gen_helper_vfp_sitoh(vm, vm, fpst);
2990     } else {
2991         /* u32 -> f16 */
2992         gen_helper_vfp_uitoh(vm, vm, fpst);
2993     }
2994     vfp_store_reg32(vm, a->vd);
2995     return true;
2996 }
2997 
2998 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2999 {
3000     TCGv_i32 vm;
3001     TCGv_ptr fpst;
3002 
3003     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3004         return false;
3005     }
3006 
3007     if (!vfp_access_check(s)) {
3008         return true;
3009     }
3010 
3011     vm = tcg_temp_new_i32();
3012     vfp_load_reg32(vm, a->vm);
3013     fpst = fpstatus_ptr(FPST_FPCR);
3014     if (a->s) {
3015         /* i32 -> f32 */
3016         gen_helper_vfp_sitos(vm, vm, fpst);
3017     } else {
3018         /* u32 -> f32 */
3019         gen_helper_vfp_uitos(vm, vm, fpst);
3020     }
3021     vfp_store_reg32(vm, a->vd);
3022     return true;
3023 }
3024 
3025 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3026 {
3027     TCGv_i32 vm;
3028     TCGv_i64 vd;
3029     TCGv_ptr fpst;
3030 
3031     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3032         return false;
3033     }
3034 
3035     /* UNDEF accesses to D16-D31 if they don't exist. */
3036     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3037         return false;
3038     }
3039 
3040     if (!vfp_access_check(s)) {
3041         return true;
3042     }
3043 
3044     vm = tcg_temp_new_i32();
3045     vd = tcg_temp_new_i64();
3046     vfp_load_reg32(vm, a->vm);
3047     fpst = fpstatus_ptr(FPST_FPCR);
3048     if (a->s) {
3049         /* i32 -> f64 */
3050         gen_helper_vfp_sitod(vd, vm, fpst);
3051     } else {
3052         /* u32 -> f64 */
3053         gen_helper_vfp_uitod(vd, vm, fpst);
3054     }
3055     vfp_store_reg64(vd, a->vd);
3056     return true;
3057 }
3058 
3059 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3060 {
3061     TCGv_i32 vd;
3062     TCGv_i64 vm;
3063 
3064     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3065         return false;
3066     }
3067 
3068     if (!dc_isar_feature(aa32_jscvt, s)) {
3069         return false;
3070     }
3071 
3072     /* UNDEF accesses to D16-D31 if they don't exist. */
3073     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3074         return false;
3075     }
3076 
3077     if (!vfp_access_check(s)) {
3078         return true;
3079     }
3080 
3081     vm = tcg_temp_new_i64();
3082     vd = tcg_temp_new_i32();
3083     vfp_load_reg64(vm, a->vm);
3084     gen_helper_vjcvt(vd, vm, tcg_env);
3085     vfp_store_reg32(vd, a->vd);
3086     return true;
3087 }
3088 
3089 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3090 {
3091     TCGv_i32 vd, shift;
3092     TCGv_ptr fpst;
3093     int frac_bits;
3094 
3095     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3096         return false;
3097     }
3098 
3099     if (!vfp_access_check(s)) {
3100         return true;
3101     }
3102 
3103     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3104 
3105     vd = tcg_temp_new_i32();
3106     vfp_load_reg32(vd, a->vd);
3107 
3108     fpst = fpstatus_ptr(FPST_FPCR_F16);
3109     shift = tcg_constant_i32(frac_bits);
3110 
3111     /* Switch on op:U:sx bits */
3112     switch (a->opc) {
3113     case 0:
3114         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3115         break;
3116     case 1:
3117         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3118         break;
3119     case 2:
3120         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3121         break;
3122     case 3:
3123         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3124         break;
3125     case 4:
3126         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3127         break;
3128     case 5:
3129         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3130         break;
3131     case 6:
3132         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3133         break;
3134     case 7:
3135         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3136         break;
3137     default:
3138         g_assert_not_reached();
3139     }
3140 
3141     vfp_store_reg32(vd, a->vd);
3142     return true;
3143 }
3144 
3145 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3146 {
3147     TCGv_i32 vd, shift;
3148     TCGv_ptr fpst;
3149     int frac_bits;
3150 
3151     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3152         return false;
3153     }
3154 
3155     if (!vfp_access_check(s)) {
3156         return true;
3157     }
3158 
3159     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3160 
3161     vd = tcg_temp_new_i32();
3162     vfp_load_reg32(vd, a->vd);
3163 
3164     fpst = fpstatus_ptr(FPST_FPCR);
3165     shift = tcg_constant_i32(frac_bits);
3166 
3167     /* Switch on op:U:sx bits */
3168     switch (a->opc) {
3169     case 0:
3170         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3171         break;
3172     case 1:
3173         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3174         break;
3175     case 2:
3176         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3177         break;
3178     case 3:
3179         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3180         break;
3181     case 4:
3182         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3183         break;
3184     case 5:
3185         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3186         break;
3187     case 6:
3188         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3189         break;
3190     case 7:
3191         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3192         break;
3193     default:
3194         g_assert_not_reached();
3195     }
3196 
3197     vfp_store_reg32(vd, a->vd);
3198     return true;
3199 }
3200 
3201 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3202 {
3203     TCGv_i64 vd;
3204     TCGv_i32 shift;
3205     TCGv_ptr fpst;
3206     int frac_bits;
3207 
3208     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3209         return false;
3210     }
3211 
3212     /* UNDEF accesses to D16-D31 if they don't exist. */
3213     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3214         return false;
3215     }
3216 
3217     if (!vfp_access_check(s)) {
3218         return true;
3219     }
3220 
3221     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3222 
3223     vd = tcg_temp_new_i64();
3224     vfp_load_reg64(vd, a->vd);
3225 
3226     fpst = fpstatus_ptr(FPST_FPCR);
3227     shift = tcg_constant_i32(frac_bits);
3228 
3229     /* Switch on op:U:sx bits */
3230     switch (a->opc) {
3231     case 0:
3232         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3233         break;
3234     case 1:
3235         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3236         break;
3237     case 2:
3238         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3239         break;
3240     case 3:
3241         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3242         break;
3243     case 4:
3244         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3245         break;
3246     case 5:
3247         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3248         break;
3249     case 6:
3250         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3251         break;
3252     case 7:
3253         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3254         break;
3255     default:
3256         g_assert_not_reached();
3257     }
3258 
3259     vfp_store_reg64(vd, a->vd);
3260     return true;
3261 }
3262 
3263 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3264 {
3265     TCGv_i32 vm;
3266     TCGv_ptr fpst;
3267 
3268     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3269         return false;
3270     }
3271 
3272     if (!vfp_access_check(s)) {
3273         return true;
3274     }
3275 
3276     fpst = fpstatus_ptr(FPST_FPCR_F16);
3277     vm = tcg_temp_new_i32();
3278     vfp_load_reg16(vm, a->vm);
3279 
3280     if (a->s) {
3281         if (a->rz) {
3282             gen_helper_vfp_tosizh(vm, vm, fpst);
3283         } else {
3284             gen_helper_vfp_tosih(vm, vm, fpst);
3285         }
3286     } else {
3287         if (a->rz) {
3288             gen_helper_vfp_touizh(vm, vm, fpst);
3289         } else {
3290             gen_helper_vfp_touih(vm, vm, fpst);
3291         }
3292     }
3293     vfp_store_reg32(vm, a->vd);
3294     return true;
3295 }
3296 
3297 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3298 {
3299     TCGv_i32 vm;
3300     TCGv_ptr fpst;
3301 
3302     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3303         return false;
3304     }
3305 
3306     if (!vfp_access_check(s)) {
3307         return true;
3308     }
3309 
3310     fpst = fpstatus_ptr(FPST_FPCR);
3311     vm = tcg_temp_new_i32();
3312     vfp_load_reg32(vm, a->vm);
3313 
3314     if (a->s) {
3315         if (a->rz) {
3316             gen_helper_vfp_tosizs(vm, vm, fpst);
3317         } else {
3318             gen_helper_vfp_tosis(vm, vm, fpst);
3319         }
3320     } else {
3321         if (a->rz) {
3322             gen_helper_vfp_touizs(vm, vm, fpst);
3323         } else {
3324             gen_helper_vfp_touis(vm, vm, fpst);
3325         }
3326     }
3327     vfp_store_reg32(vm, a->vd);
3328     return true;
3329 }
3330 
3331 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3332 {
3333     TCGv_i32 vd;
3334     TCGv_i64 vm;
3335     TCGv_ptr fpst;
3336 
3337     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3338         return false;
3339     }
3340 
3341     /* UNDEF accesses to D16-D31 if they don't exist. */
3342     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3343         return false;
3344     }
3345 
3346     if (!vfp_access_check(s)) {
3347         return true;
3348     }
3349 
3350     fpst = fpstatus_ptr(FPST_FPCR);
3351     vm = tcg_temp_new_i64();
3352     vd = tcg_temp_new_i32();
3353     vfp_load_reg64(vm, a->vm);
3354 
3355     if (a->s) {
3356         if (a->rz) {
3357             gen_helper_vfp_tosizd(vd, vm, fpst);
3358         } else {
3359             gen_helper_vfp_tosid(vd, vm, fpst);
3360         }
3361     } else {
3362         if (a->rz) {
3363             gen_helper_vfp_touizd(vd, vm, fpst);
3364         } else {
3365             gen_helper_vfp_touid(vd, vm, fpst);
3366         }
3367     }
3368     vfp_store_reg32(vd, a->vd);
3369     return true;
3370 }
3371 
3372 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3373 {
3374     TCGv_i32 rd, rm;
3375 
3376     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3377         return false;
3378     }
3379 
3380     if (s->vec_len != 0 || s->vec_stride != 0) {
3381         return false;
3382     }
3383 
3384     if (!vfp_access_check(s)) {
3385         return true;
3386     }
3387 
3388     /* Insert low half of Vm into high half of Vd */
3389     rm = tcg_temp_new_i32();
3390     rd = tcg_temp_new_i32();
3391     vfp_load_reg16(rm, a->vm);
3392     vfp_load_reg16(rd, a->vd);
3393     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3394     vfp_store_reg32(rd, a->vd);
3395     return true;
3396 }
3397 
3398 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3399 {
3400     TCGv_i32 rm;
3401 
3402     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3403         return false;
3404     }
3405 
3406     if (s->vec_len != 0 || s->vec_stride != 0) {
3407         return false;
3408     }
3409 
3410     if (!vfp_access_check(s)) {
3411         return true;
3412     }
3413 
3414     /* Set Vd to high half of Vm */
3415     rm = tcg_temp_new_i32();
3416     vfp_load_reg32(rm, a->vm);
3417     tcg_gen_shri_i32(rm, rm, 16);
3418     vfp_store_reg32(rm, a->vd);
3419     return true;
3420 }
3421