xref: /openbmc/qemu/target/arm/tcg/translate-vfp.c (revision 136cb9cc)
1 /*
2  *  ARM translation: AArch32 VFP instructions
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *  Copyright (c) 2019 Linaro, Ltd.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26 
27 /* Include the generated VFP decoder */
28 #include "decode-vfp.c.inc"
29 #include "decode-vfp-uncond.c.inc"
30 
31 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
32 {
33     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
34 }
35 
36 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
37 {
38     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
39 }
40 
41 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
42 {
43     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
44 }
45 
46 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
47 {
48     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
49 }
50 
51 /*
52  * The imm8 encodes the sign bit, enough bits to represent an exponent in
53  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
54  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
55  */
56 uint64_t vfp_expand_imm(int size, uint8_t imm8)
57 {
58     uint64_t imm;
59 
60     switch (size) {
61     case MO_64:
62         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
63             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
64             extract32(imm8, 0, 6);
65         imm <<= 48;
66         break;
67     case MO_32:
68         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
69             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
70             (extract32(imm8, 0, 6) << 3);
71         imm <<= 16;
72         break;
73     case MO_16:
74         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
75             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
76             (extract32(imm8, 0, 6) << 6);
77         break;
78     default:
79         g_assert_not_reached();
80     }
81     return imm;
82 }
83 
84 /*
85  * Return the offset of a 16-bit half of the specified VFP single-precision
86  * register. If top is true, returns the top 16 bits; otherwise the bottom
87  * 16 bits.
88  */
89 static inline long vfp_f16_offset(unsigned reg, bool top)
90 {
91     long offs = vfp_reg_offset(false, reg);
92 #if HOST_BIG_ENDIAN
93     if (!top) {
94         offs += 2;
95     }
96 #else
97     if (top) {
98         offs += 2;
99     }
100 #endif
101     return offs;
102 }
103 
104 /*
105  * Generate code for M-profile lazy FP state preservation if needed;
106  * this corresponds to the pseudocode PreserveFPState() function.
107  */
108 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
109 {
110     if (s->v7m_lspact) {
111         /*
112          * Lazy state saving affects external memory and also the NVIC,
113          * so we must mark it as an IO operation for icount (and cause
114          * this to be the last insn in the TB).
115          */
116         if (translator_io_start(&s->base)) {
117             s->base.is_jmp = DISAS_UPDATE_EXIT;
118         }
119         gen_helper_v7m_preserve_fp_state(cpu_env);
120         /*
121          * If the preserve_fp_state helper doesn't throw an exception
122          * then it will clear LSPACT; we don't need to repeat this for
123          * any further FP insns in this TB.
124          */
125         s->v7m_lspact = false;
126         /*
127          * The helper might have zeroed VPR, so we do not know the
128          * correct value for the MVE_NO_PRED TB flag any more.
129          * If we're about to create a new fp context then that
130          * will precisely determine the MVE_NO_PRED value (see
131          * gen_update_fp_context()). Otherwise, we must:
132          *  - set s->mve_no_pred to false, so this instruction
133          *    is generated to use helper functions
134          *  - end the TB now, without chaining to the next TB
135          */
136         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
137             s->mve_no_pred = false;
138             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
139         }
140     }
141 }
142 
143 /*
144  * Generate code for M-profile FP context handling: update the
145  * ownership of the FP context, and create a new context if
146  * necessary. This corresponds to the parts of the pseudocode
147  * ExecuteFPCheck() after the initial PreserveFPState() call.
148  */
149 static void gen_update_fp_context(DisasContext *s)
150 {
151     /* Update ownership of FP context: set FPCCR.S to match current state */
152     if (s->v8m_fpccr_s_wrong) {
153         TCGv_i32 tmp;
154 
155         tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
156         if (s->v8m_secure) {
157             tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
158         } else {
159             tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
160         }
161         store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
162         /* Don't need to do this for any further FP insns in this TB */
163         s->v8m_fpccr_s_wrong = false;
164     }
165 
166     if (s->v7m_new_fp_ctxt_needed) {
167         /*
168          * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
169          * the FPSCR, and VPR.
170          */
171         TCGv_i32 control, fpscr;
172         uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
173 
174         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
175         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
176         if (dc_isar_feature(aa32_mve, s)) {
177             store_cpu_field(tcg_constant_i32(0), v7m.vpr);
178         }
179         /*
180          * We just updated the FPSCR and VPR. Some of this state is cached
181          * in the MVE_NO_PRED TB flag. We want to avoid having to end the
182          * TB here, which means we need the new value of the MVE_NO_PRED
183          * flag to be exactly known here and the same for all executions.
184          * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
185          * always set to 0, so the new MVE_NO_PRED flag is always 1
186          * if and only if we have MVE.
187          *
188          * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
189          * but those do not exist for M-profile, so are not relevant here.)
190          */
191         s->mve_no_pred = dc_isar_feature(aa32_mve, s);
192 
193         if (s->v8m_secure) {
194             bits |= R_V7M_CONTROL_SFPA_MASK;
195         }
196         control = load_cpu_field(v7m.control[M_REG_S]);
197         tcg_gen_ori_i32(control, control, bits);
198         store_cpu_field(control, v7m.control[M_REG_S]);
199         /* Don't need to do this for any further FP insns in this TB */
200         s->v7m_new_fp_ctxt_needed = false;
201     }
202 }
203 
204 /*
205  * Check that VFP access is enabled, A-profile specific version.
206  *
207  * If VFP is enabled, return true. If not, emit code to generate an
208  * appropriate exception and return false.
209  * The ignore_vfp_enabled argument specifies that we should ignore
210  * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
211  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
212  */
213 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
214 {
215     if (s->fp_excp_el) {
216         /*
217          * The full syndrome is only used for HSR when HCPTR traps:
218          * For v8, when TA==0, coproc is RES0.
219          * For v7, any use of a Floating-point instruction or access
220          * to a Floating-point Extension register that is trapped to
221          * Hyp mode because of a trap configured in the HCPTR sets
222          * this field to 0xA.
223          */
224         int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa;
225         uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc);
226 
227         gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);
228         return false;
229     }
230 
231     /*
232      * Note that rebuild_hflags_a32 has already accounted for being in EL0
233      * and the higher EL in A64 mode, etc.  Unlike A64 mode, there do not
234      * appear to be any insns which touch VFP which are allowed.
235      */
236     if (s->sme_trap_nonstreaming) {
237         gen_exception_insn(s, 0, EXCP_UDEF,
238                            syn_smetrap(SME_ET_Streaming,
239                                        curr_insn_len(s) == 2));
240         return false;
241     }
242 
243     if (!s->vfp_enabled && !ignore_vfp_enabled) {
244         assert(!arm_dc_feature(s, ARM_FEATURE_M));
245         unallocated_encoding(s);
246         return false;
247     }
248     return true;
249 }
250 
251 /*
252  * Check that VFP access is enabled, M-profile specific version.
253  *
254  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
255  * return true. If not, emit code to generate an appropriate exception and
256  * return false.
257  * skip_context_update is true to skip the "update FP context" part of this.
258  */
259 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
260 {
261     if (s->fp_excp_el) {
262         /*
263          * M-profile mostly catches the "FPU disabled" case early, in
264          * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
265          * which do coprocessor-checks are outside the large ranges of
266          * the encoding space handled by the patterns in m-nocp.decode,
267          * and for them we may need to raise NOCP here.
268          */
269         gen_exception_insn_el(s, 0, EXCP_NOCP,
270                               syn_uncategorized(), s->fp_excp_el);
271         return false;
272     }
273 
274     /* Handle M-profile lazy FP state mechanics */
275 
276     /* Trigger lazy-state preservation if necessary */
277     gen_preserve_fp_state(s, skip_context_update);
278 
279     if (!skip_context_update) {
280         /* Update ownership of FP context and create new FP context if needed */
281         gen_update_fp_context(s);
282     }
283 
284     return true;
285 }
286 
287 /*
288  * The most usual kind of VFP access check, for everything except
289  * FMXR/FMRX to the always-available special registers.
290  */
291 bool vfp_access_check(DisasContext *s)
292 {
293     if (arm_dc_feature(s, ARM_FEATURE_M)) {
294         return vfp_access_check_m(s, false);
295     } else {
296         return vfp_access_check_a(s, false);
297     }
298 }
299 
300 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
301 {
302     uint32_t rd, rn, rm;
303     int sz = a->sz;
304 
305     if (!dc_isar_feature(aa32_vsel, s)) {
306         return false;
307     }
308 
309     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
310         return false;
311     }
312 
313     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
314         return false;
315     }
316 
317     /* UNDEF accesses to D16-D31 if they don't exist */
318     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
319         ((a->vm | a->vn | a->vd) & 0x10)) {
320         return false;
321     }
322 
323     rd = a->vd;
324     rn = a->vn;
325     rm = a->vm;
326 
327     if (!vfp_access_check(s)) {
328         return true;
329     }
330 
331     if (sz == 3) {
332         TCGv_i64 frn, frm, dest;
333         TCGv_i64 tmp, zero, zf, nf, vf;
334 
335         zero = tcg_constant_i64(0);
336 
337         frn = tcg_temp_new_i64();
338         frm = tcg_temp_new_i64();
339         dest = tcg_temp_new_i64();
340 
341         zf = tcg_temp_new_i64();
342         nf = tcg_temp_new_i64();
343         vf = tcg_temp_new_i64();
344 
345         tcg_gen_extu_i32_i64(zf, cpu_ZF);
346         tcg_gen_ext_i32_i64(nf, cpu_NF);
347         tcg_gen_ext_i32_i64(vf, cpu_VF);
348 
349         vfp_load_reg64(frn, rn);
350         vfp_load_reg64(frm, rm);
351         switch (a->cc) {
352         case 0: /* eq: Z */
353             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
354             break;
355         case 1: /* vs: V */
356             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
357             break;
358         case 2: /* ge: N == V -> N ^ V == 0 */
359             tmp = tcg_temp_new_i64();
360             tcg_gen_xor_i64(tmp, vf, nf);
361             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
362             break;
363         case 3: /* gt: !Z && N == V */
364             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
365             tmp = tcg_temp_new_i64();
366             tcg_gen_xor_i64(tmp, vf, nf);
367             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
368             break;
369         }
370         vfp_store_reg64(dest, rd);
371     } else {
372         TCGv_i32 frn, frm, dest;
373         TCGv_i32 tmp, zero;
374 
375         zero = tcg_constant_i32(0);
376 
377         frn = tcg_temp_new_i32();
378         frm = tcg_temp_new_i32();
379         dest = tcg_temp_new_i32();
380         vfp_load_reg32(frn, rn);
381         vfp_load_reg32(frm, rm);
382         switch (a->cc) {
383         case 0: /* eq: Z */
384             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
385             break;
386         case 1: /* vs: V */
387             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
388             break;
389         case 2: /* ge: N == V -> N ^ V == 0 */
390             tmp = tcg_temp_new_i32();
391             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
392             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
393             break;
394         case 3: /* gt: !Z && N == V */
395             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
396             tmp = tcg_temp_new_i32();
397             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
398             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
399             break;
400         }
401         /* For fp16 the top half is always zeroes */
402         if (sz == 1) {
403             tcg_gen_andi_i32(dest, dest, 0xffff);
404         }
405         vfp_store_reg32(dest, rd);
406     }
407 
408     return true;
409 }
410 
411 /*
412  * Table for converting the most common AArch32 encoding of
413  * rounding mode to arm_fprounding order (which matches the
414  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
415  */
416 static const uint8_t fp_decode_rm[] = {
417     FPROUNDING_TIEAWAY,
418     FPROUNDING_TIEEVEN,
419     FPROUNDING_POSINF,
420     FPROUNDING_NEGINF,
421 };
422 
423 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
424 {
425     uint32_t rd, rm;
426     int sz = a->sz;
427     TCGv_ptr fpst;
428     TCGv_i32 tcg_rmode;
429     int rounding = fp_decode_rm[a->rm];
430 
431     if (!dc_isar_feature(aa32_vrint, s)) {
432         return false;
433     }
434 
435     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
436         return false;
437     }
438 
439     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
440         return false;
441     }
442 
443     /* UNDEF accesses to D16-D31 if they don't exist */
444     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
445         ((a->vm | a->vd) & 0x10)) {
446         return false;
447     }
448 
449     rd = a->vd;
450     rm = a->vm;
451 
452     if (!vfp_access_check(s)) {
453         return true;
454     }
455 
456     if (sz == 1) {
457         fpst = fpstatus_ptr(FPST_FPCR_F16);
458     } else {
459         fpst = fpstatus_ptr(FPST_FPCR);
460     }
461 
462     tcg_rmode = gen_set_rmode(rounding, fpst);
463 
464     if (sz == 3) {
465         TCGv_i64 tcg_op;
466         TCGv_i64 tcg_res;
467         tcg_op = tcg_temp_new_i64();
468         tcg_res = tcg_temp_new_i64();
469         vfp_load_reg64(tcg_op, rm);
470         gen_helper_rintd(tcg_res, tcg_op, fpst);
471         vfp_store_reg64(tcg_res, rd);
472     } else {
473         TCGv_i32 tcg_op;
474         TCGv_i32 tcg_res;
475         tcg_op = tcg_temp_new_i32();
476         tcg_res = tcg_temp_new_i32();
477         vfp_load_reg32(tcg_op, rm);
478         if (sz == 1) {
479             gen_helper_rinth(tcg_res, tcg_op, fpst);
480         } else {
481             gen_helper_rints(tcg_res, tcg_op, fpst);
482         }
483         vfp_store_reg32(tcg_res, rd);
484     }
485 
486     gen_restore_rmode(tcg_rmode, fpst);
487     return true;
488 }
489 
490 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
491 {
492     uint32_t rd, rm;
493     int sz = a->sz;
494     TCGv_ptr fpst;
495     TCGv_i32 tcg_rmode, tcg_shift;
496     int rounding = fp_decode_rm[a->rm];
497     bool is_signed = a->op;
498 
499     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
500         return false;
501     }
502 
503     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
504         return false;
505     }
506 
507     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
508         return false;
509     }
510 
511     /* UNDEF accesses to D16-D31 if they don't exist */
512     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
513         return false;
514     }
515 
516     rd = a->vd;
517     rm = a->vm;
518 
519     if (!vfp_access_check(s)) {
520         return true;
521     }
522 
523     if (sz == 1) {
524         fpst = fpstatus_ptr(FPST_FPCR_F16);
525     } else {
526         fpst = fpstatus_ptr(FPST_FPCR);
527     }
528 
529     tcg_shift = tcg_constant_i32(0);
530     tcg_rmode = gen_set_rmode(rounding, fpst);
531 
532     if (sz == 3) {
533         TCGv_i64 tcg_double, tcg_res;
534         TCGv_i32 tcg_tmp;
535         tcg_double = tcg_temp_new_i64();
536         tcg_res = tcg_temp_new_i64();
537         tcg_tmp = tcg_temp_new_i32();
538         vfp_load_reg64(tcg_double, rm);
539         if (is_signed) {
540             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
541         } else {
542             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
543         }
544         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
545         vfp_store_reg32(tcg_tmp, rd);
546     } else {
547         TCGv_i32 tcg_single, tcg_res;
548         tcg_single = tcg_temp_new_i32();
549         tcg_res = tcg_temp_new_i32();
550         vfp_load_reg32(tcg_single, rm);
551         if (sz == 1) {
552             if (is_signed) {
553                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
554             } else {
555                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
556             }
557         } else {
558             if (is_signed) {
559                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
560             } else {
561                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
562             }
563         }
564         vfp_store_reg32(tcg_res, rd);
565     }
566 
567     gen_restore_rmode(tcg_rmode, fpst);
568     return true;
569 }
570 
571 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
572 {
573     /*
574      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
575      * and VMOV (general-purpose register to vector lane) insns are not
576      * predicated, but they are subject to beatwise execution if they are
577      * not in an IT block.
578      *
579      * Since our implementation always executes all 4 beats in one tick,
580      * this means only that if PSR.ECI says we should not be executing
581      * the beat corresponding to the lane of the vector register being
582      * accessed then we should skip performing the move, and that we need
583      * to do the usual check for bad ECI state and advance of ECI state.
584      *
585      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
586      *
587      * Return true if this VMOV scalar <-> gpreg should be skipped because
588      * the MVE PSR.ECI state says we skip the beat where the store happens.
589      */
590 
591     /* Calculate the byte offset into Qn which we're going to access */
592     int ofs = (index << size) + ((vn & 1) * 8);
593 
594     if (!dc_isar_feature(aa32_mve, s)) {
595         return false;
596     }
597 
598     switch (s->eci) {
599     case ECI_NONE:
600         return false;
601     case ECI_A0:
602         return ofs < 4;
603     case ECI_A0A1:
604         return ofs < 8;
605     case ECI_A0A1A2:
606     case ECI_A0A1A2B0:
607         return ofs < 12;
608     default:
609         g_assert_not_reached();
610     }
611 }
612 
613 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
614 {
615     /* VMOV scalar to general purpose register */
616     TCGv_i32 tmp;
617 
618     /*
619      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
620      * all sizes, whether the CPU has fp or not.
621      */
622     if (!dc_isar_feature(aa32_mve, s)) {
623         if (a->size == MO_32
624             ? !dc_isar_feature(aa32_fpsp_v2, s)
625             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
626             return false;
627         }
628     }
629 
630     /* UNDEF accesses to D16-D31 if they don't exist */
631     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
632         return false;
633     }
634 
635     if (dc_isar_feature(aa32_mve, s)) {
636         if (!mve_eci_check(s)) {
637             return true;
638         }
639     }
640 
641     if (!vfp_access_check(s)) {
642         return true;
643     }
644 
645     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
646         tmp = tcg_temp_new_i32();
647         read_neon_element32(tmp, a->vn, a->index,
648                             a->size | (a->u ? 0 : MO_SIGN));
649         store_reg(s, a->rt, tmp);
650     }
651 
652     if (dc_isar_feature(aa32_mve, s)) {
653         mve_update_and_store_eci(s);
654     }
655     return true;
656 }
657 
658 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
659 {
660     /* VMOV general purpose register to scalar */
661     TCGv_i32 tmp;
662 
663     /*
664      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
665      * all sizes, whether the CPU has fp or not.
666      */
667     if (!dc_isar_feature(aa32_mve, s)) {
668         if (a->size == MO_32
669             ? !dc_isar_feature(aa32_fpsp_v2, s)
670             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
671             return false;
672         }
673     }
674 
675     /* UNDEF accesses to D16-D31 if they don't exist */
676     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
677         return false;
678     }
679 
680     if (dc_isar_feature(aa32_mve, s)) {
681         if (!mve_eci_check(s)) {
682             return true;
683         }
684     }
685 
686     if (!vfp_access_check(s)) {
687         return true;
688     }
689 
690     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
691         tmp = load_reg(s, a->rt);
692         write_neon_element32(tmp, a->vn, a->index, a->size);
693     }
694 
695     if (dc_isar_feature(aa32_mve, s)) {
696         mve_update_and_store_eci(s);
697     }
698     return true;
699 }
700 
701 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
702 {
703     /* VDUP (general purpose register) */
704     TCGv_i32 tmp;
705     int size, vec_size;
706 
707     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
708         return false;
709     }
710 
711     /* UNDEF accesses to D16-D31 if they don't exist */
712     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
713         return false;
714     }
715 
716     if (a->b && a->e) {
717         return false;
718     }
719 
720     if (a->q && (a->vn & 1)) {
721         return false;
722     }
723 
724     vec_size = a->q ? 16 : 8;
725     if (a->b) {
726         size = 0;
727     } else if (a->e) {
728         size = 1;
729     } else {
730         size = 2;
731     }
732 
733     if (!vfp_access_check(s)) {
734         return true;
735     }
736 
737     tmp = load_reg(s, a->rt);
738     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
739                          vec_size, vec_size, tmp);
740     return true;
741 }
742 
743 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
744 {
745     TCGv_i32 tmp;
746     bool ignore_vfp_enabled = false;
747 
748     if (arm_dc_feature(s, ARM_FEATURE_M)) {
749         /* M profile version was already handled in m-nocp.decode */
750         return false;
751     }
752 
753     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
754         return false;
755     }
756 
757     switch (a->reg) {
758     case ARM_VFP_FPSID:
759         /*
760          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
761          * all ID registers to privileged access only.
762          */
763         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
764             return false;
765         }
766         ignore_vfp_enabled = true;
767         break;
768     case ARM_VFP_MVFR0:
769     case ARM_VFP_MVFR1:
770         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
771             return false;
772         }
773         ignore_vfp_enabled = true;
774         break;
775     case ARM_VFP_MVFR2:
776         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
777             return false;
778         }
779         ignore_vfp_enabled = true;
780         break;
781     case ARM_VFP_FPSCR:
782         break;
783     case ARM_VFP_FPEXC:
784         if (IS_USER(s)) {
785             return false;
786         }
787         ignore_vfp_enabled = true;
788         break;
789     case ARM_VFP_FPINST:
790     case ARM_VFP_FPINST2:
791         /* Not present in VFPv3 */
792         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
793             return false;
794         }
795         break;
796     default:
797         return false;
798     }
799 
800     /*
801      * Call vfp_access_check_a() directly, because we need to tell
802      * it to ignore FPEXC.EN for some register accesses.
803      */
804     if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
805         return true;
806     }
807 
808     if (a->l) {
809         /* VMRS, move VFP special register to gp register */
810         switch (a->reg) {
811         case ARM_VFP_MVFR0:
812         case ARM_VFP_MVFR1:
813         case ARM_VFP_MVFR2:
814         case ARM_VFP_FPSID:
815             if (s->current_el == 1) {
816                 gen_set_condexec(s);
817                 gen_update_pc(s, 0);
818                 gen_helper_check_hcr_el2_trap(cpu_env,
819                                               tcg_constant_i32(a->rt),
820                                               tcg_constant_i32(a->reg));
821             }
822             /* fall through */
823         case ARM_VFP_FPEXC:
824         case ARM_VFP_FPINST:
825         case ARM_VFP_FPINST2:
826             tmp = load_cpu_field(vfp.xregs[a->reg]);
827             break;
828         case ARM_VFP_FPSCR:
829             if (a->rt == 15) {
830                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
831                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
832             } else {
833                 tmp = tcg_temp_new_i32();
834                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
835             }
836             break;
837         default:
838             g_assert_not_reached();
839         }
840 
841         if (a->rt == 15) {
842             /* Set the 4 flag bits in the CPSR.  */
843             gen_set_nzcv(tmp);
844         } else {
845             store_reg(s, a->rt, tmp);
846         }
847     } else {
848         /* VMSR, move gp register to VFP special register */
849         switch (a->reg) {
850         case ARM_VFP_FPSID:
851         case ARM_VFP_MVFR0:
852         case ARM_VFP_MVFR1:
853         case ARM_VFP_MVFR2:
854             /* Writes are ignored.  */
855             break;
856         case ARM_VFP_FPSCR:
857             tmp = load_reg(s, a->rt);
858             gen_helper_vfp_set_fpscr(cpu_env, tmp);
859             gen_lookup_tb(s);
860             break;
861         case ARM_VFP_FPEXC:
862             /*
863              * TODO: VFP subarchitecture support.
864              * For now, keep the EN bit only
865              */
866             tmp = load_reg(s, a->rt);
867             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
868             store_cpu_field(tmp, vfp.xregs[a->reg]);
869             gen_lookup_tb(s);
870             break;
871         case ARM_VFP_FPINST:
872         case ARM_VFP_FPINST2:
873             tmp = load_reg(s, a->rt);
874             store_cpu_field(tmp, vfp.xregs[a->reg]);
875             break;
876         default:
877             g_assert_not_reached();
878         }
879     }
880 
881     return true;
882 }
883 
884 
885 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
886 {
887     TCGv_i32 tmp;
888 
889     if (!dc_isar_feature(aa32_fp16_arith, s)) {
890         return false;
891     }
892 
893     if (a->rt == 15) {
894         /* UNPREDICTABLE; we choose to UNDEF */
895         return false;
896     }
897 
898     if (!vfp_access_check(s)) {
899         return true;
900     }
901 
902     if (a->l) {
903         /* VFP to general purpose register */
904         tmp = tcg_temp_new_i32();
905         vfp_load_reg32(tmp, a->vn);
906         tcg_gen_andi_i32(tmp, tmp, 0xffff);
907         store_reg(s, a->rt, tmp);
908     } else {
909         /* general purpose register to VFP */
910         tmp = load_reg(s, a->rt);
911         tcg_gen_andi_i32(tmp, tmp, 0xffff);
912         vfp_store_reg32(tmp, a->vn);
913     }
914 
915     return true;
916 }
917 
918 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
919 {
920     TCGv_i32 tmp;
921 
922     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
923         return false;
924     }
925 
926     if (!vfp_access_check(s)) {
927         return true;
928     }
929 
930     if (a->l) {
931         /* VFP to general purpose register */
932         tmp = tcg_temp_new_i32();
933         vfp_load_reg32(tmp, a->vn);
934         if (a->rt == 15) {
935             /* Set the 4 flag bits in the CPSR.  */
936             gen_set_nzcv(tmp);
937         } else {
938             store_reg(s, a->rt, tmp);
939         }
940     } else {
941         /* general purpose register to VFP */
942         tmp = load_reg(s, a->rt);
943         vfp_store_reg32(tmp, a->vn);
944     }
945 
946     return true;
947 }
948 
949 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
950 {
951     TCGv_i32 tmp;
952 
953     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
954         return false;
955     }
956 
957     /*
958      * VMOV between two general-purpose registers and two single precision
959      * floating point registers
960      */
961     if (!vfp_access_check(s)) {
962         return true;
963     }
964 
965     if (a->op) {
966         /* fpreg to gpreg */
967         tmp = tcg_temp_new_i32();
968         vfp_load_reg32(tmp, a->vm);
969         store_reg(s, a->rt, tmp);
970         tmp = tcg_temp_new_i32();
971         vfp_load_reg32(tmp, a->vm + 1);
972         store_reg(s, a->rt2, tmp);
973     } else {
974         /* gpreg to fpreg */
975         tmp = load_reg(s, a->rt);
976         vfp_store_reg32(tmp, a->vm);
977         tmp = load_reg(s, a->rt2);
978         vfp_store_reg32(tmp, a->vm + 1);
979     }
980 
981     return true;
982 }
983 
984 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
985 {
986     TCGv_i32 tmp;
987 
988     /*
989      * VMOV between two general-purpose registers and one double precision
990      * floating point register.  Note that this does not require support
991      * for double precision arithmetic.
992      */
993     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
994         return false;
995     }
996 
997     /* UNDEF accesses to D16-D31 if they don't exist */
998     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
999         return false;
1000     }
1001 
1002     if (!vfp_access_check(s)) {
1003         return true;
1004     }
1005 
1006     if (a->op) {
1007         /* fpreg to gpreg */
1008         tmp = tcg_temp_new_i32();
1009         vfp_load_reg32(tmp, a->vm * 2);
1010         store_reg(s, a->rt, tmp);
1011         tmp = tcg_temp_new_i32();
1012         vfp_load_reg32(tmp, a->vm * 2 + 1);
1013         store_reg(s, a->rt2, tmp);
1014     } else {
1015         /* gpreg to fpreg */
1016         tmp = load_reg(s, a->rt);
1017         vfp_store_reg32(tmp, a->vm * 2);
1018         tmp = load_reg(s, a->rt2);
1019         vfp_store_reg32(tmp, a->vm * 2 + 1);
1020     }
1021 
1022     return true;
1023 }
1024 
1025 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1026 {
1027     uint32_t offset;
1028     TCGv_i32 addr, tmp;
1029 
1030     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1031         return false;
1032     }
1033 
1034     if (!vfp_access_check(s)) {
1035         return true;
1036     }
1037 
1038     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1039     offset = a->imm << 1;
1040     if (!a->u) {
1041         offset = -offset;
1042     }
1043 
1044     /* For thumb, use of PC is UNPREDICTABLE.  */
1045     addr = add_reg_for_lit(s, a->rn, offset);
1046     tmp = tcg_temp_new_i32();
1047     if (a->l) {
1048         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1049         vfp_store_reg32(tmp, a->vd);
1050     } else {
1051         vfp_load_reg32(tmp, a->vd);
1052         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1053     }
1054     return true;
1055 }
1056 
1057 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1058 {
1059     uint32_t offset;
1060     TCGv_i32 addr, tmp;
1061 
1062     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1063         return false;
1064     }
1065 
1066     if (!vfp_access_check(s)) {
1067         return true;
1068     }
1069 
1070     offset = a->imm << 2;
1071     if (!a->u) {
1072         offset = -offset;
1073     }
1074 
1075     /* For thumb, use of PC is UNPREDICTABLE.  */
1076     addr = add_reg_for_lit(s, a->rn, offset);
1077     tmp = tcg_temp_new_i32();
1078     if (a->l) {
1079         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1080         vfp_store_reg32(tmp, a->vd);
1081     } else {
1082         vfp_load_reg32(tmp, a->vd);
1083         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1084     }
1085     return true;
1086 }
1087 
1088 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1089 {
1090     uint32_t offset;
1091     TCGv_i32 addr;
1092     TCGv_i64 tmp;
1093 
1094     /* Note that this does not require support for double arithmetic.  */
1095     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1096         return false;
1097     }
1098 
1099     /* UNDEF accesses to D16-D31 if they don't exist */
1100     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1101         return false;
1102     }
1103 
1104     if (!vfp_access_check(s)) {
1105         return true;
1106     }
1107 
1108     offset = a->imm << 2;
1109     if (!a->u) {
1110         offset = -offset;
1111     }
1112 
1113     /* For thumb, use of PC is UNPREDICTABLE.  */
1114     addr = add_reg_for_lit(s, a->rn, offset);
1115     tmp = tcg_temp_new_i64();
1116     if (a->l) {
1117         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1118         vfp_store_reg64(tmp, a->vd);
1119     } else {
1120         vfp_load_reg64(tmp, a->vd);
1121         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1122     }
1123     return true;
1124 }
1125 
1126 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1127 {
1128     uint32_t offset;
1129     TCGv_i32 addr, tmp;
1130     int i, n;
1131 
1132     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1133         return false;
1134     }
1135 
1136     n = a->imm;
1137 
1138     if (n == 0 || (a->vd + n) > 32) {
1139         /*
1140          * UNPREDICTABLE cases for bad immediates: we choose to
1141          * UNDEF to avoid generating huge numbers of TCG ops
1142          */
1143         return false;
1144     }
1145     if (a->rn == 15 && a->w) {
1146         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1147         return false;
1148     }
1149 
1150     s->eci_handled = true;
1151 
1152     if (!vfp_access_check(s)) {
1153         return true;
1154     }
1155 
1156     /* For thumb, use of PC is UNPREDICTABLE.  */
1157     addr = add_reg_for_lit(s, a->rn, 0);
1158     if (a->p) {
1159         /* pre-decrement */
1160         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1161     }
1162 
1163     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1164         /*
1165          * Here 'addr' is the lowest address we will store to,
1166          * and is either the old SP (if post-increment) or
1167          * the new SP (if pre-decrement). For post-increment
1168          * where the old value is below the limit and the new
1169          * value is above, it is UNKNOWN whether the limit check
1170          * triggers; we choose to trigger.
1171          */
1172         gen_helper_v8m_stackcheck(cpu_env, addr);
1173     }
1174 
1175     offset = 4;
1176     tmp = tcg_temp_new_i32();
1177     for (i = 0; i < n; i++) {
1178         if (a->l) {
1179             /* load */
1180             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1181             vfp_store_reg32(tmp, a->vd + i);
1182         } else {
1183             /* store */
1184             vfp_load_reg32(tmp, a->vd + i);
1185             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1186         }
1187         tcg_gen_addi_i32(addr, addr, offset);
1188     }
1189     if (a->w) {
1190         /* writeback */
1191         if (a->p) {
1192             offset = -offset * n;
1193             tcg_gen_addi_i32(addr, addr, offset);
1194         }
1195         store_reg(s, a->rn, addr);
1196     }
1197 
1198     clear_eci_state(s);
1199     return true;
1200 }
1201 
1202 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1203 {
1204     uint32_t offset;
1205     TCGv_i32 addr;
1206     TCGv_i64 tmp;
1207     int i, n;
1208 
1209     /* Note that this does not require support for double arithmetic.  */
1210     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1211         return false;
1212     }
1213 
1214     n = a->imm >> 1;
1215 
1216     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1217         /*
1218          * UNPREDICTABLE cases for bad immediates: we choose to
1219          * UNDEF to avoid generating huge numbers of TCG ops
1220          */
1221         return false;
1222     }
1223     if (a->rn == 15 && a->w) {
1224         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1225         return false;
1226     }
1227 
1228     /* UNDEF accesses to D16-D31 if they don't exist */
1229     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1230         return false;
1231     }
1232 
1233     s->eci_handled = true;
1234 
1235     if (!vfp_access_check(s)) {
1236         return true;
1237     }
1238 
1239     /* For thumb, use of PC is UNPREDICTABLE.  */
1240     addr = add_reg_for_lit(s, a->rn, 0);
1241     if (a->p) {
1242         /* pre-decrement */
1243         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1244     }
1245 
1246     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1247         /*
1248          * Here 'addr' is the lowest address we will store to,
1249          * and is either the old SP (if post-increment) or
1250          * the new SP (if pre-decrement). For post-increment
1251          * where the old value is below the limit and the new
1252          * value is above, it is UNKNOWN whether the limit check
1253          * triggers; we choose to trigger.
1254          */
1255         gen_helper_v8m_stackcheck(cpu_env, addr);
1256     }
1257 
1258     offset = 8;
1259     tmp = tcg_temp_new_i64();
1260     for (i = 0; i < n; i++) {
1261         if (a->l) {
1262             /* load */
1263             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1264             vfp_store_reg64(tmp, a->vd + i);
1265         } else {
1266             /* store */
1267             vfp_load_reg64(tmp, a->vd + i);
1268             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1269         }
1270         tcg_gen_addi_i32(addr, addr, offset);
1271     }
1272     if (a->w) {
1273         /* writeback */
1274         if (a->p) {
1275             offset = -offset * n;
1276         } else if (a->imm & 1) {
1277             offset = 4;
1278         } else {
1279             offset = 0;
1280         }
1281 
1282         if (offset != 0) {
1283             tcg_gen_addi_i32(addr, addr, offset);
1284         }
1285         store_reg(s, a->rn, addr);
1286     }
1287 
1288     clear_eci_state(s);
1289     return true;
1290 }
1291 
1292 /*
1293  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1294  * The callback should emit code to write a value to vd. If
1295  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1296  * will contain the old value of the relevant VFP register;
1297  * otherwise it must be written to only.
1298  */
1299 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1300                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1301 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1302                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1303 
1304 /*
1305  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1306  * The callback should emit code to write a value to vd (which
1307  * should be written to only).
1308  */
1309 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1310 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1311 
1312 /*
1313  * Return true if the specified S reg is in a scalar bank
1314  * (ie if it is s0..s7)
1315  */
1316 static inline bool vfp_sreg_is_scalar(int reg)
1317 {
1318     return (reg & 0x18) == 0;
1319 }
1320 
1321 /*
1322  * Return true if the specified D reg is in a scalar bank
1323  * (ie if it is d0..d3 or d16..d19)
1324  */
1325 static inline bool vfp_dreg_is_scalar(int reg)
1326 {
1327     return (reg & 0xc) == 0;
1328 }
1329 
1330 /*
1331  * Advance the S reg number forwards by delta within its bank
1332  * (ie increment the low 3 bits but leave the rest the same)
1333  */
1334 static inline int vfp_advance_sreg(int reg, int delta)
1335 {
1336     return ((reg + delta) & 0x7) | (reg & ~0x7);
1337 }
1338 
1339 /*
1340  * Advance the D reg number forwards by delta within its bank
1341  * (ie increment the low 2 bits but leave the rest the same)
1342  */
1343 static inline int vfp_advance_dreg(int reg, int delta)
1344 {
1345     return ((reg + delta) & 0x3) | (reg & ~0x3);
1346 }
1347 
1348 /*
1349  * Perform a 3-operand VFP data processing instruction. fn is the
1350  * callback to do the actual operation; this function deals with the
1351  * code to handle looping around for VFP vector processing.
1352  */
1353 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1354                           int vd, int vn, int vm, bool reads_vd)
1355 {
1356     uint32_t delta_m = 0;
1357     uint32_t delta_d = 0;
1358     int veclen = s->vec_len;
1359     TCGv_i32 f0, f1, fd;
1360     TCGv_ptr fpst;
1361 
1362     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1363         return false;
1364     }
1365 
1366     if (!dc_isar_feature(aa32_fpshvec, s) &&
1367         (veclen != 0 || s->vec_stride != 0)) {
1368         return false;
1369     }
1370 
1371     if (!vfp_access_check(s)) {
1372         return true;
1373     }
1374 
1375     if (veclen > 0) {
1376         /* Figure out what type of vector operation this is.  */
1377         if (vfp_sreg_is_scalar(vd)) {
1378             /* scalar */
1379             veclen = 0;
1380         } else {
1381             delta_d = s->vec_stride + 1;
1382 
1383             if (vfp_sreg_is_scalar(vm)) {
1384                 /* mixed scalar/vector */
1385                 delta_m = 0;
1386             } else {
1387                 /* vector */
1388                 delta_m = delta_d;
1389             }
1390         }
1391     }
1392 
1393     f0 = tcg_temp_new_i32();
1394     f1 = tcg_temp_new_i32();
1395     fd = tcg_temp_new_i32();
1396     fpst = fpstatus_ptr(FPST_FPCR);
1397 
1398     vfp_load_reg32(f0, vn);
1399     vfp_load_reg32(f1, vm);
1400 
1401     for (;;) {
1402         if (reads_vd) {
1403             vfp_load_reg32(fd, vd);
1404         }
1405         fn(fd, f0, f1, fpst);
1406         vfp_store_reg32(fd, vd);
1407 
1408         if (veclen == 0) {
1409             break;
1410         }
1411 
1412         /* Set up the operands for the next iteration */
1413         veclen--;
1414         vd = vfp_advance_sreg(vd, delta_d);
1415         vn = vfp_advance_sreg(vn, delta_d);
1416         vfp_load_reg32(f0, vn);
1417         if (delta_m) {
1418             vm = vfp_advance_sreg(vm, delta_m);
1419             vfp_load_reg32(f1, vm);
1420         }
1421     }
1422     return true;
1423 }
1424 
1425 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1426                           int vd, int vn, int vm, bool reads_vd)
1427 {
1428     /*
1429      * Do a half-precision operation. Functionally this is
1430      * the same as do_vfp_3op_sp(), except:
1431      *  - it uses the FPST_FPCR_F16
1432      *  - it doesn't need the VFP vector handling (fp16 is a
1433      *    v8 feature, and in v8 VFP vectors don't exist)
1434      *  - it does the aa32_fp16_arith feature test
1435      */
1436     TCGv_i32 f0, f1, fd;
1437     TCGv_ptr fpst;
1438 
1439     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1440         return false;
1441     }
1442 
1443     if (s->vec_len != 0 || s->vec_stride != 0) {
1444         return false;
1445     }
1446 
1447     if (!vfp_access_check(s)) {
1448         return true;
1449     }
1450 
1451     f0 = tcg_temp_new_i32();
1452     f1 = tcg_temp_new_i32();
1453     fd = tcg_temp_new_i32();
1454     fpst = fpstatus_ptr(FPST_FPCR_F16);
1455 
1456     vfp_load_reg32(f0, vn);
1457     vfp_load_reg32(f1, vm);
1458 
1459     if (reads_vd) {
1460         vfp_load_reg32(fd, vd);
1461     }
1462     fn(fd, f0, f1, fpst);
1463     vfp_store_reg32(fd, vd);
1464     return true;
1465 }
1466 
1467 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1468                           int vd, int vn, int vm, bool reads_vd)
1469 {
1470     uint32_t delta_m = 0;
1471     uint32_t delta_d = 0;
1472     int veclen = s->vec_len;
1473     TCGv_i64 f0, f1, fd;
1474     TCGv_ptr fpst;
1475 
1476     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1477         return false;
1478     }
1479 
1480     /* UNDEF accesses to D16-D31 if they don't exist */
1481     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1482         return false;
1483     }
1484 
1485     if (!dc_isar_feature(aa32_fpshvec, s) &&
1486         (veclen != 0 || s->vec_stride != 0)) {
1487         return false;
1488     }
1489 
1490     if (!vfp_access_check(s)) {
1491         return true;
1492     }
1493 
1494     if (veclen > 0) {
1495         /* Figure out what type of vector operation this is.  */
1496         if (vfp_dreg_is_scalar(vd)) {
1497             /* scalar */
1498             veclen = 0;
1499         } else {
1500             delta_d = (s->vec_stride >> 1) + 1;
1501 
1502             if (vfp_dreg_is_scalar(vm)) {
1503                 /* mixed scalar/vector */
1504                 delta_m = 0;
1505             } else {
1506                 /* vector */
1507                 delta_m = delta_d;
1508             }
1509         }
1510     }
1511 
1512     f0 = tcg_temp_new_i64();
1513     f1 = tcg_temp_new_i64();
1514     fd = tcg_temp_new_i64();
1515     fpst = fpstatus_ptr(FPST_FPCR);
1516 
1517     vfp_load_reg64(f0, vn);
1518     vfp_load_reg64(f1, vm);
1519 
1520     for (;;) {
1521         if (reads_vd) {
1522             vfp_load_reg64(fd, vd);
1523         }
1524         fn(fd, f0, f1, fpst);
1525         vfp_store_reg64(fd, vd);
1526 
1527         if (veclen == 0) {
1528             break;
1529         }
1530         /* Set up the operands for the next iteration */
1531         veclen--;
1532         vd = vfp_advance_dreg(vd, delta_d);
1533         vn = vfp_advance_dreg(vn, delta_d);
1534         vfp_load_reg64(f0, vn);
1535         if (delta_m) {
1536             vm = vfp_advance_dreg(vm, delta_m);
1537             vfp_load_reg64(f1, vm);
1538         }
1539     }
1540     return true;
1541 }
1542 
1543 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1544 {
1545     uint32_t delta_m = 0;
1546     uint32_t delta_d = 0;
1547     int veclen = s->vec_len;
1548     TCGv_i32 f0, fd;
1549 
1550     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1551 
1552     if (!dc_isar_feature(aa32_fpshvec, s) &&
1553         (veclen != 0 || s->vec_stride != 0)) {
1554         return false;
1555     }
1556 
1557     if (!vfp_access_check(s)) {
1558         return true;
1559     }
1560 
1561     if (veclen > 0) {
1562         /* Figure out what type of vector operation this is.  */
1563         if (vfp_sreg_is_scalar(vd)) {
1564             /* scalar */
1565             veclen = 0;
1566         } else {
1567             delta_d = s->vec_stride + 1;
1568 
1569             if (vfp_sreg_is_scalar(vm)) {
1570                 /* mixed scalar/vector */
1571                 delta_m = 0;
1572             } else {
1573                 /* vector */
1574                 delta_m = delta_d;
1575             }
1576         }
1577     }
1578 
1579     f0 = tcg_temp_new_i32();
1580     fd = tcg_temp_new_i32();
1581 
1582     vfp_load_reg32(f0, vm);
1583 
1584     for (;;) {
1585         fn(fd, f0);
1586         vfp_store_reg32(fd, vd);
1587 
1588         if (veclen == 0) {
1589             break;
1590         }
1591 
1592         if (delta_m == 0) {
1593             /* single source one-many */
1594             while (veclen--) {
1595                 vd = vfp_advance_sreg(vd, delta_d);
1596                 vfp_store_reg32(fd, vd);
1597             }
1598             break;
1599         }
1600 
1601         /* Set up the operands for the next iteration */
1602         veclen--;
1603         vd = vfp_advance_sreg(vd, delta_d);
1604         vm = vfp_advance_sreg(vm, delta_m);
1605         vfp_load_reg32(f0, vm);
1606     }
1607     return true;
1608 }
1609 
1610 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1611 {
1612     /*
1613      * Do a half-precision operation. Functionally this is
1614      * the same as do_vfp_2op_sp(), except:
1615      *  - it doesn't need the VFP vector handling (fp16 is a
1616      *    v8 feature, and in v8 VFP vectors don't exist)
1617      *  - it does the aa32_fp16_arith feature test
1618      */
1619     TCGv_i32 f0;
1620 
1621     /* Note that the caller must check the aa32_fp16_arith feature */
1622 
1623     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1624         return false;
1625     }
1626 
1627     if (s->vec_len != 0 || s->vec_stride != 0) {
1628         return false;
1629     }
1630 
1631     if (!vfp_access_check(s)) {
1632         return true;
1633     }
1634 
1635     f0 = tcg_temp_new_i32();
1636     vfp_load_reg32(f0, vm);
1637     fn(f0, f0);
1638     vfp_store_reg32(f0, vd);
1639 
1640     return true;
1641 }
1642 
1643 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1644 {
1645     uint32_t delta_m = 0;
1646     uint32_t delta_d = 0;
1647     int veclen = s->vec_len;
1648     TCGv_i64 f0, fd;
1649 
1650     /* Note that the caller must check the aa32_fpdp_v2 feature. */
1651 
1652     /* UNDEF accesses to D16-D31 if they don't exist */
1653     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1654         return false;
1655     }
1656 
1657     if (!dc_isar_feature(aa32_fpshvec, s) &&
1658         (veclen != 0 || s->vec_stride != 0)) {
1659         return false;
1660     }
1661 
1662     if (!vfp_access_check(s)) {
1663         return true;
1664     }
1665 
1666     if (veclen > 0) {
1667         /* Figure out what type of vector operation this is.  */
1668         if (vfp_dreg_is_scalar(vd)) {
1669             /* scalar */
1670             veclen = 0;
1671         } else {
1672             delta_d = (s->vec_stride >> 1) + 1;
1673 
1674             if (vfp_dreg_is_scalar(vm)) {
1675                 /* mixed scalar/vector */
1676                 delta_m = 0;
1677             } else {
1678                 /* vector */
1679                 delta_m = delta_d;
1680             }
1681         }
1682     }
1683 
1684     f0 = tcg_temp_new_i64();
1685     fd = tcg_temp_new_i64();
1686 
1687     vfp_load_reg64(f0, vm);
1688 
1689     for (;;) {
1690         fn(fd, f0);
1691         vfp_store_reg64(fd, vd);
1692 
1693         if (veclen == 0) {
1694             break;
1695         }
1696 
1697         if (delta_m == 0) {
1698             /* single source one-many */
1699             while (veclen--) {
1700                 vd = vfp_advance_dreg(vd, delta_d);
1701                 vfp_store_reg64(fd, vd);
1702             }
1703             break;
1704         }
1705 
1706         /* Set up the operands for the next iteration */
1707         veclen--;
1708         vd = vfp_advance_dreg(vd, delta_d);
1709         vd = vfp_advance_dreg(vm, delta_m);
1710         vfp_load_reg64(f0, vm);
1711     }
1712     return true;
1713 }
1714 
1715 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1716 {
1717     /* Note that order of inputs to the add matters for NaNs */
1718     TCGv_i32 tmp = tcg_temp_new_i32();
1719 
1720     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1721     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1722 }
1723 
1724 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1725 {
1726     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1727 }
1728 
1729 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1730 {
1731     /* Note that order of inputs to the add matters for NaNs */
1732     TCGv_i32 tmp = tcg_temp_new_i32();
1733 
1734     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1735     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1736 }
1737 
1738 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1739 {
1740     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1741 }
1742 
1743 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1744 {
1745     /* Note that order of inputs to the add matters for NaNs */
1746     TCGv_i64 tmp = tcg_temp_new_i64();
1747 
1748     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1749     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1750 }
1751 
1752 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1753 {
1754     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1755 }
1756 
1757 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1758 {
1759     /*
1760      * VMLS: vd = vd + -(vn * vm)
1761      * Note that order of inputs to the add matters for NaNs.
1762      */
1763     TCGv_i32 tmp = tcg_temp_new_i32();
1764 
1765     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1766     gen_helper_vfp_negh(tmp, tmp);
1767     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1768 }
1769 
1770 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1771 {
1772     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1773 }
1774 
1775 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1776 {
1777     /*
1778      * VMLS: vd = vd + -(vn * vm)
1779      * Note that order of inputs to the add matters for NaNs.
1780      */
1781     TCGv_i32 tmp = tcg_temp_new_i32();
1782 
1783     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1784     gen_helper_vfp_negs(tmp, tmp);
1785     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1786 }
1787 
1788 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1789 {
1790     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1791 }
1792 
1793 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1794 {
1795     /*
1796      * VMLS: vd = vd + -(vn * vm)
1797      * Note that order of inputs to the add matters for NaNs.
1798      */
1799     TCGv_i64 tmp = tcg_temp_new_i64();
1800 
1801     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1802     gen_helper_vfp_negd(tmp, tmp);
1803     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1804 }
1805 
1806 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1807 {
1808     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1809 }
1810 
1811 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1812 {
1813     /*
1814      * VNMLS: -fd + (fn * fm)
1815      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1816      * plausible looking simplifications because this will give wrong results
1817      * for NaNs.
1818      */
1819     TCGv_i32 tmp = tcg_temp_new_i32();
1820 
1821     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1822     gen_helper_vfp_negh(vd, vd);
1823     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1824 }
1825 
1826 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1827 {
1828     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1829 }
1830 
1831 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1832 {
1833     /*
1834      * VNMLS: -fd + (fn * fm)
1835      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1836      * plausible looking simplifications because this will give wrong results
1837      * for NaNs.
1838      */
1839     TCGv_i32 tmp = tcg_temp_new_i32();
1840 
1841     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1842     gen_helper_vfp_negs(vd, vd);
1843     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1844 }
1845 
1846 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1847 {
1848     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1849 }
1850 
1851 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1852 {
1853     /*
1854      * VNMLS: -fd + (fn * fm)
1855      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1856      * plausible looking simplifications because this will give wrong results
1857      * for NaNs.
1858      */
1859     TCGv_i64 tmp = tcg_temp_new_i64();
1860 
1861     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1862     gen_helper_vfp_negd(vd, vd);
1863     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1864 }
1865 
1866 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1867 {
1868     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1869 }
1870 
1871 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1872 {
1873     /* VNMLA: -fd + -(fn * fm) */
1874     TCGv_i32 tmp = tcg_temp_new_i32();
1875 
1876     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1877     gen_helper_vfp_negh(tmp, tmp);
1878     gen_helper_vfp_negh(vd, vd);
1879     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1880 }
1881 
1882 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1883 {
1884     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1885 }
1886 
1887 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1888 {
1889     /* VNMLA: -fd + -(fn * fm) */
1890     TCGv_i32 tmp = tcg_temp_new_i32();
1891 
1892     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1893     gen_helper_vfp_negs(tmp, tmp);
1894     gen_helper_vfp_negs(vd, vd);
1895     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1896 }
1897 
1898 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1899 {
1900     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1901 }
1902 
1903 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1904 {
1905     /* VNMLA: -fd + (fn * fm) */
1906     TCGv_i64 tmp = tcg_temp_new_i64();
1907 
1908     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1909     gen_helper_vfp_negd(tmp, tmp);
1910     gen_helper_vfp_negd(vd, vd);
1911     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1912 }
1913 
1914 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1915 {
1916     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1917 }
1918 
1919 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
1920 {
1921     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
1922 }
1923 
1924 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1925 {
1926     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1927 }
1928 
1929 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1930 {
1931     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1932 }
1933 
1934 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1935 {
1936     /* VNMUL: -(fn * fm) */
1937     gen_helper_vfp_mulh(vd, vn, vm, fpst);
1938     gen_helper_vfp_negh(vd, vd);
1939 }
1940 
1941 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
1942 {
1943     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
1944 }
1945 
1946 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1947 {
1948     /* VNMUL: -(fn * fm) */
1949     gen_helper_vfp_muls(vd, vn, vm, fpst);
1950     gen_helper_vfp_negs(vd, vd);
1951 }
1952 
1953 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1954 {
1955     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1956 }
1957 
1958 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1959 {
1960     /* VNMUL: -(fn * fm) */
1961     gen_helper_vfp_muld(vd, vn, vm, fpst);
1962     gen_helper_vfp_negd(vd, vd);
1963 }
1964 
1965 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1966 {
1967     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1968 }
1969 
1970 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
1971 {
1972     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
1973 }
1974 
1975 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1976 {
1977     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1978 }
1979 
1980 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1981 {
1982     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1983 }
1984 
1985 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
1986 {
1987     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
1988 }
1989 
1990 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1991 {
1992     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1993 }
1994 
1995 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1996 {
1997     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1998 }
1999 
2000 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2001 {
2002     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2003 }
2004 
2005 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2006 {
2007     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2008 }
2009 
2010 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2011 {
2012     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2013 }
2014 
2015 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2016 {
2017     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2018         return false;
2019     }
2020     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2021                          a->vd, a->vn, a->vm, false);
2022 }
2023 
2024 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2025 {
2026     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2027         return false;
2028     }
2029     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2030                          a->vd, a->vn, a->vm, false);
2031 }
2032 
2033 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2034 {
2035     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2036         return false;
2037     }
2038     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2039                          a->vd, a->vn, a->vm, false);
2040 }
2041 
2042 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2043 {
2044     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2045         return false;
2046     }
2047     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2048                          a->vd, a->vn, a->vm, false);
2049 }
2050 
2051 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2052 {
2053     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2054         return false;
2055     }
2056     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2057                          a->vd, a->vn, a->vm, false);
2058 }
2059 
2060 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2061 {
2062     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2063         return false;
2064     }
2065     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2066                          a->vd, a->vn, a->vm, false);
2067 }
2068 
2069 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2070 {
2071     /*
2072      * VFNMA : fd = muladd(-fd,  fn, fm)
2073      * VFNMS : fd = muladd(-fd, -fn, fm)
2074      * VFMA  : fd = muladd( fd,  fn, fm)
2075      * VFMS  : fd = muladd( fd, -fn, fm)
2076      *
2077      * These are fused multiply-add, and must be done as one floating
2078      * point operation with no rounding between the multiplication and
2079      * addition steps.  NB that doing the negations here as separate
2080      * steps is correct : an input NaN should come out with its sign
2081      * bit flipped if it is a negated-input.
2082      */
2083     TCGv_ptr fpst;
2084     TCGv_i32 vn, vm, vd;
2085 
2086     /*
2087      * Present in VFPv4 only, and only with the FP16 extension.
2088      * Note that we can't rely on the SIMDFMAC check alone, because
2089      * in a Neon-no-VFP core that ID register field will be non-zero.
2090      */
2091     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2092         !dc_isar_feature(aa32_simdfmac, s) ||
2093         !dc_isar_feature(aa32_fpsp_v2, s)) {
2094         return false;
2095     }
2096 
2097     if (s->vec_len != 0 || s->vec_stride != 0) {
2098         return false;
2099     }
2100 
2101     if (!vfp_access_check(s)) {
2102         return true;
2103     }
2104 
2105     vn = tcg_temp_new_i32();
2106     vm = tcg_temp_new_i32();
2107     vd = tcg_temp_new_i32();
2108 
2109     vfp_load_reg32(vn, a->vn);
2110     vfp_load_reg32(vm, a->vm);
2111     if (neg_n) {
2112         /* VFNMS, VFMS */
2113         gen_helper_vfp_negh(vn, vn);
2114     }
2115     vfp_load_reg32(vd, a->vd);
2116     if (neg_d) {
2117         /* VFNMA, VFNMS */
2118         gen_helper_vfp_negh(vd, vd);
2119     }
2120     fpst = fpstatus_ptr(FPST_FPCR_F16);
2121     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2122     vfp_store_reg32(vd, a->vd);
2123     return true;
2124 }
2125 
2126 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2127 {
2128     /*
2129      * VFNMA : fd = muladd(-fd,  fn, fm)
2130      * VFNMS : fd = muladd(-fd, -fn, fm)
2131      * VFMA  : fd = muladd( fd,  fn, fm)
2132      * VFMS  : fd = muladd( fd, -fn, fm)
2133      *
2134      * These are fused multiply-add, and must be done as one floating
2135      * point operation with no rounding between the multiplication and
2136      * addition steps.  NB that doing the negations here as separate
2137      * steps is correct : an input NaN should come out with its sign
2138      * bit flipped if it is a negated-input.
2139      */
2140     TCGv_ptr fpst;
2141     TCGv_i32 vn, vm, vd;
2142 
2143     /*
2144      * Present in VFPv4 only.
2145      * Note that we can't rely on the SIMDFMAC check alone, because
2146      * in a Neon-no-VFP core that ID register field will be non-zero.
2147      */
2148     if (!dc_isar_feature(aa32_simdfmac, s) ||
2149         !dc_isar_feature(aa32_fpsp_v2, s)) {
2150         return false;
2151     }
2152     /*
2153      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2154      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2155      */
2156     if (s->vec_len != 0 || s->vec_stride != 0) {
2157         return false;
2158     }
2159 
2160     if (!vfp_access_check(s)) {
2161         return true;
2162     }
2163 
2164     vn = tcg_temp_new_i32();
2165     vm = tcg_temp_new_i32();
2166     vd = tcg_temp_new_i32();
2167 
2168     vfp_load_reg32(vn, a->vn);
2169     vfp_load_reg32(vm, a->vm);
2170     if (neg_n) {
2171         /* VFNMS, VFMS */
2172         gen_helper_vfp_negs(vn, vn);
2173     }
2174     vfp_load_reg32(vd, a->vd);
2175     if (neg_d) {
2176         /* VFNMA, VFNMS */
2177         gen_helper_vfp_negs(vd, vd);
2178     }
2179     fpst = fpstatus_ptr(FPST_FPCR);
2180     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2181     vfp_store_reg32(vd, a->vd);
2182     return true;
2183 }
2184 
2185 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2186 {
2187     /*
2188      * VFNMA : fd = muladd(-fd,  fn, fm)
2189      * VFNMS : fd = muladd(-fd, -fn, fm)
2190      * VFMA  : fd = muladd( fd,  fn, fm)
2191      * VFMS  : fd = muladd( fd, -fn, fm)
2192      *
2193      * These are fused multiply-add, and must be done as one floating
2194      * point operation with no rounding between the multiplication and
2195      * addition steps.  NB that doing the negations here as separate
2196      * steps is correct : an input NaN should come out with its sign
2197      * bit flipped if it is a negated-input.
2198      */
2199     TCGv_ptr fpst;
2200     TCGv_i64 vn, vm, vd;
2201 
2202     /*
2203      * Present in VFPv4 only.
2204      * Note that we can't rely on the SIMDFMAC check alone, because
2205      * in a Neon-no-VFP core that ID register field will be non-zero.
2206      */
2207     if (!dc_isar_feature(aa32_simdfmac, s) ||
2208         !dc_isar_feature(aa32_fpdp_v2, s)) {
2209         return false;
2210     }
2211     /*
2212      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2213      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2214      */
2215     if (s->vec_len != 0 || s->vec_stride != 0) {
2216         return false;
2217     }
2218 
2219     /* UNDEF accesses to D16-D31 if they don't exist. */
2220     if (!dc_isar_feature(aa32_simd_r32, s) &&
2221         ((a->vd | a->vn | a->vm) & 0x10)) {
2222         return false;
2223     }
2224 
2225     if (!vfp_access_check(s)) {
2226         return true;
2227     }
2228 
2229     vn = tcg_temp_new_i64();
2230     vm = tcg_temp_new_i64();
2231     vd = tcg_temp_new_i64();
2232 
2233     vfp_load_reg64(vn, a->vn);
2234     vfp_load_reg64(vm, a->vm);
2235     if (neg_n) {
2236         /* VFNMS, VFMS */
2237         gen_helper_vfp_negd(vn, vn);
2238     }
2239     vfp_load_reg64(vd, a->vd);
2240     if (neg_d) {
2241         /* VFNMA, VFNMS */
2242         gen_helper_vfp_negd(vd, vd);
2243     }
2244     fpst = fpstatus_ptr(FPST_FPCR);
2245     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2246     vfp_store_reg64(vd, a->vd);
2247     return true;
2248 }
2249 
2250 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2251     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2252                                       arg_##INSN##_##PREC *a)           \
2253     {                                                                   \
2254         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2255     }
2256 
2257 #define MAKE_VFM_TRANS_FNS(PREC) \
2258     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2259     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2260     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2261     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2262 
2263 MAKE_VFM_TRANS_FNS(hp)
2264 MAKE_VFM_TRANS_FNS(sp)
2265 MAKE_VFM_TRANS_FNS(dp)
2266 
2267 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2268 {
2269     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2270         return false;
2271     }
2272 
2273     if (s->vec_len != 0 || s->vec_stride != 0) {
2274         return false;
2275     }
2276 
2277     if (!vfp_access_check(s)) {
2278         return true;
2279     }
2280 
2281     vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
2282     return true;
2283 }
2284 
2285 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2286 {
2287     uint32_t delta_d = 0;
2288     int veclen = s->vec_len;
2289     TCGv_i32 fd;
2290     uint32_t vd;
2291 
2292     vd = a->vd;
2293 
2294     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2295         return false;
2296     }
2297 
2298     if (!dc_isar_feature(aa32_fpshvec, s) &&
2299         (veclen != 0 || s->vec_stride != 0)) {
2300         return false;
2301     }
2302 
2303     if (!vfp_access_check(s)) {
2304         return true;
2305     }
2306 
2307     if (veclen > 0) {
2308         /* Figure out what type of vector operation this is.  */
2309         if (vfp_sreg_is_scalar(vd)) {
2310             /* scalar */
2311             veclen = 0;
2312         } else {
2313             delta_d = s->vec_stride + 1;
2314         }
2315     }
2316 
2317     fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
2318 
2319     for (;;) {
2320         vfp_store_reg32(fd, vd);
2321 
2322         if (veclen == 0) {
2323             break;
2324         }
2325 
2326         /* Set up the operands for the next iteration */
2327         veclen--;
2328         vd = vfp_advance_sreg(vd, delta_d);
2329     }
2330 
2331     return true;
2332 }
2333 
2334 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2335 {
2336     uint32_t delta_d = 0;
2337     int veclen = s->vec_len;
2338     TCGv_i64 fd;
2339     uint32_t vd;
2340 
2341     vd = a->vd;
2342 
2343     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2344         return false;
2345     }
2346 
2347     /* UNDEF accesses to D16-D31 if they don't exist. */
2348     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2349         return false;
2350     }
2351 
2352     if (!dc_isar_feature(aa32_fpshvec, s) &&
2353         (veclen != 0 || s->vec_stride != 0)) {
2354         return false;
2355     }
2356 
2357     if (!vfp_access_check(s)) {
2358         return true;
2359     }
2360 
2361     if (veclen > 0) {
2362         /* Figure out what type of vector operation this is.  */
2363         if (vfp_dreg_is_scalar(vd)) {
2364             /* scalar */
2365             veclen = 0;
2366         } else {
2367             delta_d = (s->vec_stride >> 1) + 1;
2368         }
2369     }
2370 
2371     fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
2372 
2373     for (;;) {
2374         vfp_store_reg64(fd, vd);
2375 
2376         if (veclen == 0) {
2377             break;
2378         }
2379 
2380         /* Set up the operands for the next iteration */
2381         veclen--;
2382         vd = vfp_advance_dreg(vd, delta_d);
2383     }
2384 
2385     return true;
2386 }
2387 
2388 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2389     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2390                                       arg_##INSN##_##PREC *a)   \
2391     {                                                           \
2392         if (!dc_isar_feature(CHECK, s)) {                       \
2393             return false;                                       \
2394         }                                                       \
2395         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2396     }
2397 
2398 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2399     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2400                                       arg_##INSN##_##PREC *a)   \
2401     {                                                           \
2402         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2403             !dc_isar_feature(aa32_mve, s)) {                    \
2404             return false;                                       \
2405         }                                                       \
2406         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2407     }
2408 
2409 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2410 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2411 
2412 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2413 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2414 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2415 
2416 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2417 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2418 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2419 
2420 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2421 {
2422     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2423 }
2424 
2425 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2426 {
2427     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2428 }
2429 
2430 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2431 {
2432     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2433 }
2434 
2435 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2436 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2437 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2438 
2439 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2440 {
2441     TCGv_i32 vd, vm;
2442 
2443     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2444         return false;
2445     }
2446 
2447     /* Vm/M bits must be zero for the Z variant */
2448     if (a->z && a->vm != 0) {
2449         return false;
2450     }
2451 
2452     if (!vfp_access_check(s)) {
2453         return true;
2454     }
2455 
2456     vd = tcg_temp_new_i32();
2457     vm = tcg_temp_new_i32();
2458 
2459     vfp_load_reg32(vd, a->vd);
2460     if (a->z) {
2461         tcg_gen_movi_i32(vm, 0);
2462     } else {
2463         vfp_load_reg32(vm, a->vm);
2464     }
2465 
2466     if (a->e) {
2467         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2468     } else {
2469         gen_helper_vfp_cmph(vd, vm, cpu_env);
2470     }
2471     return true;
2472 }
2473 
2474 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2475 {
2476     TCGv_i32 vd, vm;
2477 
2478     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2479         return false;
2480     }
2481 
2482     /* Vm/M bits must be zero for the Z variant */
2483     if (a->z && a->vm != 0) {
2484         return false;
2485     }
2486 
2487     if (!vfp_access_check(s)) {
2488         return true;
2489     }
2490 
2491     vd = tcg_temp_new_i32();
2492     vm = tcg_temp_new_i32();
2493 
2494     vfp_load_reg32(vd, a->vd);
2495     if (a->z) {
2496         tcg_gen_movi_i32(vm, 0);
2497     } else {
2498         vfp_load_reg32(vm, a->vm);
2499     }
2500 
2501     if (a->e) {
2502         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2503     } else {
2504         gen_helper_vfp_cmps(vd, vm, cpu_env);
2505     }
2506     return true;
2507 }
2508 
2509 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2510 {
2511     TCGv_i64 vd, vm;
2512 
2513     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2514         return false;
2515     }
2516 
2517     /* Vm/M bits must be zero for the Z variant */
2518     if (a->z && a->vm != 0) {
2519         return false;
2520     }
2521 
2522     /* UNDEF accesses to D16-D31 if they don't exist. */
2523     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2524         return false;
2525     }
2526 
2527     if (!vfp_access_check(s)) {
2528         return true;
2529     }
2530 
2531     vd = tcg_temp_new_i64();
2532     vm = tcg_temp_new_i64();
2533 
2534     vfp_load_reg64(vd, a->vd);
2535     if (a->z) {
2536         tcg_gen_movi_i64(vm, 0);
2537     } else {
2538         vfp_load_reg64(vm, a->vm);
2539     }
2540 
2541     if (a->e) {
2542         gen_helper_vfp_cmped(vd, vm, cpu_env);
2543     } else {
2544         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2545     }
2546     return true;
2547 }
2548 
2549 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2550 {
2551     TCGv_ptr fpst;
2552     TCGv_i32 ahp_mode;
2553     TCGv_i32 tmp;
2554 
2555     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2556         return false;
2557     }
2558 
2559     if (!vfp_access_check(s)) {
2560         return true;
2561     }
2562 
2563     fpst = fpstatus_ptr(FPST_FPCR);
2564     ahp_mode = get_ahp_flag();
2565     tmp = tcg_temp_new_i32();
2566     /* The T bit tells us if we want the low or high 16 bits of Vm */
2567     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2568     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2569     vfp_store_reg32(tmp, a->vd);
2570     return true;
2571 }
2572 
2573 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2574 {
2575     TCGv_ptr fpst;
2576     TCGv_i32 ahp_mode;
2577     TCGv_i32 tmp;
2578     TCGv_i64 vd;
2579 
2580     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2581         return false;
2582     }
2583 
2584     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2585         return false;
2586     }
2587 
2588     /* UNDEF accesses to D16-D31 if they don't exist. */
2589     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2590         return false;
2591     }
2592 
2593     if (!vfp_access_check(s)) {
2594         return true;
2595     }
2596 
2597     fpst = fpstatus_ptr(FPST_FPCR);
2598     ahp_mode = get_ahp_flag();
2599     tmp = tcg_temp_new_i32();
2600     /* The T bit tells us if we want the low or high 16 bits of Vm */
2601     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2602     vd = tcg_temp_new_i64();
2603     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2604     vfp_store_reg64(vd, a->vd);
2605     return true;
2606 }
2607 
2608 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2609 {
2610     TCGv_ptr fpst;
2611     TCGv_i32 tmp;
2612 
2613     if (!dc_isar_feature(aa32_bf16, s)) {
2614         return false;
2615     }
2616 
2617     if (!vfp_access_check(s)) {
2618         return true;
2619     }
2620 
2621     fpst = fpstatus_ptr(FPST_FPCR);
2622     tmp = tcg_temp_new_i32();
2623 
2624     vfp_load_reg32(tmp, a->vm);
2625     gen_helper_bfcvt(tmp, tmp, fpst);
2626     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2627     return true;
2628 }
2629 
2630 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2631 {
2632     TCGv_ptr fpst;
2633     TCGv_i32 ahp_mode;
2634     TCGv_i32 tmp;
2635 
2636     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2637         return false;
2638     }
2639 
2640     if (!vfp_access_check(s)) {
2641         return true;
2642     }
2643 
2644     fpst = fpstatus_ptr(FPST_FPCR);
2645     ahp_mode = get_ahp_flag();
2646     tmp = tcg_temp_new_i32();
2647 
2648     vfp_load_reg32(tmp, a->vm);
2649     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2650     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2651     return true;
2652 }
2653 
2654 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2655 {
2656     TCGv_ptr fpst;
2657     TCGv_i32 ahp_mode;
2658     TCGv_i32 tmp;
2659     TCGv_i64 vm;
2660 
2661     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2662         return false;
2663     }
2664 
2665     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2666         return false;
2667     }
2668 
2669     /* UNDEF accesses to D16-D31 if they don't exist. */
2670     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2671         return false;
2672     }
2673 
2674     if (!vfp_access_check(s)) {
2675         return true;
2676     }
2677 
2678     fpst = fpstatus_ptr(FPST_FPCR);
2679     ahp_mode = get_ahp_flag();
2680     tmp = tcg_temp_new_i32();
2681     vm = tcg_temp_new_i64();
2682 
2683     vfp_load_reg64(vm, a->vm);
2684     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2685     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2686     return true;
2687 }
2688 
2689 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2690 {
2691     TCGv_ptr fpst;
2692     TCGv_i32 tmp;
2693 
2694     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2695         return false;
2696     }
2697 
2698     if (!vfp_access_check(s)) {
2699         return true;
2700     }
2701 
2702     tmp = tcg_temp_new_i32();
2703     vfp_load_reg32(tmp, a->vm);
2704     fpst = fpstatus_ptr(FPST_FPCR_F16);
2705     gen_helper_rinth(tmp, tmp, fpst);
2706     vfp_store_reg32(tmp, a->vd);
2707     return true;
2708 }
2709 
2710 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2711 {
2712     TCGv_ptr fpst;
2713     TCGv_i32 tmp;
2714 
2715     if (!dc_isar_feature(aa32_vrint, s)) {
2716         return false;
2717     }
2718 
2719     if (!vfp_access_check(s)) {
2720         return true;
2721     }
2722 
2723     tmp = tcg_temp_new_i32();
2724     vfp_load_reg32(tmp, a->vm);
2725     fpst = fpstatus_ptr(FPST_FPCR);
2726     gen_helper_rints(tmp, tmp, fpst);
2727     vfp_store_reg32(tmp, a->vd);
2728     return true;
2729 }
2730 
2731 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2732 {
2733     TCGv_ptr fpst;
2734     TCGv_i64 tmp;
2735 
2736     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2737         return false;
2738     }
2739 
2740     if (!dc_isar_feature(aa32_vrint, s)) {
2741         return false;
2742     }
2743 
2744     /* UNDEF accesses to D16-D31 if they don't exist. */
2745     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2746         return false;
2747     }
2748 
2749     if (!vfp_access_check(s)) {
2750         return true;
2751     }
2752 
2753     tmp = tcg_temp_new_i64();
2754     vfp_load_reg64(tmp, a->vm);
2755     fpst = fpstatus_ptr(FPST_FPCR);
2756     gen_helper_rintd(tmp, tmp, fpst);
2757     vfp_store_reg64(tmp, a->vd);
2758     return true;
2759 }
2760 
2761 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2762 {
2763     TCGv_ptr fpst;
2764     TCGv_i32 tmp;
2765     TCGv_i32 tcg_rmode;
2766 
2767     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2768         return false;
2769     }
2770 
2771     if (!vfp_access_check(s)) {
2772         return true;
2773     }
2774 
2775     tmp = tcg_temp_new_i32();
2776     vfp_load_reg32(tmp, a->vm);
2777     fpst = fpstatus_ptr(FPST_FPCR_F16);
2778     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2779     gen_helper_rinth(tmp, tmp, fpst);
2780     gen_restore_rmode(tcg_rmode, fpst);
2781     vfp_store_reg32(tmp, a->vd);
2782     return true;
2783 }
2784 
2785 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2786 {
2787     TCGv_ptr fpst;
2788     TCGv_i32 tmp;
2789     TCGv_i32 tcg_rmode;
2790 
2791     if (!dc_isar_feature(aa32_vrint, s)) {
2792         return false;
2793     }
2794 
2795     if (!vfp_access_check(s)) {
2796         return true;
2797     }
2798 
2799     tmp = tcg_temp_new_i32();
2800     vfp_load_reg32(tmp, a->vm);
2801     fpst = fpstatus_ptr(FPST_FPCR);
2802     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2803     gen_helper_rints(tmp, tmp, fpst);
2804     gen_restore_rmode(tcg_rmode, fpst);
2805     vfp_store_reg32(tmp, a->vd);
2806     return true;
2807 }
2808 
2809 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2810 {
2811     TCGv_ptr fpst;
2812     TCGv_i64 tmp;
2813     TCGv_i32 tcg_rmode;
2814 
2815     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2816         return false;
2817     }
2818 
2819     if (!dc_isar_feature(aa32_vrint, s)) {
2820         return false;
2821     }
2822 
2823     /* UNDEF accesses to D16-D31 if they don't exist. */
2824     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2825         return false;
2826     }
2827 
2828     if (!vfp_access_check(s)) {
2829         return true;
2830     }
2831 
2832     tmp = tcg_temp_new_i64();
2833     vfp_load_reg64(tmp, a->vm);
2834     fpst = fpstatus_ptr(FPST_FPCR);
2835     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2836     gen_helper_rintd(tmp, tmp, fpst);
2837     gen_restore_rmode(tcg_rmode, fpst);
2838     vfp_store_reg64(tmp, a->vd);
2839     return true;
2840 }
2841 
2842 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2843 {
2844     TCGv_ptr fpst;
2845     TCGv_i32 tmp;
2846 
2847     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2848         return false;
2849     }
2850 
2851     if (!vfp_access_check(s)) {
2852         return true;
2853     }
2854 
2855     tmp = tcg_temp_new_i32();
2856     vfp_load_reg32(tmp, a->vm);
2857     fpst = fpstatus_ptr(FPST_FPCR_F16);
2858     gen_helper_rinth_exact(tmp, tmp, fpst);
2859     vfp_store_reg32(tmp, a->vd);
2860     return true;
2861 }
2862 
2863 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2864 {
2865     TCGv_ptr fpst;
2866     TCGv_i32 tmp;
2867 
2868     if (!dc_isar_feature(aa32_vrint, s)) {
2869         return false;
2870     }
2871 
2872     if (!vfp_access_check(s)) {
2873         return true;
2874     }
2875 
2876     tmp = tcg_temp_new_i32();
2877     vfp_load_reg32(tmp, a->vm);
2878     fpst = fpstatus_ptr(FPST_FPCR);
2879     gen_helper_rints_exact(tmp, tmp, fpst);
2880     vfp_store_reg32(tmp, a->vd);
2881     return true;
2882 }
2883 
2884 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2885 {
2886     TCGv_ptr fpst;
2887     TCGv_i64 tmp;
2888 
2889     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2890         return false;
2891     }
2892 
2893     if (!dc_isar_feature(aa32_vrint, s)) {
2894         return false;
2895     }
2896 
2897     /* UNDEF accesses to D16-D31 if they don't exist. */
2898     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2899         return false;
2900     }
2901 
2902     if (!vfp_access_check(s)) {
2903         return true;
2904     }
2905 
2906     tmp = tcg_temp_new_i64();
2907     vfp_load_reg64(tmp, a->vm);
2908     fpst = fpstatus_ptr(FPST_FPCR);
2909     gen_helper_rintd_exact(tmp, tmp, fpst);
2910     vfp_store_reg64(tmp, a->vd);
2911     return true;
2912 }
2913 
2914 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2915 {
2916     TCGv_i64 vd;
2917     TCGv_i32 vm;
2918 
2919     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2920         return false;
2921     }
2922 
2923     /* UNDEF accesses to D16-D31 if they don't exist. */
2924     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2925         return false;
2926     }
2927 
2928     if (!vfp_access_check(s)) {
2929         return true;
2930     }
2931 
2932     vm = tcg_temp_new_i32();
2933     vd = tcg_temp_new_i64();
2934     vfp_load_reg32(vm, a->vm);
2935     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
2936     vfp_store_reg64(vd, a->vd);
2937     return true;
2938 }
2939 
2940 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2941 {
2942     TCGv_i64 vm;
2943     TCGv_i32 vd;
2944 
2945     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2946         return false;
2947     }
2948 
2949     /* UNDEF accesses to D16-D31 if they don't exist. */
2950     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2951         return false;
2952     }
2953 
2954     if (!vfp_access_check(s)) {
2955         return true;
2956     }
2957 
2958     vd = tcg_temp_new_i32();
2959     vm = tcg_temp_new_i64();
2960     vfp_load_reg64(vm, a->vm);
2961     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
2962     vfp_store_reg32(vd, a->vd);
2963     return true;
2964 }
2965 
2966 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
2967 {
2968     TCGv_i32 vm;
2969     TCGv_ptr fpst;
2970 
2971     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2972         return false;
2973     }
2974 
2975     if (!vfp_access_check(s)) {
2976         return true;
2977     }
2978 
2979     vm = tcg_temp_new_i32();
2980     vfp_load_reg32(vm, a->vm);
2981     fpst = fpstatus_ptr(FPST_FPCR_F16);
2982     if (a->s) {
2983         /* i32 -> f16 */
2984         gen_helper_vfp_sitoh(vm, vm, fpst);
2985     } else {
2986         /* u32 -> f16 */
2987         gen_helper_vfp_uitoh(vm, vm, fpst);
2988     }
2989     vfp_store_reg32(vm, a->vd);
2990     return true;
2991 }
2992 
2993 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2994 {
2995     TCGv_i32 vm;
2996     TCGv_ptr fpst;
2997 
2998     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2999         return false;
3000     }
3001 
3002     if (!vfp_access_check(s)) {
3003         return true;
3004     }
3005 
3006     vm = tcg_temp_new_i32();
3007     vfp_load_reg32(vm, a->vm);
3008     fpst = fpstatus_ptr(FPST_FPCR);
3009     if (a->s) {
3010         /* i32 -> f32 */
3011         gen_helper_vfp_sitos(vm, vm, fpst);
3012     } else {
3013         /* u32 -> f32 */
3014         gen_helper_vfp_uitos(vm, vm, fpst);
3015     }
3016     vfp_store_reg32(vm, a->vd);
3017     return true;
3018 }
3019 
3020 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3021 {
3022     TCGv_i32 vm;
3023     TCGv_i64 vd;
3024     TCGv_ptr fpst;
3025 
3026     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3027         return false;
3028     }
3029 
3030     /* UNDEF accesses to D16-D31 if they don't exist. */
3031     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3032         return false;
3033     }
3034 
3035     if (!vfp_access_check(s)) {
3036         return true;
3037     }
3038 
3039     vm = tcg_temp_new_i32();
3040     vd = tcg_temp_new_i64();
3041     vfp_load_reg32(vm, a->vm);
3042     fpst = fpstatus_ptr(FPST_FPCR);
3043     if (a->s) {
3044         /* i32 -> f64 */
3045         gen_helper_vfp_sitod(vd, vm, fpst);
3046     } else {
3047         /* u32 -> f64 */
3048         gen_helper_vfp_uitod(vd, vm, fpst);
3049     }
3050     vfp_store_reg64(vd, a->vd);
3051     return true;
3052 }
3053 
3054 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3055 {
3056     TCGv_i32 vd;
3057     TCGv_i64 vm;
3058 
3059     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3060         return false;
3061     }
3062 
3063     if (!dc_isar_feature(aa32_jscvt, s)) {
3064         return false;
3065     }
3066 
3067     /* UNDEF accesses to D16-D31 if they don't exist. */
3068     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3069         return false;
3070     }
3071 
3072     if (!vfp_access_check(s)) {
3073         return true;
3074     }
3075 
3076     vm = tcg_temp_new_i64();
3077     vd = tcg_temp_new_i32();
3078     vfp_load_reg64(vm, a->vm);
3079     gen_helper_vjcvt(vd, vm, cpu_env);
3080     vfp_store_reg32(vd, a->vd);
3081     return true;
3082 }
3083 
3084 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3085 {
3086     TCGv_i32 vd, shift;
3087     TCGv_ptr fpst;
3088     int frac_bits;
3089 
3090     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3091         return false;
3092     }
3093 
3094     if (!vfp_access_check(s)) {
3095         return true;
3096     }
3097 
3098     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3099 
3100     vd = tcg_temp_new_i32();
3101     vfp_load_reg32(vd, a->vd);
3102 
3103     fpst = fpstatus_ptr(FPST_FPCR_F16);
3104     shift = tcg_constant_i32(frac_bits);
3105 
3106     /* Switch on op:U:sx bits */
3107     switch (a->opc) {
3108     case 0:
3109         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3110         break;
3111     case 1:
3112         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3113         break;
3114     case 2:
3115         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3116         break;
3117     case 3:
3118         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3119         break;
3120     case 4:
3121         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3122         break;
3123     case 5:
3124         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3125         break;
3126     case 6:
3127         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3128         break;
3129     case 7:
3130         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3131         break;
3132     default:
3133         g_assert_not_reached();
3134     }
3135 
3136     vfp_store_reg32(vd, a->vd);
3137     return true;
3138 }
3139 
3140 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3141 {
3142     TCGv_i32 vd, shift;
3143     TCGv_ptr fpst;
3144     int frac_bits;
3145 
3146     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3147         return false;
3148     }
3149 
3150     if (!vfp_access_check(s)) {
3151         return true;
3152     }
3153 
3154     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3155 
3156     vd = tcg_temp_new_i32();
3157     vfp_load_reg32(vd, a->vd);
3158 
3159     fpst = fpstatus_ptr(FPST_FPCR);
3160     shift = tcg_constant_i32(frac_bits);
3161 
3162     /* Switch on op:U:sx bits */
3163     switch (a->opc) {
3164     case 0:
3165         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3166         break;
3167     case 1:
3168         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3169         break;
3170     case 2:
3171         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3172         break;
3173     case 3:
3174         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3175         break;
3176     case 4:
3177         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3178         break;
3179     case 5:
3180         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3181         break;
3182     case 6:
3183         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3184         break;
3185     case 7:
3186         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3187         break;
3188     default:
3189         g_assert_not_reached();
3190     }
3191 
3192     vfp_store_reg32(vd, a->vd);
3193     return true;
3194 }
3195 
3196 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3197 {
3198     TCGv_i64 vd;
3199     TCGv_i32 shift;
3200     TCGv_ptr fpst;
3201     int frac_bits;
3202 
3203     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3204         return false;
3205     }
3206 
3207     /* UNDEF accesses to D16-D31 if they don't exist. */
3208     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3209         return false;
3210     }
3211 
3212     if (!vfp_access_check(s)) {
3213         return true;
3214     }
3215 
3216     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3217 
3218     vd = tcg_temp_new_i64();
3219     vfp_load_reg64(vd, a->vd);
3220 
3221     fpst = fpstatus_ptr(FPST_FPCR);
3222     shift = tcg_constant_i32(frac_bits);
3223 
3224     /* Switch on op:U:sx bits */
3225     switch (a->opc) {
3226     case 0:
3227         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3228         break;
3229     case 1:
3230         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3231         break;
3232     case 2:
3233         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3234         break;
3235     case 3:
3236         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3237         break;
3238     case 4:
3239         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3240         break;
3241     case 5:
3242         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3243         break;
3244     case 6:
3245         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3246         break;
3247     case 7:
3248         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3249         break;
3250     default:
3251         g_assert_not_reached();
3252     }
3253 
3254     vfp_store_reg64(vd, a->vd);
3255     return true;
3256 }
3257 
3258 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3259 {
3260     TCGv_i32 vm;
3261     TCGv_ptr fpst;
3262 
3263     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3264         return false;
3265     }
3266 
3267     if (!vfp_access_check(s)) {
3268         return true;
3269     }
3270 
3271     fpst = fpstatus_ptr(FPST_FPCR_F16);
3272     vm = tcg_temp_new_i32();
3273     vfp_load_reg32(vm, a->vm);
3274 
3275     if (a->s) {
3276         if (a->rz) {
3277             gen_helper_vfp_tosizh(vm, vm, fpst);
3278         } else {
3279             gen_helper_vfp_tosih(vm, vm, fpst);
3280         }
3281     } else {
3282         if (a->rz) {
3283             gen_helper_vfp_touizh(vm, vm, fpst);
3284         } else {
3285             gen_helper_vfp_touih(vm, vm, fpst);
3286         }
3287     }
3288     vfp_store_reg32(vm, a->vd);
3289     return true;
3290 }
3291 
3292 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3293 {
3294     TCGv_i32 vm;
3295     TCGv_ptr fpst;
3296 
3297     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3298         return false;
3299     }
3300 
3301     if (!vfp_access_check(s)) {
3302         return true;
3303     }
3304 
3305     fpst = fpstatus_ptr(FPST_FPCR);
3306     vm = tcg_temp_new_i32();
3307     vfp_load_reg32(vm, a->vm);
3308 
3309     if (a->s) {
3310         if (a->rz) {
3311             gen_helper_vfp_tosizs(vm, vm, fpst);
3312         } else {
3313             gen_helper_vfp_tosis(vm, vm, fpst);
3314         }
3315     } else {
3316         if (a->rz) {
3317             gen_helper_vfp_touizs(vm, vm, fpst);
3318         } else {
3319             gen_helper_vfp_touis(vm, vm, fpst);
3320         }
3321     }
3322     vfp_store_reg32(vm, a->vd);
3323     return true;
3324 }
3325 
3326 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3327 {
3328     TCGv_i32 vd;
3329     TCGv_i64 vm;
3330     TCGv_ptr fpst;
3331 
3332     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3333         return false;
3334     }
3335 
3336     /* UNDEF accesses to D16-D31 if they don't exist. */
3337     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3338         return false;
3339     }
3340 
3341     if (!vfp_access_check(s)) {
3342         return true;
3343     }
3344 
3345     fpst = fpstatus_ptr(FPST_FPCR);
3346     vm = tcg_temp_new_i64();
3347     vd = tcg_temp_new_i32();
3348     vfp_load_reg64(vm, a->vm);
3349 
3350     if (a->s) {
3351         if (a->rz) {
3352             gen_helper_vfp_tosizd(vd, vm, fpst);
3353         } else {
3354             gen_helper_vfp_tosid(vd, vm, fpst);
3355         }
3356     } else {
3357         if (a->rz) {
3358             gen_helper_vfp_touizd(vd, vm, fpst);
3359         } else {
3360             gen_helper_vfp_touid(vd, vm, fpst);
3361         }
3362     }
3363     vfp_store_reg32(vd, a->vd);
3364     return true;
3365 }
3366 
3367 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3368 {
3369     TCGv_i32 rd, rm;
3370 
3371     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3372         return false;
3373     }
3374 
3375     if (s->vec_len != 0 || s->vec_stride != 0) {
3376         return false;
3377     }
3378 
3379     if (!vfp_access_check(s)) {
3380         return true;
3381     }
3382 
3383     /* Insert low half of Vm into high half of Vd */
3384     rm = tcg_temp_new_i32();
3385     rd = tcg_temp_new_i32();
3386     vfp_load_reg32(rm, a->vm);
3387     vfp_load_reg32(rd, a->vd);
3388     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3389     vfp_store_reg32(rd, a->vd);
3390     return true;
3391 }
3392 
3393 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3394 {
3395     TCGv_i32 rm;
3396 
3397     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3398         return false;
3399     }
3400 
3401     if (s->vec_len != 0 || s->vec_stride != 0) {
3402         return false;
3403     }
3404 
3405     if (!vfp_access_check(s)) {
3406         return true;
3407     }
3408 
3409     /* Set Vd to high half of Vm */
3410     rm = tcg_temp_new_i32();
3411     vfp_load_reg32(rm, a->vm);
3412     tcg_gen_shri_i32(rm, rm, 16);
3413     vfp_store_reg32(rm, a->vd);
3414     return true;
3415 }
3416