xref: /openbmc/qemu/target/arm/tcg/translate-mve.c (revision e8d1e0cd)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
53 
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg)
56 {
57     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
58 }
59 
60 static TCGv_ptr mve_qreg_ptr(unsigned reg)
61 {
62     TCGv_ptr ret = tcg_temp_new_ptr();
63     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
64     return ret;
65 }
66 
67 static bool mve_no_predication(DisasContext *s)
68 {
69     /*
70      * Return true if we are executing the entire MVE instruction
71      * with no predication or partial-execution, and so we can safely
72      * use an inline TCG vector implementation.
73      */
74     return s->eci == 0 && s->mve_no_pred;
75 }
76 
77 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
78 {
79     /*
80      * Check whether Qregs are in range. For v8.1M only Q0..Q7
81      * are supported, see VFPSmallRegisterBank().
82      */
83     return qmask < 8;
84 }
85 
86 bool mve_eci_check(DisasContext *s)
87 {
88     /*
89      * This is a beatwise insn: check that ECI is valid (not a
90      * reserved value) and note that we are handling it.
91      * Return true if OK, false if we generated an exception.
92      */
93     s->eci_handled = true;
94     switch (s->eci) {
95     case ECI_NONE:
96     case ECI_A0:
97     case ECI_A0A1:
98     case ECI_A0A1A2:
99     case ECI_A0A1A2B0:
100         return true;
101     default:
102         /* Reserved value: INVSTATE UsageFault */
103         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
104         return false;
105     }
106 }
107 
108 void mve_update_eci(DisasContext *s)
109 {
110     /*
111      * The helper function will always update the CPUState field,
112      * so we only need to update the DisasContext field.
113      */
114     if (s->eci) {
115         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
116     }
117 }
118 
119 void mve_update_and_store_eci(DisasContext *s)
120 {
121     /*
122      * For insns which don't call a helper function that will call
123      * mve_advance_vpt(), this version updates s->eci and also stores
124      * it out to the CPUState field.
125      */
126     if (s->eci) {
127         mve_update_eci(s);
128         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
129     }
130 }
131 
132 static bool mve_skip_first_beat(DisasContext *s)
133 {
134     /* Return true if PSR.ECI says we must skip the first beat of this insn */
135     switch (s->eci) {
136     case ECI_NONE:
137         return false;
138     case ECI_A0:
139     case ECI_A0A1:
140     case ECI_A0A1A2:
141     case ECI_A0A1A2B0:
142         return true;
143     default:
144         g_assert_not_reached();
145     }
146 }
147 
148 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
149                     unsigned msize)
150 {
151     TCGv_i32 addr;
152     uint32_t offset;
153     TCGv_ptr qreg;
154 
155     if (!dc_isar_feature(aa32_mve, s) ||
156         !mve_check_qreg_bank(s, a->qd) ||
157         !fn) {
158         return false;
159     }
160 
161     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
162     if (a->rn == 15 || (a->rn == 13 && a->w)) {
163         return false;
164     }
165 
166     if (!mve_eci_check(s) || !vfp_access_check(s)) {
167         return true;
168     }
169 
170     offset = a->imm << msize;
171     if (!a->a) {
172         offset = -offset;
173     }
174     addr = load_reg(s, a->rn);
175     if (a->p) {
176         tcg_gen_addi_i32(addr, addr, offset);
177     }
178 
179     qreg = mve_qreg_ptr(a->qd);
180     fn(cpu_env, qreg, addr);
181 
182     /*
183      * Writeback always happens after the last beat of the insn,
184      * regardless of predication
185      */
186     if (a->w) {
187         if (!a->p) {
188             tcg_gen_addi_i32(addr, addr, offset);
189         }
190         store_reg(s, a->rn, addr);
191     }
192     mve_update_eci(s);
193     return true;
194 }
195 
196 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
197 {
198     static MVEGenLdStFn * const ldstfns[4][2] = {
199         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
200         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
201         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
202         { NULL, NULL }
203     };
204     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
205 }
206 
207 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
208     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
209     {                                                           \
210         static MVEGenLdStFn * const ldstfns[2][2] = {           \
211             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
212             { NULL, gen_helper_mve_##ULD },                     \
213         };                                                      \
214         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
215     }
216 
217 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
218 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
219 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
220 
221 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
222 {
223     TCGv_i32 addr;
224     TCGv_ptr qd, qm;
225 
226     if (!dc_isar_feature(aa32_mve, s) ||
227         !mve_check_qreg_bank(s, a->qd | a->qm) ||
228         !fn || a->rn == 15) {
229         /* Rn case is UNPREDICTABLE */
230         return false;
231     }
232 
233     if (!mve_eci_check(s) || !vfp_access_check(s)) {
234         return true;
235     }
236 
237     addr = load_reg(s, a->rn);
238 
239     qd = mve_qreg_ptr(a->qd);
240     qm = mve_qreg_ptr(a->qm);
241     fn(cpu_env, qd, qm, addr);
242     mve_update_eci(s);
243     return true;
244 }
245 
246 /*
247  * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
248  * signextended to halfword elements in register". _os_ indicates that
249  * the offsets in Qm should be scaled by the element size.
250  */
251 /* This macro is just to make the arrays more compact in these functions */
252 #define F(N) gen_helper_mve_##N
253 
254 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
255 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
256 {
257     static MVEGenLdStSGFn * const fns[2][4][4] = { {
258             { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
259             { NULL, NULL,           F(vldrh_sg_sw), NULL },
260             { NULL, NULL,           NULL,           NULL },
261             { NULL, NULL,           NULL,           NULL }
262         }, {
263             { NULL, NULL,              NULL,              NULL },
264             { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
265             { NULL, NULL,              NULL,              NULL },
266             { NULL, NULL,              NULL,              NULL }
267         }
268     };
269     if (a->qd == a->qm) {
270         return false; /* UNPREDICTABLE */
271     }
272     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
273 }
274 
275 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
276 {
277     static MVEGenLdStSGFn * const fns[2][4][4] = { {
278             { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
279             { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
280             { NULL,           NULL,           F(vldrw_sg_uw), NULL },
281             { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
282         }, {
283             { NULL, NULL,              NULL,              NULL },
284             { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
285             { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
286             { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
287         }
288     };
289     if (a->qd == a->qm) {
290         return false; /* UNPREDICTABLE */
291     }
292     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
293 }
294 
295 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
296 {
297     static MVEGenLdStSGFn * const fns[2][4][4] = { {
298             { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
299             { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
300             { NULL,           NULL,           F(vstrw_sg_uw), NULL },
301             { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
302         }, {
303             { NULL, NULL,              NULL,              NULL },
304             { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
305             { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
306             { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
307         }
308     };
309     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
310 }
311 
312 #undef F
313 
314 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
315                            MVEGenLdStSGFn *fn, unsigned msize)
316 {
317     uint32_t offset;
318     TCGv_ptr qd, qm;
319 
320     if (!dc_isar_feature(aa32_mve, s) ||
321         !mve_check_qreg_bank(s, a->qd | a->qm) ||
322         !fn) {
323         return false;
324     }
325 
326     if (!mve_eci_check(s) || !vfp_access_check(s)) {
327         return true;
328     }
329 
330     offset = a->imm << msize;
331     if (!a->a) {
332         offset = -offset;
333     }
334 
335     qd = mve_qreg_ptr(a->qd);
336     qm = mve_qreg_ptr(a->qm);
337     fn(cpu_env, qd, qm, tcg_constant_i32(offset));
338     mve_update_eci(s);
339     return true;
340 }
341 
342 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
343 {
344     static MVEGenLdStSGFn * const fns[] = {
345         gen_helper_mve_vldrw_sg_uw,
346         gen_helper_mve_vldrw_sg_wb_uw,
347     };
348     if (a->qd == a->qm) {
349         return false; /* UNPREDICTABLE */
350     }
351     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
352 }
353 
354 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
355 {
356     static MVEGenLdStSGFn * const fns[] = {
357         gen_helper_mve_vldrd_sg_ud,
358         gen_helper_mve_vldrd_sg_wb_ud,
359     };
360     if (a->qd == a->qm) {
361         return false; /* UNPREDICTABLE */
362     }
363     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
364 }
365 
366 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
367 {
368     static MVEGenLdStSGFn * const fns[] = {
369         gen_helper_mve_vstrw_sg_uw,
370         gen_helper_mve_vstrw_sg_wb_uw,
371     };
372     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
373 }
374 
375 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
376 {
377     static MVEGenLdStSGFn * const fns[] = {
378         gen_helper_mve_vstrd_sg_ud,
379         gen_helper_mve_vstrd_sg_wb_ud,
380     };
381     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
382 }
383 
384 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
385                         int addrinc)
386 {
387     TCGv_i32 rn;
388 
389     if (!dc_isar_feature(aa32_mve, s) ||
390         !mve_check_qreg_bank(s, a->qd) ||
391         !fn || (a->rn == 13 && a->w) || a->rn == 15) {
392         /* Variously UNPREDICTABLE or UNDEF or related-encoding */
393         return false;
394     }
395     if (!mve_eci_check(s) || !vfp_access_check(s)) {
396         return true;
397     }
398 
399     rn = load_reg(s, a->rn);
400     /*
401      * We pass the index of Qd, not a pointer, because the helper must
402      * access multiple Q registers starting at Qd and working up.
403      */
404     fn(cpu_env, tcg_constant_i32(a->qd), rn);
405 
406     if (a->w) {
407         tcg_gen_addi_i32(rn, rn, addrinc);
408         store_reg(s, a->rn, rn);
409     }
410     mve_update_and_store_eci(s);
411     return true;
412 }
413 
414 /* This macro is just to make the arrays more compact in these functions */
415 #define F(N) gen_helper_mve_##N
416 
417 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
418 {
419     static MVEGenLdStIlFn * const fns[4][4] = {
420         { F(vld20b), F(vld20h), F(vld20w), NULL, },
421         { F(vld21b), F(vld21h), F(vld21w), NULL, },
422         { NULL, NULL, NULL, NULL },
423         { NULL, NULL, NULL, NULL },
424     };
425     if (a->qd > 6) {
426         return false;
427     }
428     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
429 }
430 
431 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
432 {
433     static MVEGenLdStIlFn * const fns[4][4] = {
434         { F(vld40b), F(vld40h), F(vld40w), NULL, },
435         { F(vld41b), F(vld41h), F(vld41w), NULL, },
436         { F(vld42b), F(vld42h), F(vld42w), NULL, },
437         { F(vld43b), F(vld43h), F(vld43w), NULL, },
438     };
439     if (a->qd > 4) {
440         return false;
441     }
442     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
443 }
444 
445 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
446 {
447     static MVEGenLdStIlFn * const fns[4][4] = {
448         { F(vst20b), F(vst20h), F(vst20w), NULL, },
449         { F(vst21b), F(vst21h), F(vst21w), NULL, },
450         { NULL, NULL, NULL, NULL },
451         { NULL, NULL, NULL, NULL },
452     };
453     if (a->qd > 6) {
454         return false;
455     }
456     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
457 }
458 
459 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
460 {
461     static MVEGenLdStIlFn * const fns[4][4] = {
462         { F(vst40b), F(vst40h), F(vst40w), NULL, },
463         { F(vst41b), F(vst41h), F(vst41w), NULL, },
464         { F(vst42b), F(vst42h), F(vst42w), NULL, },
465         { F(vst43b), F(vst43h), F(vst43w), NULL, },
466     };
467     if (a->qd > 4) {
468         return false;
469     }
470     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
471 }
472 
473 #undef F
474 
475 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
476 {
477     TCGv_ptr qd;
478     TCGv_i32 rt;
479 
480     if (!dc_isar_feature(aa32_mve, s) ||
481         !mve_check_qreg_bank(s, a->qd)) {
482         return false;
483     }
484     if (a->rt == 13 || a->rt == 15) {
485         /* UNPREDICTABLE; we choose to UNDEF */
486         return false;
487     }
488     if (!mve_eci_check(s) || !vfp_access_check(s)) {
489         return true;
490     }
491 
492     rt = load_reg(s, a->rt);
493     if (mve_no_predication(s)) {
494         tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
495     } else {
496         qd = mve_qreg_ptr(a->qd);
497         tcg_gen_dup_i32(a->size, rt, rt);
498         gen_helper_mve_vdup(cpu_env, qd, rt);
499     }
500     mve_update_eci(s);
501     return true;
502 }
503 
504 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
505                        GVecGen2Fn vecfn)
506 {
507     TCGv_ptr qd, qm;
508 
509     if (!dc_isar_feature(aa32_mve, s) ||
510         !mve_check_qreg_bank(s, a->qd | a->qm) ||
511         !fn) {
512         return false;
513     }
514 
515     if (!mve_eci_check(s) || !vfp_access_check(s)) {
516         return true;
517     }
518 
519     if (vecfn && mve_no_predication(s)) {
520         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
521     } else {
522         qd = mve_qreg_ptr(a->qd);
523         qm = mve_qreg_ptr(a->qm);
524         fn(cpu_env, qd, qm);
525     }
526     mve_update_eci(s);
527     return true;
528 }
529 
530 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
531 {
532     return do_1op_vec(s, a, fn, NULL);
533 }
534 
535 #define DO_1OP_VEC(INSN, FN, VECFN)                             \
536     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
537     {                                                           \
538         static MVEGenOneOpFn * const fns[] = {                  \
539             gen_helper_mve_##FN##b,                             \
540             gen_helper_mve_##FN##h,                             \
541             gen_helper_mve_##FN##w,                             \
542             NULL,                                               \
543         };                                                      \
544         return do_1op_vec(s, a, fns[a->size], VECFN);           \
545     }
546 
547 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
548 
549 DO_1OP(VCLZ, vclz)
550 DO_1OP(VCLS, vcls)
551 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
552 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
553 DO_1OP(VQABS, vqabs)
554 DO_1OP(VQNEG, vqneg)
555 DO_1OP(VMAXA, vmaxa)
556 DO_1OP(VMINA, vmina)
557 
558 /*
559  * For simple float/int conversions we use the fixed-point
560  * conversion helpers with a zero shift count
561  */
562 #define DO_VCVT(INSN, HFN, SFN)                                         \
563     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
564     {                                                                   \
565         gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0));         \
566     }                                                                   \
567     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
568     {                                                                   \
569         gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0));         \
570     }                                                                   \
571     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
572     {                                                                   \
573         static MVEGenOneOpFn * const fns[] = {                          \
574             NULL,                                                       \
575             gen_##INSN##h,                                              \
576             gen_##INSN##s,                                              \
577             NULL,                                                       \
578         };                                                              \
579         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
580             return false;                                               \
581         }                                                               \
582         return do_1op(s, a, fns[a->size]);                              \
583     }
584 
585 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
586 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
587 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
588 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
589 
590 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
591                           ARMFPRounding rmode, bool u)
592 {
593     /*
594      * Handle VCVT fp to int with specified rounding mode.
595      * This is a 1op fn but we must pass the rounding mode as
596      * an immediate to the helper.
597      */
598     TCGv_ptr qd, qm;
599     static MVEGenVCVTRmodeFn * const fns[4][2] = {
600         { NULL, NULL },
601         { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
602         { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
603         { NULL, NULL },
604     };
605     MVEGenVCVTRmodeFn *fn = fns[a->size][u];
606 
607     if (!dc_isar_feature(aa32_mve_fp, s) ||
608         !mve_check_qreg_bank(s, a->qd | a->qm) ||
609         !fn) {
610         return false;
611     }
612 
613     if (!mve_eci_check(s) || !vfp_access_check(s)) {
614         return true;
615     }
616 
617     qd = mve_qreg_ptr(a->qd);
618     qm = mve_qreg_ptr(a->qm);
619     fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
620     mve_update_eci(s);
621     return true;
622 }
623 
624 #define DO_VCVT_RMODE(INSN, RMODE, U)                           \
625     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
626     {                                                           \
627         return do_vcvt_rmode(s, a, RMODE, U);                   \
628     }                                                           \
629 
630 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
631 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
632 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
633 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
634 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
635 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
636 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
637 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
638 
639 #define DO_VCVT_SH(INSN, FN)                                    \
640     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
641     {                                                           \
642         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
643             return false;                                       \
644         }                                                       \
645         return do_1op(s, a, gen_helper_mve_##FN);               \
646     }                                                           \
647 
648 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
649 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
650 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
651 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
652 
653 #define DO_VRINT(INSN, RMODE)                                           \
654     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
655     {                                                                   \
656         gen_helper_mve_vrint_rm_h(env, qd, qm,                          \
657                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
658     }                                                                   \
659     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
660     {                                                                   \
661         gen_helper_mve_vrint_rm_s(env, qd, qm,                          \
662                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
663     }                                                                   \
664     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
665     {                                                                   \
666         static MVEGenOneOpFn * const fns[] = {                          \
667             NULL,                                                       \
668             gen_##INSN##h,                                              \
669             gen_##INSN##s,                                              \
670             NULL,                                                       \
671         };                                                              \
672         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
673             return false;                                               \
674         }                                                               \
675         return do_1op(s, a, fns[a->size]);                              \
676     }
677 
678 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
679 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
680 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
681 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
682 DO_VRINT(VRINTP, FPROUNDING_POSINF)
683 
684 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
685 {
686     static MVEGenOneOpFn * const fns[] = {
687         NULL,
688         gen_helper_mve_vrintx_h,
689         gen_helper_mve_vrintx_s,
690         NULL,
691     };
692     if (!dc_isar_feature(aa32_mve_fp, s)) {
693         return false;
694     }
695     return do_1op(s, a, fns[a->size]);
696 }
697 
698 /* Narrowing moves: only size 0 and 1 are valid */
699 #define DO_VMOVN(INSN, FN) \
700     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
701     {                                                           \
702         static MVEGenOneOpFn * const fns[] = {                  \
703             gen_helper_mve_##FN##b,                             \
704             gen_helper_mve_##FN##h,                             \
705             NULL,                                               \
706             NULL,                                               \
707         };                                                      \
708         return do_1op(s, a, fns[a->size]);                      \
709     }
710 
711 DO_VMOVN(VMOVNB, vmovnb)
712 DO_VMOVN(VMOVNT, vmovnt)
713 DO_VMOVN(VQMOVUNB, vqmovunb)
714 DO_VMOVN(VQMOVUNT, vqmovunt)
715 DO_VMOVN(VQMOVN_BS, vqmovnbs)
716 DO_VMOVN(VQMOVN_TS, vqmovnts)
717 DO_VMOVN(VQMOVN_BU, vqmovnbu)
718 DO_VMOVN(VQMOVN_TU, vqmovntu)
719 
720 static bool trans_VREV16(DisasContext *s, arg_1op *a)
721 {
722     static MVEGenOneOpFn * const fns[] = {
723         gen_helper_mve_vrev16b,
724         NULL,
725         NULL,
726         NULL,
727     };
728     return do_1op(s, a, fns[a->size]);
729 }
730 
731 static bool trans_VREV32(DisasContext *s, arg_1op *a)
732 {
733     static MVEGenOneOpFn * const fns[] = {
734         gen_helper_mve_vrev32b,
735         gen_helper_mve_vrev32h,
736         NULL,
737         NULL,
738     };
739     return do_1op(s, a, fns[a->size]);
740 }
741 
742 static bool trans_VREV64(DisasContext *s, arg_1op *a)
743 {
744     static MVEGenOneOpFn * const fns[] = {
745         gen_helper_mve_vrev64b,
746         gen_helper_mve_vrev64h,
747         gen_helper_mve_vrev64w,
748         NULL,
749     };
750     return do_1op(s, a, fns[a->size]);
751 }
752 
753 static bool trans_VMVN(DisasContext *s, arg_1op *a)
754 {
755     return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
756 }
757 
758 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
759 {
760     static MVEGenOneOpFn * const fns[] = {
761         NULL,
762         gen_helper_mve_vfabsh,
763         gen_helper_mve_vfabss,
764         NULL,
765     };
766     if (!dc_isar_feature(aa32_mve_fp, s)) {
767         return false;
768     }
769     return do_1op(s, a, fns[a->size]);
770 }
771 
772 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
773 {
774     static MVEGenOneOpFn * const fns[] = {
775         NULL,
776         gen_helper_mve_vfnegh,
777         gen_helper_mve_vfnegs,
778         NULL,
779     };
780     if (!dc_isar_feature(aa32_mve_fp, s)) {
781         return false;
782     }
783     return do_1op(s, a, fns[a->size]);
784 }
785 
786 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
787                        GVecGen3Fn *vecfn)
788 {
789     TCGv_ptr qd, qn, qm;
790 
791     if (!dc_isar_feature(aa32_mve, s) ||
792         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
793         !fn) {
794         return false;
795     }
796     if (!mve_eci_check(s) || !vfp_access_check(s)) {
797         return true;
798     }
799 
800     if (vecfn && mve_no_predication(s)) {
801         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
802               mve_qreg_offset(a->qm), 16, 16);
803     } else {
804         qd = mve_qreg_ptr(a->qd);
805         qn = mve_qreg_ptr(a->qn);
806         qm = mve_qreg_ptr(a->qm);
807         fn(cpu_env, qd, qn, qm);
808     }
809     mve_update_eci(s);
810     return true;
811 }
812 
813 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
814 {
815     return do_2op_vec(s, a, fn, NULL);
816 }
817 
818 #define DO_LOGIC(INSN, HELPER, VECFN)                           \
819     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
820     {                                                           \
821         return do_2op_vec(s, a, HELPER, VECFN);                 \
822     }
823 
824 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
825 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
826 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
827 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
828 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
829 
830 static bool trans_VPSEL(DisasContext *s, arg_2op *a)
831 {
832     /* This insn updates predication bits */
833     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
834     return do_2op(s, a, gen_helper_mve_vpsel);
835 }
836 
837 #define DO_2OP_VEC(INSN, FN, VECFN)                             \
838     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
839     {                                                           \
840         static MVEGenTwoOpFn * const fns[] = {                  \
841             gen_helper_mve_##FN##b,                             \
842             gen_helper_mve_##FN##h,                             \
843             gen_helper_mve_##FN##w,                             \
844             NULL,                                               \
845         };                                                      \
846         return do_2op_vec(s, a, fns[a->size], VECFN);           \
847     }
848 
849 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
850 
851 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
852 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
853 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
854 DO_2OP(VMULH_S, vmulhs)
855 DO_2OP(VMULH_U, vmulhu)
856 DO_2OP(VRMULH_S, vrmulhs)
857 DO_2OP(VRMULH_U, vrmulhu)
858 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
859 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
860 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
861 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
862 DO_2OP(VABD_S, vabds)
863 DO_2OP(VABD_U, vabdu)
864 DO_2OP(VHADD_S, vhadds)
865 DO_2OP(VHADD_U, vhaddu)
866 DO_2OP(VHSUB_S, vhsubs)
867 DO_2OP(VHSUB_U, vhsubu)
868 DO_2OP(VMULL_BS, vmullbs)
869 DO_2OP(VMULL_BU, vmullbu)
870 DO_2OP(VMULL_TS, vmullts)
871 DO_2OP(VMULL_TU, vmulltu)
872 DO_2OP(VQDMULH, vqdmulh)
873 DO_2OP(VQRDMULH, vqrdmulh)
874 DO_2OP(VQADD_S, vqadds)
875 DO_2OP(VQADD_U, vqaddu)
876 DO_2OP(VQSUB_S, vqsubs)
877 DO_2OP(VQSUB_U, vqsubu)
878 DO_2OP(VSHL_S, vshls)
879 DO_2OP(VSHL_U, vshlu)
880 DO_2OP(VRSHL_S, vrshls)
881 DO_2OP(VRSHL_U, vrshlu)
882 DO_2OP(VQSHL_S, vqshls)
883 DO_2OP(VQSHL_U, vqshlu)
884 DO_2OP(VQRSHL_S, vqrshls)
885 DO_2OP(VQRSHL_U, vqrshlu)
886 DO_2OP(VQDMLADH, vqdmladh)
887 DO_2OP(VQDMLADHX, vqdmladhx)
888 DO_2OP(VQRDMLADH, vqrdmladh)
889 DO_2OP(VQRDMLADHX, vqrdmladhx)
890 DO_2OP(VQDMLSDH, vqdmlsdh)
891 DO_2OP(VQDMLSDHX, vqdmlsdhx)
892 DO_2OP(VQRDMLSDH, vqrdmlsdh)
893 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
894 DO_2OP(VRHADD_S, vrhadds)
895 DO_2OP(VRHADD_U, vrhaddu)
896 /*
897  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
898  * so we can reuse the DO_2OP macro. (Our implementation calculates the
899  * "expected" results in this case.) Similarly for VHCADD.
900  */
901 DO_2OP(VCADD90, vcadd90)
902 DO_2OP(VCADD270, vcadd270)
903 DO_2OP(VHCADD90, vhcadd90)
904 DO_2OP(VHCADD270, vhcadd270)
905 
906 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
907 {
908     static MVEGenTwoOpFn * const fns[] = {
909         NULL,
910         gen_helper_mve_vqdmullbh,
911         gen_helper_mve_vqdmullbw,
912         NULL,
913     };
914     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
915         /* UNPREDICTABLE; we choose to undef */
916         return false;
917     }
918     return do_2op(s, a, fns[a->size]);
919 }
920 
921 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
922 {
923     static MVEGenTwoOpFn * const fns[] = {
924         NULL,
925         gen_helper_mve_vqdmullth,
926         gen_helper_mve_vqdmulltw,
927         NULL,
928     };
929     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
930         /* UNPREDICTABLE; we choose to undef */
931         return false;
932     }
933     return do_2op(s, a, fns[a->size]);
934 }
935 
936 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
937 {
938     /*
939      * Note that a->size indicates the output size, ie VMULL.P8
940      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
941      * is the 16x16->32 operation and a->size is MO_32.
942      */
943     static MVEGenTwoOpFn * const fns[] = {
944         NULL,
945         gen_helper_mve_vmullpbh,
946         gen_helper_mve_vmullpbw,
947         NULL,
948     };
949     return do_2op(s, a, fns[a->size]);
950 }
951 
952 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
953 {
954     /* a->size is as for trans_VMULLP_B */
955     static MVEGenTwoOpFn * const fns[] = {
956         NULL,
957         gen_helper_mve_vmullpth,
958         gen_helper_mve_vmullptw,
959         NULL,
960     };
961     return do_2op(s, a, fns[a->size]);
962 }
963 
964 /*
965  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
966  * of the 32-bit elements in each lane of the input vectors, where the
967  * carry-out of each add is the carry-in of the next.  The initial carry
968  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
969  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
970  * These insns are subject to beat-wise execution.  Partial execution
971  * of an I=1 (initial carry input fixed) insn which does not
972  * execute the first beat must start with the current FPSCR.NZCV
973  * value, not the fixed constant input.
974  */
975 static bool trans_VADC(DisasContext *s, arg_2op *a)
976 {
977     return do_2op(s, a, gen_helper_mve_vadc);
978 }
979 
980 static bool trans_VADCI(DisasContext *s, arg_2op *a)
981 {
982     if (mve_skip_first_beat(s)) {
983         return trans_VADC(s, a);
984     }
985     return do_2op(s, a, gen_helper_mve_vadci);
986 }
987 
988 static bool trans_VSBC(DisasContext *s, arg_2op *a)
989 {
990     return do_2op(s, a, gen_helper_mve_vsbc);
991 }
992 
993 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
994 {
995     if (mve_skip_first_beat(s)) {
996         return trans_VSBC(s, a);
997     }
998     return do_2op(s, a, gen_helper_mve_vsbci);
999 }
1000 
1001 #define DO_2OP_FP(INSN, FN)                                     \
1002     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
1003     {                                                           \
1004         static MVEGenTwoOpFn * const fns[] = {                  \
1005             NULL,                                               \
1006             gen_helper_mve_##FN##h,                             \
1007             gen_helper_mve_##FN##s,                             \
1008             NULL,                                               \
1009         };                                                      \
1010         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1011             return false;                                       \
1012         }                                                       \
1013         return do_2op(s, a, fns[a->size]);                      \
1014     }
1015 
1016 DO_2OP_FP(VADD_fp, vfadd)
1017 DO_2OP_FP(VSUB_fp, vfsub)
1018 DO_2OP_FP(VMUL_fp, vfmul)
1019 DO_2OP_FP(VABD_fp, vfabd)
1020 DO_2OP_FP(VMAXNM, vmaxnm)
1021 DO_2OP_FP(VMINNM, vminnm)
1022 DO_2OP_FP(VCADD90_fp, vfcadd90)
1023 DO_2OP_FP(VCADD270_fp, vfcadd270)
1024 DO_2OP_FP(VFMA, vfma)
1025 DO_2OP_FP(VFMS, vfms)
1026 DO_2OP_FP(VCMUL0, vcmul0)
1027 DO_2OP_FP(VCMUL90, vcmul90)
1028 DO_2OP_FP(VCMUL180, vcmul180)
1029 DO_2OP_FP(VCMUL270, vcmul270)
1030 DO_2OP_FP(VCMLA0, vcmla0)
1031 DO_2OP_FP(VCMLA90, vcmla90)
1032 DO_2OP_FP(VCMLA180, vcmla180)
1033 DO_2OP_FP(VCMLA270, vcmla270)
1034 DO_2OP_FP(VMAXNMA, vmaxnma)
1035 DO_2OP_FP(VMINNMA, vminnma)
1036 
1037 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
1038                           MVEGenTwoOpScalarFn fn)
1039 {
1040     TCGv_ptr qd, qn;
1041     TCGv_i32 rm;
1042 
1043     if (!dc_isar_feature(aa32_mve, s) ||
1044         !mve_check_qreg_bank(s, a->qd | a->qn) ||
1045         !fn) {
1046         return false;
1047     }
1048     if (a->rm == 13 || a->rm == 15) {
1049         /* UNPREDICTABLE */
1050         return false;
1051     }
1052     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1053         return true;
1054     }
1055 
1056     qd = mve_qreg_ptr(a->qd);
1057     qn = mve_qreg_ptr(a->qn);
1058     rm = load_reg(s, a->rm);
1059     fn(cpu_env, qd, qn, rm);
1060     mve_update_eci(s);
1061     return true;
1062 }
1063 
1064 #define DO_2OP_SCALAR(INSN, FN)                                 \
1065     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
1066     {                                                           \
1067         static MVEGenTwoOpScalarFn * const fns[] = {            \
1068             gen_helper_mve_##FN##b,                             \
1069             gen_helper_mve_##FN##h,                             \
1070             gen_helper_mve_##FN##w,                             \
1071             NULL,                                               \
1072         };                                                      \
1073         return do_2op_scalar(s, a, fns[a->size]);               \
1074     }
1075 
1076 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
1077 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
1078 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
1079 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
1080 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
1081 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
1082 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
1083 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
1084 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
1085 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
1086 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
1087 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
1088 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
1089 DO_2OP_SCALAR(VBRSR, vbrsr)
1090 DO_2OP_SCALAR(VMLA, vmla)
1091 DO_2OP_SCALAR(VMLAS, vmlas)
1092 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
1093 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
1094 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
1095 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
1096 
1097 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
1098 {
1099     static MVEGenTwoOpScalarFn * const fns[] = {
1100         NULL,
1101         gen_helper_mve_vqdmullb_scalarh,
1102         gen_helper_mve_vqdmullb_scalarw,
1103         NULL,
1104     };
1105     if (a->qd == a->qn && a->size == MO_32) {
1106         /* UNPREDICTABLE; we choose to undef */
1107         return false;
1108     }
1109     return do_2op_scalar(s, a, fns[a->size]);
1110 }
1111 
1112 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
1113 {
1114     static MVEGenTwoOpScalarFn * const fns[] = {
1115         NULL,
1116         gen_helper_mve_vqdmullt_scalarh,
1117         gen_helper_mve_vqdmullt_scalarw,
1118         NULL,
1119     };
1120     if (a->qd == a->qn && a->size == MO_32) {
1121         /* UNPREDICTABLE; we choose to undef */
1122         return false;
1123     }
1124     return do_2op_scalar(s, a, fns[a->size]);
1125 }
1126 
1127 
1128 #define DO_2OP_FP_SCALAR(INSN, FN)                              \
1129     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
1130     {                                                           \
1131         static MVEGenTwoOpScalarFn * const fns[] = {            \
1132             NULL,                                               \
1133             gen_helper_mve_##FN##h,                             \
1134             gen_helper_mve_##FN##s,                             \
1135             NULL,                                               \
1136         };                                                      \
1137         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1138             return false;                                       \
1139         }                                                       \
1140         return do_2op_scalar(s, a, fns[a->size]);               \
1141     }
1142 
1143 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
1144 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
1145 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
1146 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
1147 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
1148 
1149 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
1150                              MVEGenLongDualAccOpFn *fn)
1151 {
1152     TCGv_ptr qn, qm;
1153     TCGv_i64 rda_i, rda_o;
1154     TCGv_i32 rdalo, rdahi;
1155 
1156     if (!dc_isar_feature(aa32_mve, s) ||
1157         !mve_check_qreg_bank(s, a->qn | a->qm) ||
1158         !fn) {
1159         return false;
1160     }
1161     /*
1162      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1163      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1164      */
1165     if (a->rdahi == 13 || a->rdahi == 15) {
1166         return false;
1167     }
1168     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1169         return true;
1170     }
1171 
1172     qn = mve_qreg_ptr(a->qn);
1173     qm = mve_qreg_ptr(a->qm);
1174 
1175     /*
1176      * This insn is subject to beat-wise execution. Partial execution
1177      * of an A=0 (no-accumulate) insn which does not execute the first
1178      * beat must start with the current rda value, not 0.
1179      */
1180     rda_o = tcg_temp_new_i64();
1181     if (a->a || mve_skip_first_beat(s)) {
1182         rda_i = rda_o;
1183         rdalo = load_reg(s, a->rdalo);
1184         rdahi = load_reg(s, a->rdahi);
1185         tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
1186     } else {
1187         rda_i = tcg_constant_i64(0);
1188     }
1189 
1190     fn(rda_o, cpu_env, qn, qm, rda_i);
1191 
1192     rdalo = tcg_temp_new_i32();
1193     rdahi = tcg_temp_new_i32();
1194     tcg_gen_extrl_i64_i32(rdalo, rda_o);
1195     tcg_gen_extrh_i64_i32(rdahi, rda_o);
1196     store_reg(s, a->rdalo, rdalo);
1197     store_reg(s, a->rdahi, rdahi);
1198     mve_update_eci(s);
1199     return true;
1200 }
1201 
1202 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
1203 {
1204     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1205         { NULL, NULL },
1206         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
1207         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
1208         { NULL, NULL },
1209     };
1210     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1211 }
1212 
1213 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
1214 {
1215     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1216         { NULL, NULL },
1217         { gen_helper_mve_vmlaldavuh, NULL },
1218         { gen_helper_mve_vmlaldavuw, NULL },
1219         { NULL, NULL },
1220     };
1221     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1222 }
1223 
1224 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
1225 {
1226     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1227         { NULL, NULL },
1228         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
1229         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
1230         { NULL, NULL },
1231     };
1232     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1233 }
1234 
1235 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
1236 {
1237     static MVEGenLongDualAccOpFn * const fns[] = {
1238         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
1239     };
1240     return do_long_dual_acc(s, a, fns[a->x]);
1241 }
1242 
1243 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
1244 {
1245     static MVEGenLongDualAccOpFn * const fns[] = {
1246         gen_helper_mve_vrmlaldavhuw, NULL,
1247     };
1248     return do_long_dual_acc(s, a, fns[a->x]);
1249 }
1250 
1251 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
1252 {
1253     static MVEGenLongDualAccOpFn * const fns[] = {
1254         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
1255     };
1256     return do_long_dual_acc(s, a, fns[a->x]);
1257 }
1258 
1259 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
1260 {
1261     TCGv_ptr qn, qm;
1262     TCGv_i32 rda_i, rda_o;
1263 
1264     if (!dc_isar_feature(aa32_mve, s) ||
1265         !mve_check_qreg_bank(s, a->qn) ||
1266         !fn) {
1267         return false;
1268     }
1269     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1270         return true;
1271     }
1272 
1273     qn = mve_qreg_ptr(a->qn);
1274     qm = mve_qreg_ptr(a->qm);
1275 
1276     /*
1277      * This insn is subject to beat-wise execution. Partial execution
1278      * of an A=0 (no-accumulate) insn which does not execute the first
1279      * beat must start with the current rda value, not 0.
1280      */
1281     if (a->a || mve_skip_first_beat(s)) {
1282         rda_o = rda_i = load_reg(s, a->rda);
1283     } else {
1284         rda_i = tcg_constant_i32(0);
1285         rda_o = tcg_temp_new_i32();
1286     }
1287 
1288     fn(rda_o, cpu_env, qn, qm, rda_i);
1289     store_reg(s, a->rda, rda_o);
1290 
1291     mve_update_eci(s);
1292     return true;
1293 }
1294 
1295 #define DO_DUAL_ACC(INSN, FN)                                           \
1296     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
1297     {                                                                   \
1298         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
1299             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
1300             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
1301             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
1302             { NULL, NULL },                                             \
1303         };                                                              \
1304         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
1305     }
1306 
1307 DO_DUAL_ACC(VMLADAV_S, vmladavs)
1308 DO_DUAL_ACC(VMLSDAV, vmlsdav)
1309 
1310 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1311 {
1312     static MVEGenDualAccOpFn * const fns[4][2] = {
1313         { gen_helper_mve_vmladavub, NULL },
1314         { gen_helper_mve_vmladavuh, NULL },
1315         { gen_helper_mve_vmladavuw, NULL },
1316         { NULL, NULL },
1317     };
1318     return do_dual_acc(s, a, fns[a->size][a->x]);
1319 }
1320 
1321 static void gen_vpst(DisasContext *s, uint32_t mask)
1322 {
1323     /*
1324      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1325      * being adjacent fields in the register.
1326      *
1327      * Updating the masks is not predicated, but it is subject to beat-wise
1328      * execution, and the mask is updated on the odd-numbered beats.
1329      * So if PSR.ECI says we should skip beat 1, we mustn't update the
1330      * 01 mask field.
1331      */
1332     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1333     switch (s->eci) {
1334     case ECI_NONE:
1335     case ECI_A0:
1336         /* Update both 01 and 23 fields */
1337         tcg_gen_deposit_i32(vpr, vpr,
1338                             tcg_constant_i32(mask | (mask << 4)),
1339                             R_V7M_VPR_MASK01_SHIFT,
1340                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1341         break;
1342     case ECI_A0A1:
1343     case ECI_A0A1A2:
1344     case ECI_A0A1A2B0:
1345         /* Update only the 23 mask field */
1346         tcg_gen_deposit_i32(vpr, vpr,
1347                             tcg_constant_i32(mask),
1348                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1349         break;
1350     default:
1351         g_assert_not_reached();
1352     }
1353     store_cpu_field(vpr, v7m.vpr);
1354 }
1355 
1356 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1357 {
1358     /* mask == 0 is a "related encoding" */
1359     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1360         return false;
1361     }
1362     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1363         return true;
1364     }
1365     gen_vpst(s, a->mask);
1366     mve_update_and_store_eci(s);
1367     return true;
1368 }
1369 
1370 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1371 {
1372     /*
1373      * Invert the predicate in VPR.P0. We have call out to
1374      * a helper because this insn itself is beatwise and can
1375      * be predicated.
1376      */
1377     if (!dc_isar_feature(aa32_mve, s)) {
1378         return false;
1379     }
1380     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1381         return true;
1382     }
1383 
1384     gen_helper_mve_vpnot(cpu_env);
1385     /* This insn updates predication bits */
1386     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1387     mve_update_eci(s);
1388     return true;
1389 }
1390 
1391 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1392 {
1393     /* VADDV: vector add across vector */
1394     static MVEGenVADDVFn * const fns[4][2] = {
1395         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1396         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1397         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1398         { NULL, NULL }
1399     };
1400     TCGv_ptr qm;
1401     TCGv_i32 rda_i, rda_o;
1402 
1403     if (!dc_isar_feature(aa32_mve, s) ||
1404         a->size == 3) {
1405         return false;
1406     }
1407     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1408         return true;
1409     }
1410 
1411     /*
1412      * This insn is subject to beat-wise execution. Partial execution
1413      * of an A=0 (no-accumulate) insn which does not execute the first
1414      * beat must start with the current value of Rda, not zero.
1415      */
1416     if (a->a || mve_skip_first_beat(s)) {
1417         /* Accumulate input from Rda */
1418         rda_o = rda_i = load_reg(s, a->rda);
1419     } else {
1420         /* Accumulate starting at zero */
1421         rda_i = tcg_constant_i32(0);
1422         rda_o = tcg_temp_new_i32();
1423     }
1424 
1425     qm = mve_qreg_ptr(a->qm);
1426     fns[a->size][a->u](rda_o, cpu_env, qm, rda_i);
1427     store_reg(s, a->rda, rda_o);
1428 
1429     mve_update_eci(s);
1430     return true;
1431 }
1432 
1433 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1434 {
1435     /*
1436      * Vector Add Long Across Vector: accumulate the 32-bit
1437      * elements of the vector into a 64-bit result stored in
1438      * a pair of general-purpose registers.
1439      * No need to check Qm's bank: it is only 3 bits in decode.
1440      */
1441     TCGv_ptr qm;
1442     TCGv_i64 rda_i, rda_o;
1443     TCGv_i32 rdalo, rdahi;
1444 
1445     if (!dc_isar_feature(aa32_mve, s)) {
1446         return false;
1447     }
1448     /*
1449      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1450      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1451      */
1452     if (a->rdahi == 13 || a->rdahi == 15) {
1453         return false;
1454     }
1455     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1456         return true;
1457     }
1458 
1459     /*
1460      * This insn is subject to beat-wise execution. Partial execution
1461      * of an A=0 (no-accumulate) insn which does not execute the first
1462      * beat must start with the current value of RdaHi:RdaLo, not zero.
1463      */
1464     rda_o = tcg_temp_new_i64();
1465     if (a->a || mve_skip_first_beat(s)) {
1466         /* Accumulate input from RdaHi:RdaLo */
1467         rda_i = rda_o;
1468         rdalo = load_reg(s, a->rdalo);
1469         rdahi = load_reg(s, a->rdahi);
1470         tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
1471     } else {
1472         /* Accumulate starting at zero */
1473         rda_i = tcg_constant_i64(0);
1474     }
1475 
1476     qm = mve_qreg_ptr(a->qm);
1477     if (a->u) {
1478         gen_helper_mve_vaddlv_u(rda_o, cpu_env, qm, rda_i);
1479     } else {
1480         gen_helper_mve_vaddlv_s(rda_o, cpu_env, qm, rda_i);
1481     }
1482 
1483     rdalo = tcg_temp_new_i32();
1484     rdahi = tcg_temp_new_i32();
1485     tcg_gen_extrl_i64_i32(rdalo, rda_o);
1486     tcg_gen_extrh_i64_i32(rdahi, rda_o);
1487     store_reg(s, a->rdalo, rdalo);
1488     store_reg(s, a->rdahi, rdahi);
1489     mve_update_eci(s);
1490     return true;
1491 }
1492 
1493 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
1494                     GVecGen2iFn *vecfn)
1495 {
1496     TCGv_ptr qd;
1497     uint64_t imm;
1498 
1499     if (!dc_isar_feature(aa32_mve, s) ||
1500         !mve_check_qreg_bank(s, a->qd) ||
1501         !fn) {
1502         return false;
1503     }
1504     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1505         return true;
1506     }
1507 
1508     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1509 
1510     if (vecfn && mve_no_predication(s)) {
1511         vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
1512               imm, 16, 16);
1513     } else {
1514         qd = mve_qreg_ptr(a->qd);
1515         fn(cpu_env, qd, tcg_constant_i64(imm));
1516     }
1517     mve_update_eci(s);
1518     return true;
1519 }
1520 
1521 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
1522                            int64_t c, uint32_t oprsz, uint32_t maxsz)
1523 {
1524     tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
1525 }
1526 
1527 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1528 {
1529     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1530     MVEGenOneOpImmFn *fn;
1531     GVecGen2iFn *vecfn;
1532 
1533     if ((a->cmode & 1) && a->cmode < 12) {
1534         if (a->op) {
1535             /*
1536              * For op=1, the immediate will be inverted by asimd_imm_const(),
1537              * so the VBIC becomes a logical AND operation.
1538              */
1539             fn = gen_helper_mve_vandi;
1540             vecfn = tcg_gen_gvec_andi;
1541         } else {
1542             fn = gen_helper_mve_vorri;
1543             vecfn = tcg_gen_gvec_ori;
1544         }
1545     } else {
1546         /* There is one unallocated cmode/op combination in this space */
1547         if (a->cmode == 15 && a->op == 1) {
1548             return false;
1549         }
1550         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1551         fn = gen_helper_mve_vmovi;
1552         vecfn = gen_gvec_vmovi;
1553     }
1554     return do_1imm(s, a, fn, vecfn);
1555 }
1556 
1557 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1558                           bool negateshift, GVecGen2iFn vecfn)
1559 {
1560     TCGv_ptr qd, qm;
1561     int shift = a->shift;
1562 
1563     if (!dc_isar_feature(aa32_mve, s) ||
1564         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1565         !fn) {
1566         return false;
1567     }
1568     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1569         return true;
1570     }
1571 
1572     /*
1573      * When we handle a right shift insn using a left-shift helper
1574      * which permits a negative shift count to indicate a right-shift,
1575      * we must negate the shift count.
1576      */
1577     if (negateshift) {
1578         shift = -shift;
1579     }
1580 
1581     if (vecfn && mve_no_predication(s)) {
1582         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
1583               shift, 16, 16);
1584     } else {
1585         qd = mve_qreg_ptr(a->qd);
1586         qm = mve_qreg_ptr(a->qm);
1587         fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1588     }
1589     mve_update_eci(s);
1590     return true;
1591 }
1592 
1593 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1594                       bool negateshift)
1595 {
1596     return do_2shift_vec(s, a, fn, negateshift, NULL);
1597 }
1598 
1599 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN)                     \
1600     static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
1601     {                                                                   \
1602         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1603             gen_helper_mve_##FN##b,                                     \
1604             gen_helper_mve_##FN##h,                                     \
1605             gen_helper_mve_##FN##w,                                     \
1606             NULL,                                                       \
1607         };                                                              \
1608         return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN);   \
1609     }
1610 
1611 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)        \
1612     DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1613 
1614 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
1615                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
1616 {
1617     /*
1618      * We get here with a negated shift count, and we must handle
1619      * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1620      */
1621     shift = -shift;
1622     if (shift == (8 << vece)) {
1623         shift--;
1624     }
1625     tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
1626 }
1627 
1628 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
1629                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
1630 {
1631     /*
1632      * We get here with a negated shift count, and we must handle
1633      * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1634      */
1635     shift = -shift;
1636     if (shift == (8 << vece)) {
1637         tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
1638     } else {
1639         tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
1640     }
1641 }
1642 
1643 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
1644 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1645 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1646 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1647 /* These right shifts use a left-shift helper with negated shift count */
1648 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
1649 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
1650 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1651 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1652 
1653 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
1654 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
1655 
1656 #define DO_2SHIFT_FP(INSN, FN)                                  \
1657     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1658     {                                                           \
1659         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1660             return false;                                       \
1661         }                                                       \
1662         return do_2shift(s, a, gen_helper_mve_##FN, false);     \
1663     }
1664 
1665 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
1666 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
1667 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
1668 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
1669 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
1670 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
1671 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
1672 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
1673 
1674 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1675                              MVEGenTwoOpShiftFn *fn)
1676 {
1677     TCGv_ptr qda;
1678     TCGv_i32 rm;
1679 
1680     if (!dc_isar_feature(aa32_mve, s) ||
1681         !mve_check_qreg_bank(s, a->qda) ||
1682         a->rm == 13 || a->rm == 15 || !fn) {
1683         /* Rm cases are UNPREDICTABLE */
1684         return false;
1685     }
1686     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1687         return true;
1688     }
1689 
1690     qda = mve_qreg_ptr(a->qda);
1691     rm = load_reg(s, a->rm);
1692     fn(cpu_env, qda, qda, rm);
1693     mve_update_eci(s);
1694     return true;
1695 }
1696 
1697 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1698     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1699     {                                                                   \
1700         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1701             gen_helper_mve_##FN##b,                                     \
1702             gen_helper_mve_##FN##h,                                     \
1703             gen_helper_mve_##FN##w,                                     \
1704             NULL,                                                       \
1705         };                                                              \
1706         return do_2shift_scalar(s, a, fns[a->size]);                    \
1707     }
1708 
1709 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1710 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1711 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1712 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1713 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1714 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1715 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1716 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1717 
1718 #define DO_VSHLL(INSN, FN)                                              \
1719     static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
1720     {                                                                   \
1721         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1722             gen_helper_mve_##FN##b,                                     \
1723             gen_helper_mve_##FN##h,                                     \
1724         };                                                              \
1725         return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN);  \
1726     }
1727 
1728 /*
1729  * For the VSHLL vector helpers, the vece is the size of the input
1730  * (ie MO_8 or MO_16); the helpers want to work in the output size.
1731  * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1732  */
1733 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
1734                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1735 {
1736     unsigned ovece = vece + 1;
1737     unsigned ibits = vece == MO_8 ? 8 : 16;
1738     tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
1739     tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1740 }
1741 
1742 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
1743                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1744 {
1745     unsigned ovece = vece + 1;
1746     tcg_gen_gvec_andi(ovece, dofs, aofs,
1747                       ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
1748     tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
1749 }
1750 
1751 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
1752                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1753 {
1754     unsigned ovece = vece + 1;
1755     unsigned ibits = vece == MO_8 ? 8 : 16;
1756     if (shift == 0) {
1757         tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
1758     } else {
1759         tcg_gen_gvec_andi(ovece, dofs, aofs,
1760                           ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1761         tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1762     }
1763 }
1764 
1765 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
1766                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1767 {
1768     unsigned ovece = vece + 1;
1769     unsigned ibits = vece == MO_8 ? 8 : 16;
1770     if (shift == 0) {
1771         tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
1772     } else {
1773         tcg_gen_gvec_andi(ovece, dofs, aofs,
1774                           ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1775         tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1776     }
1777 }
1778 
1779 DO_VSHLL(VSHLL_BS, vshllbs)
1780 DO_VSHLL(VSHLL_BU, vshllbu)
1781 DO_VSHLL(VSHLL_TS, vshllts)
1782 DO_VSHLL(VSHLL_TU, vshlltu)
1783 
1784 #define DO_2SHIFT_N(INSN, FN)                                   \
1785     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1786     {                                                           \
1787         static MVEGenTwoOpShiftFn * const fns[] = {             \
1788             gen_helper_mve_##FN##b,                             \
1789             gen_helper_mve_##FN##h,                             \
1790         };                                                      \
1791         return do_2shift(s, a, fns[a->size], false);            \
1792     }
1793 
1794 DO_2SHIFT_N(VSHRNB, vshrnb)
1795 DO_2SHIFT_N(VSHRNT, vshrnt)
1796 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1797 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1798 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1799 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1800 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1801 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1802 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1803 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1804 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1805 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1806 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1807 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1808 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1809 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1810 
1811 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1812 {
1813     /*
1814      * Whole Vector Left Shift with Carry. The carry is taken
1815      * from a general purpose register and written back there.
1816      * An imm of 0 means "shift by 32".
1817      */
1818     TCGv_ptr qd;
1819     TCGv_i32 rdm;
1820 
1821     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1822         return false;
1823     }
1824     if (a->rdm == 13 || a->rdm == 15) {
1825         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1826         return false;
1827     }
1828     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1829         return true;
1830     }
1831 
1832     qd = mve_qreg_ptr(a->qd);
1833     rdm = load_reg(s, a->rdm);
1834     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1835     store_reg(s, a->rdm, rdm);
1836     mve_update_eci(s);
1837     return true;
1838 }
1839 
1840 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1841 {
1842     TCGv_ptr qd;
1843     TCGv_i32 rn;
1844 
1845     /*
1846      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1847      * This fills the vector with elements of successively increasing
1848      * or decreasing values, starting from Rn.
1849      */
1850     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1851         return false;
1852     }
1853     if (a->size == MO_64) {
1854         /* size 0b11 is another encoding */
1855         return false;
1856     }
1857     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1858         return true;
1859     }
1860 
1861     qd = mve_qreg_ptr(a->qd);
1862     rn = load_reg(s, a->rn);
1863     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1864     store_reg(s, a->rn, rn);
1865     mve_update_eci(s);
1866     return true;
1867 }
1868 
1869 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1870 {
1871     TCGv_ptr qd;
1872     TCGv_i32 rn, rm;
1873 
1874     /*
1875      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1876      * This fills the vector with elements of successively increasing
1877      * or decreasing values, starting from Rn. Rm specifies a point where
1878      * the count wraps back around to 0. The updated offset is written back
1879      * to Rn.
1880      */
1881     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1882         return false;
1883     }
1884     if (!fn || a->rm == 13 || a->rm == 15) {
1885         /*
1886          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1887          * Rm == 13 is VIWDUP, VDWDUP.
1888          */
1889         return false;
1890     }
1891     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1892         return true;
1893     }
1894 
1895     qd = mve_qreg_ptr(a->qd);
1896     rn = load_reg(s, a->rn);
1897     rm = load_reg(s, a->rm);
1898     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1899     store_reg(s, a->rn, rn);
1900     mve_update_eci(s);
1901     return true;
1902 }
1903 
1904 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1905 {
1906     static MVEGenVIDUPFn * const fns[] = {
1907         gen_helper_mve_vidupb,
1908         gen_helper_mve_viduph,
1909         gen_helper_mve_vidupw,
1910         NULL,
1911     };
1912     return do_vidup(s, a, fns[a->size]);
1913 }
1914 
1915 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1916 {
1917     static MVEGenVIDUPFn * const fns[] = {
1918         gen_helper_mve_vidupb,
1919         gen_helper_mve_viduph,
1920         gen_helper_mve_vidupw,
1921         NULL,
1922     };
1923     /* VDDUP is just like VIDUP but with a negative immediate */
1924     a->imm = -a->imm;
1925     return do_vidup(s, a, fns[a->size]);
1926 }
1927 
1928 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1929 {
1930     static MVEGenVIWDUPFn * const fns[] = {
1931         gen_helper_mve_viwdupb,
1932         gen_helper_mve_viwduph,
1933         gen_helper_mve_viwdupw,
1934         NULL,
1935     };
1936     return do_viwdup(s, a, fns[a->size]);
1937 }
1938 
1939 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1940 {
1941     static MVEGenVIWDUPFn * const fns[] = {
1942         gen_helper_mve_vdwdupb,
1943         gen_helper_mve_vdwduph,
1944         gen_helper_mve_vdwdupw,
1945         NULL,
1946     };
1947     return do_viwdup(s, a, fns[a->size]);
1948 }
1949 
1950 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1951 {
1952     TCGv_ptr qn, qm;
1953 
1954     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1955         !fn) {
1956         return false;
1957     }
1958     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1959         return true;
1960     }
1961 
1962     qn = mve_qreg_ptr(a->qn);
1963     qm = mve_qreg_ptr(a->qm);
1964     fn(cpu_env, qn, qm);
1965     if (a->mask) {
1966         /* VPT */
1967         gen_vpst(s, a->mask);
1968     }
1969     /* This insn updates predication bits */
1970     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1971     mve_update_eci(s);
1972     return true;
1973 }
1974 
1975 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1976                            MVEGenScalarCmpFn *fn)
1977 {
1978     TCGv_ptr qn;
1979     TCGv_i32 rm;
1980 
1981     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1982         return false;
1983     }
1984     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1985         return true;
1986     }
1987 
1988     qn = mve_qreg_ptr(a->qn);
1989     if (a->rm == 15) {
1990         /* Encoding Rm=0b1111 means "constant zero" */
1991         rm = tcg_constant_i32(0);
1992     } else {
1993         rm = load_reg(s, a->rm);
1994     }
1995     fn(cpu_env, qn, rm);
1996     if (a->mask) {
1997         /* VPT */
1998         gen_vpst(s, a->mask);
1999     }
2000     /* This insn updates predication bits */
2001     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2002     mve_update_eci(s);
2003     return true;
2004 }
2005 
2006 #define DO_VCMP(INSN, FN)                                       \
2007     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
2008     {                                                           \
2009         static MVEGenCmpFn * const fns[] = {                    \
2010             gen_helper_mve_##FN##b,                             \
2011             gen_helper_mve_##FN##h,                             \
2012             gen_helper_mve_##FN##w,                             \
2013             NULL,                                               \
2014         };                                                      \
2015         return do_vcmp(s, a, fns[a->size]);                     \
2016     }                                                           \
2017     static bool trans_##INSN##_scalar(DisasContext *s,          \
2018                                       arg_vcmp_scalar *a)       \
2019     {                                                           \
2020         static MVEGenScalarCmpFn * const fns[] = {              \
2021             gen_helper_mve_##FN##_scalarb,                      \
2022             gen_helper_mve_##FN##_scalarh,                      \
2023             gen_helper_mve_##FN##_scalarw,                      \
2024             NULL,                                               \
2025         };                                                      \
2026         return do_vcmp_scalar(s, a, fns[a->size]);              \
2027     }
2028 
2029 DO_VCMP(VCMPEQ, vcmpeq)
2030 DO_VCMP(VCMPNE, vcmpne)
2031 DO_VCMP(VCMPCS, vcmpcs)
2032 DO_VCMP(VCMPHI, vcmphi)
2033 DO_VCMP(VCMPGE, vcmpge)
2034 DO_VCMP(VCMPLT, vcmplt)
2035 DO_VCMP(VCMPGT, vcmpgt)
2036 DO_VCMP(VCMPLE, vcmple)
2037 
2038 #define DO_VCMP_FP(INSN, FN)                                    \
2039     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
2040     {                                                           \
2041         static MVEGenCmpFn * const fns[] = {                    \
2042             NULL,                                               \
2043             gen_helper_mve_##FN##h,                             \
2044             gen_helper_mve_##FN##s,                             \
2045             NULL,                                               \
2046         };                                                      \
2047         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
2048             return false;                                       \
2049         }                                                       \
2050         return do_vcmp(s, a, fns[a->size]);                     \
2051     }                                                           \
2052     static bool trans_##INSN##_scalar(DisasContext *s,          \
2053                                       arg_vcmp_scalar *a)       \
2054     {                                                           \
2055         static MVEGenScalarCmpFn * const fns[] = {              \
2056             NULL,                                               \
2057             gen_helper_mve_##FN##_scalarh,                      \
2058             gen_helper_mve_##FN##_scalars,                      \
2059             NULL,                                               \
2060         };                                                      \
2061         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
2062             return false;                                       \
2063         }                                                       \
2064         return do_vcmp_scalar(s, a, fns[a->size]);              \
2065     }
2066 
2067 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
2068 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
2069 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
2070 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
2071 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
2072 DO_VCMP_FP(VCMPLE_fp, vfcmple)
2073 
2074 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
2075 {
2076     /*
2077      * MIN/MAX operations across a vector: compute the min or
2078      * max of the initial value in a general purpose register
2079      * and all the elements in the vector, and store it back
2080      * into the general purpose register.
2081      */
2082     TCGv_ptr qm;
2083     TCGv_i32 rda;
2084 
2085     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
2086         !fn || a->rda == 13 || a->rda == 15) {
2087         /* Rda cases are UNPREDICTABLE */
2088         return false;
2089     }
2090     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2091         return true;
2092     }
2093 
2094     qm = mve_qreg_ptr(a->qm);
2095     rda = load_reg(s, a->rda);
2096     fn(rda, cpu_env, qm, rda);
2097     store_reg(s, a->rda, rda);
2098     mve_update_eci(s);
2099     return true;
2100 }
2101 
2102 #define DO_VMAXV(INSN, FN)                                      \
2103     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
2104     {                                                           \
2105         static MVEGenVADDVFn * const fns[] = {                  \
2106             gen_helper_mve_##FN##b,                             \
2107             gen_helper_mve_##FN##h,                             \
2108             gen_helper_mve_##FN##w,                             \
2109             NULL,                                               \
2110         };                                                      \
2111         return do_vmaxv(s, a, fns[a->size]);                    \
2112     }
2113 
2114 DO_VMAXV(VMAXV_S, vmaxvs)
2115 DO_VMAXV(VMAXV_U, vmaxvu)
2116 DO_VMAXV(VMAXAV, vmaxav)
2117 DO_VMAXV(VMINV_S, vminvs)
2118 DO_VMAXV(VMINV_U, vminvu)
2119 DO_VMAXV(VMINAV, vminav)
2120 
2121 #define DO_VMAXV_FP(INSN, FN)                                   \
2122     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
2123     {                                                           \
2124         static MVEGenVADDVFn * const fns[] = {                  \
2125             NULL,                                               \
2126             gen_helper_mve_##FN##h,                             \
2127             gen_helper_mve_##FN##s,                             \
2128             NULL,                                               \
2129         };                                                      \
2130         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
2131             return false;                                       \
2132         }                                                       \
2133         return do_vmaxv(s, a, fns[a->size]);                    \
2134     }
2135 
2136 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
2137 DO_VMAXV_FP(VMINNMV, vminnmv)
2138 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
2139 DO_VMAXV_FP(VMINNMAV, vminnmav)
2140 
2141 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
2142 {
2143     /* Absolute difference accumulated across vector */
2144     TCGv_ptr qn, qm;
2145     TCGv_i32 rda;
2146 
2147     if (!dc_isar_feature(aa32_mve, s) ||
2148         !mve_check_qreg_bank(s, a->qm | a->qn) ||
2149         !fn || a->rda == 13 || a->rda == 15) {
2150         /* Rda cases are UNPREDICTABLE */
2151         return false;
2152     }
2153     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2154         return true;
2155     }
2156 
2157     qm = mve_qreg_ptr(a->qm);
2158     qn = mve_qreg_ptr(a->qn);
2159     rda = load_reg(s, a->rda);
2160     fn(rda, cpu_env, qn, qm, rda);
2161     store_reg(s, a->rda, rda);
2162     mve_update_eci(s);
2163     return true;
2164 }
2165 
2166 #define DO_VABAV(INSN, FN)                                      \
2167     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
2168     {                                                           \
2169         static MVEGenVABAVFn * const fns[] = {                  \
2170             gen_helper_mve_##FN##b,                             \
2171             gen_helper_mve_##FN##h,                             \
2172             gen_helper_mve_##FN##w,                             \
2173             NULL,                                               \
2174         };                                                      \
2175         return do_vabav(s, a, fns[a->size]);                    \
2176     }
2177 
2178 DO_VABAV(VABAV_S, vabavs)
2179 DO_VABAV(VABAV_U, vabavu)
2180 
2181 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2182 {
2183     /*
2184      * VMOV two 32-bit vector lanes to two general-purpose registers.
2185      * This insn is not predicated but it is subject to beat-wise
2186      * execution if it is not in an IT block. For us this means
2187      * only that if PSR.ECI says we should not be executing the beat
2188      * corresponding to the lane of the vector register being accessed
2189      * then we should skip perfoming the move, and that we need to do
2190      * the usual check for bad ECI state and advance of ECI state.
2191      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2192      */
2193     TCGv_i32 tmp;
2194     int vd;
2195 
2196     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2197         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
2198         a->rt == a->rt2) {
2199         /* Rt/Rt2 cases are UNPREDICTABLE */
2200         return false;
2201     }
2202     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2203         return true;
2204     }
2205 
2206     /* Convert Qreg index to Dreg for read_neon_element32() etc */
2207     vd = a->qd * 2;
2208 
2209     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2210         tmp = tcg_temp_new_i32();
2211         read_neon_element32(tmp, vd, a->idx, MO_32);
2212         store_reg(s, a->rt, tmp);
2213     }
2214     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2215         tmp = tcg_temp_new_i32();
2216         read_neon_element32(tmp, vd + 1, a->idx, MO_32);
2217         store_reg(s, a->rt2, tmp);
2218     }
2219 
2220     mve_update_and_store_eci(s);
2221     return true;
2222 }
2223 
2224 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2225 {
2226     /*
2227      * VMOV two general-purpose registers to two 32-bit vector lanes.
2228      * This insn is not predicated but it is subject to beat-wise
2229      * execution if it is not in an IT block. For us this means
2230      * only that if PSR.ECI says we should not be executing the beat
2231      * corresponding to the lane of the vector register being accessed
2232      * then we should skip perfoming the move, and that we need to do
2233      * the usual check for bad ECI state and advance of ECI state.
2234      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2235      */
2236     TCGv_i32 tmp;
2237     int vd;
2238 
2239     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2240         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
2241         /* Rt/Rt2 cases are UNPREDICTABLE */
2242         return false;
2243     }
2244     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2245         return true;
2246     }
2247 
2248     /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2249     vd = a->qd * 2;
2250 
2251     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2252         tmp = load_reg(s, a->rt);
2253         write_neon_element32(tmp, vd, a->idx, MO_32);
2254     }
2255     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2256         tmp = load_reg(s, a->rt2);
2257         write_neon_element32(tmp, vd + 1, a->idx, MO_32);
2258     }
2259 
2260     mve_update_and_store_eci(s);
2261     return true;
2262 }
2263