xref: /openbmc/qemu/target/arm/tcg/translate-neon.c (revision 637b0aa1)
1 /*
2  *  ARM translation: AArch32 Neon instructions
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *  Copyright (c) 2020 Linaro, Ltd.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26 
27 /* Include the generated Neon decoder */
28 #include "decode-neon-dp.c.inc"
29 #include "decode-neon-ls.c.inc"
30 #include "decode-neon-shared.c.inc"
31 
32 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
33 {
34     TCGv_ptr ret = tcg_temp_new_ptr();
35     tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg));
36     return ret;
37 }
38 
39 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
40 {
41     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
42 
43     switch (mop) {
44     case MO_UB:
45         tcg_gen_ld8u_i32(var, tcg_env, offset);
46         break;
47     case MO_UW:
48         tcg_gen_ld16u_i32(var, tcg_env, offset);
49         break;
50     case MO_UL:
51         tcg_gen_ld_i32(var, tcg_env, offset);
52         break;
53     default:
54         g_assert_not_reached();
55     }
56 }
57 
58 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
59 {
60     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
61 
62     switch (mop) {
63     case MO_UB:
64         tcg_gen_ld8u_i64(var, tcg_env, offset);
65         break;
66     case MO_UW:
67         tcg_gen_ld16u_i64(var, tcg_env, offset);
68         break;
69     case MO_UL:
70         tcg_gen_ld32u_i64(var, tcg_env, offset);
71         break;
72     case MO_UQ:
73         tcg_gen_ld_i64(var, tcg_env, offset);
74         break;
75     default:
76         g_assert_not_reached();
77     }
78 }
79 
80 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
81 {
82     long offset = neon_element_offset(reg, ele, size);
83 
84     switch (size) {
85     case MO_8:
86         tcg_gen_st8_i32(var, tcg_env, offset);
87         break;
88     case MO_16:
89         tcg_gen_st16_i32(var, tcg_env, offset);
90         break;
91     case MO_32:
92         tcg_gen_st_i32(var, tcg_env, offset);
93         break;
94     default:
95         g_assert_not_reached();
96     }
97 }
98 
99 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
100 {
101     long offset = neon_element_offset(reg, ele, size);
102 
103     switch (size) {
104     case MO_8:
105         tcg_gen_st8_i64(var, tcg_env, offset);
106         break;
107     case MO_16:
108         tcg_gen_st16_i64(var, tcg_env, offset);
109         break;
110     case MO_32:
111         tcg_gen_st32_i64(var, tcg_env, offset);
112         break;
113     case MO_64:
114         tcg_gen_st_i64(var, tcg_env, offset);
115         break;
116     default:
117         g_assert_not_reached();
118     }
119 }
120 
121 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
122                          int data, gen_helper_gvec_4 *fn_gvec)
123 {
124     /* UNDEF accesses to D16-D31 if they don't exist. */
125     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
126         return false;
127     }
128 
129     /*
130      * UNDEF accesses to odd registers for each bit of Q.
131      * Q will be 0b111 for all Q-reg instructions, otherwise
132      * when we have mixed Q- and D-reg inputs.
133      */
134     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
135         return false;
136     }
137 
138     if (!vfp_access_check(s)) {
139         return true;
140     }
141 
142     int opr_sz = q ? 16 : 8;
143     tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
144                        vfp_reg_offset(1, vn),
145                        vfp_reg_offset(1, vm),
146                        vfp_reg_offset(1, vd),
147                        opr_sz, opr_sz, data, fn_gvec);
148     return true;
149 }
150 
151 static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
152                              int data, gen_helper_gvec_4_ptr *fn_gvec)
153 {
154     /* UNDEF accesses to D16-D31 if they don't exist. */
155     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
156         return false;
157     }
158 
159     /*
160      * UNDEF accesses to odd registers for each bit of Q.
161      * Q will be 0b111 for all Q-reg instructions, otherwise
162      * when we have mixed Q- and D-reg inputs.
163      */
164     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
165         return false;
166     }
167 
168     if (!vfp_access_check(s)) {
169         return true;
170     }
171 
172     int opr_sz = q ? 16 : 8;
173     tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
174                        vfp_reg_offset(1, vn),
175                        vfp_reg_offset(1, vm),
176                        vfp_reg_offset(1, vd),
177                        tcg_env,
178                        opr_sz, opr_sz, data, fn_gvec);
179     return true;
180 }
181 
182 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
183                               int data, ARMFPStatusFlavour fp_flavour,
184                               gen_helper_gvec_4_ptr *fn_gvec_ptr)
185 {
186     /* UNDEF accesses to D16-D31 if they don't exist. */
187     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
188         return false;
189     }
190 
191     /*
192      * UNDEF accesses to odd registers for each bit of Q.
193      * Q will be 0b111 for all Q-reg instructions, otherwise
194      * when we have mixed Q- and D-reg inputs.
195      */
196     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
197         return false;
198     }
199 
200     if (!vfp_access_check(s)) {
201         return true;
202     }
203 
204     int opr_sz = q ? 16 : 8;
205     TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
206 
207     tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
208                        vfp_reg_offset(1, vn),
209                        vfp_reg_offset(1, vm),
210                        vfp_reg_offset(1, vd),
211                        fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
212     return true;
213 }
214 
215 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
216 {
217     if (!dc_isar_feature(aa32_vcma, s)) {
218         return false;
219     }
220     if (a->size == MO_16) {
221         if (!dc_isar_feature(aa32_fp16_arith, s)) {
222             return false;
223         }
224         return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
225                                  FPST_STD_F16, gen_helper_gvec_fcmlah);
226     }
227     return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
228                              FPST_STD, gen_helper_gvec_fcmlas);
229 }
230 
231 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
232 {
233     int opr_sz;
234     TCGv_ptr fpst;
235     gen_helper_gvec_3_ptr *fn_gvec_ptr;
236 
237     if (!dc_isar_feature(aa32_vcma, s)
238         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
239         return false;
240     }
241 
242     /* UNDEF accesses to D16-D31 if they don't exist. */
243     if (!dc_isar_feature(aa32_simd_r32, s) &&
244         ((a->vd | a->vn | a->vm) & 0x10)) {
245         return false;
246     }
247 
248     if ((a->vn | a->vm | a->vd) & a->q) {
249         return false;
250     }
251 
252     if (!vfp_access_check(s)) {
253         return true;
254     }
255 
256     opr_sz = (1 + a->q) * 8;
257     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
258     fn_gvec_ptr = (a->size == MO_16) ?
259         gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
260     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
261                        vfp_reg_offset(1, a->vn),
262                        vfp_reg_offset(1, a->vm),
263                        fpst, opr_sz, opr_sz, a->rot,
264                        fn_gvec_ptr);
265     return true;
266 }
267 
268 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
269 {
270     if (!dc_isar_feature(aa32_dp, s)) {
271         return false;
272     }
273     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
274                         gen_helper_gvec_sdot_b);
275 }
276 
277 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
278 {
279     if (!dc_isar_feature(aa32_dp, s)) {
280         return false;
281     }
282     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
283                         gen_helper_gvec_udot_b);
284 }
285 
286 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
287 {
288     if (!dc_isar_feature(aa32_i8mm, s)) {
289         return false;
290     }
291     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
292                         gen_helper_gvec_usdot_b);
293 }
294 
295 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
296 {
297     if (!dc_isar_feature(aa32_bf16, s)) {
298         return false;
299     }
300     return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
301                             gen_helper_gvec_bfdot);
302 }
303 
304 static bool trans_VFML(DisasContext *s, arg_VFML *a)
305 {
306     int opr_sz;
307 
308     if (!dc_isar_feature(aa32_fhm, s)) {
309         return false;
310     }
311 
312     /* UNDEF accesses to D16-D31 if they don't exist. */
313     if (!dc_isar_feature(aa32_simd_r32, s) &&
314         (a->vd & 0x10)) {
315         return false;
316     }
317 
318     if (a->vd & a->q) {
319         return false;
320     }
321 
322     if (!vfp_access_check(s)) {
323         return true;
324     }
325 
326     opr_sz = (1 + a->q) * 8;
327     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
328                        vfp_reg_offset(a->q, a->vn),
329                        vfp_reg_offset(a->q, a->vm),
330                        tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
331                        gen_helper_gvec_fmlal_a32);
332     return true;
333 }
334 
335 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
336 {
337     int data = (a->index << 2) | a->rot;
338 
339     if (!dc_isar_feature(aa32_vcma, s)) {
340         return false;
341     }
342     if (a->size == MO_16) {
343         if (!dc_isar_feature(aa32_fp16_arith, s)) {
344             return false;
345         }
346         return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
347                                  FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
348     }
349     return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
350                              FPST_STD, gen_helper_gvec_fcmlas_idx);
351 }
352 
353 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
354 {
355     if (!dc_isar_feature(aa32_dp, s)) {
356         return false;
357     }
358     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
359                         gen_helper_gvec_sdot_idx_b);
360 }
361 
362 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
363 {
364     if (!dc_isar_feature(aa32_dp, s)) {
365         return false;
366     }
367     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
368                         gen_helper_gvec_udot_idx_b);
369 }
370 
371 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
372 {
373     if (!dc_isar_feature(aa32_i8mm, s)) {
374         return false;
375     }
376     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
377                         gen_helper_gvec_usdot_idx_b);
378 }
379 
380 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
381 {
382     if (!dc_isar_feature(aa32_i8mm, s)) {
383         return false;
384     }
385     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
386                         gen_helper_gvec_sudot_idx_b);
387 }
388 
389 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
390 {
391     if (!dc_isar_feature(aa32_bf16, s)) {
392         return false;
393     }
394     return do_neon_ddda_env(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
395                             gen_helper_gvec_bfdot_idx);
396 }
397 
398 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
399 {
400     int opr_sz;
401 
402     if (!dc_isar_feature(aa32_fhm, s)) {
403         return false;
404     }
405 
406     /* UNDEF accesses to D16-D31 if they don't exist. */
407     if (!dc_isar_feature(aa32_simd_r32, s) &&
408         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
409         return false;
410     }
411 
412     if (a->vd & a->q) {
413         return false;
414     }
415 
416     if (!vfp_access_check(s)) {
417         return true;
418     }
419 
420     opr_sz = (1 + a->q) * 8;
421     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
422                        vfp_reg_offset(a->q, a->vn),
423                        vfp_reg_offset(a->q, a->rm),
424                        tcg_env, opr_sz, opr_sz,
425                        (a->index << 2) | a->s, /* is_2 == 0 */
426                        gen_helper_gvec_fmlal_idx_a32);
427     return true;
428 }
429 
430 static struct {
431     int nregs;
432     int interleave;
433     int spacing;
434 } const neon_ls_element_type[11] = {
435     {1, 4, 1},
436     {1, 4, 2},
437     {4, 1, 1},
438     {2, 2, 2},
439     {1, 3, 1},
440     {1, 3, 2},
441     {3, 1, 1},
442     {1, 1, 1},
443     {1, 2, 1},
444     {1, 2, 2},
445     {2, 1, 1}
446 };
447 
448 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
449                                       int stride)
450 {
451     if (rm != 15) {
452         TCGv_i32 base;
453 
454         base = load_reg(s, rn);
455         if (rm == 13) {
456             tcg_gen_addi_i32(base, base, stride);
457         } else {
458             TCGv_i32 index;
459             index = load_reg(s, rm);
460             tcg_gen_add_i32(base, base, index);
461         }
462         store_reg(s, rn, base);
463     }
464 }
465 
466 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
467 {
468     /* Neon load/store multiple structures */
469     int nregs, interleave, spacing, reg, n;
470     MemOp mop, align, endian;
471     int mmu_idx = get_mem_index(s);
472     int size = a->size;
473     TCGv_i64 tmp64;
474     TCGv_i32 addr;
475 
476     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
477         return false;
478     }
479 
480     /* UNDEF accesses to D16-D31 if they don't exist */
481     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
482         return false;
483     }
484     if (a->itype > 10) {
485         return false;
486     }
487     /* Catch UNDEF cases for bad values of align field */
488     switch (a->itype & 0xc) {
489     case 4:
490         if (a->align >= 2) {
491             return false;
492         }
493         break;
494     case 8:
495         if (a->align == 3) {
496             return false;
497         }
498         break;
499     default:
500         break;
501     }
502     nregs = neon_ls_element_type[a->itype].nregs;
503     interleave = neon_ls_element_type[a->itype].interleave;
504     spacing = neon_ls_element_type[a->itype].spacing;
505     if (size == 3 && (interleave | spacing) != 1) {
506         return false;
507     }
508 
509     if (!vfp_access_check(s)) {
510         return true;
511     }
512 
513     /* For our purposes, bytes are always little-endian.  */
514     endian = s->be_data;
515     if (size == 0) {
516         endian = MO_LE;
517     }
518 
519     /* Enforce alignment requested by the instruction */
520     if (a->align) {
521         align = pow2_align(a->align + 2); /* 4 ** a->align */
522     } else {
523         align = s->align_mem ? MO_ALIGN : 0;
524     }
525 
526     /*
527      * Consecutive little-endian elements from a single register
528      * can be promoted to a larger little-endian operation.
529      */
530     if (interleave == 1 && endian == MO_LE) {
531         /* Retain any natural alignment. */
532         if (align == MO_ALIGN) {
533             align = pow2_align(size);
534         }
535         size = 3;
536     }
537 
538     tmp64 = tcg_temp_new_i64();
539     addr = tcg_temp_new_i32();
540     load_reg_var(s, addr, a->rn);
541 
542     mop = endian | size | align;
543     for (reg = 0; reg < nregs; reg++) {
544         for (n = 0; n < 8 >> size; n++) {
545             int xs;
546             for (xs = 0; xs < interleave; xs++) {
547                 int tt = a->vd + reg + spacing * xs;
548 
549                 if (a->l) {
550                     gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
551                     neon_store_element64(tt, n, size, tmp64);
552                 } else {
553                     neon_load_element64(tmp64, tt, n, size);
554                     gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
555                 }
556                 tcg_gen_addi_i32(addr, addr, 1 << size);
557 
558                 /* Subsequent memory operations inherit alignment */
559                 mop &= ~MO_AMASK;
560             }
561         }
562     }
563 
564     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
565     return true;
566 }
567 
568 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
569 {
570     /* Neon load single structure to all lanes */
571     int reg, stride, vec_size;
572     int vd = a->vd;
573     int size = a->size;
574     int nregs = a->n + 1;
575     TCGv_i32 addr, tmp;
576     MemOp mop, align;
577 
578     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
579         return false;
580     }
581 
582     /* UNDEF accesses to D16-D31 if they don't exist */
583     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
584         return false;
585     }
586 
587     align = 0;
588     if (size == 3) {
589         if (nregs != 4 || a->a == 0) {
590             return false;
591         }
592         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
593         size = MO_32;
594         align = MO_ALIGN_16;
595     } else if (a->a) {
596         switch (nregs) {
597         case 1:
598             if (size == 0) {
599                 return false;
600             }
601             align = MO_ALIGN;
602             break;
603         case 2:
604             align = pow2_align(size + 1);
605             break;
606         case 3:
607             return false;
608         case 4:
609             if (size == 2) {
610                 align = pow2_align(3);
611             } else {
612                 align = pow2_align(size + 2);
613             }
614             break;
615         default:
616             g_assert_not_reached();
617         }
618     }
619 
620     if (!vfp_access_check(s)) {
621         return true;
622     }
623 
624     /*
625      * VLD1 to all lanes: T bit indicates how many Dregs to write.
626      * VLD2/3/4 to all lanes: T bit indicates register stride.
627      */
628     stride = a->t ? 2 : 1;
629     vec_size = nregs == 1 ? stride * 8 : 8;
630     mop = size | align;
631     tmp = tcg_temp_new_i32();
632     addr = tcg_temp_new_i32();
633     load_reg_var(s, addr, a->rn);
634     for (reg = 0; reg < nregs; reg++) {
635         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
636         if ((vd & 1) && vec_size == 16) {
637             /*
638              * We cannot write 16 bytes at once because the
639              * destination is unaligned.
640              */
641             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
642                                  8, 8, tmp);
643             tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
644                              neon_full_reg_offset(vd), 8, 8);
645         } else {
646             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
647                                  vec_size, vec_size, tmp);
648         }
649         tcg_gen_addi_i32(addr, addr, 1 << size);
650         vd += stride;
651 
652         /* Subsequent memory operations inherit alignment */
653         mop &= ~MO_AMASK;
654     }
655 
656     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
657 
658     return true;
659 }
660 
661 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
662 {
663     /* Neon load/store single structure to one lane */
664     int reg;
665     int nregs = a->n + 1;
666     int vd = a->vd;
667     TCGv_i32 addr, tmp;
668     MemOp mop;
669 
670     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
671         return false;
672     }
673 
674     /* UNDEF accesses to D16-D31 if they don't exist */
675     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
676         return false;
677     }
678 
679     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
680     switch (nregs) {
681     case 1:
682         if (a->stride != 1) {
683             return false;
684         }
685         if (((a->align & (1 << a->size)) != 0) ||
686             (a->size == 2 && (a->align == 1 || a->align == 2))) {
687             return false;
688         }
689         break;
690     case 2:
691         if (a->size == 2 && (a->align & 2) != 0) {
692             return false;
693         }
694         break;
695     case 3:
696         if (a->align != 0) {
697             return false;
698         }
699         break;
700     case 4:
701         if (a->size == 2 && a->align == 3) {
702             return false;
703         }
704         break;
705     default:
706         g_assert_not_reached();
707     }
708     if ((vd + a->stride * (nregs - 1)) > 31) {
709         /*
710          * Attempts to write off the end of the register file are
711          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
712          * access off the end of the array that holds the register data.
713          */
714         return false;
715     }
716 
717     if (!vfp_access_check(s)) {
718         return true;
719     }
720 
721     /* Pick up SCTLR settings */
722     mop = finalize_memop(s, a->size);
723 
724     if (a->align) {
725         MemOp align_op;
726 
727         switch (nregs) {
728         case 1:
729             /* For VLD1, use natural alignment. */
730             align_op = MO_ALIGN;
731             break;
732         case 2:
733             /* For VLD2, use double alignment. */
734             align_op = pow2_align(a->size + 1);
735             break;
736         case 4:
737             if (a->size == MO_32) {
738                 /*
739                  * For VLD4.32, align = 1 is double alignment, align = 2 is
740                  * quad alignment; align = 3 is rejected above.
741                  */
742                 align_op = pow2_align(a->size + a->align);
743             } else {
744                 /* For VLD4.8 and VLD.16, we want quad alignment. */
745                 align_op = pow2_align(a->size + 2);
746             }
747             break;
748         default:
749             /* For VLD3, the alignment field is zero and rejected above. */
750             g_assert_not_reached();
751         }
752 
753         mop = (mop & ~MO_AMASK) | align_op;
754     }
755 
756     tmp = tcg_temp_new_i32();
757     addr = tcg_temp_new_i32();
758     load_reg_var(s, addr, a->rn);
759 
760     for (reg = 0; reg < nregs; reg++) {
761         if (a->l) {
762             gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
763             neon_store_element(vd, a->reg_idx, a->size, tmp);
764         } else { /* Store */
765             neon_load_element(tmp, vd, a->reg_idx, a->size);
766             gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
767         }
768         vd += a->stride;
769         tcg_gen_addi_i32(addr, addr, 1 << a->size);
770 
771         /* Subsequent memory operations inherit alignment */
772         mop &= ~MO_AMASK;
773     }
774 
775     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
776 
777     return true;
778 }
779 
780 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
781 {
782     int vec_size = a->q ? 16 : 8;
783     int rd_ofs = neon_full_reg_offset(a->vd);
784     int rn_ofs = neon_full_reg_offset(a->vn);
785     int rm_ofs = neon_full_reg_offset(a->vm);
786 
787     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
788         return false;
789     }
790 
791     /* UNDEF accesses to D16-D31 if they don't exist. */
792     if (!dc_isar_feature(aa32_simd_r32, s) &&
793         ((a->vd | a->vn | a->vm) & 0x10)) {
794         return false;
795     }
796 
797     if ((a->vn | a->vm | a->vd) & a->q) {
798         return false;
799     }
800 
801     if (!vfp_access_check(s)) {
802         return true;
803     }
804 
805     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
806     return true;
807 }
808 
809 #define DO_3SAME(INSN, FUNC)                                            \
810     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
811     {                                                                   \
812         return do_3same(s, a, FUNC);                                    \
813     }
814 
815 DO_3SAME(VADD, tcg_gen_gvec_add)
816 DO_3SAME(VSUB, tcg_gen_gvec_sub)
817 DO_3SAME(VAND, tcg_gen_gvec_and)
818 DO_3SAME(VBIC, tcg_gen_gvec_andc)
819 DO_3SAME(VORR, tcg_gen_gvec_or)
820 DO_3SAME(VORN, tcg_gen_gvec_orc)
821 DO_3SAME(VEOR, tcg_gen_gvec_xor)
822 DO_3SAME(VSHL_S, gen_gvec_sshl)
823 DO_3SAME(VSHL_U, gen_gvec_ushl)
824 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
825 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
826 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
827 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
828 DO_3SAME(VRSHL_S, gen_gvec_srshl)
829 DO_3SAME(VRSHL_U, gen_gvec_urshl)
830 DO_3SAME(VQSHL_S, gen_neon_sqshl)
831 DO_3SAME(VQSHL_U, gen_neon_uqshl)
832 DO_3SAME(VQRSHL_S, gen_neon_sqrshl)
833 DO_3SAME(VQRSHL_U, gen_neon_uqrshl)
834 
835 /* These insns are all gvec_bitsel but with the inputs in various orders. */
836 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
837     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
838                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
839                                 uint32_t oprsz, uint32_t maxsz)         \
840     {                                                                   \
841         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
842     }                                                                   \
843     DO_3SAME(INSN, gen_##INSN##_3s)
844 
845 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
846 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
847 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
848 
849 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
850     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
851     {                                                                   \
852         if (a->size == 3) {                                             \
853             return false;                                               \
854         }                                                               \
855         return do_3same(s, a, FUNC);                                    \
856     }
857 
858 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
859 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
860 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
861 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
862 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
863 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
864 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
865 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
866 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
867 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
868 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
869 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
870 DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
871 DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp)
872 DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp)
873 DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp)
874 DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp)
875 DO_3SAME_NO_SZ_3(VHADD_S, gen_gvec_shadd)
876 DO_3SAME_NO_SZ_3(VHADD_U, gen_gvec_uhadd)
877 DO_3SAME_NO_SZ_3(VHSUB_S, gen_gvec_shsub)
878 DO_3SAME_NO_SZ_3(VHSUB_U, gen_gvec_uhsub)
879 DO_3SAME_NO_SZ_3(VRHADD_S, gen_gvec_srhadd)
880 DO_3SAME_NO_SZ_3(VRHADD_U, gen_gvec_urhadd)
881 
882 #define DO_3SAME_CMP(INSN, COND)                                        \
883     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
884                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
885                                 uint32_t oprsz, uint32_t maxsz)         \
886     {                                                                   \
887         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
888     }                                                                   \
889     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
890 
891 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
892 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
893 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
894 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
895 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
896 
897 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
898     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
899                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
900     {                                                                      \
901         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
902     }
903 
904 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
905 
906 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
907 {
908     if (a->size != 0) {
909         return false;
910     }
911     return do_3same(s, a, gen_VMUL_p_3s);
912 }
913 
914 #define DO_VQRDMLAH(INSN, FUNC)                                         \
915     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
916     {                                                                   \
917         if (!dc_isar_feature(aa32_rdm, s)) {                            \
918             return false;                                               \
919         }                                                               \
920         if (a->size != 1 && a->size != 2) {                             \
921             return false;                                               \
922         }                                                               \
923         return do_3same(s, a, FUNC);                                    \
924     }
925 
926 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
927 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
928 
929 #define DO_SHA1(NAME, FUNC)                                             \
930     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
931     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
932     {                                                                   \
933         if (!dc_isar_feature(aa32_sha1, s)) {                           \
934             return false;                                               \
935         }                                                               \
936         return do_3same(s, a, gen_##NAME##_3s);                         \
937     }
938 
939 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
940 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
941 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
942 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
943 
944 #define DO_SHA2(NAME, FUNC)                                             \
945     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
946     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
947     {                                                                   \
948         if (!dc_isar_feature(aa32_sha2, s)) {                           \
949             return false;                                               \
950         }                                                               \
951         return do_3same(s, a, gen_##NAME##_3s);                         \
952     }
953 
954 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
955 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
956 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
957 
958 /*
959  * Some helper functions need to be passed the tcg_env. In order
960  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
961  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
962  * and which call a NeonGenTwoOpEnvFn().
963  */
964 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
965     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
966     {                                                                   \
967         FUNC(d, tcg_env, n, m);                                         \
968     }
969 
970 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
971     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
972     { return a->size >= 1 && a->size <= 2 && do_3same(s, a, FUNC); }
973 
974 DO_3SAME_VQDMULH(VQDMULH, gen_gvec_sqdmulh_qc)
975 DO_3SAME_VQDMULH(VQRDMULH, gen_gvec_sqrdmulh_qc)
976 
977 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
978     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
979                          uint32_t rn_ofs, uint32_t rm_ofs,              \
980                          uint32_t oprsz, uint32_t maxsz)                \
981     {                                                                   \
982         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
983         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
984                            oprsz, maxsz, 0, FUNC);                      \
985     }
986 
987 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
988     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
989     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
990     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
991     {                                                                   \
992         if (a->size == MO_16) {                                         \
993             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
994                 return false;                                           \
995             }                                                           \
996             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
997         }                                                               \
998         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
999     }
1000 
1001 
1002 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1003 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1004 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1005 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1006 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1007 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1008 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1009 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1010 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1011 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1012 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1013 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1014 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1015 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1016 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1017 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1018 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1019 DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
1020 DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
1021 DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
1022 
1023 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1024 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1025 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1026 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1027 
1028 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1029 {
1030     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1031         return false;
1032     }
1033 
1034     if (a->size == MO_16) {
1035         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1036             return false;
1037         }
1038         return do_3same(s, a, gen_VMAXNM_fp16_3s);
1039     }
1040     return do_3same(s, a, gen_VMAXNM_fp32_3s);
1041 }
1042 
1043 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1044 {
1045     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1046         return false;
1047     }
1048 
1049     if (a->size == MO_16) {
1050         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1051             return false;
1052         }
1053         return do_3same(s, a, gen_VMINNM_fp16_3s);
1054     }
1055     return do_3same(s, a, gen_VMINNM_fp32_3s);
1056 }
1057 
1058 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1059 {
1060     /* Handle a 2-reg-shift insn which can be vectorized. */
1061     int vec_size = a->q ? 16 : 8;
1062     int rd_ofs = neon_full_reg_offset(a->vd);
1063     int rm_ofs = neon_full_reg_offset(a->vm);
1064 
1065     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1066         return false;
1067     }
1068 
1069     /* UNDEF accesses to D16-D31 if they don't exist. */
1070     if (!dc_isar_feature(aa32_simd_r32, s) &&
1071         ((a->vd | a->vm) & 0x10)) {
1072         return false;
1073     }
1074 
1075     if ((a->vm | a->vd) & a->q) {
1076         return false;
1077     }
1078 
1079     if (!vfp_access_check(s)) {
1080         return true;
1081     }
1082 
1083     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1084     return true;
1085 }
1086 
1087 #define DO_2SH(INSN, FUNC)                                              \
1088     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1089     {                                                                   \
1090         return do_vector_2sh(s, a, FUNC);                               \
1091     }                                                                   \
1092 
1093 DO_2SH(VSHL, tcg_gen_gvec_shli)
1094 DO_2SH(VSLI, gen_gvec_sli)
1095 DO_2SH(VSRI, gen_gvec_sri)
1096 DO_2SH(VSRA_S, gen_gvec_ssra)
1097 DO_2SH(VSRA_U, gen_gvec_usra)
1098 DO_2SH(VRSHR_S, gen_gvec_srshr)
1099 DO_2SH(VRSHR_U, gen_gvec_urshr)
1100 DO_2SH(VRSRA_S, gen_gvec_srsra)
1101 DO_2SH(VRSRA_U, gen_gvec_ursra)
1102 
1103 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1104 {
1105     /* Signed shift out of range results in all-sign-bits */
1106     a->shift = MIN(a->shift, (8 << a->size) - 1);
1107     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1108 }
1109 
1110 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1111                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1112 {
1113     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1114 }
1115 
1116 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1117 {
1118     /* Shift out of range is architecturally valid and results in zero. */
1119     if (a->shift >= (8 << a->size)) {
1120         return do_vector_2sh(s, a, gen_zero_rd_2sh);
1121     } else {
1122         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1123     }
1124 }
1125 
1126 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1127                              NeonGenTwo64OpEnvFn *fn)
1128 {
1129     /*
1130      * 2-reg-and-shift operations, size == 3 case, where the
1131      * function needs to be passed tcg_env.
1132      */
1133     TCGv_i64 constimm;
1134     int pass;
1135 
1136     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1137         return false;
1138     }
1139 
1140     /* UNDEF accesses to D16-D31 if they don't exist. */
1141     if (!dc_isar_feature(aa32_simd_r32, s) &&
1142         ((a->vd | a->vm) & 0x10)) {
1143         return false;
1144     }
1145 
1146     if ((a->vm | a->vd) & a->q) {
1147         return false;
1148     }
1149 
1150     if (!vfp_access_check(s)) {
1151         return true;
1152     }
1153 
1154     /*
1155      * To avoid excessive duplication of ops we implement shift
1156      * by immediate using the variable shift operations.
1157      */
1158     constimm = tcg_constant_i64(dup_const(a->size, a->shift));
1159 
1160     for (pass = 0; pass < a->q + 1; pass++) {
1161         TCGv_i64 tmp = tcg_temp_new_i64();
1162 
1163         read_neon_element64(tmp, a->vm, pass, MO_64);
1164         fn(tmp, tcg_env, tmp, constimm);
1165         write_neon_element64(tmp, a->vd, pass, MO_64);
1166     }
1167     return true;
1168 }
1169 
1170 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1171                              NeonGenTwoOpEnvFn *fn)
1172 {
1173     /*
1174      * 2-reg-and-shift operations, size < 3 case, where the
1175      * helper needs to be passed tcg_env.
1176      */
1177     TCGv_i32 constimm, tmp;
1178     int pass;
1179 
1180     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1181         return false;
1182     }
1183 
1184     /* UNDEF accesses to D16-D31 if they don't exist. */
1185     if (!dc_isar_feature(aa32_simd_r32, s) &&
1186         ((a->vd | a->vm) & 0x10)) {
1187         return false;
1188     }
1189 
1190     if ((a->vm | a->vd) & a->q) {
1191         return false;
1192     }
1193 
1194     if (!vfp_access_check(s)) {
1195         return true;
1196     }
1197 
1198     /*
1199      * To avoid excessive duplication of ops we implement shift
1200      * by immediate using the variable shift operations.
1201      */
1202     constimm = tcg_constant_i32(dup_const(a->size, a->shift));
1203     tmp = tcg_temp_new_i32();
1204 
1205     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1206         read_neon_element32(tmp, a->vm, pass, MO_32);
1207         fn(tmp, tcg_env, tmp, constimm);
1208         write_neon_element32(tmp, a->vd, pass, MO_32);
1209     }
1210     return true;
1211 }
1212 
1213 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1214     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1215     {                                                                   \
1216         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1217     }                                                                   \
1218     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1219     {                                                                   \
1220         static NeonGenTwoOpEnvFn * const fns[] = {                      \
1221             gen_helper_neon_##FUNC##8,                                  \
1222             gen_helper_neon_##FUNC##16,                                 \
1223             gen_helper_neon_##FUNC##32,                                 \
1224         };                                                              \
1225         assert(a->size < ARRAY_SIZE(fns));                              \
1226         return do_2shift_env_32(s, a, fns[a->size]);                    \
1227     }
1228 
1229 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1230 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1231 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1232 
1233 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1234                                 NeonGenTwo64OpFn *shiftfn,
1235                                 NeonGenNarrowEnvFn *narrowfn)
1236 {
1237     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1238     TCGv_i64 constimm, rm1, rm2;
1239     TCGv_i32 rd;
1240 
1241     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1242         return false;
1243     }
1244 
1245     /* UNDEF accesses to D16-D31 if they don't exist. */
1246     if (!dc_isar_feature(aa32_simd_r32, s) &&
1247         ((a->vd | a->vm) & 0x10)) {
1248         return false;
1249     }
1250 
1251     if (a->vm & 1) {
1252         return false;
1253     }
1254 
1255     if (!vfp_access_check(s)) {
1256         return true;
1257     }
1258 
1259     /*
1260      * This is always a right shift, and the shiftfn is always a
1261      * left-shift helper, which thus needs the negated shift count.
1262      */
1263     constimm = tcg_constant_i64(-a->shift);
1264     rm1 = tcg_temp_new_i64();
1265     rm2 = tcg_temp_new_i64();
1266     rd = tcg_temp_new_i32();
1267 
1268     /* Load both inputs first to avoid potential overwrite if rm == rd */
1269     read_neon_element64(rm1, a->vm, 0, MO_64);
1270     read_neon_element64(rm2, a->vm, 1, MO_64);
1271 
1272     shiftfn(rm1, rm1, constimm);
1273     narrowfn(rd, tcg_env, rm1);
1274     write_neon_element32(rd, a->vd, 0, MO_32);
1275 
1276     shiftfn(rm2, rm2, constimm);
1277     narrowfn(rd, tcg_env, rm2);
1278     write_neon_element32(rd, a->vd, 1, MO_32);
1279 
1280     return true;
1281 }
1282 
1283 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1284                                 NeonGenTwoOpFn *shiftfn,
1285                                 NeonGenNarrowEnvFn *narrowfn)
1286 {
1287     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1288     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1289     TCGv_i64 rtmp;
1290     uint32_t imm;
1291 
1292     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1293         return false;
1294     }
1295 
1296     /* UNDEF accesses to D16-D31 if they don't exist. */
1297     if (!dc_isar_feature(aa32_simd_r32, s) &&
1298         ((a->vd | a->vm) & 0x10)) {
1299         return false;
1300     }
1301 
1302     if (a->vm & 1) {
1303         return false;
1304     }
1305 
1306     if (!vfp_access_check(s)) {
1307         return true;
1308     }
1309 
1310     /*
1311      * This is always a right shift, and the shiftfn is always a
1312      * left-shift helper, which thus needs the negated shift count
1313      * duplicated into each lane of the immediate value.
1314      */
1315     if (a->size == 1) {
1316         imm = (uint16_t)(-a->shift);
1317         imm |= imm << 16;
1318     } else {
1319         /* size == 2 */
1320         imm = -a->shift;
1321     }
1322     constimm = tcg_constant_i32(imm);
1323 
1324     /* Load all inputs first to avoid potential overwrite */
1325     rm1 = tcg_temp_new_i32();
1326     rm2 = tcg_temp_new_i32();
1327     rm3 = tcg_temp_new_i32();
1328     rm4 = tcg_temp_new_i32();
1329     read_neon_element32(rm1, a->vm, 0, MO_32);
1330     read_neon_element32(rm2, a->vm, 1, MO_32);
1331     read_neon_element32(rm3, a->vm, 2, MO_32);
1332     read_neon_element32(rm4, a->vm, 3, MO_32);
1333     rtmp = tcg_temp_new_i64();
1334 
1335     shiftfn(rm1, rm1, constimm);
1336     shiftfn(rm2, rm2, constimm);
1337 
1338     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1339 
1340     narrowfn(rm1, tcg_env, rtmp);
1341     write_neon_element32(rm1, a->vd, 0, MO_32);
1342 
1343     shiftfn(rm3, rm3, constimm);
1344     shiftfn(rm4, rm4, constimm);
1345 
1346     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1347 
1348     narrowfn(rm3, tcg_env, rtmp);
1349     write_neon_element32(rm3, a->vd, 1, MO_32);
1350     return true;
1351 }
1352 
1353 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1354     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1355     {                                                                   \
1356         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1357     }
1358 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1359     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1360     {                                                                   \
1361         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1362     }
1363 
1364 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1365 {
1366     tcg_gen_extrl_i64_i32(dest, src);
1367 }
1368 
1369 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1370 {
1371     gen_helper_neon_narrow_u16(dest, src);
1372 }
1373 
1374 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1375 {
1376     gen_helper_neon_narrow_u8(dest, src);
1377 }
1378 
1379 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1380 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1381 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1382 
1383 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1384 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1385 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1386 
1387 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1388 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1389 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1390 
1391 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1392 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1393 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1394 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1395 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1396 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1397 
1398 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1399 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1400 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1401 
1402 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1403 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1404 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1405 
1406 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1407 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1408 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1409 
1410 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1411                          NeonGenWidenFn *widenfn, bool u)
1412 {
1413     TCGv_i64 tmp;
1414     TCGv_i32 rm0, rm1;
1415     uint64_t widen_mask = 0;
1416 
1417     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1418         return false;
1419     }
1420 
1421     /* UNDEF accesses to D16-D31 if they don't exist. */
1422     if (!dc_isar_feature(aa32_simd_r32, s) &&
1423         ((a->vd | a->vm) & 0x10)) {
1424         return false;
1425     }
1426 
1427     if (a->vd & 1) {
1428         return false;
1429     }
1430 
1431     if (!vfp_access_check(s)) {
1432         return true;
1433     }
1434 
1435     /*
1436      * This is a widen-and-shift operation. The shift is always less
1437      * than the width of the source type, so after widening the input
1438      * vector we can simply shift the whole 64-bit widened register,
1439      * and then clear the potential overflow bits resulting from left
1440      * bits of the narrow input appearing as right bits of the left
1441      * neighbour narrow input. Calculate a mask of bits to clear.
1442      */
1443     if ((a->shift != 0) && (a->size < 2 || u)) {
1444         int esize = 8 << a->size;
1445         widen_mask = MAKE_64BIT_MASK(0, esize);
1446         widen_mask >>= esize - a->shift;
1447         widen_mask = dup_const(a->size + 1, widen_mask);
1448     }
1449 
1450     rm0 = tcg_temp_new_i32();
1451     rm1 = tcg_temp_new_i32();
1452     read_neon_element32(rm0, a->vm, 0, MO_32);
1453     read_neon_element32(rm1, a->vm, 1, MO_32);
1454     tmp = tcg_temp_new_i64();
1455 
1456     widenfn(tmp, rm0);
1457     if (a->shift != 0) {
1458         tcg_gen_shli_i64(tmp, tmp, a->shift);
1459         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1460     }
1461     write_neon_element64(tmp, a->vd, 0, MO_64);
1462 
1463     widenfn(tmp, rm1);
1464     if (a->shift != 0) {
1465         tcg_gen_shli_i64(tmp, tmp, a->shift);
1466         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1467     }
1468     write_neon_element64(tmp, a->vd, 1, MO_64);
1469     return true;
1470 }
1471 
1472 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1473 {
1474     static NeonGenWidenFn * const widenfn[] = {
1475         gen_helper_neon_widen_s8,
1476         gen_helper_neon_widen_s16,
1477         tcg_gen_ext_i32_i64,
1478     };
1479     return do_vshll_2sh(s, a, widenfn[a->size], false);
1480 }
1481 
1482 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1483 {
1484     static NeonGenWidenFn * const widenfn[] = {
1485         gen_helper_neon_widen_u8,
1486         gen_helper_neon_widen_u16,
1487         tcg_gen_extu_i32_i64,
1488     };
1489     return do_vshll_2sh(s, a, widenfn[a->size], true);
1490 }
1491 
1492 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1493                       gen_helper_gvec_2_ptr *fn)
1494 {
1495     /* FP operations in 2-reg-and-shift group */
1496     int vec_size = a->q ? 16 : 8;
1497     int rd_ofs = neon_full_reg_offset(a->vd);
1498     int rm_ofs = neon_full_reg_offset(a->vm);
1499     TCGv_ptr fpst;
1500 
1501     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1502         return false;
1503     }
1504 
1505     if (a->size == MO_16) {
1506         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1507             return false;
1508         }
1509     }
1510 
1511     /* UNDEF accesses to D16-D31 if they don't exist. */
1512     if (!dc_isar_feature(aa32_simd_r32, s) &&
1513         ((a->vd | a->vm) & 0x10)) {
1514         return false;
1515     }
1516 
1517     if ((a->vm | a->vd) & a->q) {
1518         return false;
1519     }
1520 
1521     if (!vfp_access_check(s)) {
1522         return true;
1523     }
1524 
1525     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1526     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1527     return true;
1528 }
1529 
1530 #define DO_FP_2SH(INSN, FUNC)                                           \
1531     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1532     {                                                                   \
1533         return do_fp_2sh(s, a, FUNC);                                   \
1534     }
1535 
1536 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1537 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1538 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1539 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1540 
1541 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1542 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1543 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1544 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1545 
1546 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1547                         GVecGen2iFn *fn)
1548 {
1549     uint64_t imm;
1550     int reg_ofs, vec_size;
1551 
1552     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1553         return false;
1554     }
1555 
1556     /* UNDEF accesses to D16-D31 if they don't exist. */
1557     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1558         return false;
1559     }
1560 
1561     if (a->vd & a->q) {
1562         return false;
1563     }
1564 
1565     if (!vfp_access_check(s)) {
1566         return true;
1567     }
1568 
1569     reg_ofs = neon_full_reg_offset(a->vd);
1570     vec_size = a->q ? 16 : 8;
1571     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1572 
1573     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1574     return true;
1575 }
1576 
1577 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1578                         int64_t c, uint32_t oprsz, uint32_t maxsz)
1579 {
1580     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1581 }
1582 
1583 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1584 {
1585     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1586     GVecGen2iFn *fn;
1587 
1588     if ((a->cmode & 1) && a->cmode < 12) {
1589         /* for op=1, the imm will be inverted, so BIC becomes AND. */
1590         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1591     } else {
1592         /* There is one unallocated cmode/op combination in this space */
1593         if (a->cmode == 15 && a->op == 1) {
1594             return false;
1595         }
1596         fn = gen_VMOV_1r;
1597     }
1598     return do_1reg_imm(s, a, fn);
1599 }
1600 
1601 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1602                            NeonGenWidenFn *widenfn,
1603                            NeonGenTwo64OpFn *opfn,
1604                            int src1_mop, int src2_mop)
1605 {
1606     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1607     TCGv_i64 rn0_64, rn1_64, rm_64;
1608 
1609     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1610         return false;
1611     }
1612 
1613     /* UNDEF accesses to D16-D31 if they don't exist. */
1614     if (!dc_isar_feature(aa32_simd_r32, s) &&
1615         ((a->vd | a->vn | a->vm) & 0x10)) {
1616         return false;
1617     }
1618 
1619     if (!opfn) {
1620         /* size == 3 case, which is an entirely different insn group */
1621         return false;
1622     }
1623 
1624     if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1625         return false;
1626     }
1627 
1628     if (!vfp_access_check(s)) {
1629         return true;
1630     }
1631 
1632     rn0_64 = tcg_temp_new_i64();
1633     rn1_64 = tcg_temp_new_i64();
1634     rm_64 = tcg_temp_new_i64();
1635 
1636     if (src1_mop >= 0) {
1637         read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1638     } else {
1639         TCGv_i32 tmp = tcg_temp_new_i32();
1640         read_neon_element32(tmp, a->vn, 0, MO_32);
1641         widenfn(rn0_64, tmp);
1642     }
1643     if (src2_mop >= 0) {
1644         read_neon_element64(rm_64, a->vm, 0, src2_mop);
1645     } else {
1646         TCGv_i32 tmp = tcg_temp_new_i32();
1647         read_neon_element32(tmp, a->vm, 0, MO_32);
1648         widenfn(rm_64, tmp);
1649     }
1650 
1651     opfn(rn0_64, rn0_64, rm_64);
1652 
1653     /*
1654      * Load second pass inputs before storing the first pass result, to
1655      * avoid incorrect results if a narrow input overlaps with the result.
1656      */
1657     if (src1_mop >= 0) {
1658         read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1659     } else {
1660         TCGv_i32 tmp = tcg_temp_new_i32();
1661         read_neon_element32(tmp, a->vn, 1, MO_32);
1662         widenfn(rn1_64, tmp);
1663     }
1664     if (src2_mop >= 0) {
1665         read_neon_element64(rm_64, a->vm, 1, src2_mop);
1666     } else {
1667         TCGv_i32 tmp = tcg_temp_new_i32();
1668         read_neon_element32(tmp, a->vm, 1, MO_32);
1669         widenfn(rm_64, tmp);
1670     }
1671 
1672     write_neon_element64(rn0_64, a->vd, 0, MO_64);
1673 
1674     opfn(rn1_64, rn1_64, rm_64);
1675     write_neon_element64(rn1_64, a->vd, 1, MO_64);
1676 
1677     return true;
1678 }
1679 
1680 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
1681     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1682     {                                                                   \
1683         static NeonGenWidenFn * const widenfn[] = {                     \
1684             gen_helper_neon_widen_##S##8,                               \
1685             gen_helper_neon_widen_##S##16,                              \
1686             NULL, NULL,                                                 \
1687         };                                                              \
1688         static NeonGenTwo64OpFn * const addfn[] = {                     \
1689             gen_helper_neon_##OP##l_u16,                                \
1690             gen_helper_neon_##OP##l_u32,                                \
1691             tcg_gen_##OP##_i64,                                         \
1692             NULL,                                                       \
1693         };                                                              \
1694         int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
1695         return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
1696                               SRC1WIDE ? MO_UQ : narrow_mop,             \
1697                               narrow_mop);                              \
1698     }
1699 
1700 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1701 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1702 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1703 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1704 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1705 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1706 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1707 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1708 
1709 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1710                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1711 {
1712     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1713     TCGv_i64 rn_64, rm_64;
1714     TCGv_i32 rd0, rd1;
1715 
1716     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1717         return false;
1718     }
1719 
1720     /* UNDEF accesses to D16-D31 if they don't exist. */
1721     if (!dc_isar_feature(aa32_simd_r32, s) &&
1722         ((a->vd | a->vn | a->vm) & 0x10)) {
1723         return false;
1724     }
1725 
1726     if (!opfn || !narrowfn) {
1727         /* size == 3 case, which is an entirely different insn group */
1728         return false;
1729     }
1730 
1731     if ((a->vn | a->vm) & 1) {
1732         return false;
1733     }
1734 
1735     if (!vfp_access_check(s)) {
1736         return true;
1737     }
1738 
1739     rn_64 = tcg_temp_new_i64();
1740     rm_64 = tcg_temp_new_i64();
1741     rd0 = tcg_temp_new_i32();
1742     rd1 = tcg_temp_new_i32();
1743 
1744     read_neon_element64(rn_64, a->vn, 0, MO_64);
1745     read_neon_element64(rm_64, a->vm, 0, MO_64);
1746 
1747     opfn(rn_64, rn_64, rm_64);
1748 
1749     narrowfn(rd0, rn_64);
1750 
1751     read_neon_element64(rn_64, a->vn, 1, MO_64);
1752     read_neon_element64(rm_64, a->vm, 1, MO_64);
1753 
1754     opfn(rn_64, rn_64, rm_64);
1755 
1756     narrowfn(rd1, rn_64);
1757 
1758     write_neon_element32(rd0, a->vd, 0, MO_32);
1759     write_neon_element32(rd1, a->vd, 1, MO_32);
1760 
1761     return true;
1762 }
1763 
1764 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1765     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1766     {                                                                   \
1767         static NeonGenTwo64OpFn * const addfn[] = {                     \
1768             gen_helper_neon_##OP##l_u16,                                \
1769             gen_helper_neon_##OP##l_u32,                                \
1770             tcg_gen_##OP##_i64,                                         \
1771             NULL,                                                       \
1772         };                                                              \
1773         static NeonGenNarrowFn * const narrowfn[] = {                   \
1774             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1775             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1776             EXTOP,                                                      \
1777             NULL,                                                       \
1778         };                                                              \
1779         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1780     }
1781 
1782 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1783 {
1784     tcg_gen_addi_i64(rn, rn, 1u << 31);
1785     tcg_gen_extrh_i64_i32(rd, rn);
1786 }
1787 
1788 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1789 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1790 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1791 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1792 
1793 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1794                        NeonGenTwoOpWidenFn *opfn,
1795                        NeonGenTwo64OpFn *accfn)
1796 {
1797     /*
1798      * 3-regs different lengths, long operations.
1799      * These perform an operation on two inputs that returns a double-width
1800      * result, and then possibly perform an accumulation operation of
1801      * that result into the double-width destination.
1802      */
1803     TCGv_i64 rd0, rd1, tmp;
1804     TCGv_i32 rn, rm;
1805 
1806     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1807         return false;
1808     }
1809 
1810     /* UNDEF accesses to D16-D31 if they don't exist. */
1811     if (!dc_isar_feature(aa32_simd_r32, s) &&
1812         ((a->vd | a->vn | a->vm) & 0x10)) {
1813         return false;
1814     }
1815 
1816     if (!opfn) {
1817         /* size == 3 case, which is an entirely different insn group */
1818         return false;
1819     }
1820 
1821     if (a->vd & 1) {
1822         return false;
1823     }
1824 
1825     if (!vfp_access_check(s)) {
1826         return true;
1827     }
1828 
1829     rd0 = tcg_temp_new_i64();
1830     rd1 = tcg_temp_new_i64();
1831 
1832     rn = tcg_temp_new_i32();
1833     rm = tcg_temp_new_i32();
1834     read_neon_element32(rn, a->vn, 0, MO_32);
1835     read_neon_element32(rm, a->vm, 0, MO_32);
1836     opfn(rd0, rn, rm);
1837 
1838     read_neon_element32(rn, a->vn, 1, MO_32);
1839     read_neon_element32(rm, a->vm, 1, MO_32);
1840     opfn(rd1, rn, rm);
1841 
1842     /* Don't store results until after all loads: they might overlap */
1843     if (accfn) {
1844         tmp = tcg_temp_new_i64();
1845         read_neon_element64(tmp, a->vd, 0, MO_64);
1846         accfn(rd0, tmp, rd0);
1847         read_neon_element64(tmp, a->vd, 1, MO_64);
1848         accfn(rd1, tmp, rd1);
1849     }
1850 
1851     write_neon_element64(rd0, a->vd, 0, MO_64);
1852     write_neon_element64(rd1, a->vd, 1, MO_64);
1853 
1854     return true;
1855 }
1856 
1857 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
1858 {
1859     static NeonGenTwoOpWidenFn * const opfn[] = {
1860         gen_helper_neon_abdl_s16,
1861         gen_helper_neon_abdl_s32,
1862         gen_helper_neon_abdl_s64,
1863         NULL,
1864     };
1865 
1866     return do_long_3d(s, a, opfn[a->size], NULL);
1867 }
1868 
1869 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
1870 {
1871     static NeonGenTwoOpWidenFn * const opfn[] = {
1872         gen_helper_neon_abdl_u16,
1873         gen_helper_neon_abdl_u32,
1874         gen_helper_neon_abdl_u64,
1875         NULL,
1876     };
1877 
1878     return do_long_3d(s, a, opfn[a->size], NULL);
1879 }
1880 
1881 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
1882 {
1883     static NeonGenTwoOpWidenFn * const opfn[] = {
1884         gen_helper_neon_abdl_s16,
1885         gen_helper_neon_abdl_s32,
1886         gen_helper_neon_abdl_s64,
1887         NULL,
1888     };
1889     static NeonGenTwo64OpFn * const addfn[] = {
1890         gen_helper_neon_addl_u16,
1891         gen_helper_neon_addl_u32,
1892         tcg_gen_add_i64,
1893         NULL,
1894     };
1895 
1896     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
1897 }
1898 
1899 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
1900 {
1901     static NeonGenTwoOpWidenFn * const opfn[] = {
1902         gen_helper_neon_abdl_u16,
1903         gen_helper_neon_abdl_u32,
1904         gen_helper_neon_abdl_u64,
1905         NULL,
1906     };
1907     static NeonGenTwo64OpFn * const addfn[] = {
1908         gen_helper_neon_addl_u16,
1909         gen_helper_neon_addl_u32,
1910         tcg_gen_add_i64,
1911         NULL,
1912     };
1913 
1914     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
1915 }
1916 
1917 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1918 {
1919     TCGv_i32 lo = tcg_temp_new_i32();
1920     TCGv_i32 hi = tcg_temp_new_i32();
1921 
1922     tcg_gen_muls2_i32(lo, hi, rn, rm);
1923     tcg_gen_concat_i32_i64(rd, lo, hi);
1924 }
1925 
1926 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1927 {
1928     TCGv_i32 lo = tcg_temp_new_i32();
1929     TCGv_i32 hi = tcg_temp_new_i32();
1930 
1931     tcg_gen_mulu2_i32(lo, hi, rn, rm);
1932     tcg_gen_concat_i32_i64(rd, lo, hi);
1933 }
1934 
1935 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
1936 {
1937     static NeonGenTwoOpWidenFn * const opfn[] = {
1938         gen_helper_neon_mull_s8,
1939         gen_helper_neon_mull_s16,
1940         gen_mull_s32,
1941         NULL,
1942     };
1943 
1944     return do_long_3d(s, a, opfn[a->size], NULL);
1945 }
1946 
1947 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
1948 {
1949     static NeonGenTwoOpWidenFn * const opfn[] = {
1950         gen_helper_neon_mull_u8,
1951         gen_helper_neon_mull_u16,
1952         gen_mull_u32,
1953         NULL,
1954     };
1955 
1956     return do_long_3d(s, a, opfn[a->size], NULL);
1957 }
1958 
1959 #define DO_VMLAL(INSN,MULL,ACC)                                         \
1960     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1961     {                                                                   \
1962         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
1963             gen_helper_neon_##MULL##8,                                  \
1964             gen_helper_neon_##MULL##16,                                 \
1965             gen_##MULL##32,                                             \
1966             NULL,                                                       \
1967         };                                                              \
1968         static NeonGenTwo64OpFn * const accfn[] = {                     \
1969             gen_helper_neon_##ACC##l_u16,                               \
1970             gen_helper_neon_##ACC##l_u32,                               \
1971             tcg_gen_##ACC##_i64,                                        \
1972             NULL,                                                       \
1973         };                                                              \
1974         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
1975     }
1976 
1977 DO_VMLAL(VMLAL_S,mull_s,add)
1978 DO_VMLAL(VMLAL_U,mull_u,add)
1979 DO_VMLAL(VMLSL_S,mull_s,sub)
1980 DO_VMLAL(VMLSL_U,mull_u,sub)
1981 
1982 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1983 {
1984     gen_helper_neon_mull_s16(rd, rn, rm);
1985     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd);
1986 }
1987 
1988 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1989 {
1990     gen_mull_s32(rd, rn, rm);
1991     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd);
1992 }
1993 
1994 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
1995 {
1996     static NeonGenTwoOpWidenFn * const opfn[] = {
1997         NULL,
1998         gen_VQDMULL_16,
1999         gen_VQDMULL_32,
2000         NULL,
2001     };
2002 
2003     return do_long_3d(s, a, opfn[a->size], NULL);
2004 }
2005 
2006 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2007 {
2008     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2009 }
2010 
2011 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2012 {
2013     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2014 }
2015 
2016 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2017 {
2018     static NeonGenTwoOpWidenFn * const opfn[] = {
2019         NULL,
2020         gen_VQDMULL_16,
2021         gen_VQDMULL_32,
2022         NULL,
2023     };
2024     static NeonGenTwo64OpFn * const accfn[] = {
2025         NULL,
2026         gen_VQDMLAL_acc_16,
2027         gen_VQDMLAL_acc_32,
2028         NULL,
2029     };
2030 
2031     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2032 }
2033 
2034 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2035 {
2036     gen_helper_neon_negl_u32(rm, rm);
2037     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2038 }
2039 
2040 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2041 {
2042     tcg_gen_neg_i64(rm, rm);
2043     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2044 }
2045 
2046 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2047 {
2048     static NeonGenTwoOpWidenFn * const opfn[] = {
2049         NULL,
2050         gen_VQDMULL_16,
2051         gen_VQDMULL_32,
2052         NULL,
2053     };
2054     static NeonGenTwo64OpFn * const accfn[] = {
2055         NULL,
2056         gen_VQDMLSL_acc_16,
2057         gen_VQDMLSL_acc_32,
2058         NULL,
2059     };
2060 
2061     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2062 }
2063 
2064 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2065 {
2066     gen_helper_gvec_3 *fn_gvec;
2067 
2068     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2069         return false;
2070     }
2071 
2072     /* UNDEF accesses to D16-D31 if they don't exist. */
2073     if (!dc_isar_feature(aa32_simd_r32, s) &&
2074         ((a->vd | a->vn | a->vm) & 0x10)) {
2075         return false;
2076     }
2077 
2078     if (a->vd & 1) {
2079         return false;
2080     }
2081 
2082     switch (a->size) {
2083     case 0:
2084         fn_gvec = gen_helper_neon_pmull_h;
2085         break;
2086     case 2:
2087         if (!dc_isar_feature(aa32_pmull, s)) {
2088             return false;
2089         }
2090         fn_gvec = gen_helper_gvec_pmull_q;
2091         break;
2092     default:
2093         return false;
2094     }
2095 
2096     if (!vfp_access_check(s)) {
2097         return true;
2098     }
2099 
2100     tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2101                        neon_full_reg_offset(a->vn),
2102                        neon_full_reg_offset(a->vm),
2103                        16, 16, 0, fn_gvec);
2104     return true;
2105 }
2106 
2107 static void gen_neon_dup_low16(TCGv_i32 var)
2108 {
2109     TCGv_i32 tmp = tcg_temp_new_i32();
2110     tcg_gen_ext16u_i32(var, var);
2111     tcg_gen_shli_i32(tmp, var, 16);
2112     tcg_gen_or_i32(var, var, tmp);
2113 }
2114 
2115 static void gen_neon_dup_high16(TCGv_i32 var)
2116 {
2117     TCGv_i32 tmp = tcg_temp_new_i32();
2118     tcg_gen_andi_i32(var, var, 0xffff0000);
2119     tcg_gen_shri_i32(tmp, var, 16);
2120     tcg_gen_or_i32(var, var, tmp);
2121 }
2122 
2123 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2124 {
2125     TCGv_i32 tmp = tcg_temp_new_i32();
2126     if (size == MO_16) {
2127         read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2128         if (reg & 8) {
2129             gen_neon_dup_high16(tmp);
2130         } else {
2131             gen_neon_dup_low16(tmp);
2132         }
2133     } else {
2134         read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2135     }
2136     return tmp;
2137 }
2138 
2139 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2140                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2141 {
2142     /*
2143      * Two registers and a scalar: perform an operation between
2144      * the input elements and the scalar, and then possibly
2145      * perform an accumulation operation of that result into the
2146      * destination.
2147      */
2148     TCGv_i32 scalar, tmp;
2149     int pass;
2150 
2151     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2152         return false;
2153     }
2154 
2155     /* UNDEF accesses to D16-D31 if they don't exist. */
2156     if (!dc_isar_feature(aa32_simd_r32, s) &&
2157         ((a->vd | a->vn | a->vm) & 0x10)) {
2158         return false;
2159     }
2160 
2161     if (!opfn) {
2162         /* Bad size (including size == 3, which is a different insn group) */
2163         return false;
2164     }
2165 
2166     if (a->q && ((a->vd | a->vn) & 1)) {
2167         return false;
2168     }
2169 
2170     if (!vfp_access_check(s)) {
2171         return true;
2172     }
2173 
2174     scalar = neon_get_scalar(a->size, a->vm);
2175     tmp = tcg_temp_new_i32();
2176 
2177     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2178         read_neon_element32(tmp, a->vn, pass, MO_32);
2179         opfn(tmp, tmp, scalar);
2180         if (accfn) {
2181             TCGv_i32 rd = tcg_temp_new_i32();
2182             read_neon_element32(rd, a->vd, pass, MO_32);
2183             accfn(tmp, rd, tmp);
2184         }
2185         write_neon_element32(tmp, a->vd, pass, MO_32);
2186     }
2187     return true;
2188 }
2189 
2190 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2191 {
2192     static NeonGenTwoOpFn * const opfn[] = {
2193         NULL,
2194         gen_helper_neon_mul_u16,
2195         tcg_gen_mul_i32,
2196         NULL,
2197     };
2198 
2199     return do_2scalar(s, a, opfn[a->size], NULL);
2200 }
2201 
2202 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2203 {
2204     static NeonGenTwoOpFn * const opfn[] = {
2205         NULL,
2206         gen_helper_neon_mul_u16,
2207         tcg_gen_mul_i32,
2208         NULL,
2209     };
2210     static NeonGenTwoOpFn * const accfn[] = {
2211         NULL,
2212         gen_helper_neon_add_u16,
2213         tcg_gen_add_i32,
2214         NULL,
2215     };
2216 
2217     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2218 }
2219 
2220 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2221 {
2222     static NeonGenTwoOpFn * const opfn[] = {
2223         NULL,
2224         gen_helper_neon_mul_u16,
2225         tcg_gen_mul_i32,
2226         NULL,
2227     };
2228     static NeonGenTwoOpFn * const accfn[] = {
2229         NULL,
2230         gen_helper_neon_sub_u16,
2231         tcg_gen_sub_i32,
2232         NULL,
2233     };
2234 
2235     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2236 }
2237 
2238 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2239                               gen_helper_gvec_3_ptr *fn)
2240 {
2241     /* Two registers and a scalar, using gvec */
2242     int vec_size = a->q ? 16 : 8;
2243     int rd_ofs = neon_full_reg_offset(a->vd);
2244     int rn_ofs = neon_full_reg_offset(a->vn);
2245     int rm_ofs;
2246     int idx;
2247     TCGv_ptr fpstatus;
2248 
2249     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2250         return false;
2251     }
2252 
2253     /* UNDEF accesses to D16-D31 if they don't exist. */
2254     if (!dc_isar_feature(aa32_simd_r32, s) &&
2255         ((a->vd | a->vn | a->vm) & 0x10)) {
2256         return false;
2257     }
2258 
2259     if (!fn) {
2260         /* Bad size (including size == 3, which is a different insn group) */
2261         return false;
2262     }
2263 
2264     if (a->q && ((a->vd | a->vn) & 1)) {
2265         return false;
2266     }
2267 
2268     if (!vfp_access_check(s)) {
2269         return true;
2270     }
2271 
2272     /* a->vm is M:Vm, which encodes both register and index */
2273     idx = extract32(a->vm, a->size + 2, 2);
2274     a->vm = extract32(a->vm, 0, a->size + 2);
2275     rm_ofs = neon_full_reg_offset(a->vm);
2276 
2277     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2278     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2279                        vec_size, vec_size, idx, fn);
2280     return true;
2281 }
2282 
2283 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2284     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2285     {                                                                   \
2286         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2287             NULL,                                                       \
2288             gen_helper_##FUNC##_h,                                      \
2289             gen_helper_##FUNC##_s,                                      \
2290             NULL,                                                       \
2291         };                                                              \
2292         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2293             return false;                                               \
2294         }                                                               \
2295         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2296     }
2297 
2298 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2299 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2300 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2301 
2302 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2303 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2304 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2305 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2306 
2307 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2308 {
2309     static NeonGenTwoOpFn * const opfn[] = {
2310         NULL,
2311         gen_VQDMULH_16,
2312         gen_VQDMULH_32,
2313         NULL,
2314     };
2315 
2316     return do_2scalar(s, a, opfn[a->size], NULL);
2317 }
2318 
2319 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2320 {
2321     static NeonGenTwoOpFn * const opfn[] = {
2322         NULL,
2323         gen_VQRDMULH_16,
2324         gen_VQRDMULH_32,
2325         NULL,
2326     };
2327 
2328     return do_2scalar(s, a, opfn[a->size], NULL);
2329 }
2330 
2331 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2332                             NeonGenThreeOpEnvFn *opfn)
2333 {
2334     /*
2335      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2336      * performs a kind of fused op-then-accumulate using a helper
2337      * function that takes all of rd, rn and the scalar at once.
2338      */
2339     TCGv_i32 scalar, rn, rd;
2340     int pass;
2341 
2342     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2343         return false;
2344     }
2345 
2346     if (!dc_isar_feature(aa32_rdm, s)) {
2347         return false;
2348     }
2349 
2350     /* UNDEF accesses to D16-D31 if they don't exist. */
2351     if (!dc_isar_feature(aa32_simd_r32, s) &&
2352         ((a->vd | a->vn | a->vm) & 0x10)) {
2353         return false;
2354     }
2355 
2356     if (!opfn) {
2357         /* Bad size (including size == 3, which is a different insn group) */
2358         return false;
2359     }
2360 
2361     if (a->q && ((a->vd | a->vn) & 1)) {
2362         return false;
2363     }
2364 
2365     if (!vfp_access_check(s)) {
2366         return true;
2367     }
2368 
2369     scalar = neon_get_scalar(a->size, a->vm);
2370     rn = tcg_temp_new_i32();
2371     rd = tcg_temp_new_i32();
2372 
2373     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2374         read_neon_element32(rn, a->vn, pass, MO_32);
2375         read_neon_element32(rd, a->vd, pass, MO_32);
2376         opfn(rd, tcg_env, rn, scalar, rd);
2377         write_neon_element32(rd, a->vd, pass, MO_32);
2378     }
2379     return true;
2380 }
2381 
2382 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2383 {
2384     static NeonGenThreeOpEnvFn *opfn[] = {
2385         NULL,
2386         gen_helper_neon_qrdmlah_s16,
2387         gen_helper_neon_qrdmlah_s32,
2388         NULL,
2389     };
2390     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2391 }
2392 
2393 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2394 {
2395     static NeonGenThreeOpEnvFn *opfn[] = {
2396         NULL,
2397         gen_helper_neon_qrdmlsh_s16,
2398         gen_helper_neon_qrdmlsh_s32,
2399         NULL,
2400     };
2401     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2402 }
2403 
2404 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2405                             NeonGenTwoOpWidenFn *opfn,
2406                             NeonGenTwo64OpFn *accfn)
2407 {
2408     /*
2409      * Two registers and a scalar, long operations: perform an
2410      * operation on the input elements and the scalar which produces
2411      * a double-width result, and then possibly perform an accumulation
2412      * operation of that result into the destination.
2413      */
2414     TCGv_i32 scalar, rn;
2415     TCGv_i64 rn0_64, rn1_64;
2416 
2417     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2418         return false;
2419     }
2420 
2421     /* UNDEF accesses to D16-D31 if they don't exist. */
2422     if (!dc_isar_feature(aa32_simd_r32, s) &&
2423         ((a->vd | a->vn | a->vm) & 0x10)) {
2424         return false;
2425     }
2426 
2427     if (!opfn) {
2428         /* Bad size (including size == 3, which is a different insn group) */
2429         return false;
2430     }
2431 
2432     if (a->vd & 1) {
2433         return false;
2434     }
2435 
2436     if (!vfp_access_check(s)) {
2437         return true;
2438     }
2439 
2440     scalar = neon_get_scalar(a->size, a->vm);
2441 
2442     /* Load all inputs before writing any outputs, in case of overlap */
2443     rn = tcg_temp_new_i32();
2444     read_neon_element32(rn, a->vn, 0, MO_32);
2445     rn0_64 = tcg_temp_new_i64();
2446     opfn(rn0_64, rn, scalar);
2447 
2448     read_neon_element32(rn, a->vn, 1, MO_32);
2449     rn1_64 = tcg_temp_new_i64();
2450     opfn(rn1_64, rn, scalar);
2451 
2452     if (accfn) {
2453         TCGv_i64 t64 = tcg_temp_new_i64();
2454         read_neon_element64(t64, a->vd, 0, MO_64);
2455         accfn(rn0_64, t64, rn0_64);
2456         read_neon_element64(t64, a->vd, 1, MO_64);
2457         accfn(rn1_64, t64, rn1_64);
2458     }
2459 
2460     write_neon_element64(rn0_64, a->vd, 0, MO_64);
2461     write_neon_element64(rn1_64, a->vd, 1, MO_64);
2462     return true;
2463 }
2464 
2465 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2466 {
2467     static NeonGenTwoOpWidenFn * const opfn[] = {
2468         NULL,
2469         gen_helper_neon_mull_s16,
2470         gen_mull_s32,
2471         NULL,
2472     };
2473 
2474     return do_2scalar_long(s, a, opfn[a->size], NULL);
2475 }
2476 
2477 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2478 {
2479     static NeonGenTwoOpWidenFn * const opfn[] = {
2480         NULL,
2481         gen_helper_neon_mull_u16,
2482         gen_mull_u32,
2483         NULL,
2484     };
2485 
2486     return do_2scalar_long(s, a, opfn[a->size], NULL);
2487 }
2488 
2489 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2490     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2491     {                                                                   \
2492         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2493             NULL,                                                       \
2494             gen_helper_neon_##MULL##16,                                 \
2495             gen_##MULL##32,                                             \
2496             NULL,                                                       \
2497         };                                                              \
2498         static NeonGenTwo64OpFn * const accfn[] = {                     \
2499             NULL,                                                       \
2500             gen_helper_neon_##ACC##l_u32,                               \
2501             tcg_gen_##ACC##_i64,                                        \
2502             NULL,                                                       \
2503         };                                                              \
2504         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2505     }
2506 
2507 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2508 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2509 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2510 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2511 
2512 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2513 {
2514     static NeonGenTwoOpWidenFn * const opfn[] = {
2515         NULL,
2516         gen_VQDMULL_16,
2517         gen_VQDMULL_32,
2518         NULL,
2519     };
2520 
2521     return do_2scalar_long(s, a, opfn[a->size], NULL);
2522 }
2523 
2524 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2525 {
2526     static NeonGenTwoOpWidenFn * const opfn[] = {
2527         NULL,
2528         gen_VQDMULL_16,
2529         gen_VQDMULL_32,
2530         NULL,
2531     };
2532     static NeonGenTwo64OpFn * const accfn[] = {
2533         NULL,
2534         gen_VQDMLAL_acc_16,
2535         gen_VQDMLAL_acc_32,
2536         NULL,
2537     };
2538 
2539     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2540 }
2541 
2542 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2543 {
2544     static NeonGenTwoOpWidenFn * const opfn[] = {
2545         NULL,
2546         gen_VQDMULL_16,
2547         gen_VQDMULL_32,
2548         NULL,
2549     };
2550     static NeonGenTwo64OpFn * const accfn[] = {
2551         NULL,
2552         gen_VQDMLSL_acc_16,
2553         gen_VQDMLSL_acc_32,
2554         NULL,
2555     };
2556 
2557     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2558 }
2559 
2560 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2561 {
2562     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2563         return false;
2564     }
2565 
2566     /* UNDEF accesses to D16-D31 if they don't exist. */
2567     if (!dc_isar_feature(aa32_simd_r32, s) &&
2568         ((a->vd | a->vn | a->vm) & 0x10)) {
2569         return false;
2570     }
2571 
2572     if ((a->vn | a->vm | a->vd) & a->q) {
2573         return false;
2574     }
2575 
2576     if (a->imm > 7 && !a->q) {
2577         return false;
2578     }
2579 
2580     if (!vfp_access_check(s)) {
2581         return true;
2582     }
2583 
2584     if (!a->q) {
2585         /* Extract 64 bits from <Vm:Vn> */
2586         TCGv_i64 left, right, dest;
2587 
2588         left = tcg_temp_new_i64();
2589         right = tcg_temp_new_i64();
2590         dest = tcg_temp_new_i64();
2591 
2592         read_neon_element64(right, a->vn, 0, MO_64);
2593         read_neon_element64(left, a->vm, 0, MO_64);
2594         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2595         write_neon_element64(dest, a->vd, 0, MO_64);
2596     } else {
2597         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2598         TCGv_i64 left, middle, right, destleft, destright;
2599 
2600         left = tcg_temp_new_i64();
2601         middle = tcg_temp_new_i64();
2602         right = tcg_temp_new_i64();
2603         destleft = tcg_temp_new_i64();
2604         destright = tcg_temp_new_i64();
2605 
2606         if (a->imm < 8) {
2607             read_neon_element64(right, a->vn, 0, MO_64);
2608             read_neon_element64(middle, a->vn, 1, MO_64);
2609             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2610             read_neon_element64(left, a->vm, 0, MO_64);
2611             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2612         } else {
2613             read_neon_element64(right, a->vn, 1, MO_64);
2614             read_neon_element64(middle, a->vm, 0, MO_64);
2615             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2616             read_neon_element64(left, a->vm, 1, MO_64);
2617             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2618         }
2619 
2620         write_neon_element64(destright, a->vd, 0, MO_64);
2621         write_neon_element64(destleft, a->vd, 1, MO_64);
2622     }
2623     return true;
2624 }
2625 
2626 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2627 {
2628     TCGv_i64 val, def;
2629     TCGv_i32 desc;
2630 
2631     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2632         return false;
2633     }
2634 
2635     /* UNDEF accesses to D16-D31 if they don't exist. */
2636     if (!dc_isar_feature(aa32_simd_r32, s) &&
2637         ((a->vd | a->vn | a->vm) & 0x10)) {
2638         return false;
2639     }
2640 
2641     if ((a->vn + a->len + 1) > 32) {
2642         /*
2643          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2644          * helper function running off the end of the register file.
2645          */
2646         return false;
2647     }
2648 
2649     if (!vfp_access_check(s)) {
2650         return true;
2651     }
2652 
2653     desc = tcg_constant_i32((a->vn << 2) | a->len);
2654     def = tcg_temp_new_i64();
2655     if (a->op) {
2656         read_neon_element64(def, a->vd, 0, MO_64);
2657     } else {
2658         tcg_gen_movi_i64(def, 0);
2659     }
2660     val = tcg_temp_new_i64();
2661     read_neon_element64(val, a->vm, 0, MO_64);
2662 
2663     gen_helper_neon_tbl(val, tcg_env, desc, val, def);
2664     write_neon_element64(val, a->vd, 0, MO_64);
2665     return true;
2666 }
2667 
2668 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2669 {
2670     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2671         return false;
2672     }
2673 
2674     /* UNDEF accesses to D16-D31 if they don't exist. */
2675     if (!dc_isar_feature(aa32_simd_r32, s) &&
2676         ((a->vd | a->vm) & 0x10)) {
2677         return false;
2678     }
2679 
2680     if (a->vd & a->q) {
2681         return false;
2682     }
2683 
2684     if (!vfp_access_check(s)) {
2685         return true;
2686     }
2687 
2688     tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2689                          neon_element_offset(a->vm, a->index, a->size),
2690                          a->q ? 16 : 8, a->q ? 16 : 8);
2691     return true;
2692 }
2693 
2694 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2695 {
2696     int pass, half;
2697     TCGv_i32 tmp[2];
2698 
2699     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2700         return false;
2701     }
2702 
2703     /* UNDEF accesses to D16-D31 if they don't exist. */
2704     if (!dc_isar_feature(aa32_simd_r32, s) &&
2705         ((a->vd | a->vm) & 0x10)) {
2706         return false;
2707     }
2708 
2709     if ((a->vd | a->vm) & a->q) {
2710         return false;
2711     }
2712 
2713     if (a->size == 3) {
2714         return false;
2715     }
2716 
2717     if (!vfp_access_check(s)) {
2718         return true;
2719     }
2720 
2721     tmp[0] = tcg_temp_new_i32();
2722     tmp[1] = tcg_temp_new_i32();
2723 
2724     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2725         for (half = 0; half < 2; half++) {
2726             read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2727             switch (a->size) {
2728             case 0:
2729                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2730                 break;
2731             case 1:
2732                 gen_swap_half(tmp[half], tmp[half]);
2733                 break;
2734             case 2:
2735                 break;
2736             default:
2737                 g_assert_not_reached();
2738             }
2739         }
2740         write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2741         write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2742     }
2743     return true;
2744 }
2745 
2746 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2747                               NeonGenWidenFn *widenfn,
2748                               NeonGenTwo64OpFn *opfn,
2749                               NeonGenTwo64OpFn *accfn)
2750 {
2751     /*
2752      * Pairwise long operations: widen both halves of the pair,
2753      * combine the pairs with the opfn, and then possibly accumulate
2754      * into the destination with the accfn.
2755      */
2756     int pass;
2757 
2758     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2759         return false;
2760     }
2761 
2762     /* UNDEF accesses to D16-D31 if they don't exist. */
2763     if (!dc_isar_feature(aa32_simd_r32, s) &&
2764         ((a->vd | a->vm) & 0x10)) {
2765         return false;
2766     }
2767 
2768     if ((a->vd | a->vm) & a->q) {
2769         return false;
2770     }
2771 
2772     if (!widenfn) {
2773         return false;
2774     }
2775 
2776     if (!vfp_access_check(s)) {
2777         return true;
2778     }
2779 
2780     for (pass = 0; pass < a->q + 1; pass++) {
2781         TCGv_i32 tmp;
2782         TCGv_i64 rm0_64, rm1_64, rd_64;
2783 
2784         rm0_64 = tcg_temp_new_i64();
2785         rm1_64 = tcg_temp_new_i64();
2786         rd_64 = tcg_temp_new_i64();
2787 
2788         tmp = tcg_temp_new_i32();
2789         read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2790         widenfn(rm0_64, tmp);
2791         read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2792         widenfn(rm1_64, tmp);
2793 
2794         opfn(rd_64, rm0_64, rm1_64);
2795 
2796         if (accfn) {
2797             TCGv_i64 tmp64 = tcg_temp_new_i64();
2798             read_neon_element64(tmp64, a->vd, pass, MO_64);
2799             accfn(rd_64, tmp64, rd_64);
2800         }
2801         write_neon_element64(rd_64, a->vd, pass, MO_64);
2802     }
2803     return true;
2804 }
2805 
2806 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2807 {
2808     static NeonGenWidenFn * const widenfn[] = {
2809         gen_helper_neon_widen_s8,
2810         gen_helper_neon_widen_s16,
2811         tcg_gen_ext_i32_i64,
2812         NULL,
2813     };
2814     static NeonGenTwo64OpFn * const opfn[] = {
2815         gen_helper_neon_paddl_u16,
2816         gen_helper_neon_paddl_u32,
2817         tcg_gen_add_i64,
2818         NULL,
2819     };
2820 
2821     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2822 }
2823 
2824 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
2825 {
2826     static NeonGenWidenFn * const widenfn[] = {
2827         gen_helper_neon_widen_u8,
2828         gen_helper_neon_widen_u16,
2829         tcg_gen_extu_i32_i64,
2830         NULL,
2831     };
2832     static NeonGenTwo64OpFn * const opfn[] = {
2833         gen_helper_neon_paddl_u16,
2834         gen_helper_neon_paddl_u32,
2835         tcg_gen_add_i64,
2836         NULL,
2837     };
2838 
2839     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2840 }
2841 
2842 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
2843 {
2844     static NeonGenWidenFn * const widenfn[] = {
2845         gen_helper_neon_widen_s8,
2846         gen_helper_neon_widen_s16,
2847         tcg_gen_ext_i32_i64,
2848         NULL,
2849     };
2850     static NeonGenTwo64OpFn * const opfn[] = {
2851         gen_helper_neon_paddl_u16,
2852         gen_helper_neon_paddl_u32,
2853         tcg_gen_add_i64,
2854         NULL,
2855     };
2856     static NeonGenTwo64OpFn * const accfn[] = {
2857         gen_helper_neon_addl_u16,
2858         gen_helper_neon_addl_u32,
2859         tcg_gen_add_i64,
2860         NULL,
2861     };
2862 
2863     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2864                              accfn[a->size]);
2865 }
2866 
2867 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
2868 {
2869     static NeonGenWidenFn * const widenfn[] = {
2870         gen_helper_neon_widen_u8,
2871         gen_helper_neon_widen_u16,
2872         tcg_gen_extu_i32_i64,
2873         NULL,
2874     };
2875     static NeonGenTwo64OpFn * const opfn[] = {
2876         gen_helper_neon_paddl_u16,
2877         gen_helper_neon_paddl_u32,
2878         tcg_gen_add_i64,
2879         NULL,
2880     };
2881     static NeonGenTwo64OpFn * const accfn[] = {
2882         gen_helper_neon_addl_u16,
2883         gen_helper_neon_addl_u32,
2884         tcg_gen_add_i64,
2885         NULL,
2886     };
2887 
2888     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2889                              accfn[a->size]);
2890 }
2891 
2892 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
2893 
2894 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
2895                        ZipFn *fn)
2896 {
2897     TCGv_ptr pd, pm;
2898 
2899     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2900         return false;
2901     }
2902 
2903     /* UNDEF accesses to D16-D31 if they don't exist. */
2904     if (!dc_isar_feature(aa32_simd_r32, s) &&
2905         ((a->vd | a->vm) & 0x10)) {
2906         return false;
2907     }
2908 
2909     if ((a->vd | a->vm) & a->q) {
2910         return false;
2911     }
2912 
2913     if (!fn) {
2914         /* Bad size or size/q combination */
2915         return false;
2916     }
2917 
2918     if (!vfp_access_check(s)) {
2919         return true;
2920     }
2921 
2922     pd = vfp_reg_ptr(true, a->vd);
2923     pm = vfp_reg_ptr(true, a->vm);
2924     fn(pd, pm);
2925     return true;
2926 }
2927 
2928 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
2929 {
2930     static ZipFn * const fn[2][4] = {
2931         {
2932             gen_helper_neon_unzip8,
2933             gen_helper_neon_unzip16,
2934             NULL,
2935             NULL,
2936         }, {
2937             gen_helper_neon_qunzip8,
2938             gen_helper_neon_qunzip16,
2939             gen_helper_neon_qunzip32,
2940             NULL,
2941         }
2942     };
2943     return do_zip_uzp(s, a, fn[a->q][a->size]);
2944 }
2945 
2946 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
2947 {
2948     static ZipFn * const fn[2][4] = {
2949         {
2950             gen_helper_neon_zip8,
2951             gen_helper_neon_zip16,
2952             NULL,
2953             NULL,
2954         }, {
2955             gen_helper_neon_qzip8,
2956             gen_helper_neon_qzip16,
2957             gen_helper_neon_qzip32,
2958             NULL,
2959         }
2960     };
2961     return do_zip_uzp(s, a, fn[a->q][a->size]);
2962 }
2963 
2964 static bool do_vmovn(DisasContext *s, arg_2misc *a,
2965                      NeonGenNarrowEnvFn *narrowfn)
2966 {
2967     TCGv_i64 rm;
2968     TCGv_i32 rd0, rd1;
2969 
2970     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2971         return false;
2972     }
2973 
2974     /* UNDEF accesses to D16-D31 if they don't exist. */
2975     if (!dc_isar_feature(aa32_simd_r32, s) &&
2976         ((a->vd | a->vm) & 0x10)) {
2977         return false;
2978     }
2979 
2980     if (a->vm & 1) {
2981         return false;
2982     }
2983 
2984     if (!narrowfn) {
2985         return false;
2986     }
2987 
2988     if (!vfp_access_check(s)) {
2989         return true;
2990     }
2991 
2992     rm = tcg_temp_new_i64();
2993     rd0 = tcg_temp_new_i32();
2994     rd1 = tcg_temp_new_i32();
2995 
2996     read_neon_element64(rm, a->vm, 0, MO_64);
2997     narrowfn(rd0, tcg_env, rm);
2998     read_neon_element64(rm, a->vm, 1, MO_64);
2999     narrowfn(rd1, tcg_env, rm);
3000     write_neon_element32(rd0, a->vd, 0, MO_32);
3001     write_neon_element32(rd1, a->vd, 1, MO_32);
3002     return true;
3003 }
3004 
3005 #define DO_VMOVN(INSN, FUNC)                                    \
3006     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3007     {                                                           \
3008         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3009             FUNC##8,                                            \
3010             FUNC##16,                                           \
3011             FUNC##32,                                           \
3012             NULL,                                               \
3013         };                                                      \
3014         return do_vmovn(s, a, narrowfn[a->size]);               \
3015     }
3016 
3017 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3018 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3019 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3020 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3021 
3022 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3023 {
3024     TCGv_i32 rm0, rm1;
3025     TCGv_i64 rd;
3026     static NeonGenWidenFn * const widenfns[] = {
3027         gen_helper_neon_widen_u8,
3028         gen_helper_neon_widen_u16,
3029         tcg_gen_extu_i32_i64,
3030         NULL,
3031     };
3032     NeonGenWidenFn *widenfn = widenfns[a->size];
3033 
3034     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3035         return false;
3036     }
3037 
3038     /* UNDEF accesses to D16-D31 if they don't exist. */
3039     if (!dc_isar_feature(aa32_simd_r32, s) &&
3040         ((a->vd | a->vm) & 0x10)) {
3041         return false;
3042     }
3043 
3044     if (a->vd & 1) {
3045         return false;
3046     }
3047 
3048     if (!widenfn) {
3049         return false;
3050     }
3051 
3052     if (!vfp_access_check(s)) {
3053         return true;
3054     }
3055 
3056     rd = tcg_temp_new_i64();
3057     rm0 = tcg_temp_new_i32();
3058     rm1 = tcg_temp_new_i32();
3059 
3060     read_neon_element32(rm0, a->vm, 0, MO_32);
3061     read_neon_element32(rm1, a->vm, 1, MO_32);
3062 
3063     widenfn(rd, rm0);
3064     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3065     write_neon_element64(rd, a->vd, 0, MO_64);
3066     widenfn(rd, rm1);
3067     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3068     write_neon_element64(rd, a->vd, 1, MO_64);
3069     return true;
3070 }
3071 
3072 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3073 {
3074     TCGv_ptr fpst;
3075     TCGv_i64 tmp;
3076     TCGv_i32 dst0, dst1;
3077 
3078     if (!dc_isar_feature(aa32_bf16, s)) {
3079         return false;
3080     }
3081 
3082     /* UNDEF accesses to D16-D31 if they don't exist. */
3083     if (!dc_isar_feature(aa32_simd_r32, s) &&
3084         ((a->vd | a->vm) & 0x10)) {
3085         return false;
3086     }
3087 
3088     if ((a->vm & 1) || (a->size != 1)) {
3089         return false;
3090     }
3091 
3092     if (!vfp_access_check(s)) {
3093         return true;
3094     }
3095 
3096     fpst = fpstatus_ptr(FPST_STD);
3097     tmp = tcg_temp_new_i64();
3098     dst0 = tcg_temp_new_i32();
3099     dst1 = tcg_temp_new_i32();
3100 
3101     read_neon_element64(tmp, a->vm, 0, MO_64);
3102     gen_helper_bfcvt_pair(dst0, tmp, fpst);
3103 
3104     read_neon_element64(tmp, a->vm, 1, MO_64);
3105     gen_helper_bfcvt_pair(dst1, tmp, fpst);
3106 
3107     write_neon_element32(dst0, a->vd, 0, MO_32);
3108     write_neon_element32(dst1, a->vd, 1, MO_32);
3109     return true;
3110 }
3111 
3112 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3113 {
3114     TCGv_ptr fpst;
3115     TCGv_i32 ahp, tmp, tmp2, tmp3;
3116 
3117     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3118         !dc_isar_feature(aa32_fp16_spconv, s)) {
3119         return false;
3120     }
3121 
3122     /* UNDEF accesses to D16-D31 if they don't exist. */
3123     if (!dc_isar_feature(aa32_simd_r32, s) &&
3124         ((a->vd | a->vm) & 0x10)) {
3125         return false;
3126     }
3127 
3128     if ((a->vm & 1) || (a->size != 1)) {
3129         return false;
3130     }
3131 
3132     if (!vfp_access_check(s)) {
3133         return true;
3134     }
3135 
3136     fpst = fpstatus_ptr(FPST_STD);
3137     ahp = get_ahp_flag();
3138     tmp = tcg_temp_new_i32();
3139     read_neon_element32(tmp, a->vm, 0, MO_32);
3140     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3141     tmp2 = tcg_temp_new_i32();
3142     read_neon_element32(tmp2, a->vm, 1, MO_32);
3143     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3144     tcg_gen_shli_i32(tmp2, tmp2, 16);
3145     tcg_gen_or_i32(tmp2, tmp2, tmp);
3146     read_neon_element32(tmp, a->vm, 2, MO_32);
3147     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3148     tmp3 = tcg_temp_new_i32();
3149     read_neon_element32(tmp3, a->vm, 3, MO_32);
3150     write_neon_element32(tmp2, a->vd, 0, MO_32);
3151     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3152     tcg_gen_shli_i32(tmp3, tmp3, 16);
3153     tcg_gen_or_i32(tmp3, tmp3, tmp);
3154     write_neon_element32(tmp3, a->vd, 1, MO_32);
3155     return true;
3156 }
3157 
3158 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3159 {
3160     TCGv_ptr fpst;
3161     TCGv_i32 ahp, tmp, tmp2, tmp3;
3162 
3163     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3164         !dc_isar_feature(aa32_fp16_spconv, s)) {
3165         return false;
3166     }
3167 
3168     /* UNDEF accesses to D16-D31 if they don't exist. */
3169     if (!dc_isar_feature(aa32_simd_r32, s) &&
3170         ((a->vd | a->vm) & 0x10)) {
3171         return false;
3172     }
3173 
3174     if ((a->vd & 1) || (a->size != 1)) {
3175         return false;
3176     }
3177 
3178     if (!vfp_access_check(s)) {
3179         return true;
3180     }
3181 
3182     fpst = fpstatus_ptr(FPST_STD);
3183     ahp = get_ahp_flag();
3184     tmp3 = tcg_temp_new_i32();
3185     tmp2 = tcg_temp_new_i32();
3186     tmp = tcg_temp_new_i32();
3187     read_neon_element32(tmp, a->vm, 0, MO_32);
3188     read_neon_element32(tmp2, a->vm, 1, MO_32);
3189     tcg_gen_ext16u_i32(tmp3, tmp);
3190     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3191     write_neon_element32(tmp3, a->vd, 0, MO_32);
3192     tcg_gen_shri_i32(tmp, tmp, 16);
3193     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3194     write_neon_element32(tmp, a->vd, 1, MO_32);
3195     tcg_gen_ext16u_i32(tmp3, tmp2);
3196     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3197     write_neon_element32(tmp3, a->vd, 2, MO_32);
3198     tcg_gen_shri_i32(tmp2, tmp2, 16);
3199     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3200     write_neon_element32(tmp2, a->vd, 3, MO_32);
3201     return true;
3202 }
3203 
3204 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3205 {
3206     int vec_size = a->q ? 16 : 8;
3207     int rd_ofs = neon_full_reg_offset(a->vd);
3208     int rm_ofs = neon_full_reg_offset(a->vm);
3209 
3210     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3211         return false;
3212     }
3213 
3214     /* UNDEF accesses to D16-D31 if they don't exist. */
3215     if (!dc_isar_feature(aa32_simd_r32, s) &&
3216         ((a->vd | a->vm) & 0x10)) {
3217         return false;
3218     }
3219 
3220     if (a->size == 3) {
3221         return false;
3222     }
3223 
3224     if ((a->vd | a->vm) & a->q) {
3225         return false;
3226     }
3227 
3228     if (!vfp_access_check(s)) {
3229         return true;
3230     }
3231 
3232     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3233 
3234     return true;
3235 }
3236 
3237 #define DO_2MISC_VEC(INSN, FN)                                  \
3238     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3239     {                                                           \
3240         return do_2misc_vec(s, a, FN);                          \
3241     }
3242 
3243 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3244 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3245 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3246 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3247 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3248 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3249 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3250 
3251 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3252 {
3253     if (a->size != 0) {
3254         return false;
3255     }
3256     return do_2misc_vec(s, a, tcg_gen_gvec_not);
3257 }
3258 
3259 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3260     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3261                          uint32_t rm_ofs, uint32_t oprsz,               \
3262                          uint32_t maxsz)                                \
3263     {                                                                   \
3264         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3265                            DATA, FUNC);                                 \
3266     }
3267 
3268 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3269     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3270                          uint32_t rm_ofs, uint32_t oprsz,               \
3271                          uint32_t maxsz)                                \
3272     {                                                                   \
3273         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3274     }
3275 
3276 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3277 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
3278 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3279 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
3280 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3281 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3282 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3283 
3284 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3285     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3286     {                                                           \
3287         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3288             return false;                                       \
3289         }                                                       \
3290         return do_2misc_vec(s, a, gen_##INSN);                  \
3291     }
3292 
3293 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3294 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3295 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3296 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3297 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3298 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3299 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3300 
3301 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3302 {
3303     TCGv_i32 tmp;
3304     int pass;
3305 
3306     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3307     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3308         return false;
3309     }
3310 
3311     /* UNDEF accesses to D16-D31 if they don't exist. */
3312     if (!dc_isar_feature(aa32_simd_r32, s) &&
3313         ((a->vd | a->vm) & 0x10)) {
3314         return false;
3315     }
3316 
3317     if (!fn) {
3318         return false;
3319     }
3320 
3321     if ((a->vd | a->vm) & a->q) {
3322         return false;
3323     }
3324 
3325     if (!vfp_access_check(s)) {
3326         return true;
3327     }
3328 
3329     tmp = tcg_temp_new_i32();
3330     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3331         read_neon_element32(tmp, a->vm, pass, MO_32);
3332         fn(tmp, tmp);
3333         write_neon_element32(tmp, a->vd, pass, MO_32);
3334     }
3335     return true;
3336 }
3337 
3338 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3339 {
3340     static NeonGenOneOpFn * const fn[] = {
3341         tcg_gen_bswap32_i32,
3342         gen_swap_half,
3343         NULL,
3344         NULL,
3345     };
3346     return do_2misc(s, a, fn[a->size]);
3347 }
3348 
3349 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3350 {
3351     if (a->size != 0) {
3352         return false;
3353     }
3354     return do_2misc(s, a, gen_rev16);
3355 }
3356 
3357 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3358 {
3359     static NeonGenOneOpFn * const fn[] = {
3360         gen_helper_neon_cls_s8,
3361         gen_helper_neon_cls_s16,
3362         gen_helper_neon_cls_s32,
3363         NULL,
3364     };
3365     return do_2misc(s, a, fn[a->size]);
3366 }
3367 
3368 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3369 {
3370     tcg_gen_clzi_i32(rd, rm, 32);
3371 }
3372 
3373 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3374 {
3375     static NeonGenOneOpFn * const fn[] = {
3376         gen_helper_neon_clz_u8,
3377         gen_helper_neon_clz_u16,
3378         do_VCLZ_32,
3379         NULL,
3380     };
3381     return do_2misc(s, a, fn[a->size]);
3382 }
3383 
3384 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3385 {
3386     if (a->size != 0) {
3387         return false;
3388     }
3389     return do_2misc(s, a, gen_helper_neon_cnt_u8);
3390 }
3391 
3392 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3393                        uint32_t oprsz, uint32_t maxsz)
3394 {
3395     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3396                       vece == MO_16 ? 0x7fff : 0x7fffffff,
3397                       oprsz, maxsz);
3398 }
3399 
3400 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3401 {
3402     if (a->size == MO_16) {
3403         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3404             return false;
3405         }
3406     } else if (a->size != MO_32) {
3407         return false;
3408     }
3409     return do_2misc_vec(s, a, gen_VABS_F);
3410 }
3411 
3412 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3413                        uint32_t oprsz, uint32_t maxsz)
3414 {
3415     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3416                       vece == MO_16 ? 0x8000 : 0x80000000,
3417                       oprsz, maxsz);
3418 }
3419 
3420 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3421 {
3422     if (a->size == MO_16) {
3423         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3424             return false;
3425         }
3426     } else if (a->size != MO_32) {
3427         return false;
3428     }
3429     return do_2misc_vec(s, a, gen_VNEG_F);
3430 }
3431 
3432 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3433 {
3434     if (a->size != 2) {
3435         return false;
3436     }
3437     return do_2misc(s, a, gen_helper_recpe_u32);
3438 }
3439 
3440 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3441 {
3442     if (a->size != 2) {
3443         return false;
3444     }
3445     return do_2misc(s, a, gen_helper_rsqrte_u32);
3446 }
3447 
3448 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3449     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3450     {                                                   \
3451         FUNC(d, tcg_env, m);                            \
3452     }
3453 
3454 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3455 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3456 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3457 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3458 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3459 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3460 
3461 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3462 {
3463     static NeonGenOneOpFn * const fn[] = {
3464         gen_VQABS_s8,
3465         gen_VQABS_s16,
3466         gen_VQABS_s32,
3467         NULL,
3468     };
3469     return do_2misc(s, a, fn[a->size]);
3470 }
3471 
3472 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3473 {
3474     static NeonGenOneOpFn * const fn[] = {
3475         gen_VQNEG_s8,
3476         gen_VQNEG_s16,
3477         gen_VQNEG_s32,
3478         NULL,
3479     };
3480     return do_2misc(s, a, fn[a->size]);
3481 }
3482 
3483 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3484     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3485                            uint32_t rm_ofs,                             \
3486                            uint32_t oprsz, uint32_t maxsz)              \
3487     {                                                                   \
3488         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3489             NULL, HFUNC, SFUNC, NULL,                                   \
3490         };                                                              \
3491         TCGv_ptr fpst;                                                  \
3492         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3493         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3494                            fns[vece]);                                  \
3495     }                                                                   \
3496     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3497     {                                                                   \
3498         if (a->size == MO_16) {                                         \
3499             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3500                 return false;                                           \
3501             }                                                           \
3502         } else if (a->size != MO_32) {                                  \
3503             return false;                                               \
3504         }                                                               \
3505         return do_2misc_vec(s, a, gen_##INSN);                          \
3506     }
3507 
3508 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3509 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3510 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3511 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3512 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3513 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3514 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3515 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3516 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3517 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3518 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3519 
3520 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3521 
3522 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3523 {
3524     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3525         return false;
3526     }
3527     return trans_VRINTX_impl(s, a);
3528 }
3529 
3530 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3531     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3532                            uint32_t rm_ofs,                             \
3533                            uint32_t oprsz, uint32_t maxsz)              \
3534     {                                                                   \
3535         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3536             NULL,                                                       \
3537             gen_helper_gvec_##OP##h,                                    \
3538             gen_helper_gvec_##OP##s,                                    \
3539             NULL,                                                       \
3540         };                                                              \
3541         TCGv_ptr fpst;                                                  \
3542         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3543         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3544                            arm_rmode_to_sf(RMODE), fns[vece]);          \
3545     }                                                                   \
3546     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3547     {                                                                   \
3548         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3549             return false;                                               \
3550         }                                                               \
3551         if (a->size == MO_16) {                                         \
3552             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3553                 return false;                                           \
3554             }                                                           \
3555         } else if (a->size != MO_32) {                                  \
3556             return false;                                               \
3557         }                                                               \
3558         return do_2misc_vec(s, a, gen_##INSN);                          \
3559     }
3560 
3561 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3562 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3563 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3564 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3565 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3566 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3567 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3568 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3569 
3570 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3571 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3572 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3573 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3574 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3575 
3576 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3577 {
3578     TCGv_i64 rm, rd;
3579     int pass;
3580 
3581     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3582         return false;
3583     }
3584 
3585     /* UNDEF accesses to D16-D31 if they don't exist. */
3586     if (!dc_isar_feature(aa32_simd_r32, s) &&
3587         ((a->vd | a->vm) & 0x10)) {
3588         return false;
3589     }
3590 
3591     if (a->size != 0) {
3592         return false;
3593     }
3594 
3595     if ((a->vd | a->vm) & a->q) {
3596         return false;
3597     }
3598 
3599     if (!vfp_access_check(s)) {
3600         return true;
3601     }
3602 
3603     rm = tcg_temp_new_i64();
3604     rd = tcg_temp_new_i64();
3605     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3606         read_neon_element64(rm, a->vm, pass, MO_64);
3607         read_neon_element64(rd, a->vd, pass, MO_64);
3608         write_neon_element64(rm, a->vd, pass, MO_64);
3609         write_neon_element64(rd, a->vm, pass, MO_64);
3610     }
3611     return true;
3612 }
3613 
3614 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3615 {
3616     TCGv_i32 rd, tmp;
3617 
3618     rd = tcg_temp_new_i32();
3619     tmp = tcg_temp_new_i32();
3620 
3621     tcg_gen_shli_i32(rd, t0, 8);
3622     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3623     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3624     tcg_gen_or_i32(rd, rd, tmp);
3625 
3626     tcg_gen_shri_i32(t1, t1, 8);
3627     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3628     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3629     tcg_gen_or_i32(t1, t1, tmp);
3630     tcg_gen_mov_i32(t0, rd);
3631 }
3632 
3633 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3634 {
3635     TCGv_i32 rd, tmp;
3636 
3637     rd = tcg_temp_new_i32();
3638     tmp = tcg_temp_new_i32();
3639 
3640     tcg_gen_shli_i32(rd, t0, 16);
3641     tcg_gen_andi_i32(tmp, t1, 0xffff);
3642     tcg_gen_or_i32(rd, rd, tmp);
3643     tcg_gen_shri_i32(t1, t1, 16);
3644     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3645     tcg_gen_or_i32(t1, t1, tmp);
3646     tcg_gen_mov_i32(t0, rd);
3647 }
3648 
3649 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3650 {
3651     TCGv_i32 tmp, tmp2;
3652     int pass;
3653 
3654     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3655         return false;
3656     }
3657 
3658     /* UNDEF accesses to D16-D31 if they don't exist. */
3659     if (!dc_isar_feature(aa32_simd_r32, s) &&
3660         ((a->vd | a->vm) & 0x10)) {
3661         return false;
3662     }
3663 
3664     if ((a->vd | a->vm) & a->q) {
3665         return false;
3666     }
3667 
3668     if (a->size == 3) {
3669         return false;
3670     }
3671 
3672     if (!vfp_access_check(s)) {
3673         return true;
3674     }
3675 
3676     tmp = tcg_temp_new_i32();
3677     tmp2 = tcg_temp_new_i32();
3678     if (a->size == MO_32) {
3679         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3680             read_neon_element32(tmp, a->vm, pass, MO_32);
3681             read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3682             write_neon_element32(tmp2, a->vm, pass, MO_32);
3683             write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3684         }
3685     } else {
3686         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3687             read_neon_element32(tmp, a->vm, pass, MO_32);
3688             read_neon_element32(tmp2, a->vd, pass, MO_32);
3689             if (a->size == MO_8) {
3690                 gen_neon_trn_u8(tmp, tmp2);
3691             } else {
3692                 gen_neon_trn_u16(tmp, tmp2);
3693             }
3694             write_neon_element32(tmp2, a->vm, pass, MO_32);
3695             write_neon_element32(tmp, a->vd, pass, MO_32);
3696         }
3697     }
3698     return true;
3699 }
3700 
3701 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3702 {
3703     if (!dc_isar_feature(aa32_i8mm, s)) {
3704         return false;
3705     }
3706     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3707                         gen_helper_gvec_smmla_b);
3708 }
3709 
3710 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3711 {
3712     if (!dc_isar_feature(aa32_i8mm, s)) {
3713         return false;
3714     }
3715     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3716                         gen_helper_gvec_ummla_b);
3717 }
3718 
3719 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3720 {
3721     if (!dc_isar_feature(aa32_i8mm, s)) {
3722         return false;
3723     }
3724     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3725                         gen_helper_gvec_usmmla_b);
3726 }
3727 
3728 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3729 {
3730     if (!dc_isar_feature(aa32_bf16, s)) {
3731         return false;
3732     }
3733     return do_neon_ddda_env(s, 7, a->vd, a->vn, a->vm, 0,
3734                             gen_helper_gvec_bfmmla);
3735 }
3736 
3737 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3738 {
3739     if (!dc_isar_feature(aa32_bf16, s)) {
3740         return false;
3741     }
3742     return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3743                              gen_helper_gvec_bfmlal);
3744 }
3745 
3746 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3747 {
3748     if (!dc_isar_feature(aa32_bf16, s)) {
3749         return false;
3750     }
3751     return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3752                              (a->index << 1) | a->q, FPST_STD,
3753                              gen_helper_gvec_bfmlal_idx);
3754 }
3755