xref: /openbmc/qemu/target/arm/tcg/translate-neon.c (revision a11e54ed)
1 /*
2  *  ARM translation: AArch32 Neon instructions
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *  Copyright (c) 2020 Linaro, Ltd.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26 
27 /* Include the generated Neon decoder */
28 #include "decode-neon-dp.c.inc"
29 #include "decode-neon-ls.c.inc"
30 #include "decode-neon-shared.c.inc"
31 
32 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
33 {
34     TCGv_ptr ret = tcg_temp_new_ptr();
35     tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg));
36     return ret;
37 }
38 
39 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
40 {
41     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
42 
43     switch (mop) {
44     case MO_UB:
45         tcg_gen_ld8u_i32(var, tcg_env, offset);
46         break;
47     case MO_UW:
48         tcg_gen_ld16u_i32(var, tcg_env, offset);
49         break;
50     case MO_UL:
51         tcg_gen_ld_i32(var, tcg_env, offset);
52         break;
53     default:
54         g_assert_not_reached();
55     }
56 }
57 
58 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
59 {
60     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
61 
62     switch (mop) {
63     case MO_UB:
64         tcg_gen_ld8u_i64(var, tcg_env, offset);
65         break;
66     case MO_UW:
67         tcg_gen_ld16u_i64(var, tcg_env, offset);
68         break;
69     case MO_UL:
70         tcg_gen_ld32u_i64(var, tcg_env, offset);
71         break;
72     case MO_UQ:
73         tcg_gen_ld_i64(var, tcg_env, offset);
74         break;
75     default:
76         g_assert_not_reached();
77     }
78 }
79 
80 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
81 {
82     long offset = neon_element_offset(reg, ele, size);
83 
84     switch (size) {
85     case MO_8:
86         tcg_gen_st8_i32(var, tcg_env, offset);
87         break;
88     case MO_16:
89         tcg_gen_st16_i32(var, tcg_env, offset);
90         break;
91     case MO_32:
92         tcg_gen_st_i32(var, tcg_env, offset);
93         break;
94     default:
95         g_assert_not_reached();
96     }
97 }
98 
99 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
100 {
101     long offset = neon_element_offset(reg, ele, size);
102 
103     switch (size) {
104     case MO_8:
105         tcg_gen_st8_i64(var, tcg_env, offset);
106         break;
107     case MO_16:
108         tcg_gen_st16_i64(var, tcg_env, offset);
109         break;
110     case MO_32:
111         tcg_gen_st32_i64(var, tcg_env, offset);
112         break;
113     case MO_64:
114         tcg_gen_st_i64(var, tcg_env, offset);
115         break;
116     default:
117         g_assert_not_reached();
118     }
119 }
120 
121 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
122                          int data, gen_helper_gvec_4 *fn_gvec)
123 {
124     /* UNDEF accesses to D16-D31 if they don't exist. */
125     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
126         return false;
127     }
128 
129     /*
130      * UNDEF accesses to odd registers for each bit of Q.
131      * Q will be 0b111 for all Q-reg instructions, otherwise
132      * when we have mixed Q- and D-reg inputs.
133      */
134     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
135         return false;
136     }
137 
138     if (!vfp_access_check(s)) {
139         return true;
140     }
141 
142     int opr_sz = q ? 16 : 8;
143     tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
144                        vfp_reg_offset(1, vn),
145                        vfp_reg_offset(1, vm),
146                        vfp_reg_offset(1, vd),
147                        opr_sz, opr_sz, data, fn_gvec);
148     return true;
149 }
150 
151 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
152                               int data, ARMFPStatusFlavour fp_flavour,
153                               gen_helper_gvec_4_ptr *fn_gvec_ptr)
154 {
155     /* UNDEF accesses to D16-D31 if they don't exist. */
156     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
157         return false;
158     }
159 
160     /*
161      * UNDEF accesses to odd registers for each bit of Q.
162      * Q will be 0b111 for all Q-reg instructions, otherwise
163      * when we have mixed Q- and D-reg inputs.
164      */
165     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
166         return false;
167     }
168 
169     if (!vfp_access_check(s)) {
170         return true;
171     }
172 
173     int opr_sz = q ? 16 : 8;
174     TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
175 
176     tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
177                        vfp_reg_offset(1, vn),
178                        vfp_reg_offset(1, vm),
179                        vfp_reg_offset(1, vd),
180                        fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
181     return true;
182 }
183 
184 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
185 {
186     if (!dc_isar_feature(aa32_vcma, s)) {
187         return false;
188     }
189     if (a->size == MO_16) {
190         if (!dc_isar_feature(aa32_fp16_arith, s)) {
191             return false;
192         }
193         return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
194                                  FPST_STD_F16, gen_helper_gvec_fcmlah);
195     }
196     return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
197                              FPST_STD, gen_helper_gvec_fcmlas);
198 }
199 
200 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
201 {
202     int opr_sz;
203     TCGv_ptr fpst;
204     gen_helper_gvec_3_ptr *fn_gvec_ptr;
205 
206     if (!dc_isar_feature(aa32_vcma, s)
207         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
208         return false;
209     }
210 
211     /* UNDEF accesses to D16-D31 if they don't exist. */
212     if (!dc_isar_feature(aa32_simd_r32, s) &&
213         ((a->vd | a->vn | a->vm) & 0x10)) {
214         return false;
215     }
216 
217     if ((a->vn | a->vm | a->vd) & a->q) {
218         return false;
219     }
220 
221     if (!vfp_access_check(s)) {
222         return true;
223     }
224 
225     opr_sz = (1 + a->q) * 8;
226     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
227     fn_gvec_ptr = (a->size == MO_16) ?
228         gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
229     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
230                        vfp_reg_offset(1, a->vn),
231                        vfp_reg_offset(1, a->vm),
232                        fpst, opr_sz, opr_sz, a->rot,
233                        fn_gvec_ptr);
234     return true;
235 }
236 
237 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
238 {
239     if (!dc_isar_feature(aa32_dp, s)) {
240         return false;
241     }
242     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
243                         gen_helper_gvec_sdot_b);
244 }
245 
246 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
247 {
248     if (!dc_isar_feature(aa32_dp, s)) {
249         return false;
250     }
251     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
252                         gen_helper_gvec_udot_b);
253 }
254 
255 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
256 {
257     if (!dc_isar_feature(aa32_i8mm, s)) {
258         return false;
259     }
260     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
261                         gen_helper_gvec_usdot_b);
262 }
263 
264 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
265 {
266     if (!dc_isar_feature(aa32_bf16, s)) {
267         return false;
268     }
269     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
270                         gen_helper_gvec_bfdot);
271 }
272 
273 static bool trans_VFML(DisasContext *s, arg_VFML *a)
274 {
275     int opr_sz;
276 
277     if (!dc_isar_feature(aa32_fhm, s)) {
278         return false;
279     }
280 
281     /* UNDEF accesses to D16-D31 if they don't exist. */
282     if (!dc_isar_feature(aa32_simd_r32, s) &&
283         (a->vd & 0x10)) {
284         return false;
285     }
286 
287     if (a->vd & a->q) {
288         return false;
289     }
290 
291     if (!vfp_access_check(s)) {
292         return true;
293     }
294 
295     opr_sz = (1 + a->q) * 8;
296     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
297                        vfp_reg_offset(a->q, a->vn),
298                        vfp_reg_offset(a->q, a->vm),
299                        tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
300                        gen_helper_gvec_fmlal_a32);
301     return true;
302 }
303 
304 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
305 {
306     int data = (a->index << 2) | a->rot;
307 
308     if (!dc_isar_feature(aa32_vcma, s)) {
309         return false;
310     }
311     if (a->size == MO_16) {
312         if (!dc_isar_feature(aa32_fp16_arith, s)) {
313             return false;
314         }
315         return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
316                                  FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
317     }
318     return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
319                              FPST_STD, gen_helper_gvec_fcmlas_idx);
320 }
321 
322 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
323 {
324     if (!dc_isar_feature(aa32_dp, s)) {
325         return false;
326     }
327     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
328                         gen_helper_gvec_sdot_idx_b);
329 }
330 
331 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
332 {
333     if (!dc_isar_feature(aa32_dp, s)) {
334         return false;
335     }
336     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
337                         gen_helper_gvec_udot_idx_b);
338 }
339 
340 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
341 {
342     if (!dc_isar_feature(aa32_i8mm, s)) {
343         return false;
344     }
345     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
346                         gen_helper_gvec_usdot_idx_b);
347 }
348 
349 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
350 {
351     if (!dc_isar_feature(aa32_i8mm, s)) {
352         return false;
353     }
354     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
355                         gen_helper_gvec_sudot_idx_b);
356 }
357 
358 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
359 {
360     if (!dc_isar_feature(aa32_bf16, s)) {
361         return false;
362     }
363     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
364                         gen_helper_gvec_bfdot_idx);
365 }
366 
367 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
368 {
369     int opr_sz;
370 
371     if (!dc_isar_feature(aa32_fhm, s)) {
372         return false;
373     }
374 
375     /* UNDEF accesses to D16-D31 if they don't exist. */
376     if (!dc_isar_feature(aa32_simd_r32, s) &&
377         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
378         return false;
379     }
380 
381     if (a->vd & a->q) {
382         return false;
383     }
384 
385     if (!vfp_access_check(s)) {
386         return true;
387     }
388 
389     opr_sz = (1 + a->q) * 8;
390     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
391                        vfp_reg_offset(a->q, a->vn),
392                        vfp_reg_offset(a->q, a->rm),
393                        tcg_env, opr_sz, opr_sz,
394                        (a->index << 2) | a->s, /* is_2 == 0 */
395                        gen_helper_gvec_fmlal_idx_a32);
396     return true;
397 }
398 
399 static struct {
400     int nregs;
401     int interleave;
402     int spacing;
403 } const neon_ls_element_type[11] = {
404     {1, 4, 1},
405     {1, 4, 2},
406     {4, 1, 1},
407     {2, 2, 2},
408     {1, 3, 1},
409     {1, 3, 2},
410     {3, 1, 1},
411     {1, 1, 1},
412     {1, 2, 1},
413     {1, 2, 2},
414     {2, 1, 1}
415 };
416 
417 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
418                                       int stride)
419 {
420     if (rm != 15) {
421         TCGv_i32 base;
422 
423         base = load_reg(s, rn);
424         if (rm == 13) {
425             tcg_gen_addi_i32(base, base, stride);
426         } else {
427             TCGv_i32 index;
428             index = load_reg(s, rm);
429             tcg_gen_add_i32(base, base, index);
430         }
431         store_reg(s, rn, base);
432     }
433 }
434 
435 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
436 {
437     /* Neon load/store multiple structures */
438     int nregs, interleave, spacing, reg, n;
439     MemOp mop, align, endian;
440     int mmu_idx = get_mem_index(s);
441     int size = a->size;
442     TCGv_i64 tmp64;
443     TCGv_i32 addr;
444 
445     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
446         return false;
447     }
448 
449     /* UNDEF accesses to D16-D31 if they don't exist */
450     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
451         return false;
452     }
453     if (a->itype > 10) {
454         return false;
455     }
456     /* Catch UNDEF cases for bad values of align field */
457     switch (a->itype & 0xc) {
458     case 4:
459         if (a->align >= 2) {
460             return false;
461         }
462         break;
463     case 8:
464         if (a->align == 3) {
465             return false;
466         }
467         break;
468     default:
469         break;
470     }
471     nregs = neon_ls_element_type[a->itype].nregs;
472     interleave = neon_ls_element_type[a->itype].interleave;
473     spacing = neon_ls_element_type[a->itype].spacing;
474     if (size == 3 && (interleave | spacing) != 1) {
475         return false;
476     }
477 
478     if (!vfp_access_check(s)) {
479         return true;
480     }
481 
482     /* For our purposes, bytes are always little-endian.  */
483     endian = s->be_data;
484     if (size == 0) {
485         endian = MO_LE;
486     }
487 
488     /* Enforce alignment requested by the instruction */
489     if (a->align) {
490         align = pow2_align(a->align + 2); /* 4 ** a->align */
491     } else {
492         align = s->align_mem ? MO_ALIGN : 0;
493     }
494 
495     /*
496      * Consecutive little-endian elements from a single register
497      * can be promoted to a larger little-endian operation.
498      */
499     if (interleave == 1 && endian == MO_LE) {
500         /* Retain any natural alignment. */
501         if (align == MO_ALIGN) {
502             align = pow2_align(size);
503         }
504         size = 3;
505     }
506 
507     tmp64 = tcg_temp_new_i64();
508     addr = tcg_temp_new_i32();
509     load_reg_var(s, addr, a->rn);
510 
511     mop = endian | size | align;
512     for (reg = 0; reg < nregs; reg++) {
513         for (n = 0; n < 8 >> size; n++) {
514             int xs;
515             for (xs = 0; xs < interleave; xs++) {
516                 int tt = a->vd + reg + spacing * xs;
517 
518                 if (a->l) {
519                     gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
520                     neon_store_element64(tt, n, size, tmp64);
521                 } else {
522                     neon_load_element64(tmp64, tt, n, size);
523                     gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
524                 }
525                 tcg_gen_addi_i32(addr, addr, 1 << size);
526 
527                 /* Subsequent memory operations inherit alignment */
528                 mop &= ~MO_AMASK;
529             }
530         }
531     }
532 
533     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
534     return true;
535 }
536 
537 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
538 {
539     /* Neon load single structure to all lanes */
540     int reg, stride, vec_size;
541     int vd = a->vd;
542     int size = a->size;
543     int nregs = a->n + 1;
544     TCGv_i32 addr, tmp;
545     MemOp mop, align;
546 
547     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
548         return false;
549     }
550 
551     /* UNDEF accesses to D16-D31 if they don't exist */
552     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
553         return false;
554     }
555 
556     align = 0;
557     if (size == 3) {
558         if (nregs != 4 || a->a == 0) {
559             return false;
560         }
561         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
562         size = MO_32;
563         align = MO_ALIGN_16;
564     } else if (a->a) {
565         switch (nregs) {
566         case 1:
567             if (size == 0) {
568                 return false;
569             }
570             align = MO_ALIGN;
571             break;
572         case 2:
573             align = pow2_align(size + 1);
574             break;
575         case 3:
576             return false;
577         case 4:
578             if (size == 2) {
579                 align = pow2_align(3);
580             } else {
581                 align = pow2_align(size + 2);
582             }
583             break;
584         default:
585             g_assert_not_reached();
586         }
587     }
588 
589     if (!vfp_access_check(s)) {
590         return true;
591     }
592 
593     /*
594      * VLD1 to all lanes: T bit indicates how many Dregs to write.
595      * VLD2/3/4 to all lanes: T bit indicates register stride.
596      */
597     stride = a->t ? 2 : 1;
598     vec_size = nregs == 1 ? stride * 8 : 8;
599     mop = size | align;
600     tmp = tcg_temp_new_i32();
601     addr = tcg_temp_new_i32();
602     load_reg_var(s, addr, a->rn);
603     for (reg = 0; reg < nregs; reg++) {
604         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
605         if ((vd & 1) && vec_size == 16) {
606             /*
607              * We cannot write 16 bytes at once because the
608              * destination is unaligned.
609              */
610             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
611                                  8, 8, tmp);
612             tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
613                              neon_full_reg_offset(vd), 8, 8);
614         } else {
615             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
616                                  vec_size, vec_size, tmp);
617         }
618         tcg_gen_addi_i32(addr, addr, 1 << size);
619         vd += stride;
620 
621         /* Subsequent memory operations inherit alignment */
622         mop &= ~MO_AMASK;
623     }
624 
625     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
626 
627     return true;
628 }
629 
630 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
631 {
632     /* Neon load/store single structure to one lane */
633     int reg;
634     int nregs = a->n + 1;
635     int vd = a->vd;
636     TCGv_i32 addr, tmp;
637     MemOp mop;
638 
639     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
640         return false;
641     }
642 
643     /* UNDEF accesses to D16-D31 if they don't exist */
644     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
645         return false;
646     }
647 
648     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
649     switch (nregs) {
650     case 1:
651         if (a->stride != 1) {
652             return false;
653         }
654         if (((a->align & (1 << a->size)) != 0) ||
655             (a->size == 2 && (a->align == 1 || a->align == 2))) {
656             return false;
657         }
658         break;
659     case 2:
660         if (a->size == 2 && (a->align & 2) != 0) {
661             return false;
662         }
663         break;
664     case 3:
665         if (a->align != 0) {
666             return false;
667         }
668         break;
669     case 4:
670         if (a->size == 2 && a->align == 3) {
671             return false;
672         }
673         break;
674     default:
675         g_assert_not_reached();
676     }
677     if ((vd + a->stride * (nregs - 1)) > 31) {
678         /*
679          * Attempts to write off the end of the register file are
680          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
681          * access off the end of the array that holds the register data.
682          */
683         return false;
684     }
685 
686     if (!vfp_access_check(s)) {
687         return true;
688     }
689 
690     /* Pick up SCTLR settings */
691     mop = finalize_memop(s, a->size);
692 
693     if (a->align) {
694         MemOp align_op;
695 
696         switch (nregs) {
697         case 1:
698             /* For VLD1, use natural alignment. */
699             align_op = MO_ALIGN;
700             break;
701         case 2:
702             /* For VLD2, use double alignment. */
703             align_op = pow2_align(a->size + 1);
704             break;
705         case 4:
706             if (a->size == MO_32) {
707                 /*
708                  * For VLD4.32, align = 1 is double alignment, align = 2 is
709                  * quad alignment; align = 3 is rejected above.
710                  */
711                 align_op = pow2_align(a->size + a->align);
712             } else {
713                 /* For VLD4.8 and VLD.16, we want quad alignment. */
714                 align_op = pow2_align(a->size + 2);
715             }
716             break;
717         default:
718             /* For VLD3, the alignment field is zero and rejected above. */
719             g_assert_not_reached();
720         }
721 
722         mop = (mop & ~MO_AMASK) | align_op;
723     }
724 
725     tmp = tcg_temp_new_i32();
726     addr = tcg_temp_new_i32();
727     load_reg_var(s, addr, a->rn);
728 
729     for (reg = 0; reg < nregs; reg++) {
730         if (a->l) {
731             gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
732             neon_store_element(vd, a->reg_idx, a->size, tmp);
733         } else { /* Store */
734             neon_load_element(tmp, vd, a->reg_idx, a->size);
735             gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
736         }
737         vd += a->stride;
738         tcg_gen_addi_i32(addr, addr, 1 << a->size);
739 
740         /* Subsequent memory operations inherit alignment */
741         mop &= ~MO_AMASK;
742     }
743 
744     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
745 
746     return true;
747 }
748 
749 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
750 {
751     int vec_size = a->q ? 16 : 8;
752     int rd_ofs = neon_full_reg_offset(a->vd);
753     int rn_ofs = neon_full_reg_offset(a->vn);
754     int rm_ofs = neon_full_reg_offset(a->vm);
755 
756     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
757         return false;
758     }
759 
760     /* UNDEF accesses to D16-D31 if they don't exist. */
761     if (!dc_isar_feature(aa32_simd_r32, s) &&
762         ((a->vd | a->vn | a->vm) & 0x10)) {
763         return false;
764     }
765 
766     if ((a->vn | a->vm | a->vd) & a->q) {
767         return false;
768     }
769 
770     if (!vfp_access_check(s)) {
771         return true;
772     }
773 
774     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
775     return true;
776 }
777 
778 #define DO_3SAME(INSN, FUNC)                                            \
779     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
780     {                                                                   \
781         return do_3same(s, a, FUNC);                                    \
782     }
783 
784 DO_3SAME(VADD, tcg_gen_gvec_add)
785 DO_3SAME(VSUB, tcg_gen_gvec_sub)
786 DO_3SAME(VAND, tcg_gen_gvec_and)
787 DO_3SAME(VBIC, tcg_gen_gvec_andc)
788 DO_3SAME(VORR, tcg_gen_gvec_or)
789 DO_3SAME(VORN, tcg_gen_gvec_orc)
790 DO_3SAME(VEOR, tcg_gen_gvec_xor)
791 DO_3SAME(VSHL_S, gen_gvec_sshl)
792 DO_3SAME(VSHL_U, gen_gvec_ushl)
793 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
794 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
795 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
796 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
797 
798 /* These insns are all gvec_bitsel but with the inputs in various orders. */
799 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
800     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
801                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
802                                 uint32_t oprsz, uint32_t maxsz)         \
803     {                                                                   \
804         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
805     }                                                                   \
806     DO_3SAME(INSN, gen_##INSN##_3s)
807 
808 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
809 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
810 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
811 
812 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
813     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
814     {                                                                   \
815         if (a->size == 3) {                                             \
816             return false;                                               \
817         }                                                               \
818         return do_3same(s, a, FUNC);                                    \
819     }
820 
821 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
822 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
823 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
824 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
825 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
826 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
827 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
828 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
829 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
830 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
831 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
832 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
833 DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
834 
835 #define DO_3SAME_CMP(INSN, COND)                                        \
836     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
837                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
838                                 uint32_t oprsz, uint32_t maxsz)         \
839     {                                                                   \
840         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
841     }                                                                   \
842     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
843 
844 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
845 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
846 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
847 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
848 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
849 
850 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
851     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
852                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
853     {                                                                      \
854         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
855     }
856 
857 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
858 
859 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
860 {
861     if (a->size != 0) {
862         return false;
863     }
864     return do_3same(s, a, gen_VMUL_p_3s);
865 }
866 
867 #define DO_VQRDMLAH(INSN, FUNC)                                         \
868     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
869     {                                                                   \
870         if (!dc_isar_feature(aa32_rdm, s)) {                            \
871             return false;                                               \
872         }                                                               \
873         if (a->size != 1 && a->size != 2) {                             \
874             return false;                                               \
875         }                                                               \
876         return do_3same(s, a, FUNC);                                    \
877     }
878 
879 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
880 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
881 
882 #define DO_SHA1(NAME, FUNC)                                             \
883     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
884     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
885     {                                                                   \
886         if (!dc_isar_feature(aa32_sha1, s)) {                           \
887             return false;                                               \
888         }                                                               \
889         return do_3same(s, a, gen_##NAME##_3s);                         \
890     }
891 
892 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
893 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
894 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
895 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
896 
897 #define DO_SHA2(NAME, FUNC)                                             \
898     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
899     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
900     {                                                                   \
901         if (!dc_isar_feature(aa32_sha2, s)) {                           \
902             return false;                                               \
903         }                                                               \
904         return do_3same(s, a, gen_##NAME##_3s);                         \
905     }
906 
907 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
908 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
909 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
910 
911 #define DO_3SAME_64(INSN, FUNC)                                         \
912     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
913                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
914                                 uint32_t oprsz, uint32_t maxsz)         \
915     {                                                                   \
916         static const GVecGen3 op = { .fni8 = FUNC };                    \
917         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
918     }                                                                   \
919     DO_3SAME(INSN, gen_##INSN##_3s)
920 
921 #define DO_3SAME_64_ENV(INSN, FUNC)                                     \
922     static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
923     {                                                                   \
924         FUNC(d, tcg_env, n, m);                                         \
925     }                                                                   \
926     DO_3SAME_64(INSN, gen_##INSN##_elt)
927 
928 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
929 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
930 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
931 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
932 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
933 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
934 
935 #define DO_3SAME_32(INSN, FUNC)                                         \
936     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
937                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
938                                 uint32_t oprsz, uint32_t maxsz)         \
939     {                                                                   \
940         static const GVecGen3 ops[4] = {                                \
941             { .fni4 = gen_helper_neon_##FUNC##8 },                      \
942             { .fni4 = gen_helper_neon_##FUNC##16 },                     \
943             { .fni4 = gen_helper_neon_##FUNC##32 },                     \
944             { 0 },                                                      \
945         };                                                              \
946         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
947     }                                                                   \
948     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
949     {                                                                   \
950         if (a->size > 2) {                                              \
951             return false;                                               \
952         }                                                               \
953         return do_3same(s, a, gen_##INSN##_3s);                         \
954     }
955 
956 /*
957  * Some helper functions need to be passed the tcg_env. In order
958  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
959  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
960  * and which call a NeonGenTwoOpEnvFn().
961  */
962 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
963     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
964     {                                                                   \
965         FUNC(d, tcg_env, n, m);                                         \
966     }
967 
968 #define DO_3SAME_32_ENV(INSN, FUNC)                                     \
969     WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
970     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
971     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
972     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
973                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
974                                 uint32_t oprsz, uint32_t maxsz)         \
975     {                                                                   \
976         static const GVecGen3 ops[4] = {                                \
977             { .fni4 = gen_##INSN##_tramp8 },                            \
978             { .fni4 = gen_##INSN##_tramp16 },                           \
979             { .fni4 = gen_##INSN##_tramp32 },                           \
980             { 0 },                                                      \
981         };                                                              \
982         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
983     }                                                                   \
984     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
985     {                                                                   \
986         if (a->size > 2) {                                              \
987             return false;                                               \
988         }                                                               \
989         return do_3same(s, a, gen_##INSN##_3s);                         \
990     }
991 
992 DO_3SAME_32(VHADD_S, hadd_s)
993 DO_3SAME_32(VHADD_U, hadd_u)
994 DO_3SAME_32(VHSUB_S, hsub_s)
995 DO_3SAME_32(VHSUB_U, hsub_u)
996 DO_3SAME_32(VRHADD_S, rhadd_s)
997 DO_3SAME_32(VRHADD_U, rhadd_u)
998 DO_3SAME_32(VRSHL_S, rshl_s)
999 DO_3SAME_32(VRSHL_U, rshl_u)
1000 
1001 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1002 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1003 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1004 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1005 
1006 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1007 {
1008     /* Operations handled pairwise 32 bits at a time */
1009     TCGv_i32 tmp, tmp2, tmp3;
1010 
1011     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1012         return false;
1013     }
1014 
1015     /* UNDEF accesses to D16-D31 if they don't exist. */
1016     if (!dc_isar_feature(aa32_simd_r32, s) &&
1017         ((a->vd | a->vn | a->vm) & 0x10)) {
1018         return false;
1019     }
1020 
1021     if (a->size == 3) {
1022         return false;
1023     }
1024 
1025     if (!vfp_access_check(s)) {
1026         return true;
1027     }
1028 
1029     assert(a->q == 0); /* enforced by decode patterns */
1030 
1031     /*
1032      * Note that we have to be careful not to clobber the source operands
1033      * in the "vm == vd" case by storing the result of the first pass too
1034      * early. Since Q is 0 there are always just two passes, so instead
1035      * of a complicated loop over each pass we just unroll.
1036      */
1037     tmp = tcg_temp_new_i32();
1038     tmp2 = tcg_temp_new_i32();
1039     tmp3 = tcg_temp_new_i32();
1040 
1041     read_neon_element32(tmp, a->vn, 0, MO_32);
1042     read_neon_element32(tmp2, a->vn, 1, MO_32);
1043     fn(tmp, tmp, tmp2);
1044 
1045     read_neon_element32(tmp3, a->vm, 0, MO_32);
1046     read_neon_element32(tmp2, a->vm, 1, MO_32);
1047     fn(tmp3, tmp3, tmp2);
1048 
1049     write_neon_element32(tmp, a->vd, 0, MO_32);
1050     write_neon_element32(tmp3, a->vd, 1, MO_32);
1051 
1052     return true;
1053 }
1054 
1055 #define DO_3SAME_PAIR(INSN, func)                                       \
1056     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1057     {                                                                   \
1058         static NeonGenTwoOpFn * const fns[] = {                         \
1059             gen_helper_neon_##func##8,                                  \
1060             gen_helper_neon_##func##16,                                 \
1061             gen_helper_neon_##func##32,                                 \
1062         };                                                              \
1063         if (a->size > 2) {                                              \
1064             return false;                                               \
1065         }                                                               \
1066         return do_3same_pair(s, a, fns[a->size]);                       \
1067     }
1068 
1069 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
1070 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
1071 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
1072 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
1073 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
1074 
1075 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1076 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1077 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1078 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1079 
1080 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
1081     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
1082     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
1083     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
1084                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
1085                                 uint32_t oprsz, uint32_t maxsz)         \
1086     {                                                                   \
1087         static const GVecGen3 ops[2] = {                                \
1088             { .fni4 = gen_##INSN##_tramp16 },                           \
1089             { .fni4 = gen_##INSN##_tramp32 },                           \
1090         };                                                              \
1091         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1092     }                                                                   \
1093     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1094     {                                                                   \
1095         if (a->size != 1 && a->size != 2) {                             \
1096             return false;                                               \
1097         }                                                               \
1098         return do_3same(s, a, gen_##INSN##_3s);                         \
1099     }
1100 
1101 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1102 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1103 
1104 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
1105     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
1106                          uint32_t rn_ofs, uint32_t rm_ofs,              \
1107                          uint32_t oprsz, uint32_t maxsz)                \
1108     {                                                                   \
1109         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
1110         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
1111                            oprsz, maxsz, 0, FUNC);                      \
1112     }
1113 
1114 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
1115     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
1116     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
1117     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
1118     {                                                                   \
1119         if (a->size == MO_16) {                                         \
1120             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
1121                 return false;                                           \
1122             }                                                           \
1123             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
1124         }                                                               \
1125         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
1126     }
1127 
1128 
1129 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1130 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1131 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1132 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1133 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1134 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1135 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1136 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1137 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1138 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1139 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1140 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1141 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1142 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1143 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1144 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1145 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1146 DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
1147 DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
1148 DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
1149 
1150 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1151 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1152 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1153 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1154 
1155 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1156 {
1157     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1158         return false;
1159     }
1160 
1161     if (a->size == MO_16) {
1162         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1163             return false;
1164         }
1165         return do_3same(s, a, gen_VMAXNM_fp16_3s);
1166     }
1167     return do_3same(s, a, gen_VMAXNM_fp32_3s);
1168 }
1169 
1170 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1171 {
1172     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1173         return false;
1174     }
1175 
1176     if (a->size == MO_16) {
1177         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1178             return false;
1179         }
1180         return do_3same(s, a, gen_VMINNM_fp16_3s);
1181     }
1182     return do_3same(s, a, gen_VMINNM_fp32_3s);
1183 }
1184 
1185 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1186 {
1187     /* Handle a 2-reg-shift insn which can be vectorized. */
1188     int vec_size = a->q ? 16 : 8;
1189     int rd_ofs = neon_full_reg_offset(a->vd);
1190     int rm_ofs = neon_full_reg_offset(a->vm);
1191 
1192     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1193         return false;
1194     }
1195 
1196     /* UNDEF accesses to D16-D31 if they don't exist. */
1197     if (!dc_isar_feature(aa32_simd_r32, s) &&
1198         ((a->vd | a->vm) & 0x10)) {
1199         return false;
1200     }
1201 
1202     if ((a->vm | a->vd) & a->q) {
1203         return false;
1204     }
1205 
1206     if (!vfp_access_check(s)) {
1207         return true;
1208     }
1209 
1210     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1211     return true;
1212 }
1213 
1214 #define DO_2SH(INSN, FUNC)                                              \
1215     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1216     {                                                                   \
1217         return do_vector_2sh(s, a, FUNC);                               \
1218     }                                                                   \
1219 
1220 DO_2SH(VSHL, tcg_gen_gvec_shli)
1221 DO_2SH(VSLI, gen_gvec_sli)
1222 DO_2SH(VSRI, gen_gvec_sri)
1223 DO_2SH(VSRA_S, gen_gvec_ssra)
1224 DO_2SH(VSRA_U, gen_gvec_usra)
1225 DO_2SH(VRSHR_S, gen_gvec_srshr)
1226 DO_2SH(VRSHR_U, gen_gvec_urshr)
1227 DO_2SH(VRSRA_S, gen_gvec_srsra)
1228 DO_2SH(VRSRA_U, gen_gvec_ursra)
1229 
1230 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1231 {
1232     /* Signed shift out of range results in all-sign-bits */
1233     a->shift = MIN(a->shift, (8 << a->size) - 1);
1234     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1235 }
1236 
1237 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1238                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1239 {
1240     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1241 }
1242 
1243 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1244 {
1245     /* Shift out of range is architecturally valid and results in zero. */
1246     if (a->shift >= (8 << a->size)) {
1247         return do_vector_2sh(s, a, gen_zero_rd_2sh);
1248     } else {
1249         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1250     }
1251 }
1252 
1253 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1254                              NeonGenTwo64OpEnvFn *fn)
1255 {
1256     /*
1257      * 2-reg-and-shift operations, size == 3 case, where the
1258      * function needs to be passed tcg_env.
1259      */
1260     TCGv_i64 constimm;
1261     int pass;
1262 
1263     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1264         return false;
1265     }
1266 
1267     /* UNDEF accesses to D16-D31 if they don't exist. */
1268     if (!dc_isar_feature(aa32_simd_r32, s) &&
1269         ((a->vd | a->vm) & 0x10)) {
1270         return false;
1271     }
1272 
1273     if ((a->vm | a->vd) & a->q) {
1274         return false;
1275     }
1276 
1277     if (!vfp_access_check(s)) {
1278         return true;
1279     }
1280 
1281     /*
1282      * To avoid excessive duplication of ops we implement shift
1283      * by immediate using the variable shift operations.
1284      */
1285     constimm = tcg_constant_i64(dup_const(a->size, a->shift));
1286 
1287     for (pass = 0; pass < a->q + 1; pass++) {
1288         TCGv_i64 tmp = tcg_temp_new_i64();
1289 
1290         read_neon_element64(tmp, a->vm, pass, MO_64);
1291         fn(tmp, tcg_env, tmp, constimm);
1292         write_neon_element64(tmp, a->vd, pass, MO_64);
1293     }
1294     return true;
1295 }
1296 
1297 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1298                              NeonGenTwoOpEnvFn *fn)
1299 {
1300     /*
1301      * 2-reg-and-shift operations, size < 3 case, where the
1302      * helper needs to be passed tcg_env.
1303      */
1304     TCGv_i32 constimm, tmp;
1305     int pass;
1306 
1307     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1308         return false;
1309     }
1310 
1311     /* UNDEF accesses to D16-D31 if they don't exist. */
1312     if (!dc_isar_feature(aa32_simd_r32, s) &&
1313         ((a->vd | a->vm) & 0x10)) {
1314         return false;
1315     }
1316 
1317     if ((a->vm | a->vd) & a->q) {
1318         return false;
1319     }
1320 
1321     if (!vfp_access_check(s)) {
1322         return true;
1323     }
1324 
1325     /*
1326      * To avoid excessive duplication of ops we implement shift
1327      * by immediate using the variable shift operations.
1328      */
1329     constimm = tcg_constant_i32(dup_const(a->size, a->shift));
1330     tmp = tcg_temp_new_i32();
1331 
1332     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1333         read_neon_element32(tmp, a->vm, pass, MO_32);
1334         fn(tmp, tcg_env, tmp, constimm);
1335         write_neon_element32(tmp, a->vd, pass, MO_32);
1336     }
1337     return true;
1338 }
1339 
1340 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1341     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1342     {                                                                   \
1343         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1344     }                                                                   \
1345     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1346     {                                                                   \
1347         static NeonGenTwoOpEnvFn * const fns[] = {                      \
1348             gen_helper_neon_##FUNC##8,                                  \
1349             gen_helper_neon_##FUNC##16,                                 \
1350             gen_helper_neon_##FUNC##32,                                 \
1351         };                                                              \
1352         assert(a->size < ARRAY_SIZE(fns));                              \
1353         return do_2shift_env_32(s, a, fns[a->size]);                    \
1354     }
1355 
1356 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1357 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1358 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1359 
1360 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1361                                 NeonGenTwo64OpFn *shiftfn,
1362                                 NeonGenNarrowEnvFn *narrowfn)
1363 {
1364     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1365     TCGv_i64 constimm, rm1, rm2;
1366     TCGv_i32 rd;
1367 
1368     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1369         return false;
1370     }
1371 
1372     /* UNDEF accesses to D16-D31 if they don't exist. */
1373     if (!dc_isar_feature(aa32_simd_r32, s) &&
1374         ((a->vd | a->vm) & 0x10)) {
1375         return false;
1376     }
1377 
1378     if (a->vm & 1) {
1379         return false;
1380     }
1381 
1382     if (!vfp_access_check(s)) {
1383         return true;
1384     }
1385 
1386     /*
1387      * This is always a right shift, and the shiftfn is always a
1388      * left-shift helper, which thus needs the negated shift count.
1389      */
1390     constimm = tcg_constant_i64(-a->shift);
1391     rm1 = tcg_temp_new_i64();
1392     rm2 = tcg_temp_new_i64();
1393     rd = tcg_temp_new_i32();
1394 
1395     /* Load both inputs first to avoid potential overwrite if rm == rd */
1396     read_neon_element64(rm1, a->vm, 0, MO_64);
1397     read_neon_element64(rm2, a->vm, 1, MO_64);
1398 
1399     shiftfn(rm1, rm1, constimm);
1400     narrowfn(rd, tcg_env, rm1);
1401     write_neon_element32(rd, a->vd, 0, MO_32);
1402 
1403     shiftfn(rm2, rm2, constimm);
1404     narrowfn(rd, tcg_env, rm2);
1405     write_neon_element32(rd, a->vd, 1, MO_32);
1406 
1407     return true;
1408 }
1409 
1410 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1411                                 NeonGenTwoOpFn *shiftfn,
1412                                 NeonGenNarrowEnvFn *narrowfn)
1413 {
1414     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1415     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1416     TCGv_i64 rtmp;
1417     uint32_t imm;
1418 
1419     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1420         return false;
1421     }
1422 
1423     /* UNDEF accesses to D16-D31 if they don't exist. */
1424     if (!dc_isar_feature(aa32_simd_r32, s) &&
1425         ((a->vd | a->vm) & 0x10)) {
1426         return false;
1427     }
1428 
1429     if (a->vm & 1) {
1430         return false;
1431     }
1432 
1433     if (!vfp_access_check(s)) {
1434         return true;
1435     }
1436 
1437     /*
1438      * This is always a right shift, and the shiftfn is always a
1439      * left-shift helper, which thus needs the negated shift count
1440      * duplicated into each lane of the immediate value.
1441      */
1442     if (a->size == 1) {
1443         imm = (uint16_t)(-a->shift);
1444         imm |= imm << 16;
1445     } else {
1446         /* size == 2 */
1447         imm = -a->shift;
1448     }
1449     constimm = tcg_constant_i32(imm);
1450 
1451     /* Load all inputs first to avoid potential overwrite */
1452     rm1 = tcg_temp_new_i32();
1453     rm2 = tcg_temp_new_i32();
1454     rm3 = tcg_temp_new_i32();
1455     rm4 = tcg_temp_new_i32();
1456     read_neon_element32(rm1, a->vm, 0, MO_32);
1457     read_neon_element32(rm2, a->vm, 1, MO_32);
1458     read_neon_element32(rm3, a->vm, 2, MO_32);
1459     read_neon_element32(rm4, a->vm, 3, MO_32);
1460     rtmp = tcg_temp_new_i64();
1461 
1462     shiftfn(rm1, rm1, constimm);
1463     shiftfn(rm2, rm2, constimm);
1464 
1465     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1466 
1467     narrowfn(rm1, tcg_env, rtmp);
1468     write_neon_element32(rm1, a->vd, 0, MO_32);
1469 
1470     shiftfn(rm3, rm3, constimm);
1471     shiftfn(rm4, rm4, constimm);
1472 
1473     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1474 
1475     narrowfn(rm3, tcg_env, rtmp);
1476     write_neon_element32(rm3, a->vd, 1, MO_32);
1477     return true;
1478 }
1479 
1480 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1481     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1482     {                                                                   \
1483         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1484     }
1485 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1486     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1487     {                                                                   \
1488         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1489     }
1490 
1491 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1492 {
1493     tcg_gen_extrl_i64_i32(dest, src);
1494 }
1495 
1496 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1497 {
1498     gen_helper_neon_narrow_u16(dest, src);
1499 }
1500 
1501 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1502 {
1503     gen_helper_neon_narrow_u8(dest, src);
1504 }
1505 
1506 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1507 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1508 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1509 
1510 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1511 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1512 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1513 
1514 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1515 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1516 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1517 
1518 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1519 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1520 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1521 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1522 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1523 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1524 
1525 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1526 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1527 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1528 
1529 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1530 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1531 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1532 
1533 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1534 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1535 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1536 
1537 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1538                          NeonGenWidenFn *widenfn, bool u)
1539 {
1540     TCGv_i64 tmp;
1541     TCGv_i32 rm0, rm1;
1542     uint64_t widen_mask = 0;
1543 
1544     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1545         return false;
1546     }
1547 
1548     /* UNDEF accesses to D16-D31 if they don't exist. */
1549     if (!dc_isar_feature(aa32_simd_r32, s) &&
1550         ((a->vd | a->vm) & 0x10)) {
1551         return false;
1552     }
1553 
1554     if (a->vd & 1) {
1555         return false;
1556     }
1557 
1558     if (!vfp_access_check(s)) {
1559         return true;
1560     }
1561 
1562     /*
1563      * This is a widen-and-shift operation. The shift is always less
1564      * than the width of the source type, so after widening the input
1565      * vector we can simply shift the whole 64-bit widened register,
1566      * and then clear the potential overflow bits resulting from left
1567      * bits of the narrow input appearing as right bits of the left
1568      * neighbour narrow input. Calculate a mask of bits to clear.
1569      */
1570     if ((a->shift != 0) && (a->size < 2 || u)) {
1571         int esize = 8 << a->size;
1572         widen_mask = MAKE_64BIT_MASK(0, esize);
1573         widen_mask >>= esize - a->shift;
1574         widen_mask = dup_const(a->size + 1, widen_mask);
1575     }
1576 
1577     rm0 = tcg_temp_new_i32();
1578     rm1 = tcg_temp_new_i32();
1579     read_neon_element32(rm0, a->vm, 0, MO_32);
1580     read_neon_element32(rm1, a->vm, 1, MO_32);
1581     tmp = tcg_temp_new_i64();
1582 
1583     widenfn(tmp, rm0);
1584     if (a->shift != 0) {
1585         tcg_gen_shli_i64(tmp, tmp, a->shift);
1586         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1587     }
1588     write_neon_element64(tmp, a->vd, 0, MO_64);
1589 
1590     widenfn(tmp, rm1);
1591     if (a->shift != 0) {
1592         tcg_gen_shli_i64(tmp, tmp, a->shift);
1593         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1594     }
1595     write_neon_element64(tmp, a->vd, 1, MO_64);
1596     return true;
1597 }
1598 
1599 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1600 {
1601     static NeonGenWidenFn * const widenfn[] = {
1602         gen_helper_neon_widen_s8,
1603         gen_helper_neon_widen_s16,
1604         tcg_gen_ext_i32_i64,
1605     };
1606     return do_vshll_2sh(s, a, widenfn[a->size], false);
1607 }
1608 
1609 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1610 {
1611     static NeonGenWidenFn * const widenfn[] = {
1612         gen_helper_neon_widen_u8,
1613         gen_helper_neon_widen_u16,
1614         tcg_gen_extu_i32_i64,
1615     };
1616     return do_vshll_2sh(s, a, widenfn[a->size], true);
1617 }
1618 
1619 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1620                       gen_helper_gvec_2_ptr *fn)
1621 {
1622     /* FP operations in 2-reg-and-shift group */
1623     int vec_size = a->q ? 16 : 8;
1624     int rd_ofs = neon_full_reg_offset(a->vd);
1625     int rm_ofs = neon_full_reg_offset(a->vm);
1626     TCGv_ptr fpst;
1627 
1628     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1629         return false;
1630     }
1631 
1632     if (a->size == MO_16) {
1633         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1634             return false;
1635         }
1636     }
1637 
1638     /* UNDEF accesses to D16-D31 if they don't exist. */
1639     if (!dc_isar_feature(aa32_simd_r32, s) &&
1640         ((a->vd | a->vm) & 0x10)) {
1641         return false;
1642     }
1643 
1644     if ((a->vm | a->vd) & a->q) {
1645         return false;
1646     }
1647 
1648     if (!vfp_access_check(s)) {
1649         return true;
1650     }
1651 
1652     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1653     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1654     return true;
1655 }
1656 
1657 #define DO_FP_2SH(INSN, FUNC)                                           \
1658     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1659     {                                                                   \
1660         return do_fp_2sh(s, a, FUNC);                                   \
1661     }
1662 
1663 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1664 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1665 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1666 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1667 
1668 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1669 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1670 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1671 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1672 
1673 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1674                         GVecGen2iFn *fn)
1675 {
1676     uint64_t imm;
1677     int reg_ofs, vec_size;
1678 
1679     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1680         return false;
1681     }
1682 
1683     /* UNDEF accesses to D16-D31 if they don't exist. */
1684     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1685         return false;
1686     }
1687 
1688     if (a->vd & a->q) {
1689         return false;
1690     }
1691 
1692     if (!vfp_access_check(s)) {
1693         return true;
1694     }
1695 
1696     reg_ofs = neon_full_reg_offset(a->vd);
1697     vec_size = a->q ? 16 : 8;
1698     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1699 
1700     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1701     return true;
1702 }
1703 
1704 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1705                         int64_t c, uint32_t oprsz, uint32_t maxsz)
1706 {
1707     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1708 }
1709 
1710 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1711 {
1712     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1713     GVecGen2iFn *fn;
1714 
1715     if ((a->cmode & 1) && a->cmode < 12) {
1716         /* for op=1, the imm will be inverted, so BIC becomes AND. */
1717         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1718     } else {
1719         /* There is one unallocated cmode/op combination in this space */
1720         if (a->cmode == 15 && a->op == 1) {
1721             return false;
1722         }
1723         fn = gen_VMOV_1r;
1724     }
1725     return do_1reg_imm(s, a, fn);
1726 }
1727 
1728 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1729                            NeonGenWidenFn *widenfn,
1730                            NeonGenTwo64OpFn *opfn,
1731                            int src1_mop, int src2_mop)
1732 {
1733     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1734     TCGv_i64 rn0_64, rn1_64, rm_64;
1735 
1736     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1737         return false;
1738     }
1739 
1740     /* UNDEF accesses to D16-D31 if they don't exist. */
1741     if (!dc_isar_feature(aa32_simd_r32, s) &&
1742         ((a->vd | a->vn | a->vm) & 0x10)) {
1743         return false;
1744     }
1745 
1746     if (!opfn) {
1747         /* size == 3 case, which is an entirely different insn group */
1748         return false;
1749     }
1750 
1751     if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1752         return false;
1753     }
1754 
1755     if (!vfp_access_check(s)) {
1756         return true;
1757     }
1758 
1759     rn0_64 = tcg_temp_new_i64();
1760     rn1_64 = tcg_temp_new_i64();
1761     rm_64 = tcg_temp_new_i64();
1762 
1763     if (src1_mop >= 0) {
1764         read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1765     } else {
1766         TCGv_i32 tmp = tcg_temp_new_i32();
1767         read_neon_element32(tmp, a->vn, 0, MO_32);
1768         widenfn(rn0_64, tmp);
1769     }
1770     if (src2_mop >= 0) {
1771         read_neon_element64(rm_64, a->vm, 0, src2_mop);
1772     } else {
1773         TCGv_i32 tmp = tcg_temp_new_i32();
1774         read_neon_element32(tmp, a->vm, 0, MO_32);
1775         widenfn(rm_64, tmp);
1776     }
1777 
1778     opfn(rn0_64, rn0_64, rm_64);
1779 
1780     /*
1781      * Load second pass inputs before storing the first pass result, to
1782      * avoid incorrect results if a narrow input overlaps with the result.
1783      */
1784     if (src1_mop >= 0) {
1785         read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1786     } else {
1787         TCGv_i32 tmp = tcg_temp_new_i32();
1788         read_neon_element32(tmp, a->vn, 1, MO_32);
1789         widenfn(rn1_64, tmp);
1790     }
1791     if (src2_mop >= 0) {
1792         read_neon_element64(rm_64, a->vm, 1, src2_mop);
1793     } else {
1794         TCGv_i32 tmp = tcg_temp_new_i32();
1795         read_neon_element32(tmp, a->vm, 1, MO_32);
1796         widenfn(rm_64, tmp);
1797     }
1798 
1799     write_neon_element64(rn0_64, a->vd, 0, MO_64);
1800 
1801     opfn(rn1_64, rn1_64, rm_64);
1802     write_neon_element64(rn1_64, a->vd, 1, MO_64);
1803 
1804     return true;
1805 }
1806 
1807 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
1808     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1809     {                                                                   \
1810         static NeonGenWidenFn * const widenfn[] = {                     \
1811             gen_helper_neon_widen_##S##8,                               \
1812             gen_helper_neon_widen_##S##16,                              \
1813             NULL, NULL,                                                 \
1814         };                                                              \
1815         static NeonGenTwo64OpFn * const addfn[] = {                     \
1816             gen_helper_neon_##OP##l_u16,                                \
1817             gen_helper_neon_##OP##l_u32,                                \
1818             tcg_gen_##OP##_i64,                                         \
1819             NULL,                                                       \
1820         };                                                              \
1821         int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
1822         return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
1823                               SRC1WIDE ? MO_UQ : narrow_mop,             \
1824                               narrow_mop);                              \
1825     }
1826 
1827 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1828 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1829 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1830 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1831 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1832 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1833 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1834 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1835 
1836 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1837                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1838 {
1839     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1840     TCGv_i64 rn_64, rm_64;
1841     TCGv_i32 rd0, rd1;
1842 
1843     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1844         return false;
1845     }
1846 
1847     /* UNDEF accesses to D16-D31 if they don't exist. */
1848     if (!dc_isar_feature(aa32_simd_r32, s) &&
1849         ((a->vd | a->vn | a->vm) & 0x10)) {
1850         return false;
1851     }
1852 
1853     if (!opfn || !narrowfn) {
1854         /* size == 3 case, which is an entirely different insn group */
1855         return false;
1856     }
1857 
1858     if ((a->vn | a->vm) & 1) {
1859         return false;
1860     }
1861 
1862     if (!vfp_access_check(s)) {
1863         return true;
1864     }
1865 
1866     rn_64 = tcg_temp_new_i64();
1867     rm_64 = tcg_temp_new_i64();
1868     rd0 = tcg_temp_new_i32();
1869     rd1 = tcg_temp_new_i32();
1870 
1871     read_neon_element64(rn_64, a->vn, 0, MO_64);
1872     read_neon_element64(rm_64, a->vm, 0, MO_64);
1873 
1874     opfn(rn_64, rn_64, rm_64);
1875 
1876     narrowfn(rd0, rn_64);
1877 
1878     read_neon_element64(rn_64, a->vn, 1, MO_64);
1879     read_neon_element64(rm_64, a->vm, 1, MO_64);
1880 
1881     opfn(rn_64, rn_64, rm_64);
1882 
1883     narrowfn(rd1, rn_64);
1884 
1885     write_neon_element32(rd0, a->vd, 0, MO_32);
1886     write_neon_element32(rd1, a->vd, 1, MO_32);
1887 
1888     return true;
1889 }
1890 
1891 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1892     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1893     {                                                                   \
1894         static NeonGenTwo64OpFn * const addfn[] = {                     \
1895             gen_helper_neon_##OP##l_u16,                                \
1896             gen_helper_neon_##OP##l_u32,                                \
1897             tcg_gen_##OP##_i64,                                         \
1898             NULL,                                                       \
1899         };                                                              \
1900         static NeonGenNarrowFn * const narrowfn[] = {                   \
1901             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1902             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1903             EXTOP,                                                      \
1904             NULL,                                                       \
1905         };                                                              \
1906         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1907     }
1908 
1909 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1910 {
1911     tcg_gen_addi_i64(rn, rn, 1u << 31);
1912     tcg_gen_extrh_i64_i32(rd, rn);
1913 }
1914 
1915 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1916 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1917 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1918 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1919 
1920 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1921                        NeonGenTwoOpWidenFn *opfn,
1922                        NeonGenTwo64OpFn *accfn)
1923 {
1924     /*
1925      * 3-regs different lengths, long operations.
1926      * These perform an operation on two inputs that returns a double-width
1927      * result, and then possibly perform an accumulation operation of
1928      * that result into the double-width destination.
1929      */
1930     TCGv_i64 rd0, rd1, tmp;
1931     TCGv_i32 rn, rm;
1932 
1933     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1934         return false;
1935     }
1936 
1937     /* UNDEF accesses to D16-D31 if they don't exist. */
1938     if (!dc_isar_feature(aa32_simd_r32, s) &&
1939         ((a->vd | a->vn | a->vm) & 0x10)) {
1940         return false;
1941     }
1942 
1943     if (!opfn) {
1944         /* size == 3 case, which is an entirely different insn group */
1945         return false;
1946     }
1947 
1948     if (a->vd & 1) {
1949         return false;
1950     }
1951 
1952     if (!vfp_access_check(s)) {
1953         return true;
1954     }
1955 
1956     rd0 = tcg_temp_new_i64();
1957     rd1 = tcg_temp_new_i64();
1958 
1959     rn = tcg_temp_new_i32();
1960     rm = tcg_temp_new_i32();
1961     read_neon_element32(rn, a->vn, 0, MO_32);
1962     read_neon_element32(rm, a->vm, 0, MO_32);
1963     opfn(rd0, rn, rm);
1964 
1965     read_neon_element32(rn, a->vn, 1, MO_32);
1966     read_neon_element32(rm, a->vm, 1, MO_32);
1967     opfn(rd1, rn, rm);
1968 
1969     /* Don't store results until after all loads: they might overlap */
1970     if (accfn) {
1971         tmp = tcg_temp_new_i64();
1972         read_neon_element64(tmp, a->vd, 0, MO_64);
1973         accfn(rd0, tmp, rd0);
1974         read_neon_element64(tmp, a->vd, 1, MO_64);
1975         accfn(rd1, tmp, rd1);
1976     }
1977 
1978     write_neon_element64(rd0, a->vd, 0, MO_64);
1979     write_neon_element64(rd1, a->vd, 1, MO_64);
1980 
1981     return true;
1982 }
1983 
1984 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
1985 {
1986     static NeonGenTwoOpWidenFn * const opfn[] = {
1987         gen_helper_neon_abdl_s16,
1988         gen_helper_neon_abdl_s32,
1989         gen_helper_neon_abdl_s64,
1990         NULL,
1991     };
1992 
1993     return do_long_3d(s, a, opfn[a->size], NULL);
1994 }
1995 
1996 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
1997 {
1998     static NeonGenTwoOpWidenFn * const opfn[] = {
1999         gen_helper_neon_abdl_u16,
2000         gen_helper_neon_abdl_u32,
2001         gen_helper_neon_abdl_u64,
2002         NULL,
2003     };
2004 
2005     return do_long_3d(s, a, opfn[a->size], NULL);
2006 }
2007 
2008 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2009 {
2010     static NeonGenTwoOpWidenFn * const opfn[] = {
2011         gen_helper_neon_abdl_s16,
2012         gen_helper_neon_abdl_s32,
2013         gen_helper_neon_abdl_s64,
2014         NULL,
2015     };
2016     static NeonGenTwo64OpFn * const addfn[] = {
2017         gen_helper_neon_addl_u16,
2018         gen_helper_neon_addl_u32,
2019         tcg_gen_add_i64,
2020         NULL,
2021     };
2022 
2023     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2024 }
2025 
2026 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2027 {
2028     static NeonGenTwoOpWidenFn * const opfn[] = {
2029         gen_helper_neon_abdl_u16,
2030         gen_helper_neon_abdl_u32,
2031         gen_helper_neon_abdl_u64,
2032         NULL,
2033     };
2034     static NeonGenTwo64OpFn * const addfn[] = {
2035         gen_helper_neon_addl_u16,
2036         gen_helper_neon_addl_u32,
2037         tcg_gen_add_i64,
2038         NULL,
2039     };
2040 
2041     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2042 }
2043 
2044 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2045 {
2046     TCGv_i32 lo = tcg_temp_new_i32();
2047     TCGv_i32 hi = tcg_temp_new_i32();
2048 
2049     tcg_gen_muls2_i32(lo, hi, rn, rm);
2050     tcg_gen_concat_i32_i64(rd, lo, hi);
2051 }
2052 
2053 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2054 {
2055     TCGv_i32 lo = tcg_temp_new_i32();
2056     TCGv_i32 hi = tcg_temp_new_i32();
2057 
2058     tcg_gen_mulu2_i32(lo, hi, rn, rm);
2059     tcg_gen_concat_i32_i64(rd, lo, hi);
2060 }
2061 
2062 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2063 {
2064     static NeonGenTwoOpWidenFn * const opfn[] = {
2065         gen_helper_neon_mull_s8,
2066         gen_helper_neon_mull_s16,
2067         gen_mull_s32,
2068         NULL,
2069     };
2070 
2071     return do_long_3d(s, a, opfn[a->size], NULL);
2072 }
2073 
2074 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2075 {
2076     static NeonGenTwoOpWidenFn * const opfn[] = {
2077         gen_helper_neon_mull_u8,
2078         gen_helper_neon_mull_u16,
2079         gen_mull_u32,
2080         NULL,
2081     };
2082 
2083     return do_long_3d(s, a, opfn[a->size], NULL);
2084 }
2085 
2086 #define DO_VMLAL(INSN,MULL,ACC)                                         \
2087     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
2088     {                                                                   \
2089         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2090             gen_helper_neon_##MULL##8,                                  \
2091             gen_helper_neon_##MULL##16,                                 \
2092             gen_##MULL##32,                                             \
2093             NULL,                                                       \
2094         };                                                              \
2095         static NeonGenTwo64OpFn * const accfn[] = {                     \
2096             gen_helper_neon_##ACC##l_u16,                               \
2097             gen_helper_neon_##ACC##l_u32,                               \
2098             tcg_gen_##ACC##_i64,                                        \
2099             NULL,                                                       \
2100         };                                                              \
2101         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
2102     }
2103 
2104 DO_VMLAL(VMLAL_S,mull_s,add)
2105 DO_VMLAL(VMLAL_U,mull_u,add)
2106 DO_VMLAL(VMLSL_S,mull_s,sub)
2107 DO_VMLAL(VMLSL_U,mull_u,sub)
2108 
2109 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2110 {
2111     gen_helper_neon_mull_s16(rd, rn, rm);
2112     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd);
2113 }
2114 
2115 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2116 {
2117     gen_mull_s32(rd, rn, rm);
2118     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd);
2119 }
2120 
2121 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2122 {
2123     static NeonGenTwoOpWidenFn * const opfn[] = {
2124         NULL,
2125         gen_VQDMULL_16,
2126         gen_VQDMULL_32,
2127         NULL,
2128     };
2129 
2130     return do_long_3d(s, a, opfn[a->size], NULL);
2131 }
2132 
2133 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2134 {
2135     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2136 }
2137 
2138 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2139 {
2140     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2141 }
2142 
2143 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2144 {
2145     static NeonGenTwoOpWidenFn * const opfn[] = {
2146         NULL,
2147         gen_VQDMULL_16,
2148         gen_VQDMULL_32,
2149         NULL,
2150     };
2151     static NeonGenTwo64OpFn * const accfn[] = {
2152         NULL,
2153         gen_VQDMLAL_acc_16,
2154         gen_VQDMLAL_acc_32,
2155         NULL,
2156     };
2157 
2158     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2159 }
2160 
2161 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2162 {
2163     gen_helper_neon_negl_u32(rm, rm);
2164     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2165 }
2166 
2167 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2168 {
2169     tcg_gen_neg_i64(rm, rm);
2170     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2171 }
2172 
2173 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2174 {
2175     static NeonGenTwoOpWidenFn * const opfn[] = {
2176         NULL,
2177         gen_VQDMULL_16,
2178         gen_VQDMULL_32,
2179         NULL,
2180     };
2181     static NeonGenTwo64OpFn * const accfn[] = {
2182         NULL,
2183         gen_VQDMLSL_acc_16,
2184         gen_VQDMLSL_acc_32,
2185         NULL,
2186     };
2187 
2188     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2189 }
2190 
2191 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2192 {
2193     gen_helper_gvec_3 *fn_gvec;
2194 
2195     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2196         return false;
2197     }
2198 
2199     /* UNDEF accesses to D16-D31 if they don't exist. */
2200     if (!dc_isar_feature(aa32_simd_r32, s) &&
2201         ((a->vd | a->vn | a->vm) & 0x10)) {
2202         return false;
2203     }
2204 
2205     if (a->vd & 1) {
2206         return false;
2207     }
2208 
2209     switch (a->size) {
2210     case 0:
2211         fn_gvec = gen_helper_neon_pmull_h;
2212         break;
2213     case 2:
2214         if (!dc_isar_feature(aa32_pmull, s)) {
2215             return false;
2216         }
2217         fn_gvec = gen_helper_gvec_pmull_q;
2218         break;
2219     default:
2220         return false;
2221     }
2222 
2223     if (!vfp_access_check(s)) {
2224         return true;
2225     }
2226 
2227     tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2228                        neon_full_reg_offset(a->vn),
2229                        neon_full_reg_offset(a->vm),
2230                        16, 16, 0, fn_gvec);
2231     return true;
2232 }
2233 
2234 static void gen_neon_dup_low16(TCGv_i32 var)
2235 {
2236     TCGv_i32 tmp = tcg_temp_new_i32();
2237     tcg_gen_ext16u_i32(var, var);
2238     tcg_gen_shli_i32(tmp, var, 16);
2239     tcg_gen_or_i32(var, var, tmp);
2240 }
2241 
2242 static void gen_neon_dup_high16(TCGv_i32 var)
2243 {
2244     TCGv_i32 tmp = tcg_temp_new_i32();
2245     tcg_gen_andi_i32(var, var, 0xffff0000);
2246     tcg_gen_shri_i32(tmp, var, 16);
2247     tcg_gen_or_i32(var, var, tmp);
2248 }
2249 
2250 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2251 {
2252     TCGv_i32 tmp = tcg_temp_new_i32();
2253     if (size == MO_16) {
2254         read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2255         if (reg & 8) {
2256             gen_neon_dup_high16(tmp);
2257         } else {
2258             gen_neon_dup_low16(tmp);
2259         }
2260     } else {
2261         read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2262     }
2263     return tmp;
2264 }
2265 
2266 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2267                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2268 {
2269     /*
2270      * Two registers and a scalar: perform an operation between
2271      * the input elements and the scalar, and then possibly
2272      * perform an accumulation operation of that result into the
2273      * destination.
2274      */
2275     TCGv_i32 scalar, tmp;
2276     int pass;
2277 
2278     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2279         return false;
2280     }
2281 
2282     /* UNDEF accesses to D16-D31 if they don't exist. */
2283     if (!dc_isar_feature(aa32_simd_r32, s) &&
2284         ((a->vd | a->vn | a->vm) & 0x10)) {
2285         return false;
2286     }
2287 
2288     if (!opfn) {
2289         /* Bad size (including size == 3, which is a different insn group) */
2290         return false;
2291     }
2292 
2293     if (a->q && ((a->vd | a->vn) & 1)) {
2294         return false;
2295     }
2296 
2297     if (!vfp_access_check(s)) {
2298         return true;
2299     }
2300 
2301     scalar = neon_get_scalar(a->size, a->vm);
2302     tmp = tcg_temp_new_i32();
2303 
2304     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2305         read_neon_element32(tmp, a->vn, pass, MO_32);
2306         opfn(tmp, tmp, scalar);
2307         if (accfn) {
2308             TCGv_i32 rd = tcg_temp_new_i32();
2309             read_neon_element32(rd, a->vd, pass, MO_32);
2310             accfn(tmp, rd, tmp);
2311         }
2312         write_neon_element32(tmp, a->vd, pass, MO_32);
2313     }
2314     return true;
2315 }
2316 
2317 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2318 {
2319     static NeonGenTwoOpFn * const opfn[] = {
2320         NULL,
2321         gen_helper_neon_mul_u16,
2322         tcg_gen_mul_i32,
2323         NULL,
2324     };
2325 
2326     return do_2scalar(s, a, opfn[a->size], NULL);
2327 }
2328 
2329 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2330 {
2331     static NeonGenTwoOpFn * const opfn[] = {
2332         NULL,
2333         gen_helper_neon_mul_u16,
2334         tcg_gen_mul_i32,
2335         NULL,
2336     };
2337     static NeonGenTwoOpFn * const accfn[] = {
2338         NULL,
2339         gen_helper_neon_add_u16,
2340         tcg_gen_add_i32,
2341         NULL,
2342     };
2343 
2344     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2345 }
2346 
2347 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2348 {
2349     static NeonGenTwoOpFn * const opfn[] = {
2350         NULL,
2351         gen_helper_neon_mul_u16,
2352         tcg_gen_mul_i32,
2353         NULL,
2354     };
2355     static NeonGenTwoOpFn * const accfn[] = {
2356         NULL,
2357         gen_helper_neon_sub_u16,
2358         tcg_gen_sub_i32,
2359         NULL,
2360     };
2361 
2362     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2363 }
2364 
2365 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2366                               gen_helper_gvec_3_ptr *fn)
2367 {
2368     /* Two registers and a scalar, using gvec */
2369     int vec_size = a->q ? 16 : 8;
2370     int rd_ofs = neon_full_reg_offset(a->vd);
2371     int rn_ofs = neon_full_reg_offset(a->vn);
2372     int rm_ofs;
2373     int idx;
2374     TCGv_ptr fpstatus;
2375 
2376     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2377         return false;
2378     }
2379 
2380     /* UNDEF accesses to D16-D31 if they don't exist. */
2381     if (!dc_isar_feature(aa32_simd_r32, s) &&
2382         ((a->vd | a->vn | a->vm) & 0x10)) {
2383         return false;
2384     }
2385 
2386     if (!fn) {
2387         /* Bad size (including size == 3, which is a different insn group) */
2388         return false;
2389     }
2390 
2391     if (a->q && ((a->vd | a->vn) & 1)) {
2392         return false;
2393     }
2394 
2395     if (!vfp_access_check(s)) {
2396         return true;
2397     }
2398 
2399     /* a->vm is M:Vm, which encodes both register and index */
2400     idx = extract32(a->vm, a->size + 2, 2);
2401     a->vm = extract32(a->vm, 0, a->size + 2);
2402     rm_ofs = neon_full_reg_offset(a->vm);
2403 
2404     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2405     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2406                        vec_size, vec_size, idx, fn);
2407     return true;
2408 }
2409 
2410 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2411     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2412     {                                                                   \
2413         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2414             NULL,                                                       \
2415             gen_helper_##FUNC##_h,                                      \
2416             gen_helper_##FUNC##_s,                                      \
2417             NULL,                                                       \
2418         };                                                              \
2419         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2420             return false;                                               \
2421         }                                                               \
2422         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2423     }
2424 
2425 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2426 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2427 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2428 
2429 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2430 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2431 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2432 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2433 
2434 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2435 {
2436     static NeonGenTwoOpFn * const opfn[] = {
2437         NULL,
2438         gen_VQDMULH_16,
2439         gen_VQDMULH_32,
2440         NULL,
2441     };
2442 
2443     return do_2scalar(s, a, opfn[a->size], NULL);
2444 }
2445 
2446 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2447 {
2448     static NeonGenTwoOpFn * const opfn[] = {
2449         NULL,
2450         gen_VQRDMULH_16,
2451         gen_VQRDMULH_32,
2452         NULL,
2453     };
2454 
2455     return do_2scalar(s, a, opfn[a->size], NULL);
2456 }
2457 
2458 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2459                             NeonGenThreeOpEnvFn *opfn)
2460 {
2461     /*
2462      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2463      * performs a kind of fused op-then-accumulate using a helper
2464      * function that takes all of rd, rn and the scalar at once.
2465      */
2466     TCGv_i32 scalar, rn, rd;
2467     int pass;
2468 
2469     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2470         return false;
2471     }
2472 
2473     if (!dc_isar_feature(aa32_rdm, s)) {
2474         return false;
2475     }
2476 
2477     /* UNDEF accesses to D16-D31 if they don't exist. */
2478     if (!dc_isar_feature(aa32_simd_r32, s) &&
2479         ((a->vd | a->vn | a->vm) & 0x10)) {
2480         return false;
2481     }
2482 
2483     if (!opfn) {
2484         /* Bad size (including size == 3, which is a different insn group) */
2485         return false;
2486     }
2487 
2488     if (a->q && ((a->vd | a->vn) & 1)) {
2489         return false;
2490     }
2491 
2492     if (!vfp_access_check(s)) {
2493         return true;
2494     }
2495 
2496     scalar = neon_get_scalar(a->size, a->vm);
2497     rn = tcg_temp_new_i32();
2498     rd = tcg_temp_new_i32();
2499 
2500     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2501         read_neon_element32(rn, a->vn, pass, MO_32);
2502         read_neon_element32(rd, a->vd, pass, MO_32);
2503         opfn(rd, tcg_env, rn, scalar, rd);
2504         write_neon_element32(rd, a->vd, pass, MO_32);
2505     }
2506     return true;
2507 }
2508 
2509 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2510 {
2511     static NeonGenThreeOpEnvFn *opfn[] = {
2512         NULL,
2513         gen_helper_neon_qrdmlah_s16,
2514         gen_helper_neon_qrdmlah_s32,
2515         NULL,
2516     };
2517     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2518 }
2519 
2520 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2521 {
2522     static NeonGenThreeOpEnvFn *opfn[] = {
2523         NULL,
2524         gen_helper_neon_qrdmlsh_s16,
2525         gen_helper_neon_qrdmlsh_s32,
2526         NULL,
2527     };
2528     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2529 }
2530 
2531 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2532                             NeonGenTwoOpWidenFn *opfn,
2533                             NeonGenTwo64OpFn *accfn)
2534 {
2535     /*
2536      * Two registers and a scalar, long operations: perform an
2537      * operation on the input elements and the scalar which produces
2538      * a double-width result, and then possibly perform an accumulation
2539      * operation of that result into the destination.
2540      */
2541     TCGv_i32 scalar, rn;
2542     TCGv_i64 rn0_64, rn1_64;
2543 
2544     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2545         return false;
2546     }
2547 
2548     /* UNDEF accesses to D16-D31 if they don't exist. */
2549     if (!dc_isar_feature(aa32_simd_r32, s) &&
2550         ((a->vd | a->vn | a->vm) & 0x10)) {
2551         return false;
2552     }
2553 
2554     if (!opfn) {
2555         /* Bad size (including size == 3, which is a different insn group) */
2556         return false;
2557     }
2558 
2559     if (a->vd & 1) {
2560         return false;
2561     }
2562 
2563     if (!vfp_access_check(s)) {
2564         return true;
2565     }
2566 
2567     scalar = neon_get_scalar(a->size, a->vm);
2568 
2569     /* Load all inputs before writing any outputs, in case of overlap */
2570     rn = tcg_temp_new_i32();
2571     read_neon_element32(rn, a->vn, 0, MO_32);
2572     rn0_64 = tcg_temp_new_i64();
2573     opfn(rn0_64, rn, scalar);
2574 
2575     read_neon_element32(rn, a->vn, 1, MO_32);
2576     rn1_64 = tcg_temp_new_i64();
2577     opfn(rn1_64, rn, scalar);
2578 
2579     if (accfn) {
2580         TCGv_i64 t64 = tcg_temp_new_i64();
2581         read_neon_element64(t64, a->vd, 0, MO_64);
2582         accfn(rn0_64, t64, rn0_64);
2583         read_neon_element64(t64, a->vd, 1, MO_64);
2584         accfn(rn1_64, t64, rn1_64);
2585     }
2586 
2587     write_neon_element64(rn0_64, a->vd, 0, MO_64);
2588     write_neon_element64(rn1_64, a->vd, 1, MO_64);
2589     return true;
2590 }
2591 
2592 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2593 {
2594     static NeonGenTwoOpWidenFn * const opfn[] = {
2595         NULL,
2596         gen_helper_neon_mull_s16,
2597         gen_mull_s32,
2598         NULL,
2599     };
2600 
2601     return do_2scalar_long(s, a, opfn[a->size], NULL);
2602 }
2603 
2604 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2605 {
2606     static NeonGenTwoOpWidenFn * const opfn[] = {
2607         NULL,
2608         gen_helper_neon_mull_u16,
2609         gen_mull_u32,
2610         NULL,
2611     };
2612 
2613     return do_2scalar_long(s, a, opfn[a->size], NULL);
2614 }
2615 
2616 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2617     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2618     {                                                                   \
2619         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2620             NULL,                                                       \
2621             gen_helper_neon_##MULL##16,                                 \
2622             gen_##MULL##32,                                             \
2623             NULL,                                                       \
2624         };                                                              \
2625         static NeonGenTwo64OpFn * const accfn[] = {                     \
2626             NULL,                                                       \
2627             gen_helper_neon_##ACC##l_u32,                               \
2628             tcg_gen_##ACC##_i64,                                        \
2629             NULL,                                                       \
2630         };                                                              \
2631         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2632     }
2633 
2634 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2635 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2636 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2637 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2638 
2639 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2640 {
2641     static NeonGenTwoOpWidenFn * const opfn[] = {
2642         NULL,
2643         gen_VQDMULL_16,
2644         gen_VQDMULL_32,
2645         NULL,
2646     };
2647 
2648     return do_2scalar_long(s, a, opfn[a->size], NULL);
2649 }
2650 
2651 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2652 {
2653     static NeonGenTwoOpWidenFn * const opfn[] = {
2654         NULL,
2655         gen_VQDMULL_16,
2656         gen_VQDMULL_32,
2657         NULL,
2658     };
2659     static NeonGenTwo64OpFn * const accfn[] = {
2660         NULL,
2661         gen_VQDMLAL_acc_16,
2662         gen_VQDMLAL_acc_32,
2663         NULL,
2664     };
2665 
2666     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2667 }
2668 
2669 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2670 {
2671     static NeonGenTwoOpWidenFn * const opfn[] = {
2672         NULL,
2673         gen_VQDMULL_16,
2674         gen_VQDMULL_32,
2675         NULL,
2676     };
2677     static NeonGenTwo64OpFn * const accfn[] = {
2678         NULL,
2679         gen_VQDMLSL_acc_16,
2680         gen_VQDMLSL_acc_32,
2681         NULL,
2682     };
2683 
2684     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2685 }
2686 
2687 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2688 {
2689     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2690         return false;
2691     }
2692 
2693     /* UNDEF accesses to D16-D31 if they don't exist. */
2694     if (!dc_isar_feature(aa32_simd_r32, s) &&
2695         ((a->vd | a->vn | a->vm) & 0x10)) {
2696         return false;
2697     }
2698 
2699     if ((a->vn | a->vm | a->vd) & a->q) {
2700         return false;
2701     }
2702 
2703     if (a->imm > 7 && !a->q) {
2704         return false;
2705     }
2706 
2707     if (!vfp_access_check(s)) {
2708         return true;
2709     }
2710 
2711     if (!a->q) {
2712         /* Extract 64 bits from <Vm:Vn> */
2713         TCGv_i64 left, right, dest;
2714 
2715         left = tcg_temp_new_i64();
2716         right = tcg_temp_new_i64();
2717         dest = tcg_temp_new_i64();
2718 
2719         read_neon_element64(right, a->vn, 0, MO_64);
2720         read_neon_element64(left, a->vm, 0, MO_64);
2721         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2722         write_neon_element64(dest, a->vd, 0, MO_64);
2723     } else {
2724         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2725         TCGv_i64 left, middle, right, destleft, destright;
2726 
2727         left = tcg_temp_new_i64();
2728         middle = tcg_temp_new_i64();
2729         right = tcg_temp_new_i64();
2730         destleft = tcg_temp_new_i64();
2731         destright = tcg_temp_new_i64();
2732 
2733         if (a->imm < 8) {
2734             read_neon_element64(right, a->vn, 0, MO_64);
2735             read_neon_element64(middle, a->vn, 1, MO_64);
2736             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2737             read_neon_element64(left, a->vm, 0, MO_64);
2738             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2739         } else {
2740             read_neon_element64(right, a->vn, 1, MO_64);
2741             read_neon_element64(middle, a->vm, 0, MO_64);
2742             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2743             read_neon_element64(left, a->vm, 1, MO_64);
2744             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2745         }
2746 
2747         write_neon_element64(destright, a->vd, 0, MO_64);
2748         write_neon_element64(destleft, a->vd, 1, MO_64);
2749     }
2750     return true;
2751 }
2752 
2753 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2754 {
2755     TCGv_i64 val, def;
2756     TCGv_i32 desc;
2757 
2758     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2759         return false;
2760     }
2761 
2762     /* UNDEF accesses to D16-D31 if they don't exist. */
2763     if (!dc_isar_feature(aa32_simd_r32, s) &&
2764         ((a->vd | a->vn | a->vm) & 0x10)) {
2765         return false;
2766     }
2767 
2768     if ((a->vn + a->len + 1) > 32) {
2769         /*
2770          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2771          * helper function running off the end of the register file.
2772          */
2773         return false;
2774     }
2775 
2776     if (!vfp_access_check(s)) {
2777         return true;
2778     }
2779 
2780     desc = tcg_constant_i32((a->vn << 2) | a->len);
2781     def = tcg_temp_new_i64();
2782     if (a->op) {
2783         read_neon_element64(def, a->vd, 0, MO_64);
2784     } else {
2785         tcg_gen_movi_i64(def, 0);
2786     }
2787     val = tcg_temp_new_i64();
2788     read_neon_element64(val, a->vm, 0, MO_64);
2789 
2790     gen_helper_neon_tbl(val, tcg_env, desc, val, def);
2791     write_neon_element64(val, a->vd, 0, MO_64);
2792     return true;
2793 }
2794 
2795 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2796 {
2797     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2798         return false;
2799     }
2800 
2801     /* UNDEF accesses to D16-D31 if they don't exist. */
2802     if (!dc_isar_feature(aa32_simd_r32, s) &&
2803         ((a->vd | a->vm) & 0x10)) {
2804         return false;
2805     }
2806 
2807     if (a->vd & a->q) {
2808         return false;
2809     }
2810 
2811     if (!vfp_access_check(s)) {
2812         return true;
2813     }
2814 
2815     tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2816                          neon_element_offset(a->vm, a->index, a->size),
2817                          a->q ? 16 : 8, a->q ? 16 : 8);
2818     return true;
2819 }
2820 
2821 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2822 {
2823     int pass, half;
2824     TCGv_i32 tmp[2];
2825 
2826     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2827         return false;
2828     }
2829 
2830     /* UNDEF accesses to D16-D31 if they don't exist. */
2831     if (!dc_isar_feature(aa32_simd_r32, s) &&
2832         ((a->vd | a->vm) & 0x10)) {
2833         return false;
2834     }
2835 
2836     if ((a->vd | a->vm) & a->q) {
2837         return false;
2838     }
2839 
2840     if (a->size == 3) {
2841         return false;
2842     }
2843 
2844     if (!vfp_access_check(s)) {
2845         return true;
2846     }
2847 
2848     tmp[0] = tcg_temp_new_i32();
2849     tmp[1] = tcg_temp_new_i32();
2850 
2851     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2852         for (half = 0; half < 2; half++) {
2853             read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2854             switch (a->size) {
2855             case 0:
2856                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2857                 break;
2858             case 1:
2859                 gen_swap_half(tmp[half], tmp[half]);
2860                 break;
2861             case 2:
2862                 break;
2863             default:
2864                 g_assert_not_reached();
2865             }
2866         }
2867         write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2868         write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2869     }
2870     return true;
2871 }
2872 
2873 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2874                               NeonGenWidenFn *widenfn,
2875                               NeonGenTwo64OpFn *opfn,
2876                               NeonGenTwo64OpFn *accfn)
2877 {
2878     /*
2879      * Pairwise long operations: widen both halves of the pair,
2880      * combine the pairs with the opfn, and then possibly accumulate
2881      * into the destination with the accfn.
2882      */
2883     int pass;
2884 
2885     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2886         return false;
2887     }
2888 
2889     /* UNDEF accesses to D16-D31 if they don't exist. */
2890     if (!dc_isar_feature(aa32_simd_r32, s) &&
2891         ((a->vd | a->vm) & 0x10)) {
2892         return false;
2893     }
2894 
2895     if ((a->vd | a->vm) & a->q) {
2896         return false;
2897     }
2898 
2899     if (!widenfn) {
2900         return false;
2901     }
2902 
2903     if (!vfp_access_check(s)) {
2904         return true;
2905     }
2906 
2907     for (pass = 0; pass < a->q + 1; pass++) {
2908         TCGv_i32 tmp;
2909         TCGv_i64 rm0_64, rm1_64, rd_64;
2910 
2911         rm0_64 = tcg_temp_new_i64();
2912         rm1_64 = tcg_temp_new_i64();
2913         rd_64 = tcg_temp_new_i64();
2914 
2915         tmp = tcg_temp_new_i32();
2916         read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2917         widenfn(rm0_64, tmp);
2918         read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2919         widenfn(rm1_64, tmp);
2920 
2921         opfn(rd_64, rm0_64, rm1_64);
2922 
2923         if (accfn) {
2924             TCGv_i64 tmp64 = tcg_temp_new_i64();
2925             read_neon_element64(tmp64, a->vd, pass, MO_64);
2926             accfn(rd_64, tmp64, rd_64);
2927         }
2928         write_neon_element64(rd_64, a->vd, pass, MO_64);
2929     }
2930     return true;
2931 }
2932 
2933 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2934 {
2935     static NeonGenWidenFn * const widenfn[] = {
2936         gen_helper_neon_widen_s8,
2937         gen_helper_neon_widen_s16,
2938         tcg_gen_ext_i32_i64,
2939         NULL,
2940     };
2941     static NeonGenTwo64OpFn * const opfn[] = {
2942         gen_helper_neon_paddl_u16,
2943         gen_helper_neon_paddl_u32,
2944         tcg_gen_add_i64,
2945         NULL,
2946     };
2947 
2948     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2949 }
2950 
2951 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
2952 {
2953     static NeonGenWidenFn * const widenfn[] = {
2954         gen_helper_neon_widen_u8,
2955         gen_helper_neon_widen_u16,
2956         tcg_gen_extu_i32_i64,
2957         NULL,
2958     };
2959     static NeonGenTwo64OpFn * const opfn[] = {
2960         gen_helper_neon_paddl_u16,
2961         gen_helper_neon_paddl_u32,
2962         tcg_gen_add_i64,
2963         NULL,
2964     };
2965 
2966     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2967 }
2968 
2969 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
2970 {
2971     static NeonGenWidenFn * const widenfn[] = {
2972         gen_helper_neon_widen_s8,
2973         gen_helper_neon_widen_s16,
2974         tcg_gen_ext_i32_i64,
2975         NULL,
2976     };
2977     static NeonGenTwo64OpFn * const opfn[] = {
2978         gen_helper_neon_paddl_u16,
2979         gen_helper_neon_paddl_u32,
2980         tcg_gen_add_i64,
2981         NULL,
2982     };
2983     static NeonGenTwo64OpFn * const accfn[] = {
2984         gen_helper_neon_addl_u16,
2985         gen_helper_neon_addl_u32,
2986         tcg_gen_add_i64,
2987         NULL,
2988     };
2989 
2990     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2991                              accfn[a->size]);
2992 }
2993 
2994 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
2995 {
2996     static NeonGenWidenFn * const widenfn[] = {
2997         gen_helper_neon_widen_u8,
2998         gen_helper_neon_widen_u16,
2999         tcg_gen_extu_i32_i64,
3000         NULL,
3001     };
3002     static NeonGenTwo64OpFn * const opfn[] = {
3003         gen_helper_neon_paddl_u16,
3004         gen_helper_neon_paddl_u32,
3005         tcg_gen_add_i64,
3006         NULL,
3007     };
3008     static NeonGenTwo64OpFn * const accfn[] = {
3009         gen_helper_neon_addl_u16,
3010         gen_helper_neon_addl_u32,
3011         tcg_gen_add_i64,
3012         NULL,
3013     };
3014 
3015     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3016                              accfn[a->size]);
3017 }
3018 
3019 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3020 
3021 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3022                        ZipFn *fn)
3023 {
3024     TCGv_ptr pd, pm;
3025 
3026     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3027         return false;
3028     }
3029 
3030     /* UNDEF accesses to D16-D31 if they don't exist. */
3031     if (!dc_isar_feature(aa32_simd_r32, s) &&
3032         ((a->vd | a->vm) & 0x10)) {
3033         return false;
3034     }
3035 
3036     if ((a->vd | a->vm) & a->q) {
3037         return false;
3038     }
3039 
3040     if (!fn) {
3041         /* Bad size or size/q combination */
3042         return false;
3043     }
3044 
3045     if (!vfp_access_check(s)) {
3046         return true;
3047     }
3048 
3049     pd = vfp_reg_ptr(true, a->vd);
3050     pm = vfp_reg_ptr(true, a->vm);
3051     fn(pd, pm);
3052     return true;
3053 }
3054 
3055 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3056 {
3057     static ZipFn * const fn[2][4] = {
3058         {
3059             gen_helper_neon_unzip8,
3060             gen_helper_neon_unzip16,
3061             NULL,
3062             NULL,
3063         }, {
3064             gen_helper_neon_qunzip8,
3065             gen_helper_neon_qunzip16,
3066             gen_helper_neon_qunzip32,
3067             NULL,
3068         }
3069     };
3070     return do_zip_uzp(s, a, fn[a->q][a->size]);
3071 }
3072 
3073 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3074 {
3075     static ZipFn * const fn[2][4] = {
3076         {
3077             gen_helper_neon_zip8,
3078             gen_helper_neon_zip16,
3079             NULL,
3080             NULL,
3081         }, {
3082             gen_helper_neon_qzip8,
3083             gen_helper_neon_qzip16,
3084             gen_helper_neon_qzip32,
3085             NULL,
3086         }
3087     };
3088     return do_zip_uzp(s, a, fn[a->q][a->size]);
3089 }
3090 
3091 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3092                      NeonGenNarrowEnvFn *narrowfn)
3093 {
3094     TCGv_i64 rm;
3095     TCGv_i32 rd0, rd1;
3096 
3097     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3098         return false;
3099     }
3100 
3101     /* UNDEF accesses to D16-D31 if they don't exist. */
3102     if (!dc_isar_feature(aa32_simd_r32, s) &&
3103         ((a->vd | a->vm) & 0x10)) {
3104         return false;
3105     }
3106 
3107     if (a->vm & 1) {
3108         return false;
3109     }
3110 
3111     if (!narrowfn) {
3112         return false;
3113     }
3114 
3115     if (!vfp_access_check(s)) {
3116         return true;
3117     }
3118 
3119     rm = tcg_temp_new_i64();
3120     rd0 = tcg_temp_new_i32();
3121     rd1 = tcg_temp_new_i32();
3122 
3123     read_neon_element64(rm, a->vm, 0, MO_64);
3124     narrowfn(rd0, tcg_env, rm);
3125     read_neon_element64(rm, a->vm, 1, MO_64);
3126     narrowfn(rd1, tcg_env, rm);
3127     write_neon_element32(rd0, a->vd, 0, MO_32);
3128     write_neon_element32(rd1, a->vd, 1, MO_32);
3129     return true;
3130 }
3131 
3132 #define DO_VMOVN(INSN, FUNC)                                    \
3133     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3134     {                                                           \
3135         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3136             FUNC##8,                                            \
3137             FUNC##16,                                           \
3138             FUNC##32,                                           \
3139             NULL,                                               \
3140         };                                                      \
3141         return do_vmovn(s, a, narrowfn[a->size]);               \
3142     }
3143 
3144 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3145 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3146 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3147 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3148 
3149 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3150 {
3151     TCGv_i32 rm0, rm1;
3152     TCGv_i64 rd;
3153     static NeonGenWidenFn * const widenfns[] = {
3154         gen_helper_neon_widen_u8,
3155         gen_helper_neon_widen_u16,
3156         tcg_gen_extu_i32_i64,
3157         NULL,
3158     };
3159     NeonGenWidenFn *widenfn = widenfns[a->size];
3160 
3161     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3162         return false;
3163     }
3164 
3165     /* UNDEF accesses to D16-D31 if they don't exist. */
3166     if (!dc_isar_feature(aa32_simd_r32, s) &&
3167         ((a->vd | a->vm) & 0x10)) {
3168         return false;
3169     }
3170 
3171     if (a->vd & 1) {
3172         return false;
3173     }
3174 
3175     if (!widenfn) {
3176         return false;
3177     }
3178 
3179     if (!vfp_access_check(s)) {
3180         return true;
3181     }
3182 
3183     rd = tcg_temp_new_i64();
3184     rm0 = tcg_temp_new_i32();
3185     rm1 = tcg_temp_new_i32();
3186 
3187     read_neon_element32(rm0, a->vm, 0, MO_32);
3188     read_neon_element32(rm1, a->vm, 1, MO_32);
3189 
3190     widenfn(rd, rm0);
3191     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3192     write_neon_element64(rd, a->vd, 0, MO_64);
3193     widenfn(rd, rm1);
3194     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3195     write_neon_element64(rd, a->vd, 1, MO_64);
3196     return true;
3197 }
3198 
3199 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3200 {
3201     TCGv_ptr fpst;
3202     TCGv_i64 tmp;
3203     TCGv_i32 dst0, dst1;
3204 
3205     if (!dc_isar_feature(aa32_bf16, s)) {
3206         return false;
3207     }
3208 
3209     /* UNDEF accesses to D16-D31 if they don't exist. */
3210     if (!dc_isar_feature(aa32_simd_r32, s) &&
3211         ((a->vd | a->vm) & 0x10)) {
3212         return false;
3213     }
3214 
3215     if ((a->vm & 1) || (a->size != 1)) {
3216         return false;
3217     }
3218 
3219     if (!vfp_access_check(s)) {
3220         return true;
3221     }
3222 
3223     fpst = fpstatus_ptr(FPST_STD);
3224     tmp = tcg_temp_new_i64();
3225     dst0 = tcg_temp_new_i32();
3226     dst1 = tcg_temp_new_i32();
3227 
3228     read_neon_element64(tmp, a->vm, 0, MO_64);
3229     gen_helper_bfcvt_pair(dst0, tmp, fpst);
3230 
3231     read_neon_element64(tmp, a->vm, 1, MO_64);
3232     gen_helper_bfcvt_pair(dst1, tmp, fpst);
3233 
3234     write_neon_element32(dst0, a->vd, 0, MO_32);
3235     write_neon_element32(dst1, a->vd, 1, MO_32);
3236     return true;
3237 }
3238 
3239 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3240 {
3241     TCGv_ptr fpst;
3242     TCGv_i32 ahp, tmp, tmp2, tmp3;
3243 
3244     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3245         !dc_isar_feature(aa32_fp16_spconv, s)) {
3246         return false;
3247     }
3248 
3249     /* UNDEF accesses to D16-D31 if they don't exist. */
3250     if (!dc_isar_feature(aa32_simd_r32, s) &&
3251         ((a->vd | a->vm) & 0x10)) {
3252         return false;
3253     }
3254 
3255     if ((a->vm & 1) || (a->size != 1)) {
3256         return false;
3257     }
3258 
3259     if (!vfp_access_check(s)) {
3260         return true;
3261     }
3262 
3263     fpst = fpstatus_ptr(FPST_STD);
3264     ahp = get_ahp_flag();
3265     tmp = tcg_temp_new_i32();
3266     read_neon_element32(tmp, a->vm, 0, MO_32);
3267     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3268     tmp2 = tcg_temp_new_i32();
3269     read_neon_element32(tmp2, a->vm, 1, MO_32);
3270     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3271     tcg_gen_shli_i32(tmp2, tmp2, 16);
3272     tcg_gen_or_i32(tmp2, tmp2, tmp);
3273     read_neon_element32(tmp, a->vm, 2, MO_32);
3274     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3275     tmp3 = tcg_temp_new_i32();
3276     read_neon_element32(tmp3, a->vm, 3, MO_32);
3277     write_neon_element32(tmp2, a->vd, 0, MO_32);
3278     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3279     tcg_gen_shli_i32(tmp3, tmp3, 16);
3280     tcg_gen_or_i32(tmp3, tmp3, tmp);
3281     write_neon_element32(tmp3, a->vd, 1, MO_32);
3282     return true;
3283 }
3284 
3285 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3286 {
3287     TCGv_ptr fpst;
3288     TCGv_i32 ahp, tmp, tmp2, tmp3;
3289 
3290     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3291         !dc_isar_feature(aa32_fp16_spconv, s)) {
3292         return false;
3293     }
3294 
3295     /* UNDEF accesses to D16-D31 if they don't exist. */
3296     if (!dc_isar_feature(aa32_simd_r32, s) &&
3297         ((a->vd | a->vm) & 0x10)) {
3298         return false;
3299     }
3300 
3301     if ((a->vd & 1) || (a->size != 1)) {
3302         return false;
3303     }
3304 
3305     if (!vfp_access_check(s)) {
3306         return true;
3307     }
3308 
3309     fpst = fpstatus_ptr(FPST_STD);
3310     ahp = get_ahp_flag();
3311     tmp3 = tcg_temp_new_i32();
3312     tmp2 = tcg_temp_new_i32();
3313     tmp = tcg_temp_new_i32();
3314     read_neon_element32(tmp, a->vm, 0, MO_32);
3315     read_neon_element32(tmp2, a->vm, 1, MO_32);
3316     tcg_gen_ext16u_i32(tmp3, tmp);
3317     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3318     write_neon_element32(tmp3, a->vd, 0, MO_32);
3319     tcg_gen_shri_i32(tmp, tmp, 16);
3320     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3321     write_neon_element32(tmp, a->vd, 1, MO_32);
3322     tcg_gen_ext16u_i32(tmp3, tmp2);
3323     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3324     write_neon_element32(tmp3, a->vd, 2, MO_32);
3325     tcg_gen_shri_i32(tmp2, tmp2, 16);
3326     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3327     write_neon_element32(tmp2, a->vd, 3, MO_32);
3328     return true;
3329 }
3330 
3331 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3332 {
3333     int vec_size = a->q ? 16 : 8;
3334     int rd_ofs = neon_full_reg_offset(a->vd);
3335     int rm_ofs = neon_full_reg_offset(a->vm);
3336 
3337     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3338         return false;
3339     }
3340 
3341     /* UNDEF accesses to D16-D31 if they don't exist. */
3342     if (!dc_isar_feature(aa32_simd_r32, s) &&
3343         ((a->vd | a->vm) & 0x10)) {
3344         return false;
3345     }
3346 
3347     if (a->size == 3) {
3348         return false;
3349     }
3350 
3351     if ((a->vd | a->vm) & a->q) {
3352         return false;
3353     }
3354 
3355     if (!vfp_access_check(s)) {
3356         return true;
3357     }
3358 
3359     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3360 
3361     return true;
3362 }
3363 
3364 #define DO_2MISC_VEC(INSN, FN)                                  \
3365     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3366     {                                                           \
3367         return do_2misc_vec(s, a, FN);                          \
3368     }
3369 
3370 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3371 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3372 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3373 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3374 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3375 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3376 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3377 
3378 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3379 {
3380     if (a->size != 0) {
3381         return false;
3382     }
3383     return do_2misc_vec(s, a, tcg_gen_gvec_not);
3384 }
3385 
3386 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3387     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3388                          uint32_t rm_ofs, uint32_t oprsz,               \
3389                          uint32_t maxsz)                                \
3390     {                                                                   \
3391         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3392                            DATA, FUNC);                                 \
3393     }
3394 
3395 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3396     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3397                          uint32_t rm_ofs, uint32_t oprsz,               \
3398                          uint32_t maxsz)                                \
3399     {                                                                   \
3400         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3401     }
3402 
3403 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3404 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
3405 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3406 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
3407 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3408 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3409 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3410 
3411 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3412     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3413     {                                                           \
3414         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3415             return false;                                       \
3416         }                                                       \
3417         return do_2misc_vec(s, a, gen_##INSN);                  \
3418     }
3419 
3420 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3421 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3422 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3423 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3424 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3425 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3426 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3427 
3428 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3429 {
3430     TCGv_i32 tmp;
3431     int pass;
3432 
3433     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3434     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3435         return false;
3436     }
3437 
3438     /* UNDEF accesses to D16-D31 if they don't exist. */
3439     if (!dc_isar_feature(aa32_simd_r32, s) &&
3440         ((a->vd | a->vm) & 0x10)) {
3441         return false;
3442     }
3443 
3444     if (!fn) {
3445         return false;
3446     }
3447 
3448     if ((a->vd | a->vm) & a->q) {
3449         return false;
3450     }
3451 
3452     if (!vfp_access_check(s)) {
3453         return true;
3454     }
3455 
3456     tmp = tcg_temp_new_i32();
3457     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3458         read_neon_element32(tmp, a->vm, pass, MO_32);
3459         fn(tmp, tmp);
3460         write_neon_element32(tmp, a->vd, pass, MO_32);
3461     }
3462     return true;
3463 }
3464 
3465 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3466 {
3467     static NeonGenOneOpFn * const fn[] = {
3468         tcg_gen_bswap32_i32,
3469         gen_swap_half,
3470         NULL,
3471         NULL,
3472     };
3473     return do_2misc(s, a, fn[a->size]);
3474 }
3475 
3476 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3477 {
3478     if (a->size != 0) {
3479         return false;
3480     }
3481     return do_2misc(s, a, gen_rev16);
3482 }
3483 
3484 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3485 {
3486     static NeonGenOneOpFn * const fn[] = {
3487         gen_helper_neon_cls_s8,
3488         gen_helper_neon_cls_s16,
3489         gen_helper_neon_cls_s32,
3490         NULL,
3491     };
3492     return do_2misc(s, a, fn[a->size]);
3493 }
3494 
3495 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3496 {
3497     tcg_gen_clzi_i32(rd, rm, 32);
3498 }
3499 
3500 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3501 {
3502     static NeonGenOneOpFn * const fn[] = {
3503         gen_helper_neon_clz_u8,
3504         gen_helper_neon_clz_u16,
3505         do_VCLZ_32,
3506         NULL,
3507     };
3508     return do_2misc(s, a, fn[a->size]);
3509 }
3510 
3511 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3512 {
3513     if (a->size != 0) {
3514         return false;
3515     }
3516     return do_2misc(s, a, gen_helper_neon_cnt_u8);
3517 }
3518 
3519 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3520                        uint32_t oprsz, uint32_t maxsz)
3521 {
3522     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3523                       vece == MO_16 ? 0x7fff : 0x7fffffff,
3524                       oprsz, maxsz);
3525 }
3526 
3527 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3528 {
3529     if (a->size == MO_16) {
3530         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3531             return false;
3532         }
3533     } else if (a->size != MO_32) {
3534         return false;
3535     }
3536     return do_2misc_vec(s, a, gen_VABS_F);
3537 }
3538 
3539 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3540                        uint32_t oprsz, uint32_t maxsz)
3541 {
3542     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3543                       vece == MO_16 ? 0x8000 : 0x80000000,
3544                       oprsz, maxsz);
3545 }
3546 
3547 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3548 {
3549     if (a->size == MO_16) {
3550         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3551             return false;
3552         }
3553     } else if (a->size != MO_32) {
3554         return false;
3555     }
3556     return do_2misc_vec(s, a, gen_VNEG_F);
3557 }
3558 
3559 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3560 {
3561     if (a->size != 2) {
3562         return false;
3563     }
3564     return do_2misc(s, a, gen_helper_recpe_u32);
3565 }
3566 
3567 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3568 {
3569     if (a->size != 2) {
3570         return false;
3571     }
3572     return do_2misc(s, a, gen_helper_rsqrte_u32);
3573 }
3574 
3575 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3576     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3577     {                                                   \
3578         FUNC(d, tcg_env, m);                            \
3579     }
3580 
3581 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3582 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3583 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3584 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3585 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3586 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3587 
3588 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3589 {
3590     static NeonGenOneOpFn * const fn[] = {
3591         gen_VQABS_s8,
3592         gen_VQABS_s16,
3593         gen_VQABS_s32,
3594         NULL,
3595     };
3596     return do_2misc(s, a, fn[a->size]);
3597 }
3598 
3599 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3600 {
3601     static NeonGenOneOpFn * const fn[] = {
3602         gen_VQNEG_s8,
3603         gen_VQNEG_s16,
3604         gen_VQNEG_s32,
3605         NULL,
3606     };
3607     return do_2misc(s, a, fn[a->size]);
3608 }
3609 
3610 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3611     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3612                            uint32_t rm_ofs,                             \
3613                            uint32_t oprsz, uint32_t maxsz)              \
3614     {                                                                   \
3615         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3616             NULL, HFUNC, SFUNC, NULL,                                   \
3617         };                                                              \
3618         TCGv_ptr fpst;                                                  \
3619         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3620         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3621                            fns[vece]);                                  \
3622     }                                                                   \
3623     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3624     {                                                                   \
3625         if (a->size == MO_16) {                                         \
3626             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3627                 return false;                                           \
3628             }                                                           \
3629         } else if (a->size != MO_32) {                                  \
3630             return false;                                               \
3631         }                                                               \
3632         return do_2misc_vec(s, a, gen_##INSN);                          \
3633     }
3634 
3635 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3636 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3637 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3638 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3639 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3640 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3641 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3642 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3643 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3644 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3645 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3646 
3647 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3648 
3649 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3650 {
3651     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3652         return false;
3653     }
3654     return trans_VRINTX_impl(s, a);
3655 }
3656 
3657 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3658     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3659                            uint32_t rm_ofs,                             \
3660                            uint32_t oprsz, uint32_t maxsz)              \
3661     {                                                                   \
3662         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3663             NULL,                                                       \
3664             gen_helper_gvec_##OP##h,                                    \
3665             gen_helper_gvec_##OP##s,                                    \
3666             NULL,                                                       \
3667         };                                                              \
3668         TCGv_ptr fpst;                                                  \
3669         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3670         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3671                            arm_rmode_to_sf(RMODE), fns[vece]);          \
3672     }                                                                   \
3673     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3674     {                                                                   \
3675         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3676             return false;                                               \
3677         }                                                               \
3678         if (a->size == MO_16) {                                         \
3679             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3680                 return false;                                           \
3681             }                                                           \
3682         } else if (a->size != MO_32) {                                  \
3683             return false;                                               \
3684         }                                                               \
3685         return do_2misc_vec(s, a, gen_##INSN);                          \
3686     }
3687 
3688 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3689 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3690 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3691 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3692 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3693 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3694 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3695 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3696 
3697 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3698 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3699 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3700 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3701 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3702 
3703 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3704 {
3705     TCGv_i64 rm, rd;
3706     int pass;
3707 
3708     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3709         return false;
3710     }
3711 
3712     /* UNDEF accesses to D16-D31 if they don't exist. */
3713     if (!dc_isar_feature(aa32_simd_r32, s) &&
3714         ((a->vd | a->vm) & 0x10)) {
3715         return false;
3716     }
3717 
3718     if (a->size != 0) {
3719         return false;
3720     }
3721 
3722     if ((a->vd | a->vm) & a->q) {
3723         return false;
3724     }
3725 
3726     if (!vfp_access_check(s)) {
3727         return true;
3728     }
3729 
3730     rm = tcg_temp_new_i64();
3731     rd = tcg_temp_new_i64();
3732     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3733         read_neon_element64(rm, a->vm, pass, MO_64);
3734         read_neon_element64(rd, a->vd, pass, MO_64);
3735         write_neon_element64(rm, a->vd, pass, MO_64);
3736         write_neon_element64(rd, a->vm, pass, MO_64);
3737     }
3738     return true;
3739 }
3740 
3741 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3742 {
3743     TCGv_i32 rd, tmp;
3744 
3745     rd = tcg_temp_new_i32();
3746     tmp = tcg_temp_new_i32();
3747 
3748     tcg_gen_shli_i32(rd, t0, 8);
3749     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3750     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3751     tcg_gen_or_i32(rd, rd, tmp);
3752 
3753     tcg_gen_shri_i32(t1, t1, 8);
3754     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3755     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3756     tcg_gen_or_i32(t1, t1, tmp);
3757     tcg_gen_mov_i32(t0, rd);
3758 }
3759 
3760 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3761 {
3762     TCGv_i32 rd, tmp;
3763 
3764     rd = tcg_temp_new_i32();
3765     tmp = tcg_temp_new_i32();
3766 
3767     tcg_gen_shli_i32(rd, t0, 16);
3768     tcg_gen_andi_i32(tmp, t1, 0xffff);
3769     tcg_gen_or_i32(rd, rd, tmp);
3770     tcg_gen_shri_i32(t1, t1, 16);
3771     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3772     tcg_gen_or_i32(t1, t1, tmp);
3773     tcg_gen_mov_i32(t0, rd);
3774 }
3775 
3776 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3777 {
3778     TCGv_i32 tmp, tmp2;
3779     int pass;
3780 
3781     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3782         return false;
3783     }
3784 
3785     /* UNDEF accesses to D16-D31 if they don't exist. */
3786     if (!dc_isar_feature(aa32_simd_r32, s) &&
3787         ((a->vd | a->vm) & 0x10)) {
3788         return false;
3789     }
3790 
3791     if ((a->vd | a->vm) & a->q) {
3792         return false;
3793     }
3794 
3795     if (a->size == 3) {
3796         return false;
3797     }
3798 
3799     if (!vfp_access_check(s)) {
3800         return true;
3801     }
3802 
3803     tmp = tcg_temp_new_i32();
3804     tmp2 = tcg_temp_new_i32();
3805     if (a->size == MO_32) {
3806         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3807             read_neon_element32(tmp, a->vm, pass, MO_32);
3808             read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3809             write_neon_element32(tmp2, a->vm, pass, MO_32);
3810             write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3811         }
3812     } else {
3813         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3814             read_neon_element32(tmp, a->vm, pass, MO_32);
3815             read_neon_element32(tmp2, a->vd, pass, MO_32);
3816             if (a->size == MO_8) {
3817                 gen_neon_trn_u8(tmp, tmp2);
3818             } else {
3819                 gen_neon_trn_u16(tmp, tmp2);
3820             }
3821             write_neon_element32(tmp2, a->vm, pass, MO_32);
3822             write_neon_element32(tmp, a->vd, pass, MO_32);
3823         }
3824     }
3825     return true;
3826 }
3827 
3828 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3829 {
3830     if (!dc_isar_feature(aa32_i8mm, s)) {
3831         return false;
3832     }
3833     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3834                         gen_helper_gvec_smmla_b);
3835 }
3836 
3837 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3838 {
3839     if (!dc_isar_feature(aa32_i8mm, s)) {
3840         return false;
3841     }
3842     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3843                         gen_helper_gvec_ummla_b);
3844 }
3845 
3846 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3847 {
3848     if (!dc_isar_feature(aa32_i8mm, s)) {
3849         return false;
3850     }
3851     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3852                         gen_helper_gvec_usmmla_b);
3853 }
3854 
3855 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3856 {
3857     if (!dc_isar_feature(aa32_bf16, s)) {
3858         return false;
3859     }
3860     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3861                         gen_helper_gvec_bfmmla);
3862 }
3863 
3864 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3865 {
3866     if (!dc_isar_feature(aa32_bf16, s)) {
3867         return false;
3868     }
3869     return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3870                              gen_helper_gvec_bfmlal);
3871 }
3872 
3873 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3874 {
3875     if (!dc_isar_feature(aa32_bf16, s)) {
3876         return false;
3877     }
3878     return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3879                              (a->index << 1) | a->q, FPST_STD,
3880                              gen_helper_gvec_bfmlal_idx);
3881 }
3882