xref: /openbmc/qemu/tcg/tcg-op-gvec.c (revision d0ec9796)
1 /*
2  * Generic vector operation expansion
3  *
4  * Copyright (c) 2018 Linaro
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "tcg.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 
27 #define MAX_UNROLL  4
28 
29 /* Verify vector size and alignment rules.  OFS should be the OR of all
30    of the operand offsets so that we can check them all at once.  */
31 static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
32 {
33     uint32_t opr_align = oprsz >= 16 ? 15 : 7;
34     uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
35     tcg_debug_assert(oprsz > 0);
36     tcg_debug_assert(oprsz <= maxsz);
37     tcg_debug_assert((oprsz & opr_align) == 0);
38     tcg_debug_assert((maxsz & max_align) == 0);
39     tcg_debug_assert((ofs & max_align) == 0);
40 }
41 
42 /* Verify vector overlap rules for two operands.  */
43 static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s)
44 {
45     tcg_debug_assert(d == a || d + s <= a || a + s <= d);
46 }
47 
48 /* Verify vector overlap rules for three operands.  */
49 static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)
50 {
51     check_overlap_2(d, a, s);
52     check_overlap_2(d, b, s);
53     check_overlap_2(a, b, s);
54 }
55 
56 /* Verify vector overlap rules for four operands.  */
57 static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b,
58                             uint32_t c, uint32_t s)
59 {
60     check_overlap_2(d, a, s);
61     check_overlap_2(d, b, s);
62     check_overlap_2(d, c, s);
63     check_overlap_2(a, b, s);
64     check_overlap_2(a, c, s);
65     check_overlap_2(b, c, s);
66 }
67 
68 /* Create a descriptor from components.  */
69 uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
70 {
71     uint32_t desc = 0;
72 
73     assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
74     assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
75     assert(data == sextract32(data, 0, SIMD_DATA_BITS));
76 
77     oprsz = (oprsz / 8) - 1;
78     maxsz = (maxsz / 8) - 1;
79     desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
80     desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
81     desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
82 
83     return desc;
84 }
85 
86 /* Generate a call to a gvec-style helper with two vector operands.  */
87 void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
88                         uint32_t oprsz, uint32_t maxsz, int32_t data,
89                         gen_helper_gvec_2 *fn)
90 {
91     TCGv_ptr a0, a1;
92     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
93 
94     a0 = tcg_temp_new_ptr();
95     a1 = tcg_temp_new_ptr();
96 
97     tcg_gen_addi_ptr(a0, cpu_env, dofs);
98     tcg_gen_addi_ptr(a1, cpu_env, aofs);
99 
100     fn(a0, a1, desc);
101 
102     tcg_temp_free_ptr(a0);
103     tcg_temp_free_ptr(a1);
104     tcg_temp_free_i32(desc);
105 }
106 
107 /* Generate a call to a gvec-style helper with three vector operands.  */
108 void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
109                         uint32_t oprsz, uint32_t maxsz, int32_t data,
110                         gen_helper_gvec_3 *fn)
111 {
112     TCGv_ptr a0, a1, a2;
113     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
114 
115     a0 = tcg_temp_new_ptr();
116     a1 = tcg_temp_new_ptr();
117     a2 = tcg_temp_new_ptr();
118 
119     tcg_gen_addi_ptr(a0, cpu_env, dofs);
120     tcg_gen_addi_ptr(a1, cpu_env, aofs);
121     tcg_gen_addi_ptr(a2, cpu_env, bofs);
122 
123     fn(a0, a1, a2, desc);
124 
125     tcg_temp_free_ptr(a0);
126     tcg_temp_free_ptr(a1);
127     tcg_temp_free_ptr(a2);
128     tcg_temp_free_i32(desc);
129 }
130 
131 /* Generate a call to a gvec-style helper with four vector operands.  */
132 void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
133                         uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
134                         int32_t data, gen_helper_gvec_4 *fn)
135 {
136     TCGv_ptr a0, a1, a2, a3;
137     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
138 
139     a0 = tcg_temp_new_ptr();
140     a1 = tcg_temp_new_ptr();
141     a2 = tcg_temp_new_ptr();
142     a3 = tcg_temp_new_ptr();
143 
144     tcg_gen_addi_ptr(a0, cpu_env, dofs);
145     tcg_gen_addi_ptr(a1, cpu_env, aofs);
146     tcg_gen_addi_ptr(a2, cpu_env, bofs);
147     tcg_gen_addi_ptr(a3, cpu_env, cofs);
148 
149     fn(a0, a1, a2, a3, desc);
150 
151     tcg_temp_free_ptr(a0);
152     tcg_temp_free_ptr(a1);
153     tcg_temp_free_ptr(a2);
154     tcg_temp_free_ptr(a3);
155     tcg_temp_free_i32(desc);
156 }
157 
158 /* Generate a call to a gvec-style helper with five vector operands.  */
159 void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
160                         uint32_t cofs, uint32_t xofs, uint32_t oprsz,
161                         uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
162 {
163     TCGv_ptr a0, a1, a2, a3, a4;
164     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
165 
166     a0 = tcg_temp_new_ptr();
167     a1 = tcg_temp_new_ptr();
168     a2 = tcg_temp_new_ptr();
169     a3 = tcg_temp_new_ptr();
170     a4 = tcg_temp_new_ptr();
171 
172     tcg_gen_addi_ptr(a0, cpu_env, dofs);
173     tcg_gen_addi_ptr(a1, cpu_env, aofs);
174     tcg_gen_addi_ptr(a2, cpu_env, bofs);
175     tcg_gen_addi_ptr(a3, cpu_env, cofs);
176     tcg_gen_addi_ptr(a4, cpu_env, xofs);
177 
178     fn(a0, a1, a2, a3, a4, desc);
179 
180     tcg_temp_free_ptr(a0);
181     tcg_temp_free_ptr(a1);
182     tcg_temp_free_ptr(a2);
183     tcg_temp_free_ptr(a3);
184     tcg_temp_free_ptr(a4);
185     tcg_temp_free_i32(desc);
186 }
187 
188 /* Generate a call to a gvec-style helper with three vector operands
189    and an extra pointer operand.  */
190 void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
191                         TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
192                         int32_t data, gen_helper_gvec_2_ptr *fn)
193 {
194     TCGv_ptr a0, a1;
195     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
196 
197     a0 = tcg_temp_new_ptr();
198     a1 = tcg_temp_new_ptr();
199 
200     tcg_gen_addi_ptr(a0, cpu_env, dofs);
201     tcg_gen_addi_ptr(a1, cpu_env, aofs);
202 
203     fn(a0, a1, ptr, desc);
204 
205     tcg_temp_free_ptr(a0);
206     tcg_temp_free_ptr(a1);
207     tcg_temp_free_i32(desc);
208 }
209 
210 /* Generate a call to a gvec-style helper with three vector operands
211    and an extra pointer operand.  */
212 void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
213                         TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
214                         int32_t data, gen_helper_gvec_3_ptr *fn)
215 {
216     TCGv_ptr a0, a1, a2;
217     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
218 
219     a0 = tcg_temp_new_ptr();
220     a1 = tcg_temp_new_ptr();
221     a2 = tcg_temp_new_ptr();
222 
223     tcg_gen_addi_ptr(a0, cpu_env, dofs);
224     tcg_gen_addi_ptr(a1, cpu_env, aofs);
225     tcg_gen_addi_ptr(a2, cpu_env, bofs);
226 
227     fn(a0, a1, a2, ptr, desc);
228 
229     tcg_temp_free_ptr(a0);
230     tcg_temp_free_ptr(a1);
231     tcg_temp_free_ptr(a2);
232     tcg_temp_free_i32(desc);
233 }
234 
235 /* Generate a call to a gvec-style helper with four vector operands
236    and an extra pointer operand.  */
237 void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
238                         uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
239                         uint32_t maxsz, int32_t data,
240                         gen_helper_gvec_4_ptr *fn)
241 {
242     TCGv_ptr a0, a1, a2, a3;
243     TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
244 
245     a0 = tcg_temp_new_ptr();
246     a1 = tcg_temp_new_ptr();
247     a2 = tcg_temp_new_ptr();
248     a3 = tcg_temp_new_ptr();
249 
250     tcg_gen_addi_ptr(a0, cpu_env, dofs);
251     tcg_gen_addi_ptr(a1, cpu_env, aofs);
252     tcg_gen_addi_ptr(a2, cpu_env, bofs);
253     tcg_gen_addi_ptr(a3, cpu_env, cofs);
254 
255     fn(a0, a1, a2, a3, ptr, desc);
256 
257     tcg_temp_free_ptr(a0);
258     tcg_temp_free_ptr(a1);
259     tcg_temp_free_ptr(a2);
260     tcg_temp_free_ptr(a3);
261     tcg_temp_free_i32(desc);
262 }
263 
264 /* Return true if we want to implement something of OPRSZ bytes
265    in units of LNSZ.  This limits the expansion of inline code.  */
266 static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
267 {
268     uint32_t lnct = oprsz / lnsz;
269     return lnct >= 1 && lnct <= MAX_UNROLL;
270 }
271 
272 static void expand_clr(uint32_t dofs, uint32_t maxsz);
273 
274 /* Duplicate C as per VECE.  */
275 uint64_t (dup_const)(unsigned vece, uint64_t c)
276 {
277     switch (vece) {
278     case MO_8:
279         return 0x0101010101010101ull * (uint8_t)c;
280     case MO_16:
281         return 0x0001000100010001ull * (uint16_t)c;
282     case MO_32:
283         return 0x0000000100000001ull * (uint32_t)c;
284     case MO_64:
285         return c;
286     default:
287         g_assert_not_reached();
288     }
289 }
290 
291 /* Duplicate IN into OUT as per VECE.  */
292 static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
293 {
294     switch (vece) {
295     case MO_8:
296         tcg_gen_ext8u_i32(out, in);
297         tcg_gen_muli_i32(out, out, 0x01010101);
298         break;
299     case MO_16:
300         tcg_gen_deposit_i32(out, in, in, 16, 16);
301         break;
302     case MO_32:
303         tcg_gen_mov_i32(out, in);
304         break;
305     default:
306         g_assert_not_reached();
307     }
308 }
309 
310 static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
311 {
312     switch (vece) {
313     case MO_8:
314         tcg_gen_ext8u_i64(out, in);
315         tcg_gen_muli_i64(out, out, 0x0101010101010101ull);
316         break;
317     case MO_16:
318         tcg_gen_ext16u_i64(out, in);
319         tcg_gen_muli_i64(out, out, 0x0001000100010001ull);
320         break;
321     case MO_32:
322         tcg_gen_deposit_i64(out, in, in, 32, 32);
323         break;
324     case MO_64:
325         tcg_gen_mov_i64(out, in);
326         break;
327     default:
328         g_assert_not_reached();
329     }
330 }
331 
332 /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
333  * Only one of IN_32 or IN_64 may be set;
334  * IN_C is used if IN_32 and IN_64 are unset.
335  */
336 static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
337                    uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64,
338                    uint64_t in_c)
339 {
340     TCGType type;
341     TCGv_i64 t_64;
342     TCGv_i32 t_32, t_desc;
343     TCGv_ptr t_ptr;
344     uint32_t i;
345 
346     assert(vece <= (in_32 ? MO_32 : MO_64));
347     assert(in_32 == NULL || in_64 == NULL);
348 
349     /* If we're storing 0, expand oprsz to maxsz.  */
350     if (in_32 == NULL && in_64 == NULL) {
351         in_c = dup_const(vece, in_c);
352         if (in_c == 0) {
353             oprsz = maxsz;
354         }
355     }
356 
357     type = 0;
358     if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
359         type = TCG_TYPE_V256;
360     } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
361         type = TCG_TYPE_V128;
362     } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)
363                /* Prefer integer when 64-bit host and no variable dup.  */
364                && !(TCG_TARGET_REG_BITS == 64 && in_32 == NULL
365                     && (in_64 == NULL || vece == MO_64))) {
366         type = TCG_TYPE_V64;
367     }
368 
369     /* Implement inline with a vector type, if possible.  */
370     if (type != 0) {
371         TCGv_vec t_vec = tcg_temp_new_vec(type);
372 
373         if (in_32) {
374             tcg_gen_dup_i32_vec(vece, t_vec, in_32);
375         } else if (in_64) {
376             tcg_gen_dup_i64_vec(vece, t_vec, in_64);
377         } else {
378             switch (vece) {
379             case MO_8:
380                 tcg_gen_dup8i_vec(t_vec, in_c);
381                 break;
382             case MO_16:
383                 tcg_gen_dup16i_vec(t_vec, in_c);
384                 break;
385             case MO_32:
386                 tcg_gen_dup32i_vec(t_vec, in_c);
387                 break;
388             default:
389                 tcg_gen_dup64i_vec(t_vec, in_c);
390                 break;
391             }
392         }
393 
394         i = 0;
395         if (TCG_TARGET_HAS_v256) {
396             for (; i + 32 <= oprsz; i += 32) {
397                 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
398             }
399         }
400         if (TCG_TARGET_HAS_v128) {
401             for (; i + 16 <= oprsz; i += 16) {
402                 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
403             }
404         }
405         if (TCG_TARGET_HAS_v64) {
406             for (; i < oprsz; i += 8) {
407                 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
408             }
409         }
410         tcg_temp_free_vec(t_vec);
411         goto done;
412     }
413 
414     /* Otherwise, inline with an integer type, unless "large".  */
415     if (check_size_impl(oprsz, TCG_TARGET_REG_BITS / 8)) {
416         t_64 = NULL;
417         t_32 = NULL;
418 
419         if (in_32) {
420             /* We are given a 32-bit variable input.  For a 64-bit host,
421                use a 64-bit operation unless the 32-bit operation would
422                be simple enough.  */
423             if (TCG_TARGET_REG_BITS == 64
424                 && (vece != MO_32 || !check_size_impl(oprsz, 4))) {
425                 t_64 = tcg_temp_new_i64();
426                 tcg_gen_extu_i32_i64(t_64, in_32);
427                 gen_dup_i64(vece, t_64, t_64);
428             } else {
429                 t_32 = tcg_temp_new_i32();
430                 gen_dup_i32(vece, t_32, in_32);
431             }
432         } else if (in_64) {
433             /* We are given a 64-bit variable input.  */
434             t_64 = tcg_temp_new_i64();
435             gen_dup_i64(vece, t_64, in_64);
436         } else {
437             /* We are given a constant input.  */
438             /* For 64-bit hosts, use 64-bit constants for "simple" constants
439                or when we'd need too many 32-bit stores, or when a 64-bit
440                constant is really required.  */
441             if (vece == MO_64
442                 || (TCG_TARGET_REG_BITS == 64
443                     && (in_c == 0 || in_c == -1
444                         || !check_size_impl(oprsz, 4)))) {
445                 t_64 = tcg_const_i64(in_c);
446             } else {
447                 t_32 = tcg_const_i32(in_c);
448             }
449         }
450 
451         /* Implement inline if we picked an implementation size above.  */
452         if (t_32) {
453             for (i = 0; i < oprsz; i += 4) {
454                 tcg_gen_st_i32(t_32, cpu_env, dofs + i);
455             }
456             tcg_temp_free_i32(t_32);
457             goto done;
458         }
459         if (t_64) {
460             for (i = 0; i < oprsz; i += 8) {
461                 tcg_gen_st_i64(t_64, cpu_env, dofs + i);
462             }
463             tcg_temp_free_i64(t_64);
464             goto done;
465         }
466     }
467 
468     /* Otherwise implement out of line.  */
469     t_ptr = tcg_temp_new_ptr();
470     tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
471     t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
472 
473     if (vece == MO_64) {
474         if (in_64) {
475             gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
476         } else {
477             t_64 = tcg_const_i64(in_c);
478             gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
479             tcg_temp_free_i64(t_64);
480         }
481     } else {
482         typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
483         static dup_fn * const fns[3] = {
484             gen_helper_gvec_dup8,
485             gen_helper_gvec_dup16,
486             gen_helper_gvec_dup32
487         };
488 
489         if (in_32) {
490             fns[vece](t_ptr, t_desc, in_32);
491         } else {
492             t_32 = tcg_temp_new_i32();
493             if (in_64) {
494                 tcg_gen_extrl_i64_i32(t_32, in_64);
495             } else if (vece == MO_8) {
496                 tcg_gen_movi_i32(t_32, in_c & 0xff);
497             } else if (vece == MO_16) {
498                 tcg_gen_movi_i32(t_32, in_c & 0xffff);
499             } else {
500                 tcg_gen_movi_i32(t_32, in_c);
501             }
502             fns[vece](t_ptr, t_desc, t_32);
503             tcg_temp_free_i32(t_32);
504         }
505     }
506 
507     tcg_temp_free_ptr(t_ptr);
508     tcg_temp_free_i32(t_desc);
509     return;
510 
511  done:
512     if (oprsz < maxsz) {
513         expand_clr(dofs + oprsz, maxsz - oprsz);
514     }
515 }
516 
517 /* Likewise, but with zero.  */
518 static void expand_clr(uint32_t dofs, uint32_t maxsz)
519 {
520     do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0);
521 }
522 
523 /* Expand OPSZ bytes worth of two-operand operations using i32 elements.  */
524 static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
525                          void (*fni)(TCGv_i32, TCGv_i32))
526 {
527     TCGv_i32 t0 = tcg_temp_new_i32();
528     uint32_t i;
529 
530     for (i = 0; i < oprsz; i += 4) {
531         tcg_gen_ld_i32(t0, cpu_env, aofs + i);
532         fni(t0, t0);
533         tcg_gen_st_i32(t0, cpu_env, dofs + i);
534     }
535     tcg_temp_free_i32(t0);
536 }
537 
538 static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
539                           int32_t c, bool load_dest,
540                           void (*fni)(TCGv_i32, TCGv_i32, int32_t))
541 {
542     TCGv_i32 t0 = tcg_temp_new_i32();
543     TCGv_i32 t1 = tcg_temp_new_i32();
544     uint32_t i;
545 
546     for (i = 0; i < oprsz; i += 4) {
547         tcg_gen_ld_i32(t0, cpu_env, aofs + i);
548         if (load_dest) {
549             tcg_gen_ld_i32(t1, cpu_env, dofs + i);
550         }
551         fni(t1, t0, c);
552         tcg_gen_st_i32(t1, cpu_env, dofs + i);
553     }
554     tcg_temp_free_i32(t0);
555     tcg_temp_free_i32(t1);
556 }
557 
558 /* Expand OPSZ bytes worth of three-operand operations using i32 elements.  */
559 static void expand_3_i32(uint32_t dofs, uint32_t aofs,
560                          uint32_t bofs, uint32_t oprsz, bool load_dest,
561                          void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
562 {
563     TCGv_i32 t0 = tcg_temp_new_i32();
564     TCGv_i32 t1 = tcg_temp_new_i32();
565     TCGv_i32 t2 = tcg_temp_new_i32();
566     uint32_t i;
567 
568     for (i = 0; i < oprsz; i += 4) {
569         tcg_gen_ld_i32(t0, cpu_env, aofs + i);
570         tcg_gen_ld_i32(t1, cpu_env, bofs + i);
571         if (load_dest) {
572             tcg_gen_ld_i32(t2, cpu_env, dofs + i);
573         }
574         fni(t2, t0, t1);
575         tcg_gen_st_i32(t2, cpu_env, dofs + i);
576     }
577     tcg_temp_free_i32(t2);
578     tcg_temp_free_i32(t1);
579     tcg_temp_free_i32(t0);
580 }
581 
582 /* Expand OPSZ bytes worth of three-operand operations using i32 elements.  */
583 static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
584                          uint32_t cofs, uint32_t oprsz,
585                          void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
586 {
587     TCGv_i32 t0 = tcg_temp_new_i32();
588     TCGv_i32 t1 = tcg_temp_new_i32();
589     TCGv_i32 t2 = tcg_temp_new_i32();
590     TCGv_i32 t3 = tcg_temp_new_i32();
591     uint32_t i;
592 
593     for (i = 0; i < oprsz; i += 4) {
594         tcg_gen_ld_i32(t1, cpu_env, aofs + i);
595         tcg_gen_ld_i32(t2, cpu_env, bofs + i);
596         tcg_gen_ld_i32(t3, cpu_env, cofs + i);
597         fni(t0, t1, t2, t3);
598         tcg_gen_st_i32(t0, cpu_env, dofs + i);
599     }
600     tcg_temp_free_i32(t3);
601     tcg_temp_free_i32(t2);
602     tcg_temp_free_i32(t1);
603     tcg_temp_free_i32(t0);
604 }
605 
606 /* Expand OPSZ bytes worth of two-operand operations using i64 elements.  */
607 static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
608                          void (*fni)(TCGv_i64, TCGv_i64))
609 {
610     TCGv_i64 t0 = tcg_temp_new_i64();
611     uint32_t i;
612 
613     for (i = 0; i < oprsz; i += 8) {
614         tcg_gen_ld_i64(t0, cpu_env, aofs + i);
615         fni(t0, t0);
616         tcg_gen_st_i64(t0, cpu_env, dofs + i);
617     }
618     tcg_temp_free_i64(t0);
619 }
620 
621 static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
622                           int64_t c, bool load_dest,
623                           void (*fni)(TCGv_i64, TCGv_i64, int64_t))
624 {
625     TCGv_i64 t0 = tcg_temp_new_i64();
626     TCGv_i64 t1 = tcg_temp_new_i64();
627     uint32_t i;
628 
629     for (i = 0; i < oprsz; i += 8) {
630         tcg_gen_ld_i64(t0, cpu_env, aofs + i);
631         if (load_dest) {
632             tcg_gen_ld_i64(t1, cpu_env, dofs + i);
633         }
634         fni(t1, t0, c);
635         tcg_gen_st_i64(t1, cpu_env, dofs + i);
636     }
637     tcg_temp_free_i64(t0);
638     tcg_temp_free_i64(t1);
639 }
640 
641 /* Expand OPSZ bytes worth of three-operand operations using i64 elements.  */
642 static void expand_3_i64(uint32_t dofs, uint32_t aofs,
643                          uint32_t bofs, uint32_t oprsz, bool load_dest,
644                          void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
645 {
646     TCGv_i64 t0 = tcg_temp_new_i64();
647     TCGv_i64 t1 = tcg_temp_new_i64();
648     TCGv_i64 t2 = tcg_temp_new_i64();
649     uint32_t i;
650 
651     for (i = 0; i < oprsz; i += 8) {
652         tcg_gen_ld_i64(t0, cpu_env, aofs + i);
653         tcg_gen_ld_i64(t1, cpu_env, bofs + i);
654         if (load_dest) {
655             tcg_gen_ld_i64(t2, cpu_env, dofs + i);
656         }
657         fni(t2, t0, t1);
658         tcg_gen_st_i64(t2, cpu_env, dofs + i);
659     }
660     tcg_temp_free_i64(t2);
661     tcg_temp_free_i64(t1);
662     tcg_temp_free_i64(t0);
663 }
664 
665 /* Expand OPSZ bytes worth of three-operand operations using i64 elements.  */
666 static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
667                          uint32_t cofs, uint32_t oprsz,
668                          void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
669 {
670     TCGv_i64 t0 = tcg_temp_new_i64();
671     TCGv_i64 t1 = tcg_temp_new_i64();
672     TCGv_i64 t2 = tcg_temp_new_i64();
673     TCGv_i64 t3 = tcg_temp_new_i64();
674     uint32_t i;
675 
676     for (i = 0; i < oprsz; i += 8) {
677         tcg_gen_ld_i64(t1, cpu_env, aofs + i);
678         tcg_gen_ld_i64(t2, cpu_env, bofs + i);
679         tcg_gen_ld_i64(t3, cpu_env, cofs + i);
680         fni(t0, t1, t2, t3);
681         tcg_gen_st_i64(t0, cpu_env, dofs + i);
682     }
683     tcg_temp_free_i64(t3);
684     tcg_temp_free_i64(t2);
685     tcg_temp_free_i64(t1);
686     tcg_temp_free_i64(t0);
687 }
688 
689 /* Expand OPSZ bytes worth of two-operand operations using host vectors.  */
690 static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
691                          uint32_t oprsz, uint32_t tysz, TCGType type,
692                          void (*fni)(unsigned, TCGv_vec, TCGv_vec))
693 {
694     TCGv_vec t0 = tcg_temp_new_vec(type);
695     uint32_t i;
696 
697     for (i = 0; i < oprsz; i += tysz) {
698         tcg_gen_ld_vec(t0, cpu_env, aofs + i);
699         fni(vece, t0, t0);
700         tcg_gen_st_vec(t0, cpu_env, dofs + i);
701     }
702     tcg_temp_free_vec(t0);
703 }
704 
705 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
706    using host vectors.  */
707 static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
708                           uint32_t oprsz, uint32_t tysz, TCGType type,
709                           int64_t c, bool load_dest,
710                           void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t))
711 {
712     TCGv_vec t0 = tcg_temp_new_vec(type);
713     TCGv_vec t1 = tcg_temp_new_vec(type);
714     uint32_t i;
715 
716     for (i = 0; i < oprsz; i += tysz) {
717         tcg_gen_ld_vec(t0, cpu_env, aofs + i);
718         if (load_dest) {
719             tcg_gen_ld_vec(t1, cpu_env, dofs + i);
720         }
721         fni(vece, t1, t0, c);
722         tcg_gen_st_vec(t1, cpu_env, dofs + i);
723     }
724     tcg_temp_free_vec(t0);
725     tcg_temp_free_vec(t1);
726 }
727 
728 /* Expand OPSZ bytes worth of three-operand operations using host vectors.  */
729 static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
730                          uint32_t bofs, uint32_t oprsz,
731                          uint32_t tysz, TCGType type, bool load_dest,
732                          void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
733 {
734     TCGv_vec t0 = tcg_temp_new_vec(type);
735     TCGv_vec t1 = tcg_temp_new_vec(type);
736     TCGv_vec t2 = tcg_temp_new_vec(type);
737     uint32_t i;
738 
739     for (i = 0; i < oprsz; i += tysz) {
740         tcg_gen_ld_vec(t0, cpu_env, aofs + i);
741         tcg_gen_ld_vec(t1, cpu_env, bofs + i);
742         if (load_dest) {
743             tcg_gen_ld_vec(t2, cpu_env, dofs + i);
744         }
745         fni(vece, t2, t0, t1);
746         tcg_gen_st_vec(t2, cpu_env, dofs + i);
747     }
748     tcg_temp_free_vec(t2);
749     tcg_temp_free_vec(t1);
750     tcg_temp_free_vec(t0);
751 }
752 
753 /* Expand OPSZ bytes worth of four-operand operations using host vectors.  */
754 static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
755                          uint32_t bofs, uint32_t cofs, uint32_t oprsz,
756                          uint32_t tysz, TCGType type,
757                          void (*fni)(unsigned, TCGv_vec, TCGv_vec,
758                                      TCGv_vec, TCGv_vec))
759 {
760     TCGv_vec t0 = tcg_temp_new_vec(type);
761     TCGv_vec t1 = tcg_temp_new_vec(type);
762     TCGv_vec t2 = tcg_temp_new_vec(type);
763     TCGv_vec t3 = tcg_temp_new_vec(type);
764     uint32_t i;
765 
766     for (i = 0; i < oprsz; i += tysz) {
767         tcg_gen_ld_vec(t1, cpu_env, aofs + i);
768         tcg_gen_ld_vec(t2, cpu_env, bofs + i);
769         tcg_gen_ld_vec(t3, cpu_env, cofs + i);
770         fni(vece, t0, t1, t2, t3);
771         tcg_gen_st_vec(t0, cpu_env, dofs + i);
772     }
773     tcg_temp_free_vec(t3);
774     tcg_temp_free_vec(t2);
775     tcg_temp_free_vec(t1);
776     tcg_temp_free_vec(t0);
777 }
778 
779 /* Expand a vector two-operand operation.  */
780 void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
781                     uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
782 {
783     check_size_align(oprsz, maxsz, dofs | aofs);
784     check_overlap_2(dofs, aofs, maxsz);
785 
786     /* Recall that ARM SVE allows vector sizes that are not a power of 2.
787        Expand with successively smaller host vector sizes.  The intent is
788        that e.g. oprsz == 80 would be expanded with 2x32 + 1x16.  */
789     /* ??? For maxsz > oprsz, the host may be able to use an opr-sized
790        operation, zeroing the balance of the register.  We can then
791        use a max-sized store to implement the clearing without an extra
792        store operation.  This is true for aarch64 and x86_64 hosts.  */
793 
794     if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
795         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
796         uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
797         expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
798         if (some == oprsz) {
799             goto done;
800         }
801         dofs += some;
802         aofs += some;
803         oprsz -= some;
804         maxsz -= some;
805     }
806 
807     if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
808         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
809         expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
810     } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
811                && g->fniv && check_size_impl(oprsz, 8)
812                && (!g->opc
813                    || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
814         expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
815     } else if (g->fni8 && check_size_impl(oprsz, 8)) {
816         expand_2_i64(dofs, aofs, oprsz, g->fni8);
817     } else if (g->fni4 && check_size_impl(oprsz, 4)) {
818         expand_2_i32(dofs, aofs, oprsz, g->fni4);
819     } else {
820         assert(g->fno != NULL);
821         tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
822         return;
823     }
824 
825  done:
826     if (oprsz < maxsz) {
827         expand_clr(dofs + oprsz, maxsz - oprsz);
828     }
829 }
830 
831 void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
832                      uint32_t maxsz, int64_t c, const GVecGen2i *g)
833 {
834     check_size_align(oprsz, maxsz, dofs | aofs);
835     check_overlap_2(dofs, aofs, maxsz);
836 
837     /* Recall that ARM SVE allows vector sizes that are not a power of 2.
838        Expand with successively smaller host vector sizes.  The intent is
839        that e.g. oprsz == 80 would be expanded with 2x32 + 1x16.  */
840 
841     if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
842         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
843         uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
844         expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
845                       c, g->load_dest, g->fniv);
846         if (some == oprsz) {
847             goto done;
848         }
849         dofs += some;
850         aofs += some;
851         oprsz -= some;
852         maxsz -= some;
853     }
854 
855     if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
856         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
857         expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
858                       c, g->load_dest, g->fniv);
859     } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
860                && g->fniv && check_size_impl(oprsz, 8)
861                && (!g->opc
862                    || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
863         expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
864                       c, g->load_dest, g->fniv);
865     } else if (g->fni8 && check_size_impl(oprsz, 8)) {
866         expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
867     } else if (g->fni4 && check_size_impl(oprsz, 4)) {
868         expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
869     } else {
870         tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
871         return;
872     }
873 
874  done:
875     if (oprsz < maxsz) {
876         expand_clr(dofs + oprsz, maxsz - oprsz);
877     }
878 }
879 
880 /* Expand a vector three-operand operation.  */
881 void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
882                     uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
883 {
884     check_size_align(oprsz, maxsz, dofs | aofs | bofs);
885     check_overlap_3(dofs, aofs, bofs, maxsz);
886 
887     /* Recall that ARM SVE allows vector sizes that are not a power of 2.
888        Expand with successively smaller host vector sizes.  The intent is
889        that e.g. oprsz == 80 would be expanded with 2x32 + 1x16.  */
890 
891     if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
892         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
893         uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
894         expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
895                      g->load_dest, g->fniv);
896         if (some == oprsz) {
897             goto done;
898         }
899         dofs += some;
900         aofs += some;
901         bofs += some;
902         oprsz -= some;
903         maxsz -= some;
904     }
905 
906     if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
907         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
908         expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
909                      g->load_dest, g->fniv);
910     } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
911                && g->fniv && check_size_impl(oprsz, 8)
912                && (!g->opc
913                    || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
914         expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
915                      g->load_dest, g->fniv);
916     } else if (g->fni8 && check_size_impl(oprsz, 8)) {
917         expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
918     } else if (g->fni4 && check_size_impl(oprsz, 4)) {
919         expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
920     } else {
921         assert(g->fno != NULL);
922         tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno);
923     }
924 
925  done:
926     if (oprsz < maxsz) {
927         expand_clr(dofs + oprsz, maxsz - oprsz);
928     }
929 }
930 
931 /* Expand a vector four-operand operation.  */
932 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
933                     uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
934 {
935     check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
936     check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
937 
938     /* Recall that ARM SVE allows vector sizes that are not a power of 2.
939        Expand with successively smaller host vector sizes.  The intent is
940        that e.g. oprsz == 80 would be expanded with 2x32 + 1x16.  */
941 
942     if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
943         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
944         uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
945         expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
946                      32, TCG_TYPE_V256, g->fniv);
947         if (some == oprsz) {
948             goto done;
949         }
950         dofs += some;
951         aofs += some;
952         bofs += some;
953         cofs += some;
954         oprsz -= some;
955         maxsz -= some;
956     }
957 
958     if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
959         && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
960         expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
961                      16, TCG_TYPE_V128, g->fniv);
962     } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
963                && g->fniv && check_size_impl(oprsz, 8)
964                 && (!g->opc
965                     || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
966         expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
967                      8, TCG_TYPE_V64, g->fniv);
968     } else if (g->fni8 && check_size_impl(oprsz, 8)) {
969         expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
970     } else if (g->fni4 && check_size_impl(oprsz, 4)) {
971         expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
972     } else {
973         assert(g->fno != NULL);
974         tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
975                            oprsz, maxsz, g->data, g->fno);
976         return;
977     }
978 
979  done:
980     if (oprsz < maxsz) {
981         expand_clr(dofs + oprsz, maxsz - oprsz);
982     }
983 }
984 
985 /*
986  * Expand specific vector operations.
987  */
988 
989 static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b)
990 {
991     tcg_gen_mov_vec(a, b);
992 }
993 
994 void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
995                       uint32_t oprsz, uint32_t maxsz)
996 {
997     static const GVecGen2 g = {
998         .fni8 = tcg_gen_mov_i64,
999         .fniv = vec_mov2,
1000         .fno = gen_helper_gvec_mov,
1001         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1002     };
1003     if (dofs != aofs) {
1004         tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
1005     } else {
1006         check_size_align(oprsz, maxsz, dofs);
1007         if (oprsz < maxsz) {
1008             expand_clr(dofs + oprsz, maxsz - oprsz);
1009         }
1010     }
1011 }
1012 
1013 void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
1014                           uint32_t maxsz, TCGv_i32 in)
1015 {
1016     check_size_align(oprsz, maxsz, dofs);
1017     tcg_debug_assert(vece <= MO_32);
1018     do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
1019 }
1020 
1021 void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
1022                           uint32_t maxsz, TCGv_i64 in)
1023 {
1024     check_size_align(oprsz, maxsz, dofs);
1025     tcg_debug_assert(vece <= MO_64);
1026     do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
1027 }
1028 
1029 void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
1030                           uint32_t oprsz, uint32_t maxsz)
1031 {
1032     if (vece <= MO_32) {
1033         TCGv_i32 in = tcg_temp_new_i32();
1034         switch (vece) {
1035         case MO_8:
1036             tcg_gen_ld8u_i32(in, cpu_env, aofs);
1037             break;
1038         case MO_16:
1039             tcg_gen_ld16u_i32(in, cpu_env, aofs);
1040             break;
1041         case MO_32:
1042             tcg_gen_ld_i32(in, cpu_env, aofs);
1043             break;
1044         }
1045         tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, in);
1046         tcg_temp_free_i32(in);
1047     } else if (vece == MO_64) {
1048         TCGv_i64 in = tcg_temp_new_i64();
1049         tcg_gen_ld_i64(in, cpu_env, aofs);
1050         tcg_gen_gvec_dup_i64(MO_64, dofs, oprsz, maxsz, in);
1051         tcg_temp_free_i64(in);
1052     } else {
1053         /* 128-bit duplicate.  */
1054         /* ??? Dup to 256-bit vector.  */
1055         int i;
1056 
1057         tcg_debug_assert(vece == 4);
1058         tcg_debug_assert(oprsz >= 16);
1059         if (TCG_TARGET_HAS_v128) {
1060             TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V128);
1061 
1062             tcg_gen_ld_vec(in, cpu_env, aofs);
1063             for (i = 0; i < oprsz; i += 16) {
1064                 tcg_gen_st_vec(in, cpu_env, dofs + i);
1065             }
1066             tcg_temp_free_vec(in);
1067         } else {
1068             TCGv_i64 in0 = tcg_temp_new_i64();
1069             TCGv_i64 in1 = tcg_temp_new_i64();
1070 
1071             tcg_gen_ld_i64(in0, cpu_env, aofs);
1072             tcg_gen_ld_i64(in1, cpu_env, aofs + 8);
1073             for (i = 0; i < oprsz; i += 16) {
1074                 tcg_gen_st_i64(in0, cpu_env, dofs + i);
1075                 tcg_gen_st_i64(in1, cpu_env, dofs + i + 8);
1076             }
1077             tcg_temp_free_i64(in0);
1078             tcg_temp_free_i64(in1);
1079         }
1080     }
1081 }
1082 
1083 void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
1084                          uint32_t maxsz, uint64_t x)
1085 {
1086     check_size_align(oprsz, maxsz, dofs);
1087     do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
1088 }
1089 
1090 void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
1091                          uint32_t maxsz, uint32_t x)
1092 {
1093     check_size_align(oprsz, maxsz, dofs);
1094     do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
1095 }
1096 
1097 void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
1098                          uint32_t maxsz, uint16_t x)
1099 {
1100     check_size_align(oprsz, maxsz, dofs);
1101     do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
1102 }
1103 
1104 void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
1105                          uint32_t maxsz, uint8_t x)
1106 {
1107     check_size_align(oprsz, maxsz, dofs);
1108     do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
1109 }
1110 
1111 void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
1112                       uint32_t oprsz, uint32_t maxsz)
1113 {
1114     static const GVecGen2 g = {
1115         .fni8 = tcg_gen_not_i64,
1116         .fniv = tcg_gen_not_vec,
1117         .fno = gen_helper_gvec_not,
1118         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1119     };
1120     tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
1121 }
1122 
1123 /* Perform a vector addition using normal addition and a mask.  The mask
1124    should be the sign bit of each lane.  This 6-operation form is more
1125    efficient than separate additions when there are 4 or more lanes in
1126    the 64-bit operation.  */
1127 static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
1128 {
1129     TCGv_i64 t1 = tcg_temp_new_i64();
1130     TCGv_i64 t2 = tcg_temp_new_i64();
1131     TCGv_i64 t3 = tcg_temp_new_i64();
1132 
1133     tcg_gen_andc_i64(t1, a, m);
1134     tcg_gen_andc_i64(t2, b, m);
1135     tcg_gen_xor_i64(t3, a, b);
1136     tcg_gen_add_i64(d, t1, t2);
1137     tcg_gen_and_i64(t3, t3, m);
1138     tcg_gen_xor_i64(d, d, t3);
1139 
1140     tcg_temp_free_i64(t1);
1141     tcg_temp_free_i64(t2);
1142     tcg_temp_free_i64(t3);
1143 }
1144 
1145 void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1146 {
1147     TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1148     gen_addv_mask(d, a, b, m);
1149     tcg_temp_free_i64(m);
1150 }
1151 
1152 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1153 {
1154     TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1155     gen_addv_mask(d, a, b, m);
1156     tcg_temp_free_i64(m);
1157 }
1158 
1159 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1160 {
1161     TCGv_i64 t1 = tcg_temp_new_i64();
1162     TCGv_i64 t2 = tcg_temp_new_i64();
1163 
1164     tcg_gen_andi_i64(t1, a, ~0xffffffffull);
1165     tcg_gen_add_i64(t2, a, b);
1166     tcg_gen_add_i64(t1, t1, b);
1167     tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1168 
1169     tcg_temp_free_i64(t1);
1170     tcg_temp_free_i64(t2);
1171 }
1172 
1173 void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
1174                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1175 {
1176     static const GVecGen3 g[4] = {
1177         { .fni8 = tcg_gen_vec_add8_i64,
1178           .fniv = tcg_gen_add_vec,
1179           .fno = gen_helper_gvec_add8,
1180           .opc = INDEX_op_add_vec,
1181           .vece = MO_8 },
1182         { .fni8 = tcg_gen_vec_add16_i64,
1183           .fniv = tcg_gen_add_vec,
1184           .fno = gen_helper_gvec_add16,
1185           .opc = INDEX_op_add_vec,
1186           .vece = MO_16 },
1187         { .fni4 = tcg_gen_add_i32,
1188           .fniv = tcg_gen_add_vec,
1189           .fno = gen_helper_gvec_add32,
1190           .opc = INDEX_op_add_vec,
1191           .vece = MO_32 },
1192         { .fni8 = tcg_gen_add_i64,
1193           .fniv = tcg_gen_add_vec,
1194           .fno = gen_helper_gvec_add64,
1195           .opc = INDEX_op_add_vec,
1196           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1197           .vece = MO_64 },
1198     };
1199 
1200     tcg_debug_assert(vece <= MO_64);
1201     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1202 }
1203 
1204 /* Perform a vector subtraction using normal subtraction and a mask.
1205    Compare gen_addv_mask above.  */
1206 static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
1207 {
1208     TCGv_i64 t1 = tcg_temp_new_i64();
1209     TCGv_i64 t2 = tcg_temp_new_i64();
1210     TCGv_i64 t3 = tcg_temp_new_i64();
1211 
1212     tcg_gen_or_i64(t1, a, m);
1213     tcg_gen_andc_i64(t2, b, m);
1214     tcg_gen_eqv_i64(t3, a, b);
1215     tcg_gen_sub_i64(d, t1, t2);
1216     tcg_gen_and_i64(t3, t3, m);
1217     tcg_gen_xor_i64(d, d, t3);
1218 
1219     tcg_temp_free_i64(t1);
1220     tcg_temp_free_i64(t2);
1221     tcg_temp_free_i64(t3);
1222 }
1223 
1224 void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1225 {
1226     TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1227     gen_subv_mask(d, a, b, m);
1228     tcg_temp_free_i64(m);
1229 }
1230 
1231 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1232 {
1233     TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1234     gen_subv_mask(d, a, b, m);
1235     tcg_temp_free_i64(m);
1236 }
1237 
1238 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1239 {
1240     TCGv_i64 t1 = tcg_temp_new_i64();
1241     TCGv_i64 t2 = tcg_temp_new_i64();
1242 
1243     tcg_gen_andi_i64(t1, b, ~0xffffffffull);
1244     tcg_gen_sub_i64(t2, a, b);
1245     tcg_gen_sub_i64(t1, a, t1);
1246     tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1247 
1248     tcg_temp_free_i64(t1);
1249     tcg_temp_free_i64(t2);
1250 }
1251 
1252 void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
1253                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1254 {
1255     static const GVecGen3 g[4] = {
1256         { .fni8 = tcg_gen_vec_sub8_i64,
1257           .fniv = tcg_gen_sub_vec,
1258           .fno = gen_helper_gvec_sub8,
1259           .opc = INDEX_op_sub_vec,
1260           .vece = MO_8 },
1261         { .fni8 = tcg_gen_vec_sub16_i64,
1262           .fniv = tcg_gen_sub_vec,
1263           .fno = gen_helper_gvec_sub16,
1264           .opc = INDEX_op_sub_vec,
1265           .vece = MO_16 },
1266         { .fni4 = tcg_gen_sub_i32,
1267           .fniv = tcg_gen_sub_vec,
1268           .fno = gen_helper_gvec_sub32,
1269           .opc = INDEX_op_sub_vec,
1270           .vece = MO_32 },
1271         { .fni8 = tcg_gen_sub_i64,
1272           .fniv = tcg_gen_sub_vec,
1273           .fno = gen_helper_gvec_sub64,
1274           .opc = INDEX_op_sub_vec,
1275           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1276           .vece = MO_64 },
1277     };
1278 
1279     tcg_debug_assert(vece <= MO_64);
1280     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1281 }
1282 
1283 /* Perform a vector negation using normal negation and a mask.
1284    Compare gen_subv_mask above.  */
1285 static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
1286 {
1287     TCGv_i64 t2 = tcg_temp_new_i64();
1288     TCGv_i64 t3 = tcg_temp_new_i64();
1289 
1290     tcg_gen_andc_i64(t3, m, b);
1291     tcg_gen_andc_i64(t2, b, m);
1292     tcg_gen_sub_i64(d, m, t2);
1293     tcg_gen_xor_i64(d, d, t3);
1294 
1295     tcg_temp_free_i64(t2);
1296     tcg_temp_free_i64(t3);
1297 }
1298 
1299 void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
1300 {
1301     TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1302     gen_negv_mask(d, b, m);
1303     tcg_temp_free_i64(m);
1304 }
1305 
1306 void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
1307 {
1308     TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1309     gen_negv_mask(d, b, m);
1310     tcg_temp_free_i64(m);
1311 }
1312 
1313 void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
1314 {
1315     TCGv_i64 t1 = tcg_temp_new_i64();
1316     TCGv_i64 t2 = tcg_temp_new_i64();
1317 
1318     tcg_gen_andi_i64(t1, b, ~0xffffffffull);
1319     tcg_gen_neg_i64(t2, b);
1320     tcg_gen_neg_i64(t1, t1);
1321     tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1322 
1323     tcg_temp_free_i64(t1);
1324     tcg_temp_free_i64(t2);
1325 }
1326 
1327 void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
1328                       uint32_t oprsz, uint32_t maxsz)
1329 {
1330     static const GVecGen2 g[4] = {
1331         { .fni8 = tcg_gen_vec_neg8_i64,
1332           .fniv = tcg_gen_neg_vec,
1333           .fno = gen_helper_gvec_neg8,
1334           .opc = INDEX_op_neg_vec,
1335           .vece = MO_8 },
1336         { .fni8 = tcg_gen_vec_neg16_i64,
1337           .fniv = tcg_gen_neg_vec,
1338           .fno = gen_helper_gvec_neg16,
1339           .opc = INDEX_op_neg_vec,
1340           .vece = MO_16 },
1341         { .fni4 = tcg_gen_neg_i32,
1342           .fniv = tcg_gen_neg_vec,
1343           .fno = gen_helper_gvec_neg32,
1344           .opc = INDEX_op_neg_vec,
1345           .vece = MO_32 },
1346         { .fni8 = tcg_gen_neg_i64,
1347           .fniv = tcg_gen_neg_vec,
1348           .fno = gen_helper_gvec_neg64,
1349           .opc = INDEX_op_neg_vec,
1350           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1351           .vece = MO_64 },
1352     };
1353 
1354     tcg_debug_assert(vece <= MO_64);
1355     tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]);
1356 }
1357 
1358 void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
1359                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1360 {
1361     static const GVecGen3 g = {
1362         .fni8 = tcg_gen_and_i64,
1363         .fniv = tcg_gen_and_vec,
1364         .fno = gen_helper_gvec_and,
1365         .opc = INDEX_op_and_vec,
1366         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1367     };
1368     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1369 }
1370 
1371 void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
1372                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1373 {
1374     static const GVecGen3 g = {
1375         .fni8 = tcg_gen_or_i64,
1376         .fniv = tcg_gen_or_vec,
1377         .fno = gen_helper_gvec_or,
1378         .opc = INDEX_op_or_vec,
1379         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1380     };
1381     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1382 }
1383 
1384 void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
1385                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1386 {
1387     static const GVecGen3 g = {
1388         .fni8 = tcg_gen_xor_i64,
1389         .fniv = tcg_gen_xor_vec,
1390         .fno = gen_helper_gvec_xor,
1391         .opc = INDEX_op_xor_vec,
1392         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1393     };
1394     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1395 }
1396 
1397 void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
1398                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1399 {
1400     static const GVecGen3 g = {
1401         .fni8 = tcg_gen_andc_i64,
1402         .fniv = tcg_gen_andc_vec,
1403         .fno = gen_helper_gvec_andc,
1404         .opc = INDEX_op_andc_vec,
1405         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1406     };
1407     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1408 }
1409 
1410 void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
1411                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1412 {
1413     static const GVecGen3 g = {
1414         .fni8 = tcg_gen_orc_i64,
1415         .fniv = tcg_gen_orc_vec,
1416         .fno = gen_helper_gvec_orc,
1417         .opc = INDEX_op_orc_vec,
1418         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1419     };
1420     tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1421 }
1422 
1423 void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1424 {
1425     uint64_t mask = dup_const(MO_8, 0xff << c);
1426     tcg_gen_shli_i64(d, a, c);
1427     tcg_gen_andi_i64(d, d, mask);
1428 }
1429 
1430 void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1431 {
1432     uint64_t mask = dup_const(MO_16, 0xffff << c);
1433     tcg_gen_shli_i64(d, a, c);
1434     tcg_gen_andi_i64(d, d, mask);
1435 }
1436 
1437 void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
1438                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
1439 {
1440     static const GVecGen2i g[4] = {
1441         { .fni8 = tcg_gen_vec_shl8i_i64,
1442           .fniv = tcg_gen_shli_vec,
1443           .fno = gen_helper_gvec_shl8i,
1444           .opc = INDEX_op_shli_vec,
1445           .vece = MO_8 },
1446         { .fni8 = tcg_gen_vec_shl16i_i64,
1447           .fniv = tcg_gen_shli_vec,
1448           .fno = gen_helper_gvec_shl16i,
1449           .opc = INDEX_op_shli_vec,
1450           .vece = MO_16 },
1451         { .fni4 = tcg_gen_shli_i32,
1452           .fniv = tcg_gen_shli_vec,
1453           .fno = gen_helper_gvec_shl32i,
1454           .opc = INDEX_op_shli_vec,
1455           .vece = MO_32 },
1456         { .fni8 = tcg_gen_shli_i64,
1457           .fniv = tcg_gen_shli_vec,
1458           .fno = gen_helper_gvec_shl64i,
1459           .opc = INDEX_op_shli_vec,
1460           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1461           .vece = MO_64 },
1462     };
1463 
1464     tcg_debug_assert(vece <= MO_64);
1465     tcg_debug_assert(shift >= 0 && shift < (8 << vece));
1466     if (shift == 0) {
1467         tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
1468     } else {
1469         tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
1470     }
1471 }
1472 
1473 void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1474 {
1475     uint64_t mask = dup_const(MO_8, 0xff >> c);
1476     tcg_gen_shri_i64(d, a, c);
1477     tcg_gen_andi_i64(d, d, mask);
1478 }
1479 
1480 void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1481 {
1482     uint64_t mask = dup_const(MO_16, 0xffff >> c);
1483     tcg_gen_shri_i64(d, a, c);
1484     tcg_gen_andi_i64(d, d, mask);
1485 }
1486 
1487 void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
1488                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
1489 {
1490     static const GVecGen2i g[4] = {
1491         { .fni8 = tcg_gen_vec_shr8i_i64,
1492           .fniv = tcg_gen_shri_vec,
1493           .fno = gen_helper_gvec_shr8i,
1494           .opc = INDEX_op_shri_vec,
1495           .vece = MO_8 },
1496         { .fni8 = tcg_gen_vec_shr16i_i64,
1497           .fniv = tcg_gen_shri_vec,
1498           .fno = gen_helper_gvec_shr16i,
1499           .opc = INDEX_op_shri_vec,
1500           .vece = MO_16 },
1501         { .fni4 = tcg_gen_shri_i32,
1502           .fniv = tcg_gen_shri_vec,
1503           .fno = gen_helper_gvec_shr32i,
1504           .opc = INDEX_op_shri_vec,
1505           .vece = MO_32 },
1506         { .fni8 = tcg_gen_shri_i64,
1507           .fniv = tcg_gen_shri_vec,
1508           .fno = gen_helper_gvec_shr64i,
1509           .opc = INDEX_op_shri_vec,
1510           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1511           .vece = MO_64 },
1512     };
1513 
1514     tcg_debug_assert(vece <= MO_64);
1515     tcg_debug_assert(shift >= 0 && shift < (8 << vece));
1516     if (shift == 0) {
1517         tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
1518     } else {
1519         tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
1520     }
1521 }
1522 
1523 void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1524 {
1525     uint64_t s_mask = dup_const(MO_8, 0x80 >> c);
1526     uint64_t c_mask = dup_const(MO_8, 0xff >> c);
1527     TCGv_i64 s = tcg_temp_new_i64();
1528 
1529     tcg_gen_shri_i64(d, a, c);
1530     tcg_gen_andi_i64(s, d, s_mask);  /* isolate (shifted) sign bit */
1531     tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
1532     tcg_gen_andi_i64(d, d, c_mask);  /* clear out bits above sign  */
1533     tcg_gen_or_i64(d, d, s);         /* include sign extension */
1534     tcg_temp_free_i64(s);
1535 }
1536 
1537 void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1538 {
1539     uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
1540     uint64_t c_mask = dup_const(MO_16, 0xffff >> c);
1541     TCGv_i64 s = tcg_temp_new_i64();
1542 
1543     tcg_gen_shri_i64(d, a, c);
1544     tcg_gen_andi_i64(s, d, s_mask);  /* isolate (shifted) sign bit */
1545     tcg_gen_andi_i64(d, d, c_mask);  /* clear out bits above sign  */
1546     tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
1547     tcg_gen_or_i64(d, d, s);         /* include sign extension */
1548     tcg_temp_free_i64(s);
1549 }
1550 
1551 void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
1552                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
1553 {
1554     static const GVecGen2i g[4] = {
1555         { .fni8 = tcg_gen_vec_sar8i_i64,
1556           .fniv = tcg_gen_sari_vec,
1557           .fno = gen_helper_gvec_sar8i,
1558           .opc = INDEX_op_sari_vec,
1559           .vece = MO_8 },
1560         { .fni8 = tcg_gen_vec_sar16i_i64,
1561           .fniv = tcg_gen_sari_vec,
1562           .fno = gen_helper_gvec_sar16i,
1563           .opc = INDEX_op_sari_vec,
1564           .vece = MO_16 },
1565         { .fni4 = tcg_gen_sari_i32,
1566           .fniv = tcg_gen_sari_vec,
1567           .fno = gen_helper_gvec_sar32i,
1568           .opc = INDEX_op_sari_vec,
1569           .vece = MO_32 },
1570         { .fni8 = tcg_gen_sari_i64,
1571           .fniv = tcg_gen_sari_vec,
1572           .fno = gen_helper_gvec_sar64i,
1573           .opc = INDEX_op_sari_vec,
1574           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1575           .vece = MO_64 },
1576     };
1577 
1578     tcg_debug_assert(vece <= MO_64);
1579     tcg_debug_assert(shift >= 0 && shift < (8 << vece));
1580     if (shift == 0) {
1581         tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
1582     } else {
1583         tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
1584     }
1585 }
1586