xref: /openbmc/qemu/target/s390x/tcg/translate_vx.c.inc (revision e6a19a6477407e57b4deb61aaa497a14d7db9626)
1/*
2 * QEMU TCG support -- s390x vector instruction translation functions
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 *   David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13/*
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
20 *
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
25 *
26 * 128 bit elements:
27 *  As we only have i32/i64, such elements have to be loaded into two
28 *  i64 values and can then be processed e.g. by tcg_gen_add2_i64.
29 *
30 * Sizes:
31 *  On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 *  always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 *  a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 *  128 bit element size has to be treated in a special way (MO_64 + 1).
35 *  We will use ES_* instead of MO_* for this reason in this file.
36 *
37 * CC handling:
38 *  As gvec ool-helpers can currently not return values (besides via
39 *  pointers like vectors or cpu_env), whenever we have to set the CC and
40 *  can't conclude the value from the result vector, we will directly
41 *  set it in "env->cc_op" and mark it as static via set_cc_static()".
42 *  Whenever this is done, the helper writes globals (cc_op).
43 */
44
45#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
48
49#define ES_8    MO_8
50#define ES_16   MO_16
51#define ES_32   MO_32
52#define ES_64   MO_64
53#define ES_128  4
54
55/* Floating-Point Format */
56#define FPF_SHORT       2
57#define FPF_LONG        3
58#define FPF_EXT         4
59
60static inline bool valid_vec_element(uint16_t enr, MemOp es)
61{
62    return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
63}
64
65static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
66                                 MemOp memop)
67{
68    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
69
70    switch ((unsigned)memop) {
71    case ES_8:
72        tcg_gen_ld8u_i64(dst, cpu_env, offs);
73        break;
74    case ES_16:
75        tcg_gen_ld16u_i64(dst, cpu_env, offs);
76        break;
77    case ES_32:
78        tcg_gen_ld32u_i64(dst, cpu_env, offs);
79        break;
80    case ES_8 | MO_SIGN:
81        tcg_gen_ld8s_i64(dst, cpu_env, offs);
82        break;
83    case ES_16 | MO_SIGN:
84        tcg_gen_ld16s_i64(dst, cpu_env, offs);
85        break;
86    case ES_32 | MO_SIGN:
87        tcg_gen_ld32s_i64(dst, cpu_env, offs);
88        break;
89    case ES_64:
90    case ES_64 | MO_SIGN:
91        tcg_gen_ld_i64(dst, cpu_env, offs);
92        break;
93    default:
94        g_assert_not_reached();
95    }
96}
97
98static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
99                                 MemOp memop)
100{
101    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
102
103    switch (memop) {
104    case ES_8:
105        tcg_gen_ld8u_i32(dst, cpu_env, offs);
106        break;
107    case ES_16:
108        tcg_gen_ld16u_i32(dst, cpu_env, offs);
109        break;
110    case ES_8 | MO_SIGN:
111        tcg_gen_ld8s_i32(dst, cpu_env, offs);
112        break;
113    case ES_16 | MO_SIGN:
114        tcg_gen_ld16s_i32(dst, cpu_env, offs);
115        break;
116    case ES_32:
117    case ES_32 | MO_SIGN:
118        tcg_gen_ld_i32(dst, cpu_env, offs);
119        break;
120    default:
121        g_assert_not_reached();
122    }
123}
124
125static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
126                                  MemOp memop)
127{
128    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
129
130    switch (memop) {
131    case ES_8:
132        tcg_gen_st8_i64(src, cpu_env, offs);
133        break;
134    case ES_16:
135        tcg_gen_st16_i64(src, cpu_env, offs);
136        break;
137    case ES_32:
138        tcg_gen_st32_i64(src, cpu_env, offs);
139        break;
140    case ES_64:
141        tcg_gen_st_i64(src, cpu_env, offs);
142        break;
143    default:
144        g_assert_not_reached();
145    }
146}
147
148static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
149                                  MemOp memop)
150{
151    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
152
153    switch (memop) {
154    case ES_8:
155        tcg_gen_st8_i32(src, cpu_env, offs);
156        break;
157    case ES_16:
158        tcg_gen_st16_i32(src, cpu_env, offs);
159        break;
160    case ES_32:
161        tcg_gen_st_i32(src, cpu_env, offs);
162        break;
163    default:
164        g_assert_not_reached();
165    }
166}
167
168static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
169                                    uint8_t es)
170{
171    TCGv_i64 tmp = tcg_temp_new_i64();
172
173    /* mask off invalid parts from the element nr */
174    tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
175
176    /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
177    tcg_gen_shli_i64(tmp, tmp, es);
178#if !HOST_BIG_ENDIAN
179    tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
180#endif
181    tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
182
183    /* generate the final ptr by adding cpu_env */
184    tcg_gen_trunc_i64_ptr(ptr, tmp);
185    tcg_gen_add_ptr(ptr, ptr, cpu_env);
186}
187
188#define gen_gvec_2(v1, v2, gen) \
189    tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
190                   16, 16, gen)
191#define gen_gvec_2s(v1, v2, c, gen) \
192    tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
193                    16, 16, c, gen)
194#define gen_gvec_2_ool(v1, v2, data, fn) \
195    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
196                       16, 16, data, fn)
197#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
198    tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
199                        c, 16, 16, data, fn)
200#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
201    tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
202                       ptr, 16, 16, data, fn)
203#define gen_gvec_3(v1, v2, v3, gen) \
204    tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
205                   vec_full_reg_offset(v3), 16, 16, gen)
206#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
207    tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
208                       vec_full_reg_offset(v3), 16, 16, data, fn)
209#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
210    tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
211                       vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
212#define gen_gvec_3i(v1, v2, v3, c, gen) \
213    tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
214                    vec_full_reg_offset(v3), 16, 16, c, gen)
215#define gen_gvec_4(v1, v2, v3, v4, gen) \
216    tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
217                   vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
218                   16, 16, gen)
219#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
220    tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
221                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
222                       16, 16, data, fn)
223#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
224    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
225                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
226                       ptr, 16, 16, data, fn)
227#define gen_gvec_dup_i64(es, v1, c) \
228    tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
229#define gen_gvec_mov(v1, v2) \
230    tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
231                     16)
232#define gen_gvec_dup_imm(es, v1, c) \
233    tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
234#define gen_gvec_fn_2(fn, es, v1, v2) \
235    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
236                      16, 16)
237#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
238    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
239                      c, 16, 16)
240#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
241    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
242                      s, 16, 16)
243#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
244    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
245                      vec_full_reg_offset(v3), 16, 16)
246#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
247    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
248                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
249
250/*
251 * Helper to carry out a 128 bit vector computation using 2 i64 values per
252 * vector.
253 */
254typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
255                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
256static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
257                              uint8_t b)
258{
259        TCGv_i64 dh = tcg_temp_new_i64();
260        TCGv_i64 dl = tcg_temp_new_i64();
261        TCGv_i64 ah = tcg_temp_new_i64();
262        TCGv_i64 al = tcg_temp_new_i64();
263        TCGv_i64 bh = tcg_temp_new_i64();
264        TCGv_i64 bl = tcg_temp_new_i64();
265
266        read_vec_element_i64(ah, a, 0, ES_64);
267        read_vec_element_i64(al, a, 1, ES_64);
268        read_vec_element_i64(bh, b, 0, ES_64);
269        read_vec_element_i64(bl, b, 1, ES_64);
270        fn(dl, dh, al, ah, bl, bh);
271        write_vec_element_i64(dh, d, 0, ES_64);
272        write_vec_element_i64(dl, d, 1, ES_64);
273}
274
275typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
276                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
277                                     TCGv_i64 cl, TCGv_i64 ch);
278static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
279                              uint8_t b, uint8_t c)
280{
281        TCGv_i64 dh = tcg_temp_new_i64();
282        TCGv_i64 dl = tcg_temp_new_i64();
283        TCGv_i64 ah = tcg_temp_new_i64();
284        TCGv_i64 al = tcg_temp_new_i64();
285        TCGv_i64 bh = tcg_temp_new_i64();
286        TCGv_i64 bl = tcg_temp_new_i64();
287        TCGv_i64 ch = tcg_temp_new_i64();
288        TCGv_i64 cl = tcg_temp_new_i64();
289
290        read_vec_element_i64(ah, a, 0, ES_64);
291        read_vec_element_i64(al, a, 1, ES_64);
292        read_vec_element_i64(bh, b, 0, ES_64);
293        read_vec_element_i64(bl, b, 1, ES_64);
294        read_vec_element_i64(ch, c, 0, ES_64);
295        read_vec_element_i64(cl, c, 1, ES_64);
296        fn(dl, dh, al, ah, bl, bh, cl, ch);
297        write_vec_element_i64(dh, d, 0, ES_64);
298        write_vec_element_i64(dl, d, 1, ES_64);
299}
300
301static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
302                          uint64_t b)
303{
304    TCGv_i64 bl = tcg_constant_i64(b);
305    TCGv_i64 bh = tcg_constant_i64(0);
306
307    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
308}
309
310static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
311{
312    gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
313                   gen_helper_gvec_vbperm);
314
315    return DISAS_NEXT;
316}
317
318static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
319{
320    const uint8_t es = s->insn->data;
321    const uint8_t enr = get_field(s, m3);
322    TCGv_i64 tmp;
323
324    if (!valid_vec_element(enr, es)) {
325        gen_program_exception(s, PGM_SPECIFICATION);
326        return DISAS_NORETURN;
327    }
328
329    tmp = tcg_temp_new_i64();
330    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
331    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
332    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
333
334    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
335    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
336    return DISAS_NEXT;
337}
338
339static uint64_t generate_byte_mask(uint8_t mask)
340{
341    uint64_t r = 0;
342    int i;
343
344    for (i = 0; i < 8; i++) {
345        if ((mask >> i) & 1) {
346            r |= 0xffull << (i * 8);
347        }
348    }
349    return r;
350}
351
352static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
353{
354    const uint16_t i2 = get_field(s, i2);
355
356    if (i2 == (i2 & 0xff) * 0x0101) {
357        /*
358         * Masks for both 64 bit elements of the vector are the same.
359         * Trust tcg to produce a good constant loading.
360         */
361        gen_gvec_dup_imm(ES_64, get_field(s, v1),
362                         generate_byte_mask(i2 & 0xff));
363    } else {
364        TCGv_i64 t = tcg_temp_new_i64();
365
366        tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
367        write_vec_element_i64(t, get_field(s, v1), 0, ES_64);
368        tcg_gen_movi_i64(t, generate_byte_mask(i2));
369        write_vec_element_i64(t, get_field(s, v1), 1, ES_64);
370    }
371    return DISAS_NEXT;
372}
373
374static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
375{
376    const uint8_t es = get_field(s, m4);
377    const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
378    const uint8_t i2 = get_field(s, i2) & (bits - 1);
379    const uint8_t i3 = get_field(s, i3) & (bits - 1);
380    uint64_t mask = 0;
381    int i;
382
383    if (es > ES_64) {
384        gen_program_exception(s, PGM_SPECIFICATION);
385        return DISAS_NORETURN;
386    }
387
388    /* generate the mask - take care of wrapping */
389    for (i = i2; ; i = (i + 1) % bits) {
390        mask |= 1ull << (bits - i - 1);
391        if (i == i3) {
392            break;
393        }
394    }
395
396    gen_gvec_dup_imm(es, get_field(s, v1), mask);
397    return DISAS_NEXT;
398}
399
400static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
401{
402    TCGv_i64 t0 = tcg_temp_new_i64();
403    TCGv_i64 t1 = tcg_temp_new_i64();
404
405    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
406    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
407    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
408    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
409    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
410    return DISAS_NEXT;
411}
412
413static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
414{
415    gen_gvec_mov(get_field(s, v1), get_field(s, v2));
416    return DISAS_NEXT;
417}
418
419static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
420{
421    const uint8_t es = get_field(s, m3);
422    TCGv_i64 tmp;
423
424    if (es > ES_64) {
425        gen_program_exception(s, PGM_SPECIFICATION);
426        return DISAS_NORETURN;
427    }
428
429    tmp = tcg_temp_new_i64();
430    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
431    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
432    return DISAS_NEXT;
433}
434
435static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
436{
437    const uint8_t es = s->insn->data;
438    const uint8_t enr = get_field(s, m3);
439    TCGv_i64 tmp;
440
441    if (!valid_vec_element(enr, es)) {
442        gen_program_exception(s, PGM_SPECIFICATION);
443        return DISAS_NORETURN;
444    }
445
446    tmp = tcg_temp_new_i64();
447    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
448    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
449    return DISAS_NEXT;
450}
451
452static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
453{
454    const uint8_t es = get_field(s, m3);
455    TCGv_i64 tmp;
456
457    if (es < ES_16 || es > ES_64) {
458        gen_program_exception(s, PGM_SPECIFICATION);
459        return DISAS_NORETURN;
460    }
461
462    tmp = tcg_temp_new_i64();
463    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
464    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
465    return DISAS_NEXT;
466}
467
468static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
469{
470    const uint8_t m3 = get_field(s, m3);
471    TCGv_i64 tmp;
472    int es, lshift;
473
474    switch (m3) {
475    case ES_16:
476    case ES_32:
477    case ES_64:
478        es = m3;
479        lshift = 0;
480        break;
481    case 6:
482        es = ES_32;
483        lshift = 32;
484        break;
485    default:
486        gen_program_exception(s, PGM_SPECIFICATION);
487        return DISAS_NORETURN;
488    }
489
490    tmp = tcg_temp_new_i64();
491    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
492    tcg_gen_shli_i64(tmp, tmp, lshift);
493
494    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
495    write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64);
496    return DISAS_NEXT;
497}
498
499static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
500{
501    const uint8_t es = get_field(s, m3);
502    TCGv_i64 t0, t1;
503
504    if (es < ES_16 || es > ES_128) {
505        gen_program_exception(s, PGM_SPECIFICATION);
506        return DISAS_NORETURN;
507    }
508
509    t0 = tcg_temp_new_i64();
510    t1 = tcg_temp_new_i64();
511
512
513    if (es == ES_128) {
514        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
515        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
516        tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
517        goto write;
518    }
519
520    /* Begin with byte reversed doublewords... */
521    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
522    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
523    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
524
525    /*
526     * For 16 and 32-bit elements, the doubleword bswap also reversed
527     * the order of the elements.  Perform a larger order swap to put
528     * them back into place.  For the 128-bit "element", finish the
529     * bswap by swapping the doublewords.
530     */
531    switch (es) {
532    case ES_16:
533        tcg_gen_hswap_i64(t0, t0);
534        tcg_gen_hswap_i64(t1, t1);
535        break;
536    case ES_32:
537        tcg_gen_wswap_i64(t0, t0);
538        tcg_gen_wswap_i64(t1, t1);
539        break;
540    case ES_64:
541        break;
542    default:
543        g_assert_not_reached();
544    }
545
546write:
547    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
548    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
549    return DISAS_NEXT;
550}
551
552static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
553{
554    const uint8_t es = s->insn->data;
555    const uint8_t enr = get_field(s, m3);
556    TCGv_i64 tmp;
557
558    if (!valid_vec_element(enr, es)) {
559        gen_program_exception(s, PGM_SPECIFICATION);
560        return DISAS_NORETURN;
561    }
562
563    tmp = tcg_temp_new_i64();
564    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
565    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
566    return DISAS_NEXT;
567}
568
569static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
570{
571    const uint8_t es = s->insn->data;
572    const uint8_t enr = get_field(s, m3);
573    TCGv_i64 tmp;
574
575    if (!valid_vec_element(enr, es)) {
576        gen_program_exception(s, PGM_SPECIFICATION);
577        return DISAS_NORETURN;
578    }
579
580    tmp = tcg_constant_i64((int16_t)get_field(s, i2));
581    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
582    return DISAS_NEXT;
583}
584
585static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
586{
587    const uint8_t es = get_field(s, m3);
588
589    if (es < ES_16 || es > ES_64) {
590        gen_program_exception(s, PGM_SPECIFICATION);
591        return DISAS_NORETURN;
592    }
593
594    TCGv_i64 t0 = tcg_temp_new_i64();
595    TCGv_i64 t1 = tcg_temp_new_i64();
596
597    /* Begin with the two doublewords swapped... */
598    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
599    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
600    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
601
602    /* ... then swap smaller elements within the doublewords as required. */
603    switch (es) {
604    case MO_16:
605        tcg_gen_hswap_i64(t1, t1);
606        tcg_gen_hswap_i64(t0, t0);
607        break;
608    case MO_32:
609        tcg_gen_wswap_i64(t1, t1);
610        tcg_gen_wswap_i64(t0, t0);
611        break;
612    case MO_64:
613        break;
614    default:
615        g_assert_not_reached();
616    }
617
618    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
619    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
620    return DISAS_NEXT;
621}
622
623static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
624{
625    const uint8_t es = get_field(s, m4);
626    TCGv_ptr ptr;
627
628    if (es > ES_64) {
629        gen_program_exception(s, PGM_SPECIFICATION);
630        return DISAS_NORETURN;
631    }
632
633    /* fast path if we don't need the register content */
634    if (!get_field(s, b2)) {
635        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
636
637        read_vec_element_i64(o->out, get_field(s, v3), enr, es);
638        return DISAS_NEXT;
639    }
640
641    ptr = tcg_temp_new_ptr();
642    get_vec_element_ptr_i64(ptr, get_field(s, v3), o->addr1, es);
643    switch (es) {
644    case ES_8:
645        tcg_gen_ld8u_i64(o->out, ptr, 0);
646        break;
647    case ES_16:
648        tcg_gen_ld16u_i64(o->out, ptr, 0);
649        break;
650    case ES_32:
651        tcg_gen_ld32u_i64(o->out, ptr, 0);
652        break;
653    case ES_64:
654        tcg_gen_ld_i64(o->out, ptr, 0);
655        break;
656    default:
657        g_assert_not_reached();
658    }
659    return DISAS_NEXT;
660}
661
662static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
663{
664    uint8_t es = get_field(s, m3);
665    uint8_t enr;
666    TCGv_i64 t;
667
668    switch (es) {
669    /* rightmost sub-element of leftmost doubleword */
670    case ES_8:
671        enr = 7;
672        break;
673    case ES_16:
674        enr = 3;
675        break;
676    case ES_32:
677        enr = 1;
678        break;
679    case ES_64:
680        enr = 0;
681        break;
682    /* leftmost sub-element of leftmost doubleword */
683    case 6:
684        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
685            es = ES_32;
686            enr = 0;
687            break;
688        }
689        /* fallthrough */
690    default:
691        gen_program_exception(s, PGM_SPECIFICATION);
692        return DISAS_NORETURN;
693    }
694
695    t = tcg_temp_new_i64();
696    tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
697    gen_gvec_dup_imm(es, get_field(s, v1), 0);
698    write_vec_element_i64(t, get_field(s, v1), enr, es);
699    return DISAS_NEXT;
700}
701
702static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
703{
704    const uint8_t v3 = get_field(s, v3);
705    uint8_t v1 = get_field(s, v1);
706    TCGv_i64 t0, t1;
707
708    if (v3 < v1 || (v3 - v1 + 1) > 16) {
709        gen_program_exception(s, PGM_SPECIFICATION);
710        return DISAS_NORETURN;
711    }
712
713    /*
714     * Check for possible access exceptions by trying to load the last
715     * element. The first element will be checked first next.
716     */
717    t0 = tcg_temp_new_i64();
718    t1 = tcg_temp_new_i64();
719    gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
720    tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEUQ);
721
722    for (;; v1++) {
723        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
724        write_vec_element_i64(t1, v1, 0, ES_64);
725        if (v1 == v3) {
726            break;
727        }
728        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
729        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
730        write_vec_element_i64(t1, v1, 1, ES_64);
731        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
732    }
733
734    /* Store the last element, loaded first */
735    write_vec_element_i64(t0, v1, 1, ES_64);
736    return DISAS_NEXT;
737}
738
739static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
740{
741    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
742    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
743    TCGv_ptr a0;
744    TCGv_i64 bytes;
745
746    if (get_field(s, m3) > 6) {
747        gen_program_exception(s, PGM_SPECIFICATION);
748        return DISAS_NORETURN;
749    }
750
751    bytes = tcg_temp_new_i64();
752    a0 = tcg_temp_new_ptr();
753    /* calculate the number of bytes until the next block boundary */
754    tcg_gen_ori_i64(bytes, o->addr1, -block_size);
755    tcg_gen_neg_i64(bytes, bytes);
756
757    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
758    gen_helper_vll(cpu_env, a0, o->addr1, bytes);
759    return DISAS_NEXT;
760}
761
762static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
763{
764    const uint8_t es = get_field(s, m4);
765    TCGv_ptr ptr;
766
767    if (es > ES_64) {
768        gen_program_exception(s, PGM_SPECIFICATION);
769        return DISAS_NORETURN;
770    }
771
772    /* fast path if we don't need the register content */
773    if (!get_field(s, b2)) {
774        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
775
776        write_vec_element_i64(o->in2, get_field(s, v1), enr, es);
777        return DISAS_NEXT;
778    }
779
780    ptr = tcg_temp_new_ptr();
781    get_vec_element_ptr_i64(ptr, get_field(s, v1), o->addr1, es);
782    switch (es) {
783    case ES_8:
784        tcg_gen_st8_i64(o->in2, ptr, 0);
785        break;
786    case ES_16:
787        tcg_gen_st16_i64(o->in2, ptr, 0);
788        break;
789    case ES_32:
790        tcg_gen_st32_i64(o->in2, ptr, 0);
791        break;
792    case ES_64:
793        tcg_gen_st_i64(o->in2, ptr, 0);
794        break;
795    default:
796        g_assert_not_reached();
797    }
798    return DISAS_NEXT;
799}
800
801static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
802{
803    write_vec_element_i64(o->in1, get_field(s, v1), 0, ES_64);
804    write_vec_element_i64(o->in2, get_field(s, v1), 1, ES_64);
805    return DISAS_NEXT;
806}
807
808static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
809{
810    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
811    TCGv_ptr a0 = tcg_temp_new_ptr();
812
813    /* convert highest index into an actual length */
814    tcg_gen_addi_i64(o->in2, o->in2, 1);
815    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
816    gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
817    return DISAS_NEXT;
818}
819
820static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
821{
822    const uint8_t v1 = get_field(s, v1);
823    const uint8_t v2 = get_field(s, v2);
824    const uint8_t v3 = get_field(s, v3);
825    const uint8_t es = get_field(s, m4);
826    int dst_idx, src_idx;
827    TCGv_i64 tmp;
828
829    if (es > ES_64) {
830        gen_program_exception(s, PGM_SPECIFICATION);
831        return DISAS_NORETURN;
832    }
833
834    tmp = tcg_temp_new_i64();
835    if (s->fields.op2 == 0x61) {
836        /* iterate backwards to avoid overwriting data we might need later */
837        for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
838            src_idx = dst_idx / 2;
839            if (dst_idx % 2 == 0) {
840                read_vec_element_i64(tmp, v2, src_idx, es);
841            } else {
842                read_vec_element_i64(tmp, v3, src_idx, es);
843            }
844            write_vec_element_i64(tmp, v1, dst_idx, es);
845        }
846    } else {
847        /* iterate forward to avoid overwriting data we might need later */
848        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
849            src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
850            if (dst_idx % 2 == 0) {
851                read_vec_element_i64(tmp, v2, src_idx, es);
852            } else {
853                read_vec_element_i64(tmp, v3, src_idx, es);
854            }
855            write_vec_element_i64(tmp, v1, dst_idx, es);
856        }
857    }
858    return DISAS_NEXT;
859}
860
861static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
862{
863    const uint8_t v1 = get_field(s, v1);
864    const uint8_t v2 = get_field(s, v2);
865    const uint8_t v3 = get_field(s, v3);
866    const uint8_t es = get_field(s, m4);
867    static gen_helper_gvec_3 * const vpk[3] = {
868        gen_helper_gvec_vpk16,
869        gen_helper_gvec_vpk32,
870        gen_helper_gvec_vpk64,
871    };
872     static gen_helper_gvec_3 * const vpks[3] = {
873        gen_helper_gvec_vpks16,
874        gen_helper_gvec_vpks32,
875        gen_helper_gvec_vpks64,
876    };
877    static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
878        gen_helper_gvec_vpks_cc16,
879        gen_helper_gvec_vpks_cc32,
880        gen_helper_gvec_vpks_cc64,
881    };
882    static gen_helper_gvec_3 * const vpkls[3] = {
883        gen_helper_gvec_vpkls16,
884        gen_helper_gvec_vpkls32,
885        gen_helper_gvec_vpkls64,
886    };
887    static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
888        gen_helper_gvec_vpkls_cc16,
889        gen_helper_gvec_vpkls_cc32,
890        gen_helper_gvec_vpkls_cc64,
891    };
892
893    if (es == ES_8 || es > ES_64) {
894        gen_program_exception(s, PGM_SPECIFICATION);
895        return DISAS_NORETURN;
896    }
897
898    switch (s->fields.op2) {
899    case 0x97:
900        if (get_field(s, m5) & 0x1) {
901            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
902            set_cc_static(s);
903        } else {
904            gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
905        }
906        break;
907    case 0x95:
908        if (get_field(s, m5) & 0x1) {
909            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
910            set_cc_static(s);
911        } else {
912            gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
913        }
914        break;
915    case 0x94:
916        /* If sources and destination don't overlap -> fast path */
917        if (v1 != v2 && v1 != v3) {
918            const uint8_t src_es = get_field(s, m4);
919            const uint8_t dst_es = src_es - 1;
920            TCGv_i64 tmp = tcg_temp_new_i64();
921            int dst_idx, src_idx;
922
923            for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
924                src_idx = dst_idx;
925                if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
926                    read_vec_element_i64(tmp, v2, src_idx, src_es);
927                } else {
928                    src_idx -= NUM_VEC_ELEMENTS(src_es);
929                    read_vec_element_i64(tmp, v3, src_idx, src_es);
930                }
931                write_vec_element_i64(tmp, v1, dst_idx, dst_es);
932            }
933        } else {
934            gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
935        }
936        break;
937    default:
938        g_assert_not_reached();
939    }
940    return DISAS_NEXT;
941}
942
943static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
944{
945    gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
946                   get_field(s, v3), get_field(s, v4),
947                   0, gen_helper_gvec_vperm);
948    return DISAS_NEXT;
949}
950
951static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
952{
953    const uint8_t i2 = extract32(get_field(s, m4), 2, 1);
954    const uint8_t i3 = extract32(get_field(s, m4), 0, 1);
955    TCGv_i64 t0 = tcg_temp_new_i64();
956    TCGv_i64 t1 = tcg_temp_new_i64();
957
958    read_vec_element_i64(t0, get_field(s, v2), i2, ES_64);
959    read_vec_element_i64(t1, get_field(s, v3), i3, ES_64);
960    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
961    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
962    return DISAS_NEXT;
963}
964
965static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
966{
967    const uint16_t enr = get_field(s, i2);
968    const uint8_t es = get_field(s, m4);
969
970    if (es > ES_64 || !valid_vec_element(enr, es)) {
971        gen_program_exception(s, PGM_SPECIFICATION);
972        return DISAS_NORETURN;
973    }
974
975    tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s, v1)),
976                         vec_reg_offset(get_field(s, v3), enr, es),
977                         16, 16);
978    return DISAS_NEXT;
979}
980
981static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
982{
983    const int64_t data = (int16_t)get_field(s, i2);
984    const uint8_t es = get_field(s, m3);
985
986    if (es > ES_64) {
987        gen_program_exception(s, PGM_SPECIFICATION);
988        return DISAS_NORETURN;
989    }
990
991    gen_gvec_dup_imm(es, get_field(s, v1), data);
992    return DISAS_NEXT;
993}
994
995static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
996{
997    const uint8_t es = s->insn->data;
998    const uint8_t enr = get_field(s, m3);
999    TCGv_i64 tmp;
1000
1001    if (!valid_vec_element(enr, es)) {
1002        gen_program_exception(s, PGM_SPECIFICATION);
1003        return DISAS_NORETURN;
1004    }
1005
1006    tmp = tcg_temp_new_i64();
1007    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
1008    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
1009    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
1010
1011    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1012    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1013    return DISAS_NEXT;
1014}
1015
1016static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
1017{
1018    gen_gvec_fn_4(bitsel, ES_8, get_field(s, v1),
1019                  get_field(s, v4), get_field(s, v2),
1020                  get_field(s, v3));
1021    return DISAS_NEXT;
1022}
1023
1024static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
1025{
1026    const uint8_t es = get_field(s, m3);
1027    int idx1, idx2;
1028    TCGv_i64 tmp;
1029
1030    switch (es) {
1031    case ES_8:
1032        idx1 = 7;
1033        idx2 = 15;
1034        break;
1035    case ES_16:
1036        idx1 = 3;
1037        idx2 = 7;
1038        break;
1039    case ES_32:
1040        idx1 = 1;
1041        idx2 = 3;
1042        break;
1043    default:
1044        gen_program_exception(s, PGM_SPECIFICATION);
1045        return DISAS_NORETURN;
1046    }
1047
1048    tmp = tcg_temp_new_i64();
1049    read_vec_element_i64(tmp, get_field(s, v2), idx1, es | MO_SIGN);
1050    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
1051    read_vec_element_i64(tmp, get_field(s, v2), idx2, es | MO_SIGN);
1052    write_vec_element_i64(tmp, get_field(s, v1), 1, ES_64);
1053    return DISAS_NEXT;
1054}
1055
1056static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
1057{
1058    TCGv_i64 tmp;
1059
1060    /* Probe write access before actually modifying memory */
1061    gen_helper_probe_write_access(cpu_env, o->addr1,
1062                                  tcg_constant_i64(16));
1063
1064    tmp = tcg_temp_new_i64();
1065    read_vec_element_i64(tmp,  get_field(s, v1), 0, ES_64);
1066    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1067    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1068    read_vec_element_i64(tmp,  get_field(s, v1), 1, ES_64);
1069    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1070    return DISAS_NEXT;
1071}
1072
1073static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o)
1074{
1075    const uint8_t es = s->insn->data;
1076    const uint8_t enr = get_field(s, m3);
1077    TCGv_i64 tmp;
1078
1079    if (!valid_vec_element(enr, es)) {
1080        gen_program_exception(s, PGM_SPECIFICATION);
1081        return DISAS_NORETURN;
1082    }
1083
1084    tmp = tcg_temp_new_i64();
1085    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1086    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
1087    return DISAS_NEXT;
1088}
1089
1090static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
1091{
1092    const uint8_t es = get_field(s, m3);
1093    TCGv_i64 t0, t1;
1094
1095    if (es < ES_16 || es > ES_128) {
1096        gen_program_exception(s, PGM_SPECIFICATION);
1097        return DISAS_NORETURN;
1098    }
1099
1100    /* Probe write access before actually modifying memory */
1101    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1102
1103    t0 = tcg_temp_new_i64();
1104    t1 = tcg_temp_new_i64();
1105
1106
1107    if (es == ES_128) {
1108        read_vec_element_i64(t1, get_field(s, v1), 0, ES_64);
1109        read_vec_element_i64(t0, get_field(s, v1), 1, ES_64);
1110        goto write;
1111    }
1112
1113    read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
1114    read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
1115
1116    /*
1117     * For 16 and 32-bit elements, the doubleword bswap below will
1118     * reverse the order of the elements.  Perform a larger order
1119     * swap to put them back into place.  For the 128-bit "element",
1120     * finish the bswap by swapping the doublewords.
1121     */
1122    switch (es) {
1123    case MO_16:
1124        tcg_gen_hswap_i64(t0, t0);
1125        tcg_gen_hswap_i64(t1, t1);
1126        break;
1127    case MO_32:
1128        tcg_gen_wswap_i64(t0, t0);
1129        tcg_gen_wswap_i64(t1, t1);
1130        break;
1131    case MO_64:
1132        break;
1133    default:
1134        g_assert_not_reached();
1135    }
1136
1137write:
1138    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
1139    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1140    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
1141    return DISAS_NEXT;
1142}
1143
1144static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
1145{
1146    const uint8_t es = s->insn->data;
1147    const uint8_t enr = get_field(s, m3);
1148    TCGv_i64 tmp;
1149
1150    if (!valid_vec_element(enr, es)) {
1151        gen_program_exception(s, PGM_SPECIFICATION);
1152        return DISAS_NORETURN;
1153    }
1154
1155    tmp = tcg_temp_new_i64();
1156    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1157    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1158    return DISAS_NEXT;
1159}
1160
1161static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
1162{
1163    const uint8_t es = get_field(s, m3);
1164    TCGv_i64 t0, t1;
1165
1166    if (es < ES_16 || es > ES_64) {
1167        gen_program_exception(s, PGM_SPECIFICATION);
1168        return DISAS_NORETURN;
1169    }
1170
1171    /* Probe write access before actually modifying memory */
1172    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1173
1174    /* Begin with the two doublewords swapped... */
1175    t0 = tcg_temp_new_i64();
1176    t1 = tcg_temp_new_i64();
1177    read_vec_element_i64(t1,  get_field(s, v1), 0, ES_64);
1178    read_vec_element_i64(t0,  get_field(s, v1), 1, ES_64);
1179
1180    /* ... then swap smaller elements within the doublewords as required. */
1181    switch (es) {
1182    case MO_16:
1183        tcg_gen_hswap_i64(t1, t1);
1184        tcg_gen_hswap_i64(t0, t0);
1185        break;
1186    case MO_32:
1187        tcg_gen_wswap_i64(t1, t1);
1188        tcg_gen_wswap_i64(t0, t0);
1189        break;
1190    case MO_64:
1191        break;
1192    default:
1193        g_assert_not_reached();
1194    }
1195
1196    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
1197    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1198    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
1199    return DISAS_NEXT;
1200}
1201
1202static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
1203{
1204    const uint8_t v3 = get_field(s, v3);
1205    uint8_t v1 = get_field(s, v1);
1206    TCGv_i64 tmp;
1207
1208    while (v3 < v1 || (v3 - v1 + 1) > 16) {
1209        gen_program_exception(s, PGM_SPECIFICATION);
1210        return DISAS_NORETURN;
1211    }
1212
1213    /* Probe write access before actually modifying memory */
1214    gen_helper_probe_write_access(cpu_env, o->addr1,
1215                                  tcg_constant_i64((v3 - v1 + 1) * 16));
1216
1217    tmp = tcg_temp_new_i64();
1218    for (;; v1++) {
1219        read_vec_element_i64(tmp, v1, 0, ES_64);
1220        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1221        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1222        read_vec_element_i64(tmp, v1, 1, ES_64);
1223        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1224        if (v1 == v3) {
1225            break;
1226        }
1227        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1228    }
1229    return DISAS_NEXT;
1230}
1231
1232static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
1233{
1234    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
1235    TCGv_ptr a0 = tcg_temp_new_ptr();
1236
1237    /* convert highest index into an actual length */
1238    tcg_gen_addi_i64(o->in2, o->in2, 1);
1239    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
1240    gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
1241    return DISAS_NEXT;
1242}
1243
1244static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
1245{
1246    const bool logical = s->fields.op2 == 0xd4 || s->fields.op2 == 0xd5;
1247    const uint8_t v1 = get_field(s, v1);
1248    const uint8_t v2 = get_field(s, v2);
1249    const uint8_t src_es = get_field(s, m3);
1250    const uint8_t dst_es = src_es + 1;
1251    int dst_idx, src_idx;
1252    TCGv_i64 tmp;
1253
1254    if (src_es > ES_32) {
1255        gen_program_exception(s, PGM_SPECIFICATION);
1256        return DISAS_NORETURN;
1257    }
1258
1259    tmp = tcg_temp_new_i64();
1260    if (s->fields.op2 == 0xd7 || s->fields.op2 == 0xd5) {
1261        /* iterate backwards to avoid overwriting data we might need later */
1262        for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
1263            src_idx = dst_idx;
1264            read_vec_element_i64(tmp, v2, src_idx,
1265                                 src_es | (logical ? 0 : MO_SIGN));
1266            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1267        }
1268
1269    } else {
1270        /* iterate forward to avoid overwriting data we might need later */
1271        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
1272            src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
1273            read_vec_element_i64(tmp, v2, src_idx,
1274                                 src_es | (logical ? 0 : MO_SIGN));
1275            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1276        }
1277    }
1278    return DISAS_NEXT;
1279}
1280
1281static DisasJumpType op_va(DisasContext *s, DisasOps *o)
1282{
1283    const uint8_t es = get_field(s, m4);
1284
1285    if (es > ES_128) {
1286        gen_program_exception(s, PGM_SPECIFICATION);
1287        return DISAS_NORETURN;
1288    } else if (es == ES_128) {
1289        gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s, v1),
1290                          get_field(s, v2), get_field(s, v3));
1291        return DISAS_NEXT;
1292    }
1293    gen_gvec_fn_3(add, es, get_field(s, v1), get_field(s, v2),
1294                  get_field(s, v3));
1295    return DISAS_NEXT;
1296}
1297
1298static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
1299{
1300    const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
1301    TCGv_i64 msb_mask = tcg_constant_i64(dup_const(es, 1ull << msb_bit_nr));
1302    TCGv_i64 t1 = tcg_temp_new_i64();
1303    TCGv_i64 t2 = tcg_temp_new_i64();
1304    TCGv_i64 t3 = tcg_temp_new_i64();
1305
1306    /* Calculate the carry into the MSB, ignoring the old MSBs */
1307    tcg_gen_andc_i64(t1, a, msb_mask);
1308    tcg_gen_andc_i64(t2, b, msb_mask);
1309    tcg_gen_add_i64(t1, t1, t2);
1310    /* Calculate the MSB without any carry into it */
1311    tcg_gen_xor_i64(t3, a, b);
1312    /* Calculate the carry out of the MSB in the MSB bit position */
1313    tcg_gen_and_i64(d, a, b);
1314    tcg_gen_and_i64(t1, t1, t3);
1315    tcg_gen_or_i64(d, d, t1);
1316    /* Isolate and shift the carry into position */
1317    tcg_gen_and_i64(d, d, msb_mask);
1318    tcg_gen_shri_i64(d, d, msb_bit_nr);
1319}
1320
1321static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1322{
1323    gen_acc(d, a, b, ES_8);
1324}
1325
1326static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1327{
1328    gen_acc(d, a, b, ES_16);
1329}
1330
1331static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1332{
1333    TCGv_i32 t = tcg_temp_new_i32();
1334
1335    tcg_gen_add_i32(t, a, b);
1336    tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1337}
1338
1339static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1340{
1341    TCGv_i64 t = tcg_temp_new_i64();
1342
1343    tcg_gen_add_i64(t, a, b);
1344    tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1345}
1346
1347static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1348                         TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1349{
1350    TCGv_i64 th = tcg_temp_new_i64();
1351    TCGv_i64 tl = tcg_temp_new_i64();
1352    TCGv_i64 zero = tcg_constant_i64(0);
1353
1354    tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1355    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1356    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1357    tcg_gen_mov_i64(dh, zero);
1358}
1359
1360static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1361{
1362    const uint8_t es = get_field(s, m4);
1363    static const GVecGen3 g[4] = {
1364        { .fni8 = gen_acc8_i64, },
1365        { .fni8 = gen_acc16_i64, },
1366        { .fni4 = gen_acc_i32, },
1367        { .fni8 = gen_acc_i64, },
1368    };
1369
1370    if (es > ES_128) {
1371        gen_program_exception(s, PGM_SPECIFICATION);
1372        return DISAS_NORETURN;
1373    } else if (es == ES_128) {
1374        gen_gvec128_3_i64(gen_acc2_i64, get_field(s, v1),
1375                          get_field(s, v2), get_field(s, v3));
1376        return DISAS_NEXT;
1377    }
1378    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1379               get_field(s, v3), &g[es]);
1380    return DISAS_NEXT;
1381}
1382
1383static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1384                        TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1385{
1386    TCGv_i64 tl = tcg_temp_new_i64();
1387    TCGv_i64 zero = tcg_constant_i64(0);
1388
1389    /* extract the carry only */
1390    tcg_gen_extract_i64(tl, cl, 0, 1);
1391    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1392    tcg_gen_add2_i64(dl, dh, dl, dh, tl, zero);
1393}
1394
1395static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
1396{
1397    if (get_field(s, m5) != ES_128) {
1398        gen_program_exception(s, PGM_SPECIFICATION);
1399        return DISAS_NORETURN;
1400    }
1401
1402    gen_gvec128_4_i64(gen_ac2_i64, get_field(s, v1),
1403                      get_field(s, v2), get_field(s, v3),
1404                      get_field(s, v4));
1405    return DISAS_NEXT;
1406}
1407
1408static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1409                          TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1410{
1411    TCGv_i64 tl = tcg_temp_new_i64();
1412    TCGv_i64 th = tcg_temp_new_i64();
1413    TCGv_i64 zero = tcg_constant_i64(0);
1414
1415    tcg_gen_andi_i64(tl, cl, 1);
1416    tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
1417    tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
1418    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1419    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1420    tcg_gen_mov_i64(dh, zero);
1421}
1422
1423static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
1424{
1425    if (get_field(s, m5) != ES_128) {
1426        gen_program_exception(s, PGM_SPECIFICATION);
1427        return DISAS_NORETURN;
1428    }
1429
1430    gen_gvec128_4_i64(gen_accc2_i64, get_field(s, v1),
1431                      get_field(s, v2), get_field(s, v3),
1432                      get_field(s, v4));
1433    return DISAS_NEXT;
1434}
1435
1436static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
1437{
1438    gen_gvec_fn_3(and, ES_8, get_field(s, v1), get_field(s, v2),
1439                  get_field(s, v3));
1440    return DISAS_NEXT;
1441}
1442
1443static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
1444{
1445    gen_gvec_fn_3(andc, ES_8, get_field(s, v1),
1446                  get_field(s, v2), get_field(s, v3));
1447    return DISAS_NEXT;
1448}
1449
1450static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1451{
1452    TCGv_i64 t0 = tcg_temp_new_i64();
1453    TCGv_i64 t1 = tcg_temp_new_i64();
1454
1455    tcg_gen_ext_i32_i64(t0, a);
1456    tcg_gen_ext_i32_i64(t1, b);
1457    tcg_gen_add_i64(t0, t0, t1);
1458    tcg_gen_addi_i64(t0, t0, 1);
1459    tcg_gen_shri_i64(t0, t0, 1);
1460    tcg_gen_extrl_i64_i32(d, t0);
1461}
1462
1463static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1464{
1465    TCGv_i64 dh = tcg_temp_new_i64();
1466    TCGv_i64 ah = tcg_temp_new_i64();
1467    TCGv_i64 bh = tcg_temp_new_i64();
1468
1469    /* extending the sign by one bit is sufficient */
1470    tcg_gen_extract_i64(ah, al, 63, 1);
1471    tcg_gen_extract_i64(bh, bl, 63, 1);
1472    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1473    gen_addi2_i64(dl, dh, dl, dh, 1);
1474    tcg_gen_extract2_i64(dl, dl, dh, 1);
1475}
1476
1477static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
1478{
1479    const uint8_t es = get_field(s, m4);
1480    static const GVecGen3 g[4] = {
1481        { .fno = gen_helper_gvec_vavg8, },
1482        { .fno = gen_helper_gvec_vavg16, },
1483        { .fni4 = gen_avg_i32, },
1484        { .fni8 = gen_avg_i64, },
1485    };
1486
1487    if (es > ES_64) {
1488        gen_program_exception(s, PGM_SPECIFICATION);
1489        return DISAS_NORETURN;
1490    }
1491    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1492               get_field(s, v3), &g[es]);
1493    return DISAS_NEXT;
1494}
1495
1496static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1497{
1498    TCGv_i64 t0 = tcg_temp_new_i64();
1499    TCGv_i64 t1 = tcg_temp_new_i64();
1500
1501    tcg_gen_extu_i32_i64(t0, a);
1502    tcg_gen_extu_i32_i64(t1, b);
1503    tcg_gen_add_i64(t0, t0, t1);
1504    tcg_gen_addi_i64(t0, t0, 1);
1505    tcg_gen_shri_i64(t0, t0, 1);
1506    tcg_gen_extrl_i64_i32(d, t0);
1507}
1508
1509static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1510{
1511    TCGv_i64 dh = tcg_temp_new_i64();
1512    TCGv_i64 zero = tcg_constant_i64(0);
1513
1514    tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
1515    gen_addi2_i64(dl, dh, dl, dh, 1);
1516    tcg_gen_extract2_i64(dl, dl, dh, 1);
1517}
1518
1519static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
1520{
1521    const uint8_t es = get_field(s, m4);
1522    static const GVecGen3 g[4] = {
1523        { .fno = gen_helper_gvec_vavgl8, },
1524        { .fno = gen_helper_gvec_vavgl16, },
1525        { .fni4 = gen_avgl_i32, },
1526        { .fni8 = gen_avgl_i64, },
1527    };
1528
1529    if (es > ES_64) {
1530        gen_program_exception(s, PGM_SPECIFICATION);
1531        return DISAS_NORETURN;
1532    }
1533    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1534               get_field(s, v3), &g[es]);
1535    return DISAS_NEXT;
1536}
1537
1538static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
1539{
1540    TCGv_i32 tmp = tcg_temp_new_i32();
1541    TCGv_i32 sum = tcg_temp_new_i32();
1542    int i;
1543
1544    read_vec_element_i32(sum, get_field(s, v3), 1, ES_32);
1545    for (i = 0; i < 4; i++) {
1546        read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
1547        tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
1548    }
1549    gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
1550    write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
1551    return DISAS_NEXT;
1552}
1553
1554static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
1555{
1556    uint8_t es = get_field(s, m3);
1557    const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;
1558
1559    if (es > ES_64) {
1560        gen_program_exception(s, PGM_SPECIFICATION);
1561        return DISAS_NORETURN;
1562    }
1563    if (s->fields.op2 == 0xdb) {
1564        es |= MO_SIGN;
1565    }
1566
1567    o->in1 = tcg_temp_new_i64();
1568    o->in2 = tcg_temp_new_i64();
1569    read_vec_element_i64(o->in1, get_field(s, v1), enr, es);
1570    read_vec_element_i64(o->in2, get_field(s, v2), enr, es);
1571    return DISAS_NEXT;
1572}
1573
1574static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
1575{
1576    const uint8_t es = get_field(s, m4);
1577    TCGCond cond = s->insn->data;
1578
1579    if (es > ES_64) {
1580        gen_program_exception(s, PGM_SPECIFICATION);
1581        return DISAS_NORETURN;
1582    }
1583
1584    tcg_gen_gvec_cmp(cond, es,
1585                     vec_full_reg_offset(get_field(s, v1)),
1586                     vec_full_reg_offset(get_field(s, v2)),
1587                     vec_full_reg_offset(get_field(s, v3)), 16, 16);
1588    if (get_field(s, m5) & 0x1) {
1589        TCGv_i64 low = tcg_temp_new_i64();
1590        TCGv_i64 high = tcg_temp_new_i64();
1591
1592        read_vec_element_i64(high, get_field(s, v1), 0, ES_64);
1593        read_vec_element_i64(low, get_field(s, v1), 1, ES_64);
1594        gen_op_update2_cc_i64(s, CC_OP_VC, low, high);
1595    }
1596    return DISAS_NEXT;
1597}
1598
1599static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
1600{
1601    tcg_gen_clzi_i32(d, a, 32);
1602}
1603
1604static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
1605{
1606    tcg_gen_clzi_i64(d, a, 64);
1607}
1608
1609static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
1610{
1611    const uint8_t es = get_field(s, m3);
1612    static const GVecGen2 g[4] = {
1613        { .fno = gen_helper_gvec_vclz8, },
1614        { .fno = gen_helper_gvec_vclz16, },
1615        { .fni4 = gen_clz_i32, },
1616        { .fni8 = gen_clz_i64, },
1617    };
1618
1619    if (es > ES_64) {
1620        gen_program_exception(s, PGM_SPECIFICATION);
1621        return DISAS_NORETURN;
1622    }
1623    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1624    return DISAS_NEXT;
1625}
1626
1627static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
1628{
1629    tcg_gen_ctzi_i32(d, a, 32);
1630}
1631
1632static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
1633{
1634    tcg_gen_ctzi_i64(d, a, 64);
1635}
1636
1637static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
1638{
1639    const uint8_t es = get_field(s, m3);
1640    static const GVecGen2 g[4] = {
1641        { .fno = gen_helper_gvec_vctz8, },
1642        { .fno = gen_helper_gvec_vctz16, },
1643        { .fni4 = gen_ctz_i32, },
1644        { .fni8 = gen_ctz_i64, },
1645    };
1646
1647    if (es > ES_64) {
1648        gen_program_exception(s, PGM_SPECIFICATION);
1649        return DISAS_NORETURN;
1650    }
1651    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1652    return DISAS_NEXT;
1653}
1654
1655static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
1656{
1657    gen_gvec_fn_3(xor, ES_8, get_field(s, v1), get_field(s, v2),
1658                 get_field(s, v3));
1659    return DISAS_NEXT;
1660}
1661
1662static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
1663{
1664    const uint8_t es = get_field(s, m4);
1665    static const GVecGen3 g[4] = {
1666        { .fno = gen_helper_gvec_vgfm8, },
1667        { .fno = gen_helper_gvec_vgfm16, },
1668        { .fno = gen_helper_gvec_vgfm32, },
1669        { .fno = gen_helper_gvec_vgfm64, },
1670    };
1671
1672    if (es > ES_64) {
1673        gen_program_exception(s, PGM_SPECIFICATION);
1674        return DISAS_NORETURN;
1675    }
1676    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1677               get_field(s, v3), &g[es]);
1678    return DISAS_NEXT;
1679}
1680
1681static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
1682{
1683    const uint8_t es = get_field(s, m5);
1684    static const GVecGen4 g[4] = {
1685        { .fno = gen_helper_gvec_vgfma8, },
1686        { .fno = gen_helper_gvec_vgfma16, },
1687        { .fno = gen_helper_gvec_vgfma32, },
1688        { .fno = gen_helper_gvec_vgfma64, },
1689    };
1690
1691    if (es > ES_64) {
1692        gen_program_exception(s, PGM_SPECIFICATION);
1693        return DISAS_NORETURN;
1694    }
1695    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1696               get_field(s, v3), get_field(s, v4), &g[es]);
1697    return DISAS_NEXT;
1698}
1699
1700static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
1701{
1702    const uint8_t es = get_field(s, m3);
1703
1704    if (es > ES_64) {
1705        gen_program_exception(s, PGM_SPECIFICATION);
1706        return DISAS_NORETURN;
1707    }
1708
1709    gen_gvec_fn_2(neg, es, get_field(s, v1), get_field(s, v2));
1710    return DISAS_NEXT;
1711}
1712
1713static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
1714{
1715    const uint8_t es = get_field(s, m3);
1716
1717    if (es > ES_64) {
1718        gen_program_exception(s, PGM_SPECIFICATION);
1719        return DISAS_NORETURN;
1720    }
1721
1722    gen_gvec_fn_2(abs, es, get_field(s, v1), get_field(s, v2));
1723    return DISAS_NEXT;
1724}
1725
1726static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
1727{
1728    const uint8_t v1 = get_field(s, v1);
1729    const uint8_t v2 = get_field(s, v2);
1730    const uint8_t v3 = get_field(s, v3);
1731    const uint8_t es = get_field(s, m4);
1732
1733    if (es > ES_64) {
1734        gen_program_exception(s, PGM_SPECIFICATION);
1735        return DISAS_NORETURN;
1736    }
1737
1738    switch (s->fields.op2) {
1739    case 0xff:
1740        gen_gvec_fn_3(smax, es, v1, v2, v3);
1741        break;
1742    case 0xfd:
1743        gen_gvec_fn_3(umax, es, v1, v2, v3);
1744        break;
1745    case 0xfe:
1746        gen_gvec_fn_3(smin, es, v1, v2, v3);
1747        break;
1748    case 0xfc:
1749        gen_gvec_fn_3(umin, es, v1, v2, v3);
1750        break;
1751    default:
1752        g_assert_not_reached();
1753    }
1754    return DISAS_NEXT;
1755}
1756
1757static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1758{
1759    TCGv_i32 t0 = tcg_temp_new_i32();
1760
1761    tcg_gen_mul_i32(t0, a, b);
1762    tcg_gen_add_i32(d, t0, c);
1763}
1764
1765static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1766{
1767    TCGv_i64 t0 = tcg_temp_new_i64();
1768    TCGv_i64 t1 = tcg_temp_new_i64();
1769    TCGv_i64 t2 = tcg_temp_new_i64();
1770
1771    tcg_gen_ext_i32_i64(t0, a);
1772    tcg_gen_ext_i32_i64(t1, b);
1773    tcg_gen_ext_i32_i64(t2, c);
1774    tcg_gen_mul_i64(t0, t0, t1);
1775    tcg_gen_add_i64(t0, t0, t2);
1776    tcg_gen_extrh_i64_i32(d, t0);
1777}
1778
1779static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1780{
1781    TCGv_i64 t0 = tcg_temp_new_i64();
1782    TCGv_i64 t1 = tcg_temp_new_i64();
1783    TCGv_i64 t2 = tcg_temp_new_i64();
1784
1785    tcg_gen_extu_i32_i64(t0, a);
1786    tcg_gen_extu_i32_i64(t1, b);
1787    tcg_gen_extu_i32_i64(t2, c);
1788    tcg_gen_mul_i64(t0, t0, t1);
1789    tcg_gen_add_i64(t0, t0, t2);
1790    tcg_gen_extrh_i64_i32(d, t0);
1791}
1792
1793static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
1794{
1795    const uint8_t es = get_field(s, m5);
1796    static const GVecGen4 g_vmal[3] = {
1797        { .fno = gen_helper_gvec_vmal8, },
1798        { .fno = gen_helper_gvec_vmal16, },
1799        { .fni4 = gen_mal_i32, },
1800    };
1801    static const GVecGen4 g_vmah[3] = {
1802        { .fno = gen_helper_gvec_vmah8, },
1803        { .fno = gen_helper_gvec_vmah16, },
1804        { .fni4 = gen_mah_i32, },
1805    };
1806    static const GVecGen4 g_vmalh[3] = {
1807        { .fno = gen_helper_gvec_vmalh8, },
1808        { .fno = gen_helper_gvec_vmalh16, },
1809        { .fni4 = gen_malh_i32, },
1810    };
1811    static const GVecGen4 g_vmae[3] = {
1812        { .fno = gen_helper_gvec_vmae8, },
1813        { .fno = gen_helper_gvec_vmae16, },
1814        { .fno = gen_helper_gvec_vmae32, },
1815    };
1816    static const GVecGen4 g_vmale[3] = {
1817        { .fno = gen_helper_gvec_vmale8, },
1818        { .fno = gen_helper_gvec_vmale16, },
1819        { .fno = gen_helper_gvec_vmale32, },
1820    };
1821    static const GVecGen4 g_vmao[3] = {
1822        { .fno = gen_helper_gvec_vmao8, },
1823        { .fno = gen_helper_gvec_vmao16, },
1824        { .fno = gen_helper_gvec_vmao32, },
1825    };
1826    static const GVecGen4 g_vmalo[3] = {
1827        { .fno = gen_helper_gvec_vmalo8, },
1828        { .fno = gen_helper_gvec_vmalo16, },
1829        { .fno = gen_helper_gvec_vmalo32, },
1830    };
1831    const GVecGen4 *fn;
1832
1833    if (es > ES_32) {
1834        gen_program_exception(s, PGM_SPECIFICATION);
1835        return DISAS_NORETURN;
1836    }
1837
1838    switch (s->fields.op2) {
1839    case 0xaa:
1840        fn = &g_vmal[es];
1841        break;
1842    case 0xab:
1843        fn = &g_vmah[es];
1844        break;
1845    case 0xa9:
1846        fn = &g_vmalh[es];
1847        break;
1848    case 0xae:
1849        fn = &g_vmae[es];
1850        break;
1851    case 0xac:
1852        fn = &g_vmale[es];
1853        break;
1854    case 0xaf:
1855        fn = &g_vmao[es];
1856        break;
1857    case 0xad:
1858        fn = &g_vmalo[es];
1859        break;
1860    default:
1861        g_assert_not_reached();
1862    }
1863
1864    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1865               get_field(s, v3), get_field(s, v4), fn);
1866    return DISAS_NEXT;
1867}
1868
1869static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1870{
1871    TCGv_i32 t = tcg_temp_new_i32();
1872
1873    tcg_gen_muls2_i32(t, d, a, b);
1874}
1875
1876static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1877{
1878    TCGv_i32 t = tcg_temp_new_i32();
1879
1880    tcg_gen_mulu2_i32(t, d, a, b);
1881}
1882
1883static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
1884{
1885    const uint8_t es = get_field(s, m4);
1886    static const GVecGen3 g_vmh[3] = {
1887        { .fno = gen_helper_gvec_vmh8, },
1888        { .fno = gen_helper_gvec_vmh16, },
1889        { .fni4 = gen_mh_i32, },
1890    };
1891    static const GVecGen3 g_vmlh[3] = {
1892        { .fno = gen_helper_gvec_vmlh8, },
1893        { .fno = gen_helper_gvec_vmlh16, },
1894        { .fni4 = gen_mlh_i32, },
1895    };
1896    static const GVecGen3 g_vme[3] = {
1897        { .fno = gen_helper_gvec_vme8, },
1898        { .fno = gen_helper_gvec_vme16, },
1899        { .fno = gen_helper_gvec_vme32, },
1900    };
1901    static const GVecGen3 g_vmle[3] = {
1902        { .fno = gen_helper_gvec_vmle8, },
1903        { .fno = gen_helper_gvec_vmle16, },
1904        { .fno = gen_helper_gvec_vmle32, },
1905    };
1906    static const GVecGen3 g_vmo[3] = {
1907        { .fno = gen_helper_gvec_vmo8, },
1908        { .fno = gen_helper_gvec_vmo16, },
1909        { .fno = gen_helper_gvec_vmo32, },
1910    };
1911    static const GVecGen3 g_vmlo[3] = {
1912        { .fno = gen_helper_gvec_vmlo8, },
1913        { .fno = gen_helper_gvec_vmlo16, },
1914        { .fno = gen_helper_gvec_vmlo32, },
1915    };
1916    const GVecGen3 *fn;
1917
1918    if (es > ES_32) {
1919        gen_program_exception(s, PGM_SPECIFICATION);
1920        return DISAS_NORETURN;
1921    }
1922
1923    switch (s->fields.op2) {
1924    case 0xa2:
1925        gen_gvec_fn_3(mul, es, get_field(s, v1),
1926                      get_field(s, v2), get_field(s, v3));
1927        return DISAS_NEXT;
1928    case 0xa3:
1929        fn = &g_vmh[es];
1930        break;
1931    case 0xa1:
1932        fn = &g_vmlh[es];
1933        break;
1934    case 0xa6:
1935        fn = &g_vme[es];
1936        break;
1937    case 0xa4:
1938        fn = &g_vmle[es];
1939        break;
1940    case 0xa7:
1941        fn = &g_vmo[es];
1942        break;
1943    case 0xa5:
1944        fn = &g_vmlo[es];
1945        break;
1946    default:
1947        g_assert_not_reached();
1948    }
1949
1950    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1951               get_field(s, v3), fn);
1952    return DISAS_NEXT;
1953}
1954
1955static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
1956{
1957    TCGv_i64 l1, h1, l2, h2;
1958
1959    if (get_field(s, m5) != ES_64) {
1960        gen_program_exception(s, PGM_SPECIFICATION);
1961        return DISAS_NORETURN;
1962    }
1963
1964    l1 = tcg_temp_new_i64();
1965    h1 = tcg_temp_new_i64();
1966    l2 = tcg_temp_new_i64();
1967    h2 = tcg_temp_new_i64();
1968
1969    /* Multiply both even elements from v2 and v3 */
1970    read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
1971    read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
1972    tcg_gen_mulu2_i64(l1, h1, l1, h1);
1973    /* Shift result left by one (x2) if requested */
1974    if (extract32(get_field(s, m6), 3, 1)) {
1975        tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
1976    }
1977
1978    /* Multiply both odd elements from v2 and v3 */
1979    read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
1980    read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
1981    tcg_gen_mulu2_i64(l2, h2, l2, h2);
1982    /* Shift result left by one (x2) if requested */
1983    if (extract32(get_field(s, m6), 2, 1)) {
1984        tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
1985    }
1986
1987    /* Add both intermediate results */
1988    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
1989    /* Add whole v4 */
1990    read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
1991    read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
1992    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
1993
1994    /* Store final result into v1. */
1995    write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
1996    write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
1997    return DISAS_NEXT;
1998}
1999
2000static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
2001{
2002    gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
2003                  get_field(s, v2), get_field(s, v3));
2004    return DISAS_NEXT;
2005}
2006
2007static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
2008{
2009    gen_gvec_fn_3(nor, ES_8, get_field(s, v1), get_field(s, v2),
2010                  get_field(s, v3));
2011    return DISAS_NEXT;
2012}
2013
2014static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
2015{
2016    gen_gvec_fn_3(eqv, ES_8, get_field(s, v1), get_field(s, v2),
2017                  get_field(s, v3));
2018    return DISAS_NEXT;
2019}
2020
2021static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
2022{
2023    gen_gvec_fn_3(or, ES_8, get_field(s, v1), get_field(s, v2),
2024                  get_field(s, v3));
2025    return DISAS_NEXT;
2026}
2027
2028static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
2029{
2030    gen_gvec_fn_3(orc, ES_8, get_field(s, v1), get_field(s, v2),
2031                  get_field(s, v3));
2032    return DISAS_NEXT;
2033}
2034
2035static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
2036{
2037    const uint8_t es = get_field(s, m3);
2038    static const GVecGen2 g[4] = {
2039        { .fno = gen_helper_gvec_vpopct8, },
2040        { .fno = gen_helper_gvec_vpopct16, },
2041        { .fni4 = tcg_gen_ctpop_i32, },
2042        { .fni8 = tcg_gen_ctpop_i64, },
2043    };
2044
2045    if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
2046        gen_program_exception(s, PGM_SPECIFICATION);
2047        return DISAS_NORETURN;
2048    }
2049
2050    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
2051    return DISAS_NEXT;
2052}
2053
2054static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
2055{
2056    TCGv_i32 t = tcg_temp_new_i32();
2057
2058    tcg_gen_rotli_i32(t, a, c & 31);
2059    tcg_gen_and_i32(t, t, b);
2060    tcg_gen_andc_i32(d, d, b);
2061    tcg_gen_or_i32(d, d, t);
2062}
2063
2064static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
2065{
2066    TCGv_i64 t = tcg_temp_new_i64();
2067
2068    tcg_gen_rotli_i64(t, a, c & 63);
2069    tcg_gen_and_i64(t, t, b);
2070    tcg_gen_andc_i64(d, d, b);
2071    tcg_gen_or_i64(d, d, t);
2072}
2073
2074static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
2075{
2076    const uint8_t es = get_field(s, m5);
2077    const uint8_t i4 = get_field(s, i4) &
2078                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2079    static const GVecGen3i g[4] = {
2080        { .fno = gen_helper_gvec_verim8, },
2081        { .fno = gen_helper_gvec_verim16, },
2082        { .fni4 = gen_rim_i32,
2083          .load_dest = true, },
2084        { .fni8 = gen_rim_i64,
2085          .load_dest = true, },
2086    };
2087
2088    if (es > ES_64) {
2089        gen_program_exception(s, PGM_SPECIFICATION);
2090        return DISAS_NORETURN;
2091    }
2092
2093    gen_gvec_3i(get_field(s, v1), get_field(s, v2),
2094                get_field(s, v3), i4, &g[es]);
2095    return DISAS_NEXT;
2096}
2097
2098static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
2099{
2100    const uint8_t es = get_field(s, m4);
2101    const uint8_t v1 = get_field(s, v1);
2102    const uint8_t v2 = get_field(s, v2);
2103    const uint8_t v3 = get_field(s, v3);
2104
2105    if (es > ES_64) {
2106        gen_program_exception(s, PGM_SPECIFICATION);
2107        return DISAS_NORETURN;
2108    }
2109
2110    switch (s->fields.op2) {
2111    case 0x70:
2112        gen_gvec_fn_3(shlv, es, v1, v2, v3);
2113        break;
2114    case 0x73:
2115        gen_gvec_fn_3(rotlv, es, v1, v2, v3);
2116        break;
2117    case 0x7a:
2118        gen_gvec_fn_3(sarv, es, v1, v2, v3);
2119        break;
2120    case 0x78:
2121        gen_gvec_fn_3(shrv, es, v1, v2, v3);
2122        break;
2123    default:
2124        g_assert_not_reached();
2125    }
2126    return DISAS_NEXT;
2127}
2128
2129static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
2130{
2131    const uint8_t es = get_field(s, m4);
2132    const uint8_t d2 = get_field(s, d2) &
2133                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2134    const uint8_t v1 = get_field(s, v1);
2135    const uint8_t v3 = get_field(s, v3);
2136    TCGv_i32 shift;
2137
2138    if (es > ES_64) {
2139        gen_program_exception(s, PGM_SPECIFICATION);
2140        return DISAS_NORETURN;
2141    }
2142
2143    if (likely(!get_field(s, b2))) {
2144        switch (s->fields.op2) {
2145        case 0x30:
2146            gen_gvec_fn_2i(shli, es, v1, v3, d2);
2147            break;
2148        case 0x33:
2149            gen_gvec_fn_2i(rotli, es, v1, v3, d2);
2150            break;
2151        case 0x3a:
2152            gen_gvec_fn_2i(sari, es, v1, v3, d2);
2153            break;
2154        case 0x38:
2155            gen_gvec_fn_2i(shri, es, v1, v3, d2);
2156            break;
2157        default:
2158            g_assert_not_reached();
2159        }
2160    } else {
2161        shift = tcg_temp_new_i32();
2162        tcg_gen_extrl_i64_i32(shift, o->addr1);
2163        tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
2164        switch (s->fields.op2) {
2165        case 0x30:
2166            gen_gvec_fn_2s(shls, es, v1, v3, shift);
2167            break;
2168        case 0x33:
2169            gen_gvec_fn_2s(rotls, es, v1, v3, shift);
2170            break;
2171        case 0x3a:
2172            gen_gvec_fn_2s(sars, es, v1, v3, shift);
2173            break;
2174        case 0x38:
2175            gen_gvec_fn_2s(shrs, es, v1, v3, shift);
2176            break;
2177        default:
2178            g_assert_not_reached();
2179        }
2180    }
2181    return DISAS_NEXT;
2182}
2183
2184static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o,
2185                                      gen_helper_gvec_2i *gen,
2186                                      gen_helper_gvec_3 *gen_ve2)
2187{
2188    bool byte = s->insn->data;
2189
2190    if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2191        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2192                       get_field(s, v3), 0, gen_ve2);
2193    } else {
2194        TCGv_i64 shift = tcg_temp_new_i64();
2195
2196        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
2197        tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7);
2198        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen);
2199    }
2200    return DISAS_NEXT;
2201}
2202
2203static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
2204{
2205    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl,
2206                            gen_helper_gvec_vsl_ve2);
2207}
2208
2209static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
2210{
2211    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra,
2212                            gen_helper_gvec_vsra_ve2);
2213}
2214
2215static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
2216{
2217    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsrl,
2218                            gen_helper_gvec_vsrl_ve2);
2219}
2220
2221static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
2222{
2223    const bool byte = s->insn->data;
2224    const uint8_t mask = byte ? 15 : 7;
2225    const uint8_t mul  = byte ?  8 : 1;
2226    const uint8_t i4   = get_field(s, i4);
2227    const int right_shift = 64 - (i4 & 7) * mul;
2228    TCGv_i64 t0, t1, t2;
2229
2230    if (i4 & ~mask) {
2231        gen_program_exception(s, PGM_SPECIFICATION);
2232        return DISAS_NORETURN;
2233    }
2234
2235    t0 = tcg_temp_new_i64();
2236    t1 = tcg_temp_new_i64();
2237    t2 = tcg_temp_new_i64();
2238
2239    if ((i4 & 8) == 0) {
2240        read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
2241        read_vec_element_i64(t1, get_field(s, v2), 1, ES_64);
2242        read_vec_element_i64(t2, get_field(s, v3), 0, ES_64);
2243    } else {
2244        read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2245        read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2246        read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2247    }
2248
2249    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
2250    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
2251
2252    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2253    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2254    return DISAS_NEXT;
2255}
2256
2257static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
2258{
2259    const uint8_t i4 = get_field(s, i4);
2260    TCGv_i64 t0, t1, t2;
2261
2262    if (i4 & ~7) {
2263        gen_program_exception(s, PGM_SPECIFICATION);
2264        return DISAS_NORETURN;
2265    }
2266
2267    t0 = tcg_temp_new_i64();
2268    t1 = tcg_temp_new_i64();
2269    t2 = tcg_temp_new_i64();
2270
2271    read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2272    read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2273    read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2274
2275    tcg_gen_extract2_i64(t0, t1, t0, i4);
2276    tcg_gen_extract2_i64(t1, t2, t1, i4);
2277
2278    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2279    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2280    return DISAS_NEXT;
2281}
2282
2283static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
2284{
2285    const uint8_t es = get_field(s, m4);
2286
2287    if (es > ES_128) {
2288        gen_program_exception(s, PGM_SPECIFICATION);
2289        return DISAS_NORETURN;
2290    } else if (es == ES_128) {
2291        gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s, v1),
2292                          get_field(s, v2), get_field(s, v3));
2293        return DISAS_NEXT;
2294    }
2295    gen_gvec_fn_3(sub, es, get_field(s, v1), get_field(s, v2),
2296                  get_field(s, v3));
2297    return DISAS_NEXT;
2298}
2299
2300static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2301{
2302    tcg_gen_setcond_i32(TCG_COND_GEU, d, a, b);
2303}
2304
2305static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2306{
2307    tcg_gen_setcond_i64(TCG_COND_GEU, d, a, b);
2308}
2309
2310static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
2311                          TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2312{
2313    TCGv_i64 th = tcg_temp_new_i64();
2314    TCGv_i64 tl = tcg_temp_new_i64();
2315    TCGv_i64 zero = tcg_constant_i64(0);
2316
2317    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
2318    tcg_gen_andi_i64(th, th, 1);
2319    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
2320    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
2321    /* "invert" the result: -1 -> 0; 0 -> 1 */
2322    tcg_gen_addi_i64(dl, th, 1);
2323    tcg_gen_mov_i64(dh, zero);
2324}
2325
2326static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
2327{
2328    const uint8_t es = get_field(s, m4);
2329    static const GVecGen3 g[4] = {
2330        { .fno = gen_helper_gvec_vscbi8, },
2331        { .fno = gen_helper_gvec_vscbi16, },
2332        { .fni4 = gen_scbi_i32, },
2333        { .fni8 = gen_scbi_i64, },
2334    };
2335
2336    if (es > ES_128) {
2337        gen_program_exception(s, PGM_SPECIFICATION);
2338        return DISAS_NORETURN;
2339    } else if (es == ES_128) {
2340        gen_gvec128_3_i64(gen_scbi2_i64, get_field(s, v1),
2341                          get_field(s, v2), get_field(s, v3));
2342        return DISAS_NEXT;
2343    }
2344    gen_gvec_3(get_field(s, v1), get_field(s, v2),
2345               get_field(s, v3), &g[es]);
2346    return DISAS_NEXT;
2347}
2348
2349static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2350                         TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2351{
2352    TCGv_i64 tl = tcg_temp_new_i64();
2353    TCGv_i64 th = tcg_temp_new_i64();
2354
2355    tcg_gen_not_i64(tl, bl);
2356    tcg_gen_not_i64(th, bh);
2357    gen_ac2_i64(dl, dh, al, ah, tl, th, cl, ch);
2358}
2359
2360static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
2361{
2362    if (get_field(s, m5) != ES_128) {
2363        gen_program_exception(s, PGM_SPECIFICATION);
2364        return DISAS_NORETURN;
2365    }
2366
2367    gen_gvec128_4_i64(gen_sbi2_i64, get_field(s, v1),
2368                      get_field(s, v2), get_field(s, v3),
2369                      get_field(s, v4));
2370    return DISAS_NEXT;
2371}
2372
2373static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2374                           TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2375{
2376    TCGv_i64 th = tcg_temp_new_i64();
2377    TCGv_i64 tl = tcg_temp_new_i64();
2378
2379    tcg_gen_not_i64(tl, bl);
2380    tcg_gen_not_i64(th, bh);
2381    gen_accc2_i64(dl, dh, al, ah, tl, th, cl, ch);
2382}
2383
2384static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
2385{
2386    if (get_field(s, m5) != ES_128) {
2387        gen_program_exception(s, PGM_SPECIFICATION);
2388        return DISAS_NORETURN;
2389    }
2390
2391    gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s, v1),
2392                      get_field(s, v2), get_field(s, v3),
2393                      get_field(s, v4));
2394    return DISAS_NEXT;
2395}
2396
2397static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
2398{
2399    const uint8_t es = get_field(s, m4);
2400    TCGv_i64 sum, tmp;
2401    uint8_t dst_idx;
2402
2403    if (es == ES_8 || es > ES_32) {
2404        gen_program_exception(s, PGM_SPECIFICATION);
2405        return DISAS_NORETURN;
2406    }
2407
2408    sum = tcg_temp_new_i64();
2409    tmp = tcg_temp_new_i64();
2410    for (dst_idx = 0; dst_idx < 2; dst_idx++) {
2411        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
2412        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;
2413
2414        read_vec_element_i64(sum, get_field(s, v3), max_idx, es);
2415        for (; idx <= max_idx; idx++) {
2416            read_vec_element_i64(tmp, get_field(s, v2), idx, es);
2417            tcg_gen_add_i64(sum, sum, tmp);
2418        }
2419        write_vec_element_i64(sum, get_field(s, v1), dst_idx, ES_64);
2420    }
2421    return DISAS_NEXT;
2422}
2423
2424static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
2425{
2426    const uint8_t es = get_field(s, m4);
2427    const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
2428    TCGv_i64 sumh, suml, zero, tmpl;
2429    uint8_t idx;
2430
2431    if (es < ES_32 || es > ES_64) {
2432        gen_program_exception(s, PGM_SPECIFICATION);
2433        return DISAS_NORETURN;
2434    }
2435
2436    sumh = tcg_temp_new_i64();
2437    suml = tcg_temp_new_i64();
2438    zero = tcg_constant_i64(0);
2439    tmpl = tcg_temp_new_i64();
2440
2441    tcg_gen_mov_i64(sumh, zero);
2442    read_vec_element_i64(suml, get_field(s, v3), max_idx, es);
2443    for (idx = 0; idx <= max_idx; idx++) {
2444        read_vec_element_i64(tmpl, get_field(s, v2), idx, es);
2445        tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
2446    }
2447    write_vec_element_i64(sumh, get_field(s, v1), 0, ES_64);
2448    write_vec_element_i64(suml, get_field(s, v1), 1, ES_64);
2449    return DISAS_NEXT;
2450}
2451
2452static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
2453{
2454    const uint8_t es = get_field(s, m4);
2455    TCGv_i32 sum, tmp;
2456    uint8_t dst_idx;
2457
2458    if (es > ES_16) {
2459        gen_program_exception(s, PGM_SPECIFICATION);
2460        return DISAS_NORETURN;
2461    }
2462
2463    sum = tcg_temp_new_i32();
2464    tmp = tcg_temp_new_i32();
2465    for (dst_idx = 0; dst_idx < 4; dst_idx++) {
2466        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
2467        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;
2468
2469        read_vec_element_i32(sum, get_field(s, v3), max_idx, es);
2470        for (; idx <= max_idx; idx++) {
2471            read_vec_element_i32(tmp, get_field(s, v2), idx, es);
2472            tcg_gen_add_i32(sum, sum, tmp);
2473        }
2474        write_vec_element_i32(sum, get_field(s, v1), dst_idx, ES_32);
2475    }
2476    return DISAS_NEXT;
2477}
2478
2479static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
2480{
2481    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2482                   cpu_env, 0, gen_helper_gvec_vtm);
2483    set_cc_static(s);
2484    return DISAS_NEXT;
2485}
2486
2487static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
2488{
2489    const uint8_t es = get_field(s, m4);
2490    const uint8_t m5 = get_field(s, m5);
2491    static gen_helper_gvec_3 * const g[3] = {
2492        gen_helper_gvec_vfae8,
2493        gen_helper_gvec_vfae16,
2494        gen_helper_gvec_vfae32,
2495    };
2496    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2497        gen_helper_gvec_vfae_cc8,
2498        gen_helper_gvec_vfae_cc16,
2499        gen_helper_gvec_vfae_cc32,
2500    };
2501    if (es > ES_32) {
2502        gen_program_exception(s, PGM_SPECIFICATION);
2503        return DISAS_NORETURN;
2504    }
2505
2506    if (extract32(m5, 0, 1)) {
2507        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2508                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2509        set_cc_static(s);
2510    } else {
2511        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2512                       get_field(s, v3), m5, g[es]);
2513    }
2514    return DISAS_NEXT;
2515}
2516
2517static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
2518{
2519    const uint8_t es = get_field(s, m4);
2520    const uint8_t m5 = get_field(s, m5);
2521    static gen_helper_gvec_3 * const g[3] = {
2522        gen_helper_gvec_vfee8,
2523        gen_helper_gvec_vfee16,
2524        gen_helper_gvec_vfee32,
2525    };
2526    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2527        gen_helper_gvec_vfee_cc8,
2528        gen_helper_gvec_vfee_cc16,
2529        gen_helper_gvec_vfee_cc32,
2530    };
2531
2532    if (es > ES_32 || m5 & ~0x3) {
2533        gen_program_exception(s, PGM_SPECIFICATION);
2534        return DISAS_NORETURN;
2535    }
2536
2537    if (extract32(m5, 0, 1)) {
2538        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2539                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2540        set_cc_static(s);
2541    } else {
2542        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2543                       get_field(s, v3), m5, g[es]);
2544    }
2545    return DISAS_NEXT;
2546}
2547
2548static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
2549{
2550    const uint8_t es = get_field(s, m4);
2551    const uint8_t m5 = get_field(s, m5);
2552    static gen_helper_gvec_3 * const g[3] = {
2553        gen_helper_gvec_vfene8,
2554        gen_helper_gvec_vfene16,
2555        gen_helper_gvec_vfene32,
2556    };
2557    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2558        gen_helper_gvec_vfene_cc8,
2559        gen_helper_gvec_vfene_cc16,
2560        gen_helper_gvec_vfene_cc32,
2561    };
2562
2563    if (es > ES_32 || m5 & ~0x3) {
2564        gen_program_exception(s, PGM_SPECIFICATION);
2565        return DISAS_NORETURN;
2566    }
2567
2568    if (extract32(m5, 0, 1)) {
2569        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2570                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2571        set_cc_static(s);
2572    } else {
2573        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2574                       get_field(s, v3), m5, g[es]);
2575    }
2576    return DISAS_NEXT;
2577}
2578
2579static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
2580{
2581    const uint8_t es = get_field(s, m3);
2582    const uint8_t m5 = get_field(s, m5);
2583    static gen_helper_gvec_2 * const g[3] = {
2584        gen_helper_gvec_vistr8,
2585        gen_helper_gvec_vistr16,
2586        gen_helper_gvec_vistr32,
2587    };
2588    static gen_helper_gvec_2_ptr * const g_cc[3] = {
2589        gen_helper_gvec_vistr_cc8,
2590        gen_helper_gvec_vistr_cc16,
2591        gen_helper_gvec_vistr_cc32,
2592    };
2593
2594    if (es > ES_32 || m5 & ~0x1) {
2595        gen_program_exception(s, PGM_SPECIFICATION);
2596        return DISAS_NORETURN;
2597    }
2598
2599    if (extract32(m5, 0, 1)) {
2600        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2601                       cpu_env, 0, g_cc[es]);
2602        set_cc_static(s);
2603    } else {
2604        gen_gvec_2_ool(get_field(s, v1), get_field(s, v2), 0,
2605                       g[es]);
2606    }
2607    return DISAS_NEXT;
2608}
2609
2610static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
2611{
2612    const uint8_t es = get_field(s, m5);
2613    const uint8_t m6 = get_field(s, m6);
2614    static gen_helper_gvec_4 * const g[3] = {
2615        gen_helper_gvec_vstrc8,
2616        gen_helper_gvec_vstrc16,
2617        gen_helper_gvec_vstrc32,
2618    };
2619    static gen_helper_gvec_4 * const g_rt[3] = {
2620        gen_helper_gvec_vstrc_rt8,
2621        gen_helper_gvec_vstrc_rt16,
2622        gen_helper_gvec_vstrc_rt32,
2623    };
2624    static gen_helper_gvec_4_ptr * const g_cc[3] = {
2625        gen_helper_gvec_vstrc_cc8,
2626        gen_helper_gvec_vstrc_cc16,
2627        gen_helper_gvec_vstrc_cc32,
2628    };
2629    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
2630        gen_helper_gvec_vstrc_cc_rt8,
2631        gen_helper_gvec_vstrc_cc_rt16,
2632        gen_helper_gvec_vstrc_cc_rt32,
2633    };
2634
2635    if (es > ES_32) {
2636        gen_program_exception(s, PGM_SPECIFICATION);
2637        return DISAS_NORETURN;
2638    }
2639
2640    if (extract32(m6, 0, 1)) {
2641        if (extract32(m6, 2, 1)) {
2642            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2643                           get_field(s, v3), get_field(s, v4),
2644                           cpu_env, m6, g_cc_rt[es]);
2645        } else {
2646            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2647                           get_field(s, v3), get_field(s, v4),
2648                           cpu_env, m6, g_cc[es]);
2649        }
2650        set_cc_static(s);
2651    } else {
2652        if (extract32(m6, 2, 1)) {
2653            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2654                           get_field(s, v3), get_field(s, v4),
2655                           m6, g_rt[es]);
2656        } else {
2657            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2658                           get_field(s, v3), get_field(s, v4),
2659                           m6, g[es]);
2660        }
2661    }
2662    return DISAS_NEXT;
2663}
2664
2665static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
2666{
2667    typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
2668                                 TCGv_ptr, TCGv_ptr, TCGv_i32);
2669    static const helper_vstrs fns[3][2] = {
2670        { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
2671        { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
2672        { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
2673    };
2674    const uint8_t es = get_field(s, m5);
2675    const uint8_t m6 = get_field(s, m6);
2676    const bool zs = extract32(m6, 1, 1);
2677
2678    if (es > ES_32 || m6 & ~2) {
2679        gen_program_exception(s, PGM_SPECIFICATION);
2680        return DISAS_NORETURN;
2681    }
2682
2683    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2684                   get_field(s, v3), get_field(s, v4),
2685                   cpu_env, 0, fns[es][zs]);
2686    set_cc_static(s);
2687    return DISAS_NEXT;
2688}
2689
2690static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
2691{
2692    const uint8_t fpf = get_field(s, m4);
2693    const uint8_t m5 = get_field(s, m5);
2694    gen_helper_gvec_3_ptr *fn = NULL;
2695
2696    switch (s->fields.op2) {
2697    case 0xe3:
2698        switch (fpf) {
2699        case FPF_SHORT:
2700            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2701                fn = gen_helper_gvec_vfa32;
2702            }
2703            break;
2704        case FPF_LONG:
2705            fn = gen_helper_gvec_vfa64;
2706            break;
2707        case FPF_EXT:
2708            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2709                fn = gen_helper_gvec_vfa128;
2710            }
2711            break;
2712        default:
2713            break;
2714        }
2715        break;
2716    case 0xe5:
2717        switch (fpf) {
2718        case FPF_SHORT:
2719            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2720                fn = gen_helper_gvec_vfd32;
2721            }
2722            break;
2723        case FPF_LONG:
2724            fn = gen_helper_gvec_vfd64;
2725            break;
2726        case FPF_EXT:
2727            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2728                fn = gen_helper_gvec_vfd128;
2729            }
2730            break;
2731        default:
2732            break;
2733        }
2734        break;
2735    case 0xe7:
2736        switch (fpf) {
2737        case FPF_SHORT:
2738            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2739                fn = gen_helper_gvec_vfm32;
2740            }
2741            break;
2742        case FPF_LONG:
2743            fn = gen_helper_gvec_vfm64;
2744            break;
2745        case FPF_EXT:
2746            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2747                fn = gen_helper_gvec_vfm128;
2748            }
2749            break;
2750        default:
2751            break;
2752        }
2753        break;
2754    case 0xe2:
2755        switch (fpf) {
2756        case FPF_SHORT:
2757            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2758                fn = gen_helper_gvec_vfs32;
2759            }
2760            break;
2761        case FPF_LONG:
2762            fn = gen_helper_gvec_vfs64;
2763            break;
2764        case FPF_EXT:
2765            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2766                fn = gen_helper_gvec_vfs128;
2767            }
2768            break;
2769        default:
2770            break;
2771        }
2772        break;
2773    default:
2774        g_assert_not_reached();
2775    }
2776
2777    if (!fn || extract32(m5, 0, 3)) {
2778        gen_program_exception(s, PGM_SPECIFICATION);
2779        return DISAS_NORETURN;
2780    }
2781
2782    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2783                   get_field(s, v3), cpu_env, m5, fn);
2784    return DISAS_NEXT;
2785}
2786
2787static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
2788{
2789    const uint8_t fpf = get_field(s, m3);
2790    const uint8_t m4 = get_field(s, m4);
2791    gen_helper_gvec_2_ptr *fn = NULL;
2792
2793    switch (fpf) {
2794    case FPF_SHORT:
2795        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2796            fn = gen_helper_gvec_wfk32;
2797            if (s->fields.op2 == 0xcb) {
2798                fn = gen_helper_gvec_wfc32;
2799            }
2800        }
2801        break;
2802    case FPF_LONG:
2803        fn = gen_helper_gvec_wfk64;
2804        if (s->fields.op2 == 0xcb) {
2805            fn = gen_helper_gvec_wfc64;
2806        }
2807        break;
2808    case FPF_EXT:
2809        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2810            fn = gen_helper_gvec_wfk128;
2811            if (s->fields.op2 == 0xcb) {
2812                fn = gen_helper_gvec_wfc128;
2813            }
2814        }
2815        break;
2816    default:
2817        break;
2818    };
2819
2820    if (!fn || m4) {
2821        gen_program_exception(s, PGM_SPECIFICATION);
2822        return DISAS_NORETURN;
2823    }
2824
2825    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
2826    set_cc_static(s);
2827    return DISAS_NEXT;
2828}
2829
2830static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
2831{
2832    const uint8_t fpf = get_field(s, m4);
2833    const uint8_t m5 = get_field(s, m5);
2834    const uint8_t m6 = get_field(s, m6);
2835    const bool cs = extract32(m6, 0, 1);
2836    const bool sq = extract32(m5, 2, 1);
2837    gen_helper_gvec_3_ptr *fn = NULL;
2838
2839    switch (s->fields.op2) {
2840    case 0xe8:
2841        switch (fpf) {
2842        case FPF_SHORT:
2843            fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
2844            break;
2845        case FPF_LONG:
2846            fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
2847            break;
2848        case FPF_EXT:
2849            fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
2850            break;
2851        default:
2852            break;
2853        }
2854        break;
2855    case 0xeb:
2856        switch (fpf) {
2857        case FPF_SHORT:
2858            fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
2859            break;
2860        case FPF_LONG:
2861            fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
2862            break;
2863        case FPF_EXT:
2864            fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
2865            break;
2866        default:
2867            break;
2868        }
2869        break;
2870    case 0xea:
2871        switch (fpf) {
2872        case FPF_SHORT:
2873            fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
2874            break;
2875        case FPF_LONG:
2876            fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
2877            break;
2878        case FPF_EXT:
2879            fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
2880            break;
2881        default:
2882            break;
2883        }
2884        break;
2885    default:
2886        g_assert_not_reached();
2887    }
2888
2889    if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
2890        (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
2891        gen_program_exception(s, PGM_SPECIFICATION);
2892        return DISAS_NORETURN;
2893    }
2894
2895    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
2896                   cpu_env, m5, fn);
2897    if (cs) {
2898        set_cc_static(s);
2899    }
2900    return DISAS_NEXT;
2901}
2902
2903static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
2904{
2905    const uint8_t fpf = get_field(s, m3);
2906    const uint8_t m4 = get_field(s, m4);
2907    const uint8_t erm = get_field(s, m5);
2908    gen_helper_gvec_2_ptr *fn = NULL;
2909
2910
2911    switch (s->fields.op2) {
2912    case 0xc3:
2913        switch (fpf) {
2914        case FPF_LONG:
2915            fn = gen_helper_gvec_vcdg64;
2916            break;
2917        case FPF_SHORT:
2918            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2919                fn = gen_helper_gvec_vcdg32;
2920            }
2921            break;
2922        default:
2923            break;
2924        }
2925        break;
2926    case 0xc1:
2927        switch (fpf) {
2928        case FPF_LONG:
2929            fn = gen_helper_gvec_vcdlg64;
2930            break;
2931        case FPF_SHORT:
2932            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2933                fn = gen_helper_gvec_vcdlg32;
2934            }
2935            break;
2936        default:
2937            break;
2938        }
2939        break;
2940    case 0xc2:
2941        switch (fpf) {
2942        case FPF_LONG:
2943            fn = gen_helper_gvec_vcgd64;
2944            break;
2945        case FPF_SHORT:
2946            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2947                fn = gen_helper_gvec_vcgd32;
2948            }
2949            break;
2950        default:
2951            break;
2952        }
2953        break;
2954    case 0xc0:
2955        switch (fpf) {
2956        case FPF_LONG:
2957            fn = gen_helper_gvec_vclgd64;
2958            break;
2959        case FPF_SHORT:
2960            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2961                fn = gen_helper_gvec_vclgd32;
2962            }
2963            break;
2964        default:
2965            break;
2966        }
2967        break;
2968    case 0xc7:
2969        switch (fpf) {
2970        case FPF_SHORT:
2971            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2972                fn = gen_helper_gvec_vfi32;
2973            }
2974            break;
2975        case FPF_LONG:
2976            fn = gen_helper_gvec_vfi64;
2977            break;
2978        case FPF_EXT:
2979            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2980                fn = gen_helper_gvec_vfi128;
2981            }
2982            break;
2983        default:
2984            break;
2985        }
2986        break;
2987    case 0xc5:
2988        switch (fpf) {
2989        case FPF_LONG:
2990            fn = gen_helper_gvec_vflr64;
2991            break;
2992        case FPF_EXT:
2993            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2994                fn = gen_helper_gvec_vflr128;
2995            }
2996            break;
2997        default:
2998            break;
2999        }
3000        break;
3001    default:
3002        g_assert_not_reached();
3003    }
3004
3005    if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
3006        gen_program_exception(s, PGM_SPECIFICATION);
3007        return DISAS_NORETURN;
3008    }
3009
3010    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3011                   deposit32(m4, 4, 4, erm), fn);
3012    return DISAS_NEXT;
3013}
3014
3015static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
3016{
3017    const uint8_t fpf = get_field(s, m3);
3018    const uint8_t m4 = get_field(s, m4);
3019    gen_helper_gvec_2_ptr *fn = NULL;
3020
3021    switch (fpf) {
3022    case FPF_SHORT:
3023        fn = gen_helper_gvec_vfll32;
3024        break;
3025    case FPF_LONG:
3026        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3027            fn = gen_helper_gvec_vfll64;
3028        }
3029        break;
3030    default:
3031        break;
3032    }
3033
3034    if (!fn || extract32(m4, 0, 3)) {
3035        gen_program_exception(s, PGM_SPECIFICATION);
3036        return DISAS_NORETURN;
3037    }
3038
3039    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3040    return DISAS_NEXT;
3041}
3042
3043static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
3044{
3045    const uint8_t fpf = get_field(s, m4);
3046    const uint8_t m6 = get_field(s, m6);
3047    const uint8_t m5 = get_field(s, m5);
3048    gen_helper_gvec_3_ptr *fn;
3049
3050    if (m6 == 5 || m6 == 6 || m6 == 7 || m6 >= 13 || (m5 & 7)) {
3051        gen_program_exception(s, PGM_SPECIFICATION);
3052        return DISAS_NORETURN;
3053    }
3054
3055    switch (fpf) {
3056    case FPF_SHORT:
3057        if (s->fields.op2 == 0xef) {
3058            fn = gen_helper_gvec_vfmax32;
3059        } else {
3060            fn = gen_helper_gvec_vfmin32;
3061        }
3062        break;
3063    case FPF_LONG:
3064        if (s->fields.op2 == 0xef) {
3065            fn = gen_helper_gvec_vfmax64;
3066        } else {
3067            fn = gen_helper_gvec_vfmin64;
3068        }
3069        break;
3070    case FPF_EXT:
3071        if (s->fields.op2 == 0xef) {
3072            fn = gen_helper_gvec_vfmax128;
3073        } else {
3074            fn = gen_helper_gvec_vfmin128;
3075        }
3076        break;
3077    default:
3078        gen_program_exception(s, PGM_SPECIFICATION);
3079        return DISAS_NORETURN;
3080    }
3081
3082    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
3083                   cpu_env, deposit32(m5, 4, 4, m6), fn);
3084    return DISAS_NEXT;
3085}
3086
3087static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
3088{
3089    const uint8_t m5 = get_field(s, m5);
3090    const uint8_t fpf = get_field(s, m6);
3091    gen_helper_gvec_4_ptr *fn = NULL;
3092
3093    switch (s->fields.op2) {
3094    case 0x8f:
3095        switch (fpf) {
3096        case FPF_SHORT:
3097            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3098                fn = gen_helper_gvec_vfma32;
3099            }
3100            break;
3101        case FPF_LONG:
3102            fn = gen_helper_gvec_vfma64;
3103            break;
3104        case FPF_EXT:
3105            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3106                fn = gen_helper_gvec_vfma128;
3107            }
3108            break;
3109        default:
3110            break;
3111        }
3112        break;
3113    case 0x8e:
3114        switch (fpf) {
3115        case FPF_SHORT:
3116            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3117                fn = gen_helper_gvec_vfms32;
3118            }
3119            break;
3120        case FPF_LONG:
3121            fn = gen_helper_gvec_vfms64;
3122            break;
3123        case FPF_EXT:
3124            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3125                fn = gen_helper_gvec_vfms128;
3126            }
3127            break;
3128        default:
3129            break;
3130        }
3131        break;
3132    case 0x9f:
3133        switch (fpf) {
3134        case FPF_SHORT:
3135            fn = gen_helper_gvec_vfnma32;
3136            break;
3137        case FPF_LONG:
3138            fn = gen_helper_gvec_vfnma64;
3139            break;
3140        case FPF_EXT:
3141            fn = gen_helper_gvec_vfnma128;
3142            break;
3143        default:
3144            break;
3145        }
3146        break;
3147    case 0x9e:
3148        switch (fpf) {
3149        case FPF_SHORT:
3150            fn = gen_helper_gvec_vfnms32;
3151            break;
3152        case FPF_LONG:
3153            fn = gen_helper_gvec_vfnms64;
3154            break;
3155        case FPF_EXT:
3156            fn = gen_helper_gvec_vfnms128;
3157            break;
3158        default:
3159            break;
3160        }
3161        break;
3162    default:
3163        g_assert_not_reached();
3164    }
3165
3166    if (!fn || extract32(m5, 0, 3)) {
3167        gen_program_exception(s, PGM_SPECIFICATION);
3168        return DISAS_NORETURN;
3169    }
3170
3171    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
3172                   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
3173    return DISAS_NEXT;
3174}
3175
3176static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
3177{
3178    const uint8_t v1 = get_field(s, v1);
3179    const uint8_t v2 = get_field(s, v2);
3180    const uint8_t fpf = get_field(s, m3);
3181    const uint8_t m4 = get_field(s, m4);
3182    const uint8_t m5 = get_field(s, m5);
3183    const bool se = extract32(m4, 3, 1);
3184    TCGv_i64 tmp;
3185
3186    if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
3187        extract32(m4, 0, 3) || m5 > 2) {
3188        gen_program_exception(s, PGM_SPECIFICATION);
3189        return DISAS_NORETURN;
3190    }
3191
3192    switch (fpf) {
3193    case FPF_SHORT:
3194        if (!se) {
3195            switch (m5) {
3196            case 0:
3197                /* sign bit is inverted (complement) */
3198                gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
3199                break;
3200            case 1:
3201                /* sign bit is set to one (negative) */
3202                gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
3203                break;
3204            case 2:
3205                /* sign bit is set to zero (positive) */
3206                gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
3207                break;
3208            }
3209            return DISAS_NEXT;
3210        }
3211        break;
3212    case FPF_LONG:
3213        if (!se) {
3214            switch (m5) {
3215            case 0:
3216                /* sign bit is inverted (complement) */
3217                gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
3218                break;
3219            case 1:
3220                /* sign bit is set to one (negative) */
3221                gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
3222                break;
3223            case 2:
3224                /* sign bit is set to zero (positive) */
3225                gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
3226                break;
3227            }
3228            return DISAS_NEXT;
3229        }
3230        break;
3231    case FPF_EXT:
3232        /* Only a single element. */
3233        break;
3234    default:
3235        gen_program_exception(s, PGM_SPECIFICATION);
3236        return DISAS_NORETURN;
3237    }
3238
3239    /* With a single element, we are only interested in bit 0. */
3240    tmp = tcg_temp_new_i64();
3241    read_vec_element_i64(tmp, v2, 0, ES_64);
3242    switch (m5) {
3243    case 0:
3244        /* sign bit is inverted (complement) */
3245        tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
3246        break;
3247    case 1:
3248        /* sign bit is set to one (negative) */
3249        tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
3250        break;
3251    case 2:
3252        /* sign bit is set to zero (positive) */
3253        tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
3254        break;
3255    }
3256    write_vec_element_i64(tmp, v1, 0, ES_64);
3257
3258    if (fpf == FPF_EXT) {
3259        read_vec_element_i64(tmp, v2, 1, ES_64);
3260        write_vec_element_i64(tmp, v1, 1, ES_64);
3261    }
3262    return DISAS_NEXT;
3263}
3264
3265static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
3266{
3267    const uint8_t fpf = get_field(s, m3);
3268    const uint8_t m4 = get_field(s, m4);
3269    gen_helper_gvec_2_ptr *fn = NULL;
3270
3271    switch (fpf) {
3272    case FPF_SHORT:
3273        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3274            fn = gen_helper_gvec_vfsq32;
3275        }
3276        break;
3277    case FPF_LONG:
3278        fn = gen_helper_gvec_vfsq64;
3279        break;
3280    case FPF_EXT:
3281        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3282            fn = gen_helper_gvec_vfsq128;
3283        }
3284        break;
3285    default:
3286        break;
3287    }
3288
3289    if (!fn || extract32(m4, 0, 3)) {
3290        gen_program_exception(s, PGM_SPECIFICATION);
3291        return DISAS_NORETURN;
3292    }
3293
3294    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3295    return DISAS_NEXT;
3296}
3297
3298static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
3299{
3300    const uint16_t i3 = get_field(s, i3);
3301    const uint8_t fpf = get_field(s, m4);
3302    const uint8_t m5 = get_field(s, m5);
3303    gen_helper_gvec_2_ptr *fn = NULL;
3304
3305    switch (fpf) {
3306    case FPF_SHORT:
3307        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3308            fn = gen_helper_gvec_vftci32;
3309        }
3310        break;
3311    case FPF_LONG:
3312        fn = gen_helper_gvec_vftci64;
3313        break;
3314    case FPF_EXT:
3315        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3316            fn = gen_helper_gvec_vftci128;
3317        }
3318        break;
3319    default:
3320        break;
3321    }
3322
3323    if (!fn || extract32(m5, 0, 3)) {
3324        gen_program_exception(s, PGM_SPECIFICATION);
3325        return DISAS_NORETURN;
3326    }
3327
3328    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3329                   deposit32(m5, 4, 12, i3), fn);
3330    set_cc_static(s);
3331    return DISAS_NEXT;
3332}
3333