1/*
2 * QEMU TCG support -- s390x vector instruction translation functions
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 *   David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13/*
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
20 *
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
25 *
26 * 128 bit elements:
27 *  As we only have i32/i64, such elements have to be loaded into two
28 *  i64 values and can then be processed e.g. by tcg_gen_add2_i64.
29 *
30 * Sizes:
31 *  On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 *  always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 *  a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 *  128 bit element size has to be treated in a special way (MO_64 + 1).
35 *  We will use ES_* instead of MO_* for this reason in this file.
36 *
37 * CC handling:
38 *  As gvec ool-helpers can currently not return values (besides via
39 *  pointers like vectors or cpu_env), whenever we have to set the CC and
40 *  can't conclude the value from the result vector, we will directly
41 *  set it in "env->cc_op" and mark it as static via set_cc_static()".
42 *  Whenever this is done, the helper writes globals (cc_op).
43 */
44
45#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
48
49#define ES_8    MO_8
50#define ES_16   MO_16
51#define ES_32   MO_32
52#define ES_64   MO_64
53#define ES_128  4
54
55/* Floating-Point Format */
56#define FPF_SHORT       2
57#define FPF_LONG        3
58#define FPF_EXT         4
59
60static inline bool valid_vec_element(uint8_t enr, MemOp es)
61{
62    return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
63}
64
65static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
66                                 MemOp memop)
67{
68    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
69
70    switch ((unsigned)memop) {
71    case ES_8:
72        tcg_gen_ld8u_i64(dst, cpu_env, offs);
73        break;
74    case ES_16:
75        tcg_gen_ld16u_i64(dst, cpu_env, offs);
76        break;
77    case ES_32:
78        tcg_gen_ld32u_i64(dst, cpu_env, offs);
79        break;
80    case ES_8 | MO_SIGN:
81        tcg_gen_ld8s_i64(dst, cpu_env, offs);
82        break;
83    case ES_16 | MO_SIGN:
84        tcg_gen_ld16s_i64(dst, cpu_env, offs);
85        break;
86    case ES_32 | MO_SIGN:
87        tcg_gen_ld32s_i64(dst, cpu_env, offs);
88        break;
89    case ES_64:
90    case ES_64 | MO_SIGN:
91        tcg_gen_ld_i64(dst, cpu_env, offs);
92        break;
93    default:
94        g_assert_not_reached();
95    }
96}
97
98static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
99                                 MemOp memop)
100{
101    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
102
103    switch (memop) {
104    case ES_8:
105        tcg_gen_ld8u_i32(dst, cpu_env, offs);
106        break;
107    case ES_16:
108        tcg_gen_ld16u_i32(dst, cpu_env, offs);
109        break;
110    case ES_8 | MO_SIGN:
111        tcg_gen_ld8s_i32(dst, cpu_env, offs);
112        break;
113    case ES_16 | MO_SIGN:
114        tcg_gen_ld16s_i32(dst, cpu_env, offs);
115        break;
116    case ES_32:
117    case ES_32 | MO_SIGN:
118        tcg_gen_ld_i32(dst, cpu_env, offs);
119        break;
120    default:
121        g_assert_not_reached();
122    }
123}
124
125static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
126                                  MemOp memop)
127{
128    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
129
130    switch (memop) {
131    case ES_8:
132        tcg_gen_st8_i64(src, cpu_env, offs);
133        break;
134    case ES_16:
135        tcg_gen_st16_i64(src, cpu_env, offs);
136        break;
137    case ES_32:
138        tcg_gen_st32_i64(src, cpu_env, offs);
139        break;
140    case ES_64:
141        tcg_gen_st_i64(src, cpu_env, offs);
142        break;
143    default:
144        g_assert_not_reached();
145    }
146}
147
148static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
149                                  MemOp memop)
150{
151    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
152
153    switch (memop) {
154    case ES_8:
155        tcg_gen_st8_i32(src, cpu_env, offs);
156        break;
157    case ES_16:
158        tcg_gen_st16_i32(src, cpu_env, offs);
159        break;
160    case ES_32:
161        tcg_gen_st_i32(src, cpu_env, offs);
162        break;
163    default:
164        g_assert_not_reached();
165    }
166}
167
168static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
169                                    uint8_t es)
170{
171    TCGv_i64 tmp = tcg_temp_new_i64();
172
173    /* mask off invalid parts from the element nr */
174    tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
175
176    /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
177    tcg_gen_shli_i64(tmp, tmp, es);
178#if !HOST_BIG_ENDIAN
179    tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
180#endif
181    tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
182
183    /* generate the final ptr by adding cpu_env */
184    tcg_gen_trunc_i64_ptr(ptr, tmp);
185    tcg_gen_add_ptr(ptr, ptr, cpu_env);
186
187    tcg_temp_free_i64(tmp);
188}
189
190#define gen_gvec_2(v1, v2, gen) \
191    tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
192                   16, 16, gen)
193#define gen_gvec_2s(v1, v2, c, gen) \
194    tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
195                    16, 16, c, gen)
196#define gen_gvec_2_ool(v1, v2, data, fn) \
197    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
198                       16, 16, data, fn)
199#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
200    tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
201                        c, 16, 16, data, fn)
202#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
203    tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
204                       ptr, 16, 16, data, fn)
205#define gen_gvec_3(v1, v2, v3, gen) \
206    tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
207                   vec_full_reg_offset(v3), 16, 16, gen)
208#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
209    tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
210                       vec_full_reg_offset(v3), 16, 16, data, fn)
211#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
212    tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
213                       vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
214#define gen_gvec_3i(v1, v2, v3, c, gen) \
215    tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
216                    vec_full_reg_offset(v3), 16, 16, c, gen)
217#define gen_gvec_4(v1, v2, v3, v4, gen) \
218    tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
219                   vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
220                   16, 16, gen)
221#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
222    tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
223                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
224                       16, 16, data, fn)
225#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
226    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
227                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
228                       ptr, 16, 16, data, fn)
229#define gen_gvec_dup_i64(es, v1, c) \
230    tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
231#define gen_gvec_mov(v1, v2) \
232    tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
233                     16)
234#define gen_gvec_dup_imm(es, v1, c) \
235    tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
236#define gen_gvec_fn_2(fn, es, v1, v2) \
237    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
238                      16, 16)
239#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
240    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
241                      c, 16, 16)
242#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
243    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
244                      s, 16, 16)
245#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
246    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
247                      vec_full_reg_offset(v3), 16, 16)
248#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
249    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
250                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
251
252/*
253 * Helper to carry out a 128 bit vector computation using 2 i64 values per
254 * vector.
255 */
256typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
257                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
258static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
259                              uint8_t b)
260{
261        TCGv_i64 dh = tcg_temp_new_i64();
262        TCGv_i64 dl = tcg_temp_new_i64();
263        TCGv_i64 ah = tcg_temp_new_i64();
264        TCGv_i64 al = tcg_temp_new_i64();
265        TCGv_i64 bh = tcg_temp_new_i64();
266        TCGv_i64 bl = tcg_temp_new_i64();
267
268        read_vec_element_i64(ah, a, 0, ES_64);
269        read_vec_element_i64(al, a, 1, ES_64);
270        read_vec_element_i64(bh, b, 0, ES_64);
271        read_vec_element_i64(bl, b, 1, ES_64);
272        fn(dl, dh, al, ah, bl, bh);
273        write_vec_element_i64(dh, d, 0, ES_64);
274        write_vec_element_i64(dl, d, 1, ES_64);
275
276        tcg_temp_free_i64(dh);
277        tcg_temp_free_i64(dl);
278        tcg_temp_free_i64(ah);
279        tcg_temp_free_i64(al);
280        tcg_temp_free_i64(bh);
281        tcg_temp_free_i64(bl);
282}
283
284typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
285                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
286                                     TCGv_i64 cl, TCGv_i64 ch);
287static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
288                              uint8_t b, uint8_t c)
289{
290        TCGv_i64 dh = tcg_temp_new_i64();
291        TCGv_i64 dl = tcg_temp_new_i64();
292        TCGv_i64 ah = tcg_temp_new_i64();
293        TCGv_i64 al = tcg_temp_new_i64();
294        TCGv_i64 bh = tcg_temp_new_i64();
295        TCGv_i64 bl = tcg_temp_new_i64();
296        TCGv_i64 ch = tcg_temp_new_i64();
297        TCGv_i64 cl = tcg_temp_new_i64();
298
299        read_vec_element_i64(ah, a, 0, ES_64);
300        read_vec_element_i64(al, a, 1, ES_64);
301        read_vec_element_i64(bh, b, 0, ES_64);
302        read_vec_element_i64(bl, b, 1, ES_64);
303        read_vec_element_i64(ch, c, 0, ES_64);
304        read_vec_element_i64(cl, c, 1, ES_64);
305        fn(dl, dh, al, ah, bl, bh, cl, ch);
306        write_vec_element_i64(dh, d, 0, ES_64);
307        write_vec_element_i64(dl, d, 1, ES_64);
308
309        tcg_temp_free_i64(dh);
310        tcg_temp_free_i64(dl);
311        tcg_temp_free_i64(ah);
312        tcg_temp_free_i64(al);
313        tcg_temp_free_i64(bh);
314        tcg_temp_free_i64(bl);
315        tcg_temp_free_i64(ch);
316        tcg_temp_free_i64(cl);
317}
318
319static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
320                          uint64_t b)
321{
322    TCGv_i64 bl = tcg_constant_i64(b);
323    TCGv_i64 bh = tcg_constant_i64(0);
324
325    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
326}
327
328static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
329{
330    gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
331                   gen_helper_gvec_vbperm);
332
333    return DISAS_NEXT;
334}
335
336static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
337{
338    const uint8_t es = s->insn->data;
339    const uint8_t enr = get_field(s, m3);
340    TCGv_i64 tmp;
341
342    if (!valid_vec_element(enr, es)) {
343        gen_program_exception(s, PGM_SPECIFICATION);
344        return DISAS_NORETURN;
345    }
346
347    tmp = tcg_temp_new_i64();
348    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
349    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
350    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
351
352    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
353    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
354    tcg_temp_free_i64(tmp);
355    return DISAS_NEXT;
356}
357
358static uint64_t generate_byte_mask(uint8_t mask)
359{
360    uint64_t r = 0;
361    int i;
362
363    for (i = 0; i < 8; i++) {
364        if ((mask >> i) & 1) {
365            r |= 0xffull << (i * 8);
366        }
367    }
368    return r;
369}
370
371static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
372{
373    const uint16_t i2 = get_field(s, i2);
374
375    if (i2 == (i2 & 0xff) * 0x0101) {
376        /*
377         * Masks for both 64 bit elements of the vector are the same.
378         * Trust tcg to produce a good constant loading.
379         */
380        gen_gvec_dup_imm(ES_64, get_field(s, v1),
381                         generate_byte_mask(i2 & 0xff));
382    } else {
383        TCGv_i64 t = tcg_temp_new_i64();
384
385        tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
386        write_vec_element_i64(t, get_field(s, v1), 0, ES_64);
387        tcg_gen_movi_i64(t, generate_byte_mask(i2));
388        write_vec_element_i64(t, get_field(s, v1), 1, ES_64);
389        tcg_temp_free_i64(t);
390    }
391    return DISAS_NEXT;
392}
393
394static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
395{
396    const uint8_t es = get_field(s, m4);
397    const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
398    const uint8_t i2 = get_field(s, i2) & (bits - 1);
399    const uint8_t i3 = get_field(s, i3) & (bits - 1);
400    uint64_t mask = 0;
401    int i;
402
403    if (es > ES_64) {
404        gen_program_exception(s, PGM_SPECIFICATION);
405        return DISAS_NORETURN;
406    }
407
408    /* generate the mask - take care of wrapping */
409    for (i = i2; ; i = (i + 1) % bits) {
410        mask |= 1ull << (bits - i - 1);
411        if (i == i3) {
412            break;
413        }
414    }
415
416    gen_gvec_dup_imm(es, get_field(s, v1), mask);
417    return DISAS_NEXT;
418}
419
420static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
421{
422    TCGv_i64 t0 = tcg_temp_new_i64();
423    TCGv_i64 t1 = tcg_temp_new_i64();
424
425    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
426    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
427    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
428    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
429    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
430    tcg_temp_free(t0);
431    tcg_temp_free(t1);
432    return DISAS_NEXT;
433}
434
435static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
436{
437    gen_gvec_mov(get_field(s, v1), get_field(s, v2));
438    return DISAS_NEXT;
439}
440
441static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
442{
443    const uint8_t es = get_field(s, m3);
444    TCGv_i64 tmp;
445
446    if (es > ES_64) {
447        gen_program_exception(s, PGM_SPECIFICATION);
448        return DISAS_NORETURN;
449    }
450
451    tmp = tcg_temp_new_i64();
452    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
453    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
454    tcg_temp_free_i64(tmp);
455    return DISAS_NEXT;
456}
457
458static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
459{
460    const uint8_t es = s->insn->data;
461    const uint8_t enr = get_field(s, m3);
462    TCGv_i64 tmp;
463
464    if (!valid_vec_element(enr, es)) {
465        gen_program_exception(s, PGM_SPECIFICATION);
466        return DISAS_NORETURN;
467    }
468
469    tmp = tcg_temp_new_i64();
470    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
471    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
472    tcg_temp_free_i64(tmp);
473    return DISAS_NEXT;
474}
475
476static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
477{
478    const uint8_t es = get_field(s, m3);
479    TCGv_i64 tmp;
480
481    if (es < ES_16 || es > ES_64) {
482        gen_program_exception(s, PGM_SPECIFICATION);
483        return DISAS_NORETURN;
484    }
485
486    tmp = tcg_temp_new_i64();
487    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
488    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
489    tcg_temp_free_i64(tmp);
490    return DISAS_NEXT;
491}
492
493static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
494{
495    const uint8_t m3 = get_field(s, m3);
496    TCGv_i64 tmp;
497    int es, lshift;
498
499    switch (m3) {
500    case ES_16:
501    case ES_32:
502    case ES_64:
503        es = m3;
504        lshift = 0;
505        break;
506    case 6:
507        es = ES_32;
508        lshift = 32;
509        break;
510    default:
511        gen_program_exception(s, PGM_SPECIFICATION);
512        return DISAS_NORETURN;
513    }
514
515    tmp = tcg_temp_new_i64();
516    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
517    tcg_gen_shli_i64(tmp, tmp, lshift);
518
519    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
520    write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64);
521    tcg_temp_free_i64(tmp);
522    return DISAS_NEXT;
523}
524
525static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
526{
527    const uint8_t es = get_field(s, m3);
528    TCGv_i64 t0, t1;
529
530    if (es < ES_16 || es > ES_128) {
531        gen_program_exception(s, PGM_SPECIFICATION);
532        return DISAS_NORETURN;
533    }
534
535    t0 = tcg_temp_new_i64();
536    t1 = tcg_temp_new_i64();
537
538
539    if (es == ES_128) {
540        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
541        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
542        tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
543        goto write;
544    }
545
546    /* Begin with byte reversed doublewords... */
547    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
548    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
549    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
550
551    /*
552     * For 16 and 32-bit elements, the doubleword bswap also reversed
553     * the order of the elements.  Perform a larger order swap to put
554     * them back into place.  For the 128-bit "element", finish the
555     * bswap by swapping the doublewords.
556     */
557    switch (es) {
558    case ES_16:
559        tcg_gen_hswap_i64(t0, t0);
560        tcg_gen_hswap_i64(t1, t1);
561        break;
562    case ES_32:
563        tcg_gen_wswap_i64(t0, t0);
564        tcg_gen_wswap_i64(t1, t1);
565        break;
566    case ES_64:
567        break;
568    default:
569        g_assert_not_reached();
570    }
571
572write:
573    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
574    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
575
576    tcg_temp_free(t0);
577    tcg_temp_free(t1);
578    return DISAS_NEXT;
579}
580
581static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
582{
583    const uint8_t es = s->insn->data;
584    const uint8_t enr = get_field(s, m3);
585    TCGv_i64 tmp;
586
587    if (!valid_vec_element(enr, es)) {
588        gen_program_exception(s, PGM_SPECIFICATION);
589        return DISAS_NORETURN;
590    }
591
592    tmp = tcg_temp_new_i64();
593    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
594    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
595    tcg_temp_free_i64(tmp);
596    return DISAS_NEXT;
597}
598
599static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
600{
601    const uint8_t es = s->insn->data;
602    const uint8_t enr = get_field(s, m3);
603    TCGv_i64 tmp;
604
605    if (!valid_vec_element(enr, es)) {
606        gen_program_exception(s, PGM_SPECIFICATION);
607        return DISAS_NORETURN;
608    }
609
610    tmp = tcg_constant_i64((int16_t)get_field(s, i2));
611    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
612    return DISAS_NEXT;
613}
614
615static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
616{
617    const uint8_t es = get_field(s, m3);
618
619    if (es < ES_16 || es > ES_64) {
620        gen_program_exception(s, PGM_SPECIFICATION);
621        return DISAS_NORETURN;
622    }
623
624    TCGv_i64 t0 = tcg_temp_new_i64();
625    TCGv_i64 t1 = tcg_temp_new_i64();
626
627    /* Begin with the two doublewords swapped... */
628    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
629    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
630    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
631
632    /* ... then swap smaller elements within the doublewords as required. */
633    switch (es) {
634    case MO_16:
635        tcg_gen_hswap_i64(t1, t1);
636        tcg_gen_hswap_i64(t0, t0);
637        break;
638    case MO_32:
639        tcg_gen_wswap_i64(t1, t1);
640        tcg_gen_wswap_i64(t0, t0);
641        break;
642    case MO_64:
643        break;
644    default:
645        g_assert_not_reached();
646    }
647
648    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
649    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
650    tcg_temp_free(t0);
651    tcg_temp_free(t1);
652    return DISAS_NEXT;
653}
654
655static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
656{
657    const uint8_t es = get_field(s, m4);
658    TCGv_ptr ptr;
659
660    if (es > ES_64) {
661        gen_program_exception(s, PGM_SPECIFICATION);
662        return DISAS_NORETURN;
663    }
664
665    /* fast path if we don't need the register content */
666    if (!get_field(s, b2)) {
667        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
668
669        read_vec_element_i64(o->out, get_field(s, v3), enr, es);
670        return DISAS_NEXT;
671    }
672
673    ptr = tcg_temp_new_ptr();
674    get_vec_element_ptr_i64(ptr, get_field(s, v3), o->addr1, es);
675    switch (es) {
676    case ES_8:
677        tcg_gen_ld8u_i64(o->out, ptr, 0);
678        break;
679    case ES_16:
680        tcg_gen_ld16u_i64(o->out, ptr, 0);
681        break;
682    case ES_32:
683        tcg_gen_ld32u_i64(o->out, ptr, 0);
684        break;
685    case ES_64:
686        tcg_gen_ld_i64(o->out, ptr, 0);
687        break;
688    default:
689        g_assert_not_reached();
690    }
691    tcg_temp_free_ptr(ptr);
692
693    return DISAS_NEXT;
694}
695
696static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
697{
698    uint8_t es = get_field(s, m3);
699    uint8_t enr;
700    TCGv_i64 t;
701
702    switch (es) {
703    /* rightmost sub-element of leftmost doubleword */
704    case ES_8:
705        enr = 7;
706        break;
707    case ES_16:
708        enr = 3;
709        break;
710    case ES_32:
711        enr = 1;
712        break;
713    case ES_64:
714        enr = 0;
715        break;
716    /* leftmost sub-element of leftmost doubleword */
717    case 6:
718        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
719            es = ES_32;
720            enr = 0;
721            break;
722        }
723        /* fallthrough */
724    default:
725        gen_program_exception(s, PGM_SPECIFICATION);
726        return DISAS_NORETURN;
727    }
728
729    t = tcg_temp_new_i64();
730    tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
731    gen_gvec_dup_imm(es, get_field(s, v1), 0);
732    write_vec_element_i64(t, get_field(s, v1), enr, es);
733    tcg_temp_free_i64(t);
734    return DISAS_NEXT;
735}
736
737static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
738{
739    const uint8_t v3 = get_field(s, v3);
740    uint8_t v1 = get_field(s, v1);
741    TCGv_i64 t0, t1;
742
743    if (v3 < v1 || (v3 - v1 + 1) > 16) {
744        gen_program_exception(s, PGM_SPECIFICATION);
745        return DISAS_NORETURN;
746    }
747
748    /*
749     * Check for possible access exceptions by trying to load the last
750     * element. The first element will be checked first next.
751     */
752    t0 = tcg_temp_new_i64();
753    t1 = tcg_temp_new_i64();
754    gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
755    tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEUQ);
756
757    for (;; v1++) {
758        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
759        write_vec_element_i64(t1, v1, 0, ES_64);
760        if (v1 == v3) {
761            break;
762        }
763        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
764        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
765        write_vec_element_i64(t1, v1, 1, ES_64);
766        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
767    }
768
769    /* Store the last element, loaded first */
770    write_vec_element_i64(t0, v1, 1, ES_64);
771
772    tcg_temp_free_i64(t0);
773    tcg_temp_free_i64(t1);
774    return DISAS_NEXT;
775}
776
777static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
778{
779    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
780    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
781    TCGv_ptr a0;
782    TCGv_i64 bytes;
783
784    if (get_field(s, m3) > 6) {
785        gen_program_exception(s, PGM_SPECIFICATION);
786        return DISAS_NORETURN;
787    }
788
789    bytes = tcg_temp_new_i64();
790    a0 = tcg_temp_new_ptr();
791    /* calculate the number of bytes until the next block boundary */
792    tcg_gen_ori_i64(bytes, o->addr1, -block_size);
793    tcg_gen_neg_i64(bytes, bytes);
794
795    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
796    gen_helper_vll(cpu_env, a0, o->addr1, bytes);
797    tcg_temp_free_i64(bytes);
798    tcg_temp_free_ptr(a0);
799    return DISAS_NEXT;
800}
801
802static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
803{
804    const uint8_t es = get_field(s, m4);
805    TCGv_ptr ptr;
806
807    if (es > ES_64) {
808        gen_program_exception(s, PGM_SPECIFICATION);
809        return DISAS_NORETURN;
810    }
811
812    /* fast path if we don't need the register content */
813    if (!get_field(s, b2)) {
814        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
815
816        write_vec_element_i64(o->in2, get_field(s, v1), enr, es);
817        return DISAS_NEXT;
818    }
819
820    ptr = tcg_temp_new_ptr();
821    get_vec_element_ptr_i64(ptr, get_field(s, v1), o->addr1, es);
822    switch (es) {
823    case ES_8:
824        tcg_gen_st8_i64(o->in2, ptr, 0);
825        break;
826    case ES_16:
827        tcg_gen_st16_i64(o->in2, ptr, 0);
828        break;
829    case ES_32:
830        tcg_gen_st32_i64(o->in2, ptr, 0);
831        break;
832    case ES_64:
833        tcg_gen_st_i64(o->in2, ptr, 0);
834        break;
835    default:
836        g_assert_not_reached();
837    }
838    tcg_temp_free_ptr(ptr);
839
840    return DISAS_NEXT;
841}
842
843static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
844{
845    write_vec_element_i64(o->in1, get_field(s, v1), 0, ES_64);
846    write_vec_element_i64(o->in2, get_field(s, v1), 1, ES_64);
847    return DISAS_NEXT;
848}
849
850static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
851{
852    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
853    TCGv_ptr a0 = tcg_temp_new_ptr();
854
855    /* convert highest index into an actual length */
856    tcg_gen_addi_i64(o->in2, o->in2, 1);
857    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
858    gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
859    tcg_temp_free_ptr(a0);
860    return DISAS_NEXT;
861}
862
863static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
864{
865    const uint8_t v1 = get_field(s, v1);
866    const uint8_t v2 = get_field(s, v2);
867    const uint8_t v3 = get_field(s, v3);
868    const uint8_t es = get_field(s, m4);
869    int dst_idx, src_idx;
870    TCGv_i64 tmp;
871
872    if (es > ES_64) {
873        gen_program_exception(s, PGM_SPECIFICATION);
874        return DISAS_NORETURN;
875    }
876
877    tmp = tcg_temp_new_i64();
878    if (s->fields.op2 == 0x61) {
879        /* iterate backwards to avoid overwriting data we might need later */
880        for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
881            src_idx = dst_idx / 2;
882            if (dst_idx % 2 == 0) {
883                read_vec_element_i64(tmp, v2, src_idx, es);
884            } else {
885                read_vec_element_i64(tmp, v3, src_idx, es);
886            }
887            write_vec_element_i64(tmp, v1, dst_idx, es);
888        }
889    } else {
890        /* iterate forward to avoid overwriting data we might need later */
891        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
892            src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
893            if (dst_idx % 2 == 0) {
894                read_vec_element_i64(tmp, v2, src_idx, es);
895            } else {
896                read_vec_element_i64(tmp, v3, src_idx, es);
897            }
898            write_vec_element_i64(tmp, v1, dst_idx, es);
899        }
900    }
901    tcg_temp_free_i64(tmp);
902    return DISAS_NEXT;
903}
904
905static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
906{
907    const uint8_t v1 = get_field(s, v1);
908    const uint8_t v2 = get_field(s, v2);
909    const uint8_t v3 = get_field(s, v3);
910    const uint8_t es = get_field(s, m4);
911    static gen_helper_gvec_3 * const vpk[3] = {
912        gen_helper_gvec_vpk16,
913        gen_helper_gvec_vpk32,
914        gen_helper_gvec_vpk64,
915    };
916     static gen_helper_gvec_3 * const vpks[3] = {
917        gen_helper_gvec_vpks16,
918        gen_helper_gvec_vpks32,
919        gen_helper_gvec_vpks64,
920    };
921    static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
922        gen_helper_gvec_vpks_cc16,
923        gen_helper_gvec_vpks_cc32,
924        gen_helper_gvec_vpks_cc64,
925    };
926    static gen_helper_gvec_3 * const vpkls[3] = {
927        gen_helper_gvec_vpkls16,
928        gen_helper_gvec_vpkls32,
929        gen_helper_gvec_vpkls64,
930    };
931    static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
932        gen_helper_gvec_vpkls_cc16,
933        gen_helper_gvec_vpkls_cc32,
934        gen_helper_gvec_vpkls_cc64,
935    };
936
937    if (es == ES_8 || es > ES_64) {
938        gen_program_exception(s, PGM_SPECIFICATION);
939        return DISAS_NORETURN;
940    }
941
942    switch (s->fields.op2) {
943    case 0x97:
944        if (get_field(s, m5) & 0x1) {
945            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
946            set_cc_static(s);
947        } else {
948            gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
949        }
950        break;
951    case 0x95:
952        if (get_field(s, m5) & 0x1) {
953            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
954            set_cc_static(s);
955        } else {
956            gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
957        }
958        break;
959    case 0x94:
960        /* If sources and destination don't overlap -> fast path */
961        if (v1 != v2 && v1 != v3) {
962            const uint8_t src_es = get_field(s, m4);
963            const uint8_t dst_es = src_es - 1;
964            TCGv_i64 tmp = tcg_temp_new_i64();
965            int dst_idx, src_idx;
966
967            for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
968                src_idx = dst_idx;
969                if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
970                    read_vec_element_i64(tmp, v2, src_idx, src_es);
971                } else {
972                    src_idx -= NUM_VEC_ELEMENTS(src_es);
973                    read_vec_element_i64(tmp, v3, src_idx, src_es);
974                }
975                write_vec_element_i64(tmp, v1, dst_idx, dst_es);
976            }
977            tcg_temp_free_i64(tmp);
978        } else {
979            gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
980        }
981        break;
982    default:
983        g_assert_not_reached();
984    }
985    return DISAS_NEXT;
986}
987
988static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
989{
990    gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
991                   get_field(s, v3), get_field(s, v4),
992                   0, gen_helper_gvec_vperm);
993    return DISAS_NEXT;
994}
995
996static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
997{
998    const uint8_t i2 = extract32(get_field(s, m4), 2, 1);
999    const uint8_t i3 = extract32(get_field(s, m4), 0, 1);
1000    TCGv_i64 t0 = tcg_temp_new_i64();
1001    TCGv_i64 t1 = tcg_temp_new_i64();
1002
1003    read_vec_element_i64(t0, get_field(s, v2), i2, ES_64);
1004    read_vec_element_i64(t1, get_field(s, v3), i3, ES_64);
1005    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
1006    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
1007    tcg_temp_free_i64(t0);
1008    tcg_temp_free_i64(t1);
1009    return DISAS_NEXT;
1010}
1011
1012static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
1013{
1014    const uint8_t enr = get_field(s, i2);
1015    const uint8_t es = get_field(s, m4);
1016
1017    if (es > ES_64 || !valid_vec_element(enr, es)) {
1018        gen_program_exception(s, PGM_SPECIFICATION);
1019        return DISAS_NORETURN;
1020    }
1021
1022    tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s, v1)),
1023                         vec_reg_offset(get_field(s, v3), enr, es),
1024                         16, 16);
1025    return DISAS_NEXT;
1026}
1027
1028static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
1029{
1030    const int64_t data = (int16_t)get_field(s, i2);
1031    const uint8_t es = get_field(s, m3);
1032
1033    if (es > ES_64) {
1034        gen_program_exception(s, PGM_SPECIFICATION);
1035        return DISAS_NORETURN;
1036    }
1037
1038    gen_gvec_dup_imm(es, get_field(s, v1), data);
1039    return DISAS_NEXT;
1040}
1041
1042static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
1043{
1044    const uint8_t es = s->insn->data;
1045    const uint8_t enr = get_field(s, m3);
1046    TCGv_i64 tmp;
1047
1048    if (!valid_vec_element(enr, es)) {
1049        gen_program_exception(s, PGM_SPECIFICATION);
1050        return DISAS_NORETURN;
1051    }
1052
1053    tmp = tcg_temp_new_i64();
1054    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
1055    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
1056    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
1057
1058    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1059    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1060    tcg_temp_free_i64(tmp);
1061    return DISAS_NEXT;
1062}
1063
1064static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
1065{
1066    gen_gvec_fn_4(bitsel, ES_8, get_field(s, v1),
1067                  get_field(s, v4), get_field(s, v2),
1068                  get_field(s, v3));
1069    return DISAS_NEXT;
1070}
1071
1072static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
1073{
1074    const uint8_t es = get_field(s, m3);
1075    int idx1, idx2;
1076    TCGv_i64 tmp;
1077
1078    switch (es) {
1079    case ES_8:
1080        idx1 = 7;
1081        idx2 = 15;
1082        break;
1083    case ES_16:
1084        idx1 = 3;
1085        idx2 = 7;
1086        break;
1087    case ES_32:
1088        idx1 = 1;
1089        idx2 = 3;
1090        break;
1091    default:
1092        gen_program_exception(s, PGM_SPECIFICATION);
1093        return DISAS_NORETURN;
1094    }
1095
1096    tmp = tcg_temp_new_i64();
1097    read_vec_element_i64(tmp, get_field(s, v2), idx1, es | MO_SIGN);
1098    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
1099    read_vec_element_i64(tmp, get_field(s, v2), idx2, es | MO_SIGN);
1100    write_vec_element_i64(tmp, get_field(s, v1), 1, ES_64);
1101    tcg_temp_free_i64(tmp);
1102    return DISAS_NEXT;
1103}
1104
1105static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
1106{
1107    TCGv_i64 tmp;
1108
1109    /* Probe write access before actually modifying memory */
1110    gen_helper_probe_write_access(cpu_env, o->addr1,
1111                                  tcg_constant_i64(16));
1112
1113    tmp = tcg_temp_new_i64();
1114    read_vec_element_i64(tmp,  get_field(s, v1), 0, ES_64);
1115    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1116    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1117    read_vec_element_i64(tmp,  get_field(s, v1), 1, ES_64);
1118    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1119    tcg_temp_free_i64(tmp);
1120    return DISAS_NEXT;
1121}
1122
1123static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o)
1124{
1125    const uint8_t es = s->insn->data;
1126    const uint8_t enr = get_field(s, m3);
1127    TCGv_i64 tmp;
1128
1129    if (!valid_vec_element(enr, es)) {
1130        gen_program_exception(s, PGM_SPECIFICATION);
1131        return DISAS_NORETURN;
1132    }
1133
1134    tmp = tcg_temp_new_i64();
1135    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1136    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
1137    tcg_temp_free_i64(tmp);
1138    return DISAS_NEXT;
1139}
1140
1141static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
1142{
1143    const uint8_t es = get_field(s, m3);
1144    TCGv_i64 t0, t1;
1145
1146    if (es < ES_16 || es > ES_128) {
1147        gen_program_exception(s, PGM_SPECIFICATION);
1148        return DISAS_NORETURN;
1149    }
1150
1151    /* Probe write access before actually modifying memory */
1152    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1153
1154    t0 = tcg_temp_new_i64();
1155    t1 = tcg_temp_new_i64();
1156
1157
1158    if (es == ES_128) {
1159        read_vec_element_i64(t1, get_field(s, v1), 0, ES_64);
1160        read_vec_element_i64(t0, get_field(s, v1), 1, ES_64);
1161        goto write;
1162    }
1163
1164    read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
1165    read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
1166
1167    /*
1168     * For 16 and 32-bit elements, the doubleword bswap below will
1169     * reverse the order of the elements.  Perform a larger order
1170     * swap to put them back into place.  For the 128-bit "element",
1171     * finish the bswap by swapping the doublewords.
1172     */
1173    switch (es) {
1174    case MO_16:
1175        tcg_gen_hswap_i64(t0, t0);
1176        tcg_gen_hswap_i64(t1, t1);
1177        break;
1178    case MO_32:
1179        tcg_gen_wswap_i64(t0, t0);
1180        tcg_gen_wswap_i64(t1, t1);
1181        break;
1182    case MO_64:
1183        break;
1184    default:
1185        g_assert_not_reached();
1186    }
1187
1188write:
1189    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
1190    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1191    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
1192
1193    tcg_temp_free(t0);
1194    tcg_temp_free(t1);
1195    return DISAS_NEXT;
1196}
1197
1198static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
1199{
1200    const uint8_t es = s->insn->data;
1201    const uint8_t enr = get_field(s, m3);
1202    TCGv_i64 tmp;
1203
1204    if (!valid_vec_element(enr, es)) {
1205        gen_program_exception(s, PGM_SPECIFICATION);
1206        return DISAS_NORETURN;
1207    }
1208
1209    tmp = tcg_temp_new_i64();
1210    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1211    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1212    tcg_temp_free_i64(tmp);
1213    return DISAS_NEXT;
1214}
1215
1216static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
1217{
1218    const uint8_t es = get_field(s, m3);
1219    TCGv_i64 t0, t1;
1220
1221    if (es < ES_16 || es > ES_64) {
1222        gen_program_exception(s, PGM_SPECIFICATION);
1223        return DISAS_NORETURN;
1224    }
1225
1226    /* Probe write access before actually modifying memory */
1227    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1228
1229    /* Begin with the two doublewords swapped... */
1230    t0 = tcg_temp_new_i64();
1231    t1 = tcg_temp_new_i64();
1232    read_vec_element_i64(t1,  get_field(s, v1), 0, ES_64);
1233    read_vec_element_i64(t0,  get_field(s, v1), 1, ES_64);
1234
1235    /* ... then swap smaller elements within the doublewords as required. */
1236    switch (es) {
1237    case MO_16:
1238        tcg_gen_hswap_i64(t1, t1);
1239        tcg_gen_hswap_i64(t0, t0);
1240        break;
1241    case MO_32:
1242        tcg_gen_wswap_i64(t1, t1);
1243        tcg_gen_wswap_i64(t0, t0);
1244        break;
1245    case MO_64:
1246        break;
1247    default:
1248        g_assert_not_reached();
1249    }
1250
1251    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
1252    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1253    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
1254
1255    tcg_temp_free(t0);
1256    tcg_temp_free(t1);
1257    return DISAS_NEXT;
1258}
1259
1260static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
1261{
1262    const uint8_t v3 = get_field(s, v3);
1263    uint8_t v1 = get_field(s, v1);
1264    TCGv_i64 tmp;
1265
1266    while (v3 < v1 || (v3 - v1 + 1) > 16) {
1267        gen_program_exception(s, PGM_SPECIFICATION);
1268        return DISAS_NORETURN;
1269    }
1270
1271    /* Probe write access before actually modifying memory */
1272    gen_helper_probe_write_access(cpu_env, o->addr1,
1273                                  tcg_constant_i64((v3 - v1 + 1) * 16));
1274
1275    tmp = tcg_temp_new_i64();
1276    for (;; v1++) {
1277        read_vec_element_i64(tmp, v1, 0, ES_64);
1278        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1279        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1280        read_vec_element_i64(tmp, v1, 1, ES_64);
1281        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1282        if (v1 == v3) {
1283            break;
1284        }
1285        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1286    }
1287    tcg_temp_free_i64(tmp);
1288    return DISAS_NEXT;
1289}
1290
1291static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
1292{
1293    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
1294    TCGv_ptr a0 = tcg_temp_new_ptr();
1295
1296    /* convert highest index into an actual length */
1297    tcg_gen_addi_i64(o->in2, o->in2, 1);
1298    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
1299    gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
1300    tcg_temp_free_ptr(a0);
1301    return DISAS_NEXT;
1302}
1303
1304static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
1305{
1306    const bool logical = s->fields.op2 == 0xd4 || s->fields.op2 == 0xd5;
1307    const uint8_t v1 = get_field(s, v1);
1308    const uint8_t v2 = get_field(s, v2);
1309    const uint8_t src_es = get_field(s, m3);
1310    const uint8_t dst_es = src_es + 1;
1311    int dst_idx, src_idx;
1312    TCGv_i64 tmp;
1313
1314    if (src_es > ES_32) {
1315        gen_program_exception(s, PGM_SPECIFICATION);
1316        return DISAS_NORETURN;
1317    }
1318
1319    tmp = tcg_temp_new_i64();
1320    if (s->fields.op2 == 0xd7 || s->fields.op2 == 0xd5) {
1321        /* iterate backwards to avoid overwriting data we might need later */
1322        for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
1323            src_idx = dst_idx;
1324            read_vec_element_i64(tmp, v2, src_idx,
1325                                 src_es | (logical ? 0 : MO_SIGN));
1326            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1327        }
1328
1329    } else {
1330        /* iterate forward to avoid overwriting data we might need later */
1331        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
1332            src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
1333            read_vec_element_i64(tmp, v2, src_idx,
1334                                 src_es | (logical ? 0 : MO_SIGN));
1335            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1336        }
1337    }
1338    tcg_temp_free_i64(tmp);
1339    return DISAS_NEXT;
1340}
1341
1342static DisasJumpType op_va(DisasContext *s, DisasOps *o)
1343{
1344    const uint8_t es = get_field(s, m4);
1345
1346    if (es > ES_128) {
1347        gen_program_exception(s, PGM_SPECIFICATION);
1348        return DISAS_NORETURN;
1349    } else if (es == ES_128) {
1350        gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s, v1),
1351                          get_field(s, v2), get_field(s, v3));
1352        return DISAS_NEXT;
1353    }
1354    gen_gvec_fn_3(add, es, get_field(s, v1), get_field(s, v2),
1355                  get_field(s, v3));
1356    return DISAS_NEXT;
1357}
1358
1359static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
1360{
1361    const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
1362    TCGv_i64 msb_mask = tcg_constant_i64(dup_const(es, 1ull << msb_bit_nr));
1363    TCGv_i64 t1 = tcg_temp_new_i64();
1364    TCGv_i64 t2 = tcg_temp_new_i64();
1365    TCGv_i64 t3 = tcg_temp_new_i64();
1366
1367    /* Calculate the carry into the MSB, ignoring the old MSBs */
1368    tcg_gen_andc_i64(t1, a, msb_mask);
1369    tcg_gen_andc_i64(t2, b, msb_mask);
1370    tcg_gen_add_i64(t1, t1, t2);
1371    /* Calculate the MSB without any carry into it */
1372    tcg_gen_xor_i64(t3, a, b);
1373    /* Calculate the carry out of the MSB in the MSB bit position */
1374    tcg_gen_and_i64(d, a, b);
1375    tcg_gen_and_i64(t1, t1, t3);
1376    tcg_gen_or_i64(d, d, t1);
1377    /* Isolate and shift the carry into position */
1378    tcg_gen_and_i64(d, d, msb_mask);
1379    tcg_gen_shri_i64(d, d, msb_bit_nr);
1380
1381    tcg_temp_free_i64(t1);
1382    tcg_temp_free_i64(t2);
1383    tcg_temp_free_i64(t3);
1384}
1385
1386static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1387{
1388    gen_acc(d, a, b, ES_8);
1389}
1390
1391static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1392{
1393    gen_acc(d, a, b, ES_16);
1394}
1395
1396static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1397{
1398    TCGv_i32 t = tcg_temp_new_i32();
1399
1400    tcg_gen_add_i32(t, a, b);
1401    tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1402    tcg_temp_free_i32(t);
1403}
1404
1405static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1406{
1407    TCGv_i64 t = tcg_temp_new_i64();
1408
1409    tcg_gen_add_i64(t, a, b);
1410    tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1411    tcg_temp_free_i64(t);
1412}
1413
1414static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1415                         TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1416{
1417    TCGv_i64 th = tcg_temp_new_i64();
1418    TCGv_i64 tl = tcg_temp_new_i64();
1419    TCGv_i64 zero = tcg_constant_i64(0);
1420
1421    tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1422    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1423    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1424    tcg_gen_mov_i64(dh, zero);
1425
1426    tcg_temp_free_i64(th);
1427    tcg_temp_free_i64(tl);
1428}
1429
1430static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1431{
1432    const uint8_t es = get_field(s, m4);
1433    static const GVecGen3 g[4] = {
1434        { .fni8 = gen_acc8_i64, },
1435        { .fni8 = gen_acc16_i64, },
1436        { .fni4 = gen_acc_i32, },
1437        { .fni8 = gen_acc_i64, },
1438    };
1439
1440    if (es > ES_128) {
1441        gen_program_exception(s, PGM_SPECIFICATION);
1442        return DISAS_NORETURN;
1443    } else if (es == ES_128) {
1444        gen_gvec128_3_i64(gen_acc2_i64, get_field(s, v1),
1445                          get_field(s, v2), get_field(s, v3));
1446        return DISAS_NEXT;
1447    }
1448    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1449               get_field(s, v3), &g[es]);
1450    return DISAS_NEXT;
1451}
1452
1453static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1454                        TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1455{
1456    TCGv_i64 tl = tcg_temp_new_i64();
1457    TCGv_i64 zero = tcg_constant_i64(0);
1458
1459    /* extract the carry only */
1460    tcg_gen_extract_i64(tl, cl, 0, 1);
1461    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1462    tcg_gen_add2_i64(dl, dh, dl, dh, tl, zero);
1463
1464    tcg_temp_free_i64(tl);
1465}
1466
1467static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
1468{
1469    if (get_field(s, m5) != ES_128) {
1470        gen_program_exception(s, PGM_SPECIFICATION);
1471        return DISAS_NORETURN;
1472    }
1473
1474    gen_gvec128_4_i64(gen_ac2_i64, get_field(s, v1),
1475                      get_field(s, v2), get_field(s, v3),
1476                      get_field(s, v4));
1477    return DISAS_NEXT;
1478}
1479
1480static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1481                          TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1482{
1483    TCGv_i64 tl = tcg_temp_new_i64();
1484    TCGv_i64 th = tcg_temp_new_i64();
1485    TCGv_i64 zero = tcg_constant_i64(0);
1486
1487    tcg_gen_andi_i64(tl, cl, 1);
1488    tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
1489    tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
1490    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1491    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1492    tcg_gen_mov_i64(dh, zero);
1493
1494    tcg_temp_free_i64(tl);
1495    tcg_temp_free_i64(th);
1496}
1497
1498static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
1499{
1500    if (get_field(s, m5) != ES_128) {
1501        gen_program_exception(s, PGM_SPECIFICATION);
1502        return DISAS_NORETURN;
1503    }
1504
1505    gen_gvec128_4_i64(gen_accc2_i64, get_field(s, v1),
1506                      get_field(s, v2), get_field(s, v3),
1507                      get_field(s, v4));
1508    return DISAS_NEXT;
1509}
1510
1511static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
1512{
1513    gen_gvec_fn_3(and, ES_8, get_field(s, v1), get_field(s, v2),
1514                  get_field(s, v3));
1515    return DISAS_NEXT;
1516}
1517
1518static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
1519{
1520    gen_gvec_fn_3(andc, ES_8, get_field(s, v1),
1521                  get_field(s, v2), get_field(s, v3));
1522    return DISAS_NEXT;
1523}
1524
1525static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1526{
1527    TCGv_i64 t0 = tcg_temp_new_i64();
1528    TCGv_i64 t1 = tcg_temp_new_i64();
1529
1530    tcg_gen_ext_i32_i64(t0, a);
1531    tcg_gen_ext_i32_i64(t1, b);
1532    tcg_gen_add_i64(t0, t0, t1);
1533    tcg_gen_addi_i64(t0, t0, 1);
1534    tcg_gen_shri_i64(t0, t0, 1);
1535    tcg_gen_extrl_i64_i32(d, t0);
1536
1537    tcg_temp_free(t0);
1538    tcg_temp_free(t1);
1539}
1540
1541static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1542{
1543    TCGv_i64 dh = tcg_temp_new_i64();
1544    TCGv_i64 ah = tcg_temp_new_i64();
1545    TCGv_i64 bh = tcg_temp_new_i64();
1546
1547    /* extending the sign by one bit is sufficient */
1548    tcg_gen_extract_i64(ah, al, 63, 1);
1549    tcg_gen_extract_i64(bh, bl, 63, 1);
1550    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1551    gen_addi2_i64(dl, dh, dl, dh, 1);
1552    tcg_gen_extract2_i64(dl, dl, dh, 1);
1553
1554    tcg_temp_free_i64(dh);
1555    tcg_temp_free_i64(ah);
1556    tcg_temp_free_i64(bh);
1557}
1558
1559static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
1560{
1561    const uint8_t es = get_field(s, m4);
1562    static const GVecGen3 g[4] = {
1563        { .fno = gen_helper_gvec_vavg8, },
1564        { .fno = gen_helper_gvec_vavg16, },
1565        { .fni4 = gen_avg_i32, },
1566        { .fni8 = gen_avg_i64, },
1567    };
1568
1569    if (es > ES_64) {
1570        gen_program_exception(s, PGM_SPECIFICATION);
1571        return DISAS_NORETURN;
1572    }
1573    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1574               get_field(s, v3), &g[es]);
1575    return DISAS_NEXT;
1576}
1577
1578static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1579{
1580    TCGv_i64 t0 = tcg_temp_new_i64();
1581    TCGv_i64 t1 = tcg_temp_new_i64();
1582
1583    tcg_gen_extu_i32_i64(t0, a);
1584    tcg_gen_extu_i32_i64(t1, b);
1585    tcg_gen_add_i64(t0, t0, t1);
1586    tcg_gen_addi_i64(t0, t0, 1);
1587    tcg_gen_shri_i64(t0, t0, 1);
1588    tcg_gen_extrl_i64_i32(d, t0);
1589
1590    tcg_temp_free(t0);
1591    tcg_temp_free(t1);
1592}
1593
1594static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1595{
1596    TCGv_i64 dh = tcg_temp_new_i64();
1597    TCGv_i64 zero = tcg_constant_i64(0);
1598
1599    tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
1600    gen_addi2_i64(dl, dh, dl, dh, 1);
1601    tcg_gen_extract2_i64(dl, dl, dh, 1);
1602
1603    tcg_temp_free_i64(dh);
1604}
1605
1606static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
1607{
1608    const uint8_t es = get_field(s, m4);
1609    static const GVecGen3 g[4] = {
1610        { .fno = gen_helper_gvec_vavgl8, },
1611        { .fno = gen_helper_gvec_vavgl16, },
1612        { .fni4 = gen_avgl_i32, },
1613        { .fni8 = gen_avgl_i64, },
1614    };
1615
1616    if (es > ES_64) {
1617        gen_program_exception(s, PGM_SPECIFICATION);
1618        return DISAS_NORETURN;
1619    }
1620    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1621               get_field(s, v3), &g[es]);
1622    return DISAS_NEXT;
1623}
1624
1625static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
1626{
1627    TCGv_i32 tmp = tcg_temp_new_i32();
1628    TCGv_i32 sum = tcg_temp_new_i32();
1629    int i;
1630
1631    read_vec_element_i32(sum, get_field(s, v3), 1, ES_32);
1632    for (i = 0; i < 4; i++) {
1633        read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
1634        tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
1635    }
1636    gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
1637    write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
1638
1639    tcg_temp_free_i32(tmp);
1640    tcg_temp_free_i32(sum);
1641    return DISAS_NEXT;
1642}
1643
1644static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
1645{
1646    uint8_t es = get_field(s, m3);
1647    const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;
1648
1649    if (es > ES_64) {
1650        gen_program_exception(s, PGM_SPECIFICATION);
1651        return DISAS_NORETURN;
1652    }
1653    if (s->fields.op2 == 0xdb) {
1654        es |= MO_SIGN;
1655    }
1656
1657    o->in1 = tcg_temp_new_i64();
1658    o->in2 = tcg_temp_new_i64();
1659    read_vec_element_i64(o->in1, get_field(s, v1), enr, es);
1660    read_vec_element_i64(o->in2, get_field(s, v2), enr, es);
1661    return DISAS_NEXT;
1662}
1663
1664static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
1665{
1666    const uint8_t es = get_field(s, m4);
1667    TCGCond cond = s->insn->data;
1668
1669    if (es > ES_64) {
1670        gen_program_exception(s, PGM_SPECIFICATION);
1671        return DISAS_NORETURN;
1672    }
1673
1674    tcg_gen_gvec_cmp(cond, es,
1675                     vec_full_reg_offset(get_field(s, v1)),
1676                     vec_full_reg_offset(get_field(s, v2)),
1677                     vec_full_reg_offset(get_field(s, v3)), 16, 16);
1678    if (get_field(s, m5) & 0x1) {
1679        TCGv_i64 low = tcg_temp_new_i64();
1680        TCGv_i64 high = tcg_temp_new_i64();
1681
1682        read_vec_element_i64(high, get_field(s, v1), 0, ES_64);
1683        read_vec_element_i64(low, get_field(s, v1), 1, ES_64);
1684        gen_op_update2_cc_i64(s, CC_OP_VC, low, high);
1685
1686        tcg_temp_free_i64(low);
1687        tcg_temp_free_i64(high);
1688    }
1689    return DISAS_NEXT;
1690}
1691
1692static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
1693{
1694    tcg_gen_clzi_i32(d, a, 32);
1695}
1696
1697static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
1698{
1699    tcg_gen_clzi_i64(d, a, 64);
1700}
1701
1702static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
1703{
1704    const uint8_t es = get_field(s, m3);
1705    static const GVecGen2 g[4] = {
1706        { .fno = gen_helper_gvec_vclz8, },
1707        { .fno = gen_helper_gvec_vclz16, },
1708        { .fni4 = gen_clz_i32, },
1709        { .fni8 = gen_clz_i64, },
1710    };
1711
1712    if (es > ES_64) {
1713        gen_program_exception(s, PGM_SPECIFICATION);
1714        return DISAS_NORETURN;
1715    }
1716    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1717    return DISAS_NEXT;
1718}
1719
1720static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
1721{
1722    tcg_gen_ctzi_i32(d, a, 32);
1723}
1724
1725static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
1726{
1727    tcg_gen_ctzi_i64(d, a, 64);
1728}
1729
1730static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
1731{
1732    const uint8_t es = get_field(s, m3);
1733    static const GVecGen2 g[4] = {
1734        { .fno = gen_helper_gvec_vctz8, },
1735        { .fno = gen_helper_gvec_vctz16, },
1736        { .fni4 = gen_ctz_i32, },
1737        { .fni8 = gen_ctz_i64, },
1738    };
1739
1740    if (es > ES_64) {
1741        gen_program_exception(s, PGM_SPECIFICATION);
1742        return DISAS_NORETURN;
1743    }
1744    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1745    return DISAS_NEXT;
1746}
1747
1748static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
1749{
1750    gen_gvec_fn_3(xor, ES_8, get_field(s, v1), get_field(s, v2),
1751                 get_field(s, v3));
1752    return DISAS_NEXT;
1753}
1754
1755static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
1756{
1757    const uint8_t es = get_field(s, m4);
1758    static const GVecGen3 g[4] = {
1759        { .fno = gen_helper_gvec_vgfm8, },
1760        { .fno = gen_helper_gvec_vgfm16, },
1761        { .fno = gen_helper_gvec_vgfm32, },
1762        { .fno = gen_helper_gvec_vgfm64, },
1763    };
1764
1765    if (es > ES_64) {
1766        gen_program_exception(s, PGM_SPECIFICATION);
1767        return DISAS_NORETURN;
1768    }
1769    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1770               get_field(s, v3), &g[es]);
1771    return DISAS_NEXT;
1772}
1773
1774static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
1775{
1776    const uint8_t es = get_field(s, m5);
1777    static const GVecGen4 g[4] = {
1778        { .fno = gen_helper_gvec_vgfma8, },
1779        { .fno = gen_helper_gvec_vgfma16, },
1780        { .fno = gen_helper_gvec_vgfma32, },
1781        { .fno = gen_helper_gvec_vgfma64, },
1782    };
1783
1784    if (es > ES_64) {
1785        gen_program_exception(s, PGM_SPECIFICATION);
1786        return DISAS_NORETURN;
1787    }
1788    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1789               get_field(s, v3), get_field(s, v4), &g[es]);
1790    return DISAS_NEXT;
1791}
1792
1793static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
1794{
1795    const uint8_t es = get_field(s, m3);
1796
1797    if (es > ES_64) {
1798        gen_program_exception(s, PGM_SPECIFICATION);
1799        return DISAS_NORETURN;
1800    }
1801
1802    gen_gvec_fn_2(neg, es, get_field(s, v1), get_field(s, v2));
1803    return DISAS_NEXT;
1804}
1805
1806static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
1807{
1808    const uint8_t es = get_field(s, m3);
1809
1810    if (es > ES_64) {
1811        gen_program_exception(s, PGM_SPECIFICATION);
1812        return DISAS_NORETURN;
1813    }
1814
1815    gen_gvec_fn_2(abs, es, get_field(s, v1), get_field(s, v2));
1816    return DISAS_NEXT;
1817}
1818
1819static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
1820{
1821    const uint8_t v1 = get_field(s, v1);
1822    const uint8_t v2 = get_field(s, v2);
1823    const uint8_t v3 = get_field(s, v3);
1824    const uint8_t es = get_field(s, m4);
1825
1826    if (es > ES_64) {
1827        gen_program_exception(s, PGM_SPECIFICATION);
1828        return DISAS_NORETURN;
1829    }
1830
1831    switch (s->fields.op2) {
1832    case 0xff:
1833        gen_gvec_fn_3(smax, es, v1, v2, v3);
1834        break;
1835    case 0xfd:
1836        gen_gvec_fn_3(umax, es, v1, v2, v3);
1837        break;
1838    case 0xfe:
1839        gen_gvec_fn_3(smin, es, v1, v2, v3);
1840        break;
1841    case 0xfc:
1842        gen_gvec_fn_3(umin, es, v1, v2, v3);
1843        break;
1844    default:
1845        g_assert_not_reached();
1846    }
1847    return DISAS_NEXT;
1848}
1849
1850static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1851{
1852    TCGv_i32 t0 = tcg_temp_new_i32();
1853
1854    tcg_gen_mul_i32(t0, a, b);
1855    tcg_gen_add_i32(d, t0, c);
1856
1857    tcg_temp_free_i32(t0);
1858}
1859
1860static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1861{
1862    TCGv_i64 t0 = tcg_temp_new_i64();
1863    TCGv_i64 t1 = tcg_temp_new_i64();
1864    TCGv_i64 t2 = tcg_temp_new_i64();
1865
1866    tcg_gen_ext_i32_i64(t0, a);
1867    tcg_gen_ext_i32_i64(t1, b);
1868    tcg_gen_ext_i32_i64(t2, c);
1869    tcg_gen_mul_i64(t0, t0, t1);
1870    tcg_gen_add_i64(t0, t0, t2);
1871    tcg_gen_extrh_i64_i32(d, t0);
1872
1873    tcg_temp_free(t0);
1874    tcg_temp_free(t1);
1875    tcg_temp_free(t2);
1876}
1877
1878static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1879{
1880    TCGv_i64 t0 = tcg_temp_new_i64();
1881    TCGv_i64 t1 = tcg_temp_new_i64();
1882    TCGv_i64 t2 = tcg_temp_new_i64();
1883
1884    tcg_gen_extu_i32_i64(t0, a);
1885    tcg_gen_extu_i32_i64(t1, b);
1886    tcg_gen_extu_i32_i64(t2, c);
1887    tcg_gen_mul_i64(t0, t0, t1);
1888    tcg_gen_add_i64(t0, t0, t2);
1889    tcg_gen_extrh_i64_i32(d, t0);
1890
1891    tcg_temp_free(t0);
1892    tcg_temp_free(t1);
1893    tcg_temp_free(t2);
1894}
1895
1896static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
1897{
1898    const uint8_t es = get_field(s, m5);
1899    static const GVecGen4 g_vmal[3] = {
1900        { .fno = gen_helper_gvec_vmal8, },
1901        { .fno = gen_helper_gvec_vmal16, },
1902        { .fni4 = gen_mal_i32, },
1903    };
1904    static const GVecGen4 g_vmah[3] = {
1905        { .fno = gen_helper_gvec_vmah8, },
1906        { .fno = gen_helper_gvec_vmah16, },
1907        { .fni4 = gen_mah_i32, },
1908    };
1909    static const GVecGen4 g_vmalh[3] = {
1910        { .fno = gen_helper_gvec_vmalh8, },
1911        { .fno = gen_helper_gvec_vmalh16, },
1912        { .fni4 = gen_malh_i32, },
1913    };
1914    static const GVecGen4 g_vmae[3] = {
1915        { .fno = gen_helper_gvec_vmae8, },
1916        { .fno = gen_helper_gvec_vmae16, },
1917        { .fno = gen_helper_gvec_vmae32, },
1918    };
1919    static const GVecGen4 g_vmale[3] = {
1920        { .fno = gen_helper_gvec_vmale8, },
1921        { .fno = gen_helper_gvec_vmale16, },
1922        { .fno = gen_helper_gvec_vmale32, },
1923    };
1924    static const GVecGen4 g_vmao[3] = {
1925        { .fno = gen_helper_gvec_vmao8, },
1926        { .fno = gen_helper_gvec_vmao16, },
1927        { .fno = gen_helper_gvec_vmao32, },
1928    };
1929    static const GVecGen4 g_vmalo[3] = {
1930        { .fno = gen_helper_gvec_vmalo8, },
1931        { .fno = gen_helper_gvec_vmalo16, },
1932        { .fno = gen_helper_gvec_vmalo32, },
1933    };
1934    const GVecGen4 *fn;
1935
1936    if (es > ES_32) {
1937        gen_program_exception(s, PGM_SPECIFICATION);
1938        return DISAS_NORETURN;
1939    }
1940
1941    switch (s->fields.op2) {
1942    case 0xaa:
1943        fn = &g_vmal[es];
1944        break;
1945    case 0xab:
1946        fn = &g_vmah[es];
1947        break;
1948    case 0xa9:
1949        fn = &g_vmalh[es];
1950        break;
1951    case 0xae:
1952        fn = &g_vmae[es];
1953        break;
1954    case 0xac:
1955        fn = &g_vmale[es];
1956        break;
1957    case 0xaf:
1958        fn = &g_vmao[es];
1959        break;
1960    case 0xad:
1961        fn = &g_vmalo[es];
1962        break;
1963    default:
1964        g_assert_not_reached();
1965    }
1966
1967    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1968               get_field(s, v3), get_field(s, v4), fn);
1969    return DISAS_NEXT;
1970}
1971
1972static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1973{
1974    TCGv_i32 t = tcg_temp_new_i32();
1975
1976    tcg_gen_muls2_i32(t, d, a, b);
1977    tcg_temp_free_i32(t);
1978}
1979
1980static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1981{
1982    TCGv_i32 t = tcg_temp_new_i32();
1983
1984    tcg_gen_mulu2_i32(t, d, a, b);
1985    tcg_temp_free_i32(t);
1986}
1987
1988static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
1989{
1990    const uint8_t es = get_field(s, m4);
1991    static const GVecGen3 g_vmh[3] = {
1992        { .fno = gen_helper_gvec_vmh8, },
1993        { .fno = gen_helper_gvec_vmh16, },
1994        { .fni4 = gen_mh_i32, },
1995    };
1996    static const GVecGen3 g_vmlh[3] = {
1997        { .fno = gen_helper_gvec_vmlh8, },
1998        { .fno = gen_helper_gvec_vmlh16, },
1999        { .fni4 = gen_mlh_i32, },
2000    };
2001    static const GVecGen3 g_vme[3] = {
2002        { .fno = gen_helper_gvec_vme8, },
2003        { .fno = gen_helper_gvec_vme16, },
2004        { .fno = gen_helper_gvec_vme32, },
2005    };
2006    static const GVecGen3 g_vmle[3] = {
2007        { .fno = gen_helper_gvec_vmle8, },
2008        { .fno = gen_helper_gvec_vmle16, },
2009        { .fno = gen_helper_gvec_vmle32, },
2010    };
2011    static const GVecGen3 g_vmo[3] = {
2012        { .fno = gen_helper_gvec_vmo8, },
2013        { .fno = gen_helper_gvec_vmo16, },
2014        { .fno = gen_helper_gvec_vmo32, },
2015    };
2016    static const GVecGen3 g_vmlo[3] = {
2017        { .fno = gen_helper_gvec_vmlo8, },
2018        { .fno = gen_helper_gvec_vmlo16, },
2019        { .fno = gen_helper_gvec_vmlo32, },
2020    };
2021    const GVecGen3 *fn;
2022
2023    if (es > ES_32) {
2024        gen_program_exception(s, PGM_SPECIFICATION);
2025        return DISAS_NORETURN;
2026    }
2027
2028    switch (s->fields.op2) {
2029    case 0xa2:
2030        gen_gvec_fn_3(mul, es, get_field(s, v1),
2031                      get_field(s, v2), get_field(s, v3));
2032        return DISAS_NEXT;
2033    case 0xa3:
2034        fn = &g_vmh[es];
2035        break;
2036    case 0xa1:
2037        fn = &g_vmlh[es];
2038        break;
2039    case 0xa6:
2040        fn = &g_vme[es];
2041        break;
2042    case 0xa4:
2043        fn = &g_vmle[es];
2044        break;
2045    case 0xa7:
2046        fn = &g_vmo[es];
2047        break;
2048    case 0xa5:
2049        fn = &g_vmlo[es];
2050        break;
2051    default:
2052        g_assert_not_reached();
2053    }
2054
2055    gen_gvec_3(get_field(s, v1), get_field(s, v2),
2056               get_field(s, v3), fn);
2057    return DISAS_NEXT;
2058}
2059
2060static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
2061{
2062    TCGv_i64 l1, h1, l2, h2;
2063
2064    if (get_field(s, m5) != ES_64) {
2065        gen_program_exception(s, PGM_SPECIFICATION);
2066        return DISAS_NORETURN;
2067    }
2068
2069    l1 = tcg_temp_new_i64();
2070    h1 = tcg_temp_new_i64();
2071    l2 = tcg_temp_new_i64();
2072    h2 = tcg_temp_new_i64();
2073
2074    /* Multiply both even elements from v2 and v3 */
2075    read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
2076    read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
2077    tcg_gen_mulu2_i64(l1, h1, l1, h1);
2078    /* Shift result left by one (x2) if requested */
2079    if (extract32(get_field(s, m6), 3, 1)) {
2080        tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
2081    }
2082
2083    /* Multiply both odd elements from v2 and v3 */
2084    read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
2085    read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
2086    tcg_gen_mulu2_i64(l2, h2, l2, h2);
2087    /* Shift result left by one (x2) if requested */
2088    if (extract32(get_field(s, m6), 2, 1)) {
2089        tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
2090    }
2091
2092    /* Add both intermediate results */
2093    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
2094    /* Add whole v4 */
2095    read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
2096    read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
2097    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
2098
2099    /* Store final result into v1. */
2100    write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
2101    write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
2102
2103    tcg_temp_free_i64(l1);
2104    tcg_temp_free_i64(h1);
2105    tcg_temp_free_i64(l2);
2106    tcg_temp_free_i64(h2);
2107    return DISAS_NEXT;
2108}
2109
2110static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
2111{
2112    gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
2113                  get_field(s, v2), get_field(s, v3));
2114    return DISAS_NEXT;
2115}
2116
2117static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
2118{
2119    gen_gvec_fn_3(nor, ES_8, get_field(s, v1), get_field(s, v2),
2120                  get_field(s, v3));
2121    return DISAS_NEXT;
2122}
2123
2124static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
2125{
2126    gen_gvec_fn_3(eqv, ES_8, get_field(s, v1), get_field(s, v2),
2127                  get_field(s, v3));
2128    return DISAS_NEXT;
2129}
2130
2131static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
2132{
2133    gen_gvec_fn_3(or, ES_8, get_field(s, v1), get_field(s, v2),
2134                  get_field(s, v3));
2135    return DISAS_NEXT;
2136}
2137
2138static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
2139{
2140    gen_gvec_fn_3(orc, ES_8, get_field(s, v1), get_field(s, v2),
2141                  get_field(s, v3));
2142    return DISAS_NEXT;
2143}
2144
2145static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
2146{
2147    const uint8_t es = get_field(s, m3);
2148    static const GVecGen2 g[4] = {
2149        { .fno = gen_helper_gvec_vpopct8, },
2150        { .fno = gen_helper_gvec_vpopct16, },
2151        { .fni4 = tcg_gen_ctpop_i32, },
2152        { .fni8 = tcg_gen_ctpop_i64, },
2153    };
2154
2155    if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
2156        gen_program_exception(s, PGM_SPECIFICATION);
2157        return DISAS_NORETURN;
2158    }
2159
2160    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
2161    return DISAS_NEXT;
2162}
2163
2164static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
2165{
2166    TCGv_i32 t = tcg_temp_new_i32();
2167
2168    tcg_gen_rotli_i32(t, a, c & 31);
2169    tcg_gen_and_i32(t, t, b);
2170    tcg_gen_andc_i32(d, d, b);
2171    tcg_gen_or_i32(d, d, t);
2172
2173    tcg_temp_free_i32(t);
2174}
2175
2176static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
2177{
2178    TCGv_i64 t = tcg_temp_new_i64();
2179
2180    tcg_gen_rotli_i64(t, a, c & 63);
2181    tcg_gen_and_i64(t, t, b);
2182    tcg_gen_andc_i64(d, d, b);
2183    tcg_gen_or_i64(d, d, t);
2184
2185    tcg_temp_free_i64(t);
2186}
2187
2188static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
2189{
2190    const uint8_t es = get_field(s, m5);
2191    const uint8_t i4 = get_field(s, i4) &
2192                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2193    static const GVecGen3i g[4] = {
2194        { .fno = gen_helper_gvec_verim8, },
2195        { .fno = gen_helper_gvec_verim16, },
2196        { .fni4 = gen_rim_i32,
2197          .load_dest = true, },
2198        { .fni8 = gen_rim_i64,
2199          .load_dest = true, },
2200    };
2201
2202    if (es > ES_64) {
2203        gen_program_exception(s, PGM_SPECIFICATION);
2204        return DISAS_NORETURN;
2205    }
2206
2207    gen_gvec_3i(get_field(s, v1), get_field(s, v2),
2208                get_field(s, v3), i4, &g[es]);
2209    return DISAS_NEXT;
2210}
2211
2212static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
2213{
2214    const uint8_t es = get_field(s, m4);
2215    const uint8_t v1 = get_field(s, v1);
2216    const uint8_t v2 = get_field(s, v2);
2217    const uint8_t v3 = get_field(s, v3);
2218
2219    if (es > ES_64) {
2220        gen_program_exception(s, PGM_SPECIFICATION);
2221        return DISAS_NORETURN;
2222    }
2223
2224    switch (s->fields.op2) {
2225    case 0x70:
2226        gen_gvec_fn_3(shlv, es, v1, v2, v3);
2227        break;
2228    case 0x73:
2229        gen_gvec_fn_3(rotlv, es, v1, v2, v3);
2230        break;
2231    case 0x7a:
2232        gen_gvec_fn_3(sarv, es, v1, v2, v3);
2233        break;
2234    case 0x78:
2235        gen_gvec_fn_3(shrv, es, v1, v2, v3);
2236        break;
2237    default:
2238        g_assert_not_reached();
2239    }
2240    return DISAS_NEXT;
2241}
2242
2243static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
2244{
2245    const uint8_t es = get_field(s, m4);
2246    const uint8_t d2 = get_field(s, d2) &
2247                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2248    const uint8_t v1 = get_field(s, v1);
2249    const uint8_t v3 = get_field(s, v3);
2250    TCGv_i32 shift;
2251
2252    if (es > ES_64) {
2253        gen_program_exception(s, PGM_SPECIFICATION);
2254        return DISAS_NORETURN;
2255    }
2256
2257    if (likely(!get_field(s, b2))) {
2258        switch (s->fields.op2) {
2259        case 0x30:
2260            gen_gvec_fn_2i(shli, es, v1, v3, d2);
2261            break;
2262        case 0x33:
2263            gen_gvec_fn_2i(rotli, es, v1, v3, d2);
2264            break;
2265        case 0x3a:
2266            gen_gvec_fn_2i(sari, es, v1, v3, d2);
2267            break;
2268        case 0x38:
2269            gen_gvec_fn_2i(shri, es, v1, v3, d2);
2270            break;
2271        default:
2272            g_assert_not_reached();
2273        }
2274    } else {
2275        shift = tcg_temp_new_i32();
2276        tcg_gen_extrl_i64_i32(shift, o->addr1);
2277        tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
2278        switch (s->fields.op2) {
2279        case 0x30:
2280            gen_gvec_fn_2s(shls, es, v1, v3, shift);
2281            break;
2282        case 0x33:
2283            gen_gvec_fn_2s(rotls, es, v1, v3, shift);
2284            break;
2285        case 0x3a:
2286            gen_gvec_fn_2s(sars, es, v1, v3, shift);
2287            break;
2288        case 0x38:
2289            gen_gvec_fn_2s(shrs, es, v1, v3, shift);
2290            break;
2291        default:
2292            g_assert_not_reached();
2293        }
2294        tcg_temp_free_i32(shift);
2295    }
2296    return DISAS_NEXT;
2297}
2298
2299static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o,
2300                                      gen_helper_gvec_2i *gen,
2301                                      gen_helper_gvec_3 *gen_ve2)
2302{
2303    bool byte = s->insn->data;
2304
2305    if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2306        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2307                       get_field(s, v3), 0, gen_ve2);
2308    } else {
2309        TCGv_i64 shift = tcg_temp_new_i64();
2310
2311        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
2312        tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7);
2313        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen);
2314        tcg_temp_free_i64(shift);
2315    }
2316    return DISAS_NEXT;
2317}
2318
2319static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
2320{
2321    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl,
2322                            gen_helper_gvec_vsl_ve2);
2323}
2324
2325static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
2326{
2327    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra,
2328                            gen_helper_gvec_vsra_ve2);
2329}
2330
2331static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
2332{
2333    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsrl,
2334                            gen_helper_gvec_vsrl_ve2);
2335}
2336
2337static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
2338{
2339    const bool byte = s->insn->data;
2340    const uint8_t mask = byte ? 15 : 7;
2341    const uint8_t mul  = byte ?  8 : 1;
2342    const uint8_t i4   = get_field(s, i4);
2343    const int right_shift = 64 - (i4 & 7) * mul;
2344    TCGv_i64 t0, t1, t2;
2345
2346    if (i4 & ~mask) {
2347        gen_program_exception(s, PGM_SPECIFICATION);
2348        return DISAS_NORETURN;
2349    }
2350
2351    t0 = tcg_temp_new_i64();
2352    t1 = tcg_temp_new_i64();
2353    t2 = tcg_temp_new_i64();
2354
2355    if ((i4 & 8) == 0) {
2356        read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
2357        read_vec_element_i64(t1, get_field(s, v2), 1, ES_64);
2358        read_vec_element_i64(t2, get_field(s, v3), 0, ES_64);
2359    } else {
2360        read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2361        read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2362        read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2363    }
2364
2365    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
2366    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
2367
2368    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2369    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2370
2371    tcg_temp_free(t0);
2372    tcg_temp_free(t1);
2373    tcg_temp_free(t2);
2374    return DISAS_NEXT;
2375}
2376
2377static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
2378{
2379    const uint8_t i4 = get_field(s, i4);
2380    TCGv_i64 t0, t1, t2;
2381
2382    if (i4 & ~7) {
2383        gen_program_exception(s, PGM_SPECIFICATION);
2384        return DISAS_NORETURN;
2385    }
2386
2387    t0 = tcg_temp_new_i64();
2388    t1 = tcg_temp_new_i64();
2389    t2 = tcg_temp_new_i64();
2390
2391    read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2392    read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2393    read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2394
2395    tcg_gen_extract2_i64(t0, t1, t0, i4);
2396    tcg_gen_extract2_i64(t1, t2, t1, i4);
2397
2398    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2399    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2400
2401    tcg_temp_free(t0);
2402    tcg_temp_free(t1);
2403    tcg_temp_free(t2);
2404    return DISAS_NEXT;
2405}
2406
2407static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
2408{
2409    const uint8_t es = get_field(s, m4);
2410
2411    if (es > ES_128) {
2412        gen_program_exception(s, PGM_SPECIFICATION);
2413        return DISAS_NORETURN;
2414    } else if (es == ES_128) {
2415        gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s, v1),
2416                          get_field(s, v2), get_field(s, v3));
2417        return DISAS_NEXT;
2418    }
2419    gen_gvec_fn_3(sub, es, get_field(s, v1), get_field(s, v2),
2420                  get_field(s, v3));
2421    return DISAS_NEXT;
2422}
2423
2424static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2425{
2426    tcg_gen_setcond_i32(TCG_COND_GEU, d, a, b);
2427}
2428
2429static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2430{
2431    tcg_gen_setcond_i64(TCG_COND_GEU, d, a, b);
2432}
2433
2434static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
2435                          TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2436{
2437    TCGv_i64 th = tcg_temp_new_i64();
2438    TCGv_i64 tl = tcg_temp_new_i64();
2439    TCGv_i64 zero = tcg_constant_i64(0);
2440
2441    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
2442    tcg_gen_andi_i64(th, th, 1);
2443    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
2444    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
2445    /* "invert" the result: -1 -> 0; 0 -> 1 */
2446    tcg_gen_addi_i64(dl, th, 1);
2447    tcg_gen_mov_i64(dh, zero);
2448
2449    tcg_temp_free_i64(th);
2450    tcg_temp_free_i64(tl);
2451}
2452
2453static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
2454{
2455    const uint8_t es = get_field(s, m4);
2456    static const GVecGen3 g[4] = {
2457        { .fno = gen_helper_gvec_vscbi8, },
2458        { .fno = gen_helper_gvec_vscbi16, },
2459        { .fni4 = gen_scbi_i32, },
2460        { .fni8 = gen_scbi_i64, },
2461    };
2462
2463    if (es > ES_128) {
2464        gen_program_exception(s, PGM_SPECIFICATION);
2465        return DISAS_NORETURN;
2466    } else if (es == ES_128) {
2467        gen_gvec128_3_i64(gen_scbi2_i64, get_field(s, v1),
2468                          get_field(s, v2), get_field(s, v3));
2469        return DISAS_NEXT;
2470    }
2471    gen_gvec_3(get_field(s, v1), get_field(s, v2),
2472               get_field(s, v3), &g[es]);
2473    return DISAS_NEXT;
2474}
2475
2476static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2477                         TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2478{
2479    TCGv_i64 tl = tcg_temp_new_i64();
2480    TCGv_i64 th = tcg_temp_new_i64();
2481
2482    tcg_gen_not_i64(tl, bl);
2483    tcg_gen_not_i64(th, bh);
2484    gen_ac2_i64(dl, dh, al, ah, tl, th, cl, ch);
2485    tcg_temp_free_i64(tl);
2486    tcg_temp_free_i64(th);
2487}
2488
2489static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
2490{
2491    if (get_field(s, m5) != ES_128) {
2492        gen_program_exception(s, PGM_SPECIFICATION);
2493        return DISAS_NORETURN;
2494    }
2495
2496    gen_gvec128_4_i64(gen_sbi2_i64, get_field(s, v1),
2497                      get_field(s, v2), get_field(s, v3),
2498                      get_field(s, v4));
2499    return DISAS_NEXT;
2500}
2501
2502static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2503                           TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2504{
2505    TCGv_i64 th = tcg_temp_new_i64();
2506    TCGv_i64 tl = tcg_temp_new_i64();
2507
2508    tcg_gen_not_i64(tl, bl);
2509    tcg_gen_not_i64(th, bh);
2510    gen_accc2_i64(dl, dh, al, ah, tl, th, cl, ch);
2511
2512    tcg_temp_free_i64(tl);
2513    tcg_temp_free_i64(th);
2514}
2515
2516static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
2517{
2518    if (get_field(s, m5) != ES_128) {
2519        gen_program_exception(s, PGM_SPECIFICATION);
2520        return DISAS_NORETURN;
2521    }
2522
2523    gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s, v1),
2524                      get_field(s, v2), get_field(s, v3),
2525                      get_field(s, v4));
2526    return DISAS_NEXT;
2527}
2528
2529static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
2530{
2531    const uint8_t es = get_field(s, m4);
2532    TCGv_i64 sum, tmp;
2533    uint8_t dst_idx;
2534
2535    if (es == ES_8 || es > ES_32) {
2536        gen_program_exception(s, PGM_SPECIFICATION);
2537        return DISAS_NORETURN;
2538    }
2539
2540    sum = tcg_temp_new_i64();
2541    tmp = tcg_temp_new_i64();
2542    for (dst_idx = 0; dst_idx < 2; dst_idx++) {
2543        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
2544        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;
2545
2546        read_vec_element_i64(sum, get_field(s, v3), max_idx, es);
2547        for (; idx <= max_idx; idx++) {
2548            read_vec_element_i64(tmp, get_field(s, v2), idx, es);
2549            tcg_gen_add_i64(sum, sum, tmp);
2550        }
2551        write_vec_element_i64(sum, get_field(s, v1), dst_idx, ES_64);
2552    }
2553    tcg_temp_free_i64(sum);
2554    tcg_temp_free_i64(tmp);
2555    return DISAS_NEXT;
2556}
2557
2558static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
2559{
2560    const uint8_t es = get_field(s, m4);
2561    const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
2562    TCGv_i64 sumh, suml, zero, tmpl;
2563    uint8_t idx;
2564
2565    if (es < ES_32 || es > ES_64) {
2566        gen_program_exception(s, PGM_SPECIFICATION);
2567        return DISAS_NORETURN;
2568    }
2569
2570    sumh = tcg_temp_new_i64();
2571    suml = tcg_temp_new_i64();
2572    zero = tcg_constant_i64(0);
2573    tmpl = tcg_temp_new_i64();
2574
2575    tcg_gen_mov_i64(sumh, zero);
2576    read_vec_element_i64(suml, get_field(s, v3), max_idx, es);
2577    for (idx = 0; idx <= max_idx; idx++) {
2578        read_vec_element_i64(tmpl, get_field(s, v2), idx, es);
2579        tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
2580    }
2581    write_vec_element_i64(sumh, get_field(s, v1), 0, ES_64);
2582    write_vec_element_i64(suml, get_field(s, v1), 1, ES_64);
2583
2584    tcg_temp_free_i64(sumh);
2585    tcg_temp_free_i64(suml);
2586    tcg_temp_free_i64(tmpl);
2587    return DISAS_NEXT;
2588}
2589
2590static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
2591{
2592    const uint8_t es = get_field(s, m4);
2593    TCGv_i32 sum, tmp;
2594    uint8_t dst_idx;
2595
2596    if (es > ES_16) {
2597        gen_program_exception(s, PGM_SPECIFICATION);
2598        return DISAS_NORETURN;
2599    }
2600
2601    sum = tcg_temp_new_i32();
2602    tmp = tcg_temp_new_i32();
2603    for (dst_idx = 0; dst_idx < 4; dst_idx++) {
2604        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
2605        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;
2606
2607        read_vec_element_i32(sum, get_field(s, v3), max_idx, es);
2608        for (; idx <= max_idx; idx++) {
2609            read_vec_element_i32(tmp, get_field(s, v2), idx, es);
2610            tcg_gen_add_i32(sum, sum, tmp);
2611        }
2612        write_vec_element_i32(sum, get_field(s, v1), dst_idx, ES_32);
2613    }
2614    tcg_temp_free_i32(sum);
2615    tcg_temp_free_i32(tmp);
2616    return DISAS_NEXT;
2617}
2618
2619static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
2620{
2621    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2622                   cpu_env, 0, gen_helper_gvec_vtm);
2623    set_cc_static(s);
2624    return DISAS_NEXT;
2625}
2626
2627static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
2628{
2629    const uint8_t es = get_field(s, m4);
2630    const uint8_t m5 = get_field(s, m5);
2631    static gen_helper_gvec_3 * const g[3] = {
2632        gen_helper_gvec_vfae8,
2633        gen_helper_gvec_vfae16,
2634        gen_helper_gvec_vfae32,
2635    };
2636    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2637        gen_helper_gvec_vfae_cc8,
2638        gen_helper_gvec_vfae_cc16,
2639        gen_helper_gvec_vfae_cc32,
2640    };
2641    if (es > ES_32) {
2642        gen_program_exception(s, PGM_SPECIFICATION);
2643        return DISAS_NORETURN;
2644    }
2645
2646    if (extract32(m5, 0, 1)) {
2647        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2648                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2649        set_cc_static(s);
2650    } else {
2651        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2652                       get_field(s, v3), m5, g[es]);
2653    }
2654    return DISAS_NEXT;
2655}
2656
2657static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
2658{
2659    const uint8_t es = get_field(s, m4);
2660    const uint8_t m5 = get_field(s, m5);
2661    static gen_helper_gvec_3 * const g[3] = {
2662        gen_helper_gvec_vfee8,
2663        gen_helper_gvec_vfee16,
2664        gen_helper_gvec_vfee32,
2665    };
2666    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2667        gen_helper_gvec_vfee_cc8,
2668        gen_helper_gvec_vfee_cc16,
2669        gen_helper_gvec_vfee_cc32,
2670    };
2671
2672    if (es > ES_32 || m5 & ~0x3) {
2673        gen_program_exception(s, PGM_SPECIFICATION);
2674        return DISAS_NORETURN;
2675    }
2676
2677    if (extract32(m5, 0, 1)) {
2678        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2679                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2680        set_cc_static(s);
2681    } else {
2682        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2683                       get_field(s, v3), m5, g[es]);
2684    }
2685    return DISAS_NEXT;
2686}
2687
2688static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
2689{
2690    const uint8_t es = get_field(s, m4);
2691    const uint8_t m5 = get_field(s, m5);
2692    static gen_helper_gvec_3 * const g[3] = {
2693        gen_helper_gvec_vfene8,
2694        gen_helper_gvec_vfene16,
2695        gen_helper_gvec_vfene32,
2696    };
2697    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2698        gen_helper_gvec_vfene_cc8,
2699        gen_helper_gvec_vfene_cc16,
2700        gen_helper_gvec_vfene_cc32,
2701    };
2702
2703    if (es > ES_32 || m5 & ~0x3) {
2704        gen_program_exception(s, PGM_SPECIFICATION);
2705        return DISAS_NORETURN;
2706    }
2707
2708    if (extract32(m5, 0, 1)) {
2709        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2710                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2711        set_cc_static(s);
2712    } else {
2713        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2714                       get_field(s, v3), m5, g[es]);
2715    }
2716    return DISAS_NEXT;
2717}
2718
2719static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
2720{
2721    const uint8_t es = get_field(s, m3);
2722    const uint8_t m5 = get_field(s, m5);
2723    static gen_helper_gvec_2 * const g[3] = {
2724        gen_helper_gvec_vistr8,
2725        gen_helper_gvec_vistr16,
2726        gen_helper_gvec_vistr32,
2727    };
2728    static gen_helper_gvec_2_ptr * const g_cc[3] = {
2729        gen_helper_gvec_vistr_cc8,
2730        gen_helper_gvec_vistr_cc16,
2731        gen_helper_gvec_vistr_cc32,
2732    };
2733
2734    if (es > ES_32 || m5 & ~0x1) {
2735        gen_program_exception(s, PGM_SPECIFICATION);
2736        return DISAS_NORETURN;
2737    }
2738
2739    if (extract32(m5, 0, 1)) {
2740        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2741                       cpu_env, 0, g_cc[es]);
2742        set_cc_static(s);
2743    } else {
2744        gen_gvec_2_ool(get_field(s, v1), get_field(s, v2), 0,
2745                       g[es]);
2746    }
2747    return DISAS_NEXT;
2748}
2749
2750static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
2751{
2752    const uint8_t es = get_field(s, m5);
2753    const uint8_t m6 = get_field(s, m6);
2754    static gen_helper_gvec_4 * const g[3] = {
2755        gen_helper_gvec_vstrc8,
2756        gen_helper_gvec_vstrc16,
2757        gen_helper_gvec_vstrc32,
2758    };
2759    static gen_helper_gvec_4 * const g_rt[3] = {
2760        gen_helper_gvec_vstrc_rt8,
2761        gen_helper_gvec_vstrc_rt16,
2762        gen_helper_gvec_vstrc_rt32,
2763    };
2764    static gen_helper_gvec_4_ptr * const g_cc[3] = {
2765        gen_helper_gvec_vstrc_cc8,
2766        gen_helper_gvec_vstrc_cc16,
2767        gen_helper_gvec_vstrc_cc32,
2768    };
2769    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
2770        gen_helper_gvec_vstrc_cc_rt8,
2771        gen_helper_gvec_vstrc_cc_rt16,
2772        gen_helper_gvec_vstrc_cc_rt32,
2773    };
2774
2775    if (es > ES_32) {
2776        gen_program_exception(s, PGM_SPECIFICATION);
2777        return DISAS_NORETURN;
2778    }
2779
2780    if (extract32(m6, 0, 1)) {
2781        if (extract32(m6, 2, 1)) {
2782            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2783                           get_field(s, v3), get_field(s, v4),
2784                           cpu_env, m6, g_cc_rt[es]);
2785        } else {
2786            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2787                           get_field(s, v3), get_field(s, v4),
2788                           cpu_env, m6, g_cc[es]);
2789        }
2790        set_cc_static(s);
2791    } else {
2792        if (extract32(m6, 2, 1)) {
2793            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2794                           get_field(s, v3), get_field(s, v4),
2795                           m6, g_rt[es]);
2796        } else {
2797            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2798                           get_field(s, v3), get_field(s, v4),
2799                           m6, g[es]);
2800        }
2801    }
2802    return DISAS_NEXT;
2803}
2804
2805static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
2806{
2807    typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
2808                                 TCGv_ptr, TCGv_ptr, TCGv_i32);
2809    static const helper_vstrs fns[3][2] = {
2810        { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
2811        { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
2812        { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
2813    };
2814    const uint8_t es = get_field(s, m5);
2815    const uint8_t m6 = get_field(s, m6);
2816    const bool zs = extract32(m6, 1, 1);
2817
2818    if (es > ES_32 || m6 & ~2) {
2819        gen_program_exception(s, PGM_SPECIFICATION);
2820        return DISAS_NORETURN;
2821    }
2822
2823    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2824                   get_field(s, v3), get_field(s, v4),
2825                   cpu_env, 0, fns[es][zs]);
2826    set_cc_static(s);
2827    return DISAS_NEXT;
2828}
2829
2830static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
2831{
2832    const uint8_t fpf = get_field(s, m4);
2833    const uint8_t m5 = get_field(s, m5);
2834    gen_helper_gvec_3_ptr *fn = NULL;
2835
2836    switch (s->fields.op2) {
2837    case 0xe3:
2838        switch (fpf) {
2839        case FPF_SHORT:
2840            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2841                fn = gen_helper_gvec_vfa32;
2842            }
2843            break;
2844        case FPF_LONG:
2845            fn = gen_helper_gvec_vfa64;
2846            break;
2847        case FPF_EXT:
2848            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2849                fn = gen_helper_gvec_vfa128;
2850            }
2851            break;
2852        default:
2853            break;
2854        }
2855        break;
2856    case 0xe5:
2857        switch (fpf) {
2858        case FPF_SHORT:
2859            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2860                fn = gen_helper_gvec_vfd32;
2861            }
2862            break;
2863        case FPF_LONG:
2864            fn = gen_helper_gvec_vfd64;
2865            break;
2866        case FPF_EXT:
2867            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2868                fn = gen_helper_gvec_vfd128;
2869            }
2870            break;
2871        default:
2872            break;
2873        }
2874        break;
2875    case 0xe7:
2876        switch (fpf) {
2877        case FPF_SHORT:
2878            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2879                fn = gen_helper_gvec_vfm32;
2880            }
2881            break;
2882        case FPF_LONG:
2883            fn = gen_helper_gvec_vfm64;
2884            break;
2885        case FPF_EXT:
2886            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2887                fn = gen_helper_gvec_vfm128;
2888            }
2889            break;
2890        default:
2891            break;
2892        }
2893        break;
2894    case 0xe2:
2895        switch (fpf) {
2896        case FPF_SHORT:
2897            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2898                fn = gen_helper_gvec_vfs32;
2899            }
2900            break;
2901        case FPF_LONG:
2902            fn = gen_helper_gvec_vfs64;
2903            break;
2904        case FPF_EXT:
2905            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2906                fn = gen_helper_gvec_vfs128;
2907            }
2908            break;
2909        default:
2910            break;
2911        }
2912        break;
2913    default:
2914        g_assert_not_reached();
2915    }
2916
2917    if (!fn || extract32(m5, 0, 3)) {
2918        gen_program_exception(s, PGM_SPECIFICATION);
2919        return DISAS_NORETURN;
2920    }
2921
2922    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2923                   get_field(s, v3), cpu_env, m5, fn);
2924    return DISAS_NEXT;
2925}
2926
2927static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
2928{
2929    const uint8_t fpf = get_field(s, m3);
2930    const uint8_t m4 = get_field(s, m4);
2931    gen_helper_gvec_2_ptr *fn = NULL;
2932
2933    switch (fpf) {
2934    case FPF_SHORT:
2935        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2936            fn = gen_helper_gvec_wfk32;
2937            if (s->fields.op2 == 0xcb) {
2938                fn = gen_helper_gvec_wfc32;
2939            }
2940        }
2941        break;
2942    case FPF_LONG:
2943        fn = gen_helper_gvec_wfk64;
2944        if (s->fields.op2 == 0xcb) {
2945            fn = gen_helper_gvec_wfc64;
2946        }
2947        break;
2948    case FPF_EXT:
2949        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2950            fn = gen_helper_gvec_wfk128;
2951            if (s->fields.op2 == 0xcb) {
2952                fn = gen_helper_gvec_wfc128;
2953            }
2954        }
2955        break;
2956    default:
2957        break;
2958    };
2959
2960    if (!fn || m4) {
2961        gen_program_exception(s, PGM_SPECIFICATION);
2962        return DISAS_NORETURN;
2963    }
2964
2965    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
2966    set_cc_static(s);
2967    return DISAS_NEXT;
2968}
2969
2970static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
2971{
2972    const uint8_t fpf = get_field(s, m4);
2973    const uint8_t m5 = get_field(s, m5);
2974    const uint8_t m6 = get_field(s, m6);
2975    const bool cs = extract32(m6, 0, 1);
2976    const bool sq = extract32(m5, 2, 1);
2977    gen_helper_gvec_3_ptr *fn = NULL;
2978
2979    switch (s->fields.op2) {
2980    case 0xe8:
2981        switch (fpf) {
2982        case FPF_SHORT:
2983            fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
2984            break;
2985        case FPF_LONG:
2986            fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
2987            break;
2988        case FPF_EXT:
2989            fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
2990            break;
2991        default:
2992            break;
2993        }
2994        break;
2995    case 0xeb:
2996        switch (fpf) {
2997        case FPF_SHORT:
2998            fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
2999            break;
3000        case FPF_LONG:
3001            fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
3002            break;
3003        case FPF_EXT:
3004            fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
3005            break;
3006        default:
3007            break;
3008        }
3009        break;
3010    case 0xea:
3011        switch (fpf) {
3012        case FPF_SHORT:
3013            fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
3014            break;
3015        case FPF_LONG:
3016            fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
3017            break;
3018        case FPF_EXT:
3019            fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
3020            break;
3021        default:
3022            break;
3023        }
3024        break;
3025    default:
3026        g_assert_not_reached();
3027    }
3028
3029    if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
3030        (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
3031        gen_program_exception(s, PGM_SPECIFICATION);
3032        return DISAS_NORETURN;
3033    }
3034
3035    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
3036                   cpu_env, m5, fn);
3037    if (cs) {
3038        set_cc_static(s);
3039    }
3040    return DISAS_NEXT;
3041}
3042
3043static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
3044{
3045    const uint8_t fpf = get_field(s, m3);
3046    const uint8_t m4 = get_field(s, m4);
3047    const uint8_t erm = get_field(s, m5);
3048    gen_helper_gvec_2_ptr *fn = NULL;
3049
3050
3051    switch (s->fields.op2) {
3052    case 0xc3:
3053        switch (fpf) {
3054        case FPF_LONG:
3055            fn = gen_helper_gvec_vcdg64;
3056            break;
3057        case FPF_SHORT:
3058            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3059                fn = gen_helper_gvec_vcdg32;
3060            }
3061            break;
3062        default:
3063            break;
3064        }
3065        break;
3066    case 0xc1:
3067        switch (fpf) {
3068        case FPF_LONG:
3069            fn = gen_helper_gvec_vcdlg64;
3070            break;
3071        case FPF_SHORT:
3072            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3073                fn = gen_helper_gvec_vcdlg32;
3074            }
3075            break;
3076        default:
3077            break;
3078        }
3079        break;
3080    case 0xc2:
3081        switch (fpf) {
3082        case FPF_LONG:
3083            fn = gen_helper_gvec_vcgd64;
3084            break;
3085        case FPF_SHORT:
3086            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3087                fn = gen_helper_gvec_vcgd32;
3088            }
3089            break;
3090        default:
3091            break;
3092        }
3093        break;
3094    case 0xc0:
3095        switch (fpf) {
3096        case FPF_LONG:
3097            fn = gen_helper_gvec_vclgd64;
3098            break;
3099        case FPF_SHORT:
3100            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3101                fn = gen_helper_gvec_vclgd32;
3102            }
3103            break;
3104        default:
3105            break;
3106        }
3107        break;
3108    case 0xc7:
3109        switch (fpf) {
3110        case FPF_SHORT:
3111            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3112                fn = gen_helper_gvec_vfi32;
3113            }
3114            break;
3115        case FPF_LONG:
3116            fn = gen_helper_gvec_vfi64;
3117            break;
3118        case FPF_EXT:
3119            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3120                fn = gen_helper_gvec_vfi128;
3121            }
3122            break;
3123        default:
3124            break;
3125        }
3126        break;
3127    case 0xc5:
3128        switch (fpf) {
3129        case FPF_LONG:
3130            fn = gen_helper_gvec_vflr64;
3131            break;
3132        case FPF_EXT:
3133            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3134                fn = gen_helper_gvec_vflr128;
3135            }
3136            break;
3137        default:
3138            break;
3139        }
3140        break;
3141    default:
3142        g_assert_not_reached();
3143    }
3144
3145    if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
3146        gen_program_exception(s, PGM_SPECIFICATION);
3147        return DISAS_NORETURN;
3148    }
3149
3150    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3151                   deposit32(m4, 4, 4, erm), fn);
3152    return DISAS_NEXT;
3153}
3154
3155static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
3156{
3157    const uint8_t fpf = get_field(s, m3);
3158    const uint8_t m4 = get_field(s, m4);
3159    gen_helper_gvec_2_ptr *fn = NULL;
3160
3161    switch (fpf) {
3162    case FPF_SHORT:
3163        fn = gen_helper_gvec_vfll32;
3164        break;
3165    case FPF_LONG:
3166        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3167            fn = gen_helper_gvec_vfll64;
3168        }
3169        break;
3170    default:
3171        break;
3172    }
3173
3174    if (!fn || extract32(m4, 0, 3)) {
3175        gen_program_exception(s, PGM_SPECIFICATION);
3176        return DISAS_NORETURN;
3177    }
3178
3179    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3180    return DISAS_NEXT;
3181}
3182
3183static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
3184{
3185    const uint8_t fpf = get_field(s, m4);
3186    const uint8_t m6 = get_field(s, m6);
3187    const uint8_t m5 = get_field(s, m5);
3188    gen_helper_gvec_3_ptr *fn;
3189
3190    if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) {
3191        gen_program_exception(s, PGM_SPECIFICATION);
3192        return DISAS_NORETURN;
3193    }
3194
3195    switch (fpf) {
3196    case FPF_SHORT:
3197        if (s->fields.op2 == 0xef) {
3198            fn = gen_helper_gvec_vfmax32;
3199        } else {
3200            fn = gen_helper_gvec_vfmin32;
3201        }
3202        break;
3203    case FPF_LONG:
3204        if (s->fields.op2 == 0xef) {
3205            fn = gen_helper_gvec_vfmax64;
3206        } else {
3207            fn = gen_helper_gvec_vfmin64;
3208        }
3209        break;
3210    case FPF_EXT:
3211        if (s->fields.op2 == 0xef) {
3212            fn = gen_helper_gvec_vfmax128;
3213        } else {
3214            fn = gen_helper_gvec_vfmin128;
3215        }
3216        break;
3217    default:
3218        gen_program_exception(s, PGM_SPECIFICATION);
3219        return DISAS_NORETURN;
3220    }
3221
3222    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
3223                   cpu_env, deposit32(m5, 4, 4, m6), fn);
3224    return DISAS_NEXT;
3225}
3226
3227static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
3228{
3229    const uint8_t m5 = get_field(s, m5);
3230    const uint8_t fpf = get_field(s, m6);
3231    gen_helper_gvec_4_ptr *fn = NULL;
3232
3233    switch (s->fields.op2) {
3234    case 0x8f:
3235        switch (fpf) {
3236        case FPF_SHORT:
3237            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3238                fn = gen_helper_gvec_vfma32;
3239            }
3240            break;
3241        case FPF_LONG:
3242            fn = gen_helper_gvec_vfma64;
3243            break;
3244        case FPF_EXT:
3245            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3246                fn = gen_helper_gvec_vfma128;
3247            }
3248            break;
3249        default:
3250            break;
3251        }
3252        break;
3253    case 0x8e:
3254        switch (fpf) {
3255        case FPF_SHORT:
3256            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3257                fn = gen_helper_gvec_vfms32;
3258            }
3259            break;
3260        case FPF_LONG:
3261            fn = gen_helper_gvec_vfms64;
3262            break;
3263        case FPF_EXT:
3264            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3265                fn = gen_helper_gvec_vfms128;
3266            }
3267            break;
3268        default:
3269            break;
3270        }
3271        break;
3272    case 0x9f:
3273        switch (fpf) {
3274        case FPF_SHORT:
3275            fn = gen_helper_gvec_vfnma32;
3276            break;
3277        case FPF_LONG:
3278            fn = gen_helper_gvec_vfnma64;
3279            break;
3280        case FPF_EXT:
3281            fn = gen_helper_gvec_vfnma128;
3282            break;
3283        default:
3284            break;
3285        }
3286        break;
3287    case 0x9e:
3288        switch (fpf) {
3289        case FPF_SHORT:
3290            fn = gen_helper_gvec_vfnms32;
3291            break;
3292        case FPF_LONG:
3293            fn = gen_helper_gvec_vfnms64;
3294            break;
3295        case FPF_EXT:
3296            fn = gen_helper_gvec_vfnms128;
3297            break;
3298        default:
3299            break;
3300        }
3301        break;
3302    default:
3303        g_assert_not_reached();
3304    }
3305
3306    if (!fn || extract32(m5, 0, 3)) {
3307        gen_program_exception(s, PGM_SPECIFICATION);
3308        return DISAS_NORETURN;
3309    }
3310
3311    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
3312                   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
3313    return DISAS_NEXT;
3314}
3315
3316static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
3317{
3318    const uint8_t v1 = get_field(s, v1);
3319    const uint8_t v2 = get_field(s, v2);
3320    const uint8_t fpf = get_field(s, m3);
3321    const uint8_t m4 = get_field(s, m4);
3322    const uint8_t m5 = get_field(s, m5);
3323    const bool se = extract32(m4, 3, 1);
3324    TCGv_i64 tmp;
3325
3326    if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
3327        extract32(m4, 0, 3) || m5 > 2) {
3328        gen_program_exception(s, PGM_SPECIFICATION);
3329        return DISAS_NORETURN;
3330    }
3331
3332    switch (fpf) {
3333    case FPF_SHORT:
3334        if (!se) {
3335            switch (m5) {
3336            case 0:
3337                /* sign bit is inverted (complement) */
3338                gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
3339                break;
3340            case 1:
3341                /* sign bit is set to one (negative) */
3342                gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
3343                break;
3344            case 2:
3345                /* sign bit is set to zero (positive) */
3346                gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
3347                break;
3348            }
3349            return DISAS_NEXT;
3350        }
3351        break;
3352    case FPF_LONG:
3353        if (!se) {
3354            switch (m5) {
3355            case 0:
3356                /* sign bit is inverted (complement) */
3357                gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
3358                break;
3359            case 1:
3360                /* sign bit is set to one (negative) */
3361                gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
3362                break;
3363            case 2:
3364                /* sign bit is set to zero (positive) */
3365                gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
3366                break;
3367            }
3368            return DISAS_NEXT;
3369        }
3370        break;
3371    case FPF_EXT:
3372        /* Only a single element. */
3373        break;
3374    default:
3375        gen_program_exception(s, PGM_SPECIFICATION);
3376        return DISAS_NORETURN;
3377    }
3378
3379    /* With a single element, we are only interested in bit 0. */
3380    tmp = tcg_temp_new_i64();
3381    read_vec_element_i64(tmp, v2, 0, ES_64);
3382    switch (m5) {
3383    case 0:
3384        /* sign bit is inverted (complement) */
3385        tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
3386        break;
3387    case 1:
3388        /* sign bit is set to one (negative) */
3389        tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
3390        break;
3391    case 2:
3392        /* sign bit is set to zero (positive) */
3393        tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
3394        break;
3395    }
3396    write_vec_element_i64(tmp, v1, 0, ES_64);
3397
3398    if (fpf == FPF_EXT) {
3399        read_vec_element_i64(tmp, v2, 1, ES_64);
3400        write_vec_element_i64(tmp, v1, 1, ES_64);
3401    }
3402
3403    tcg_temp_free_i64(tmp);
3404
3405    return DISAS_NEXT;
3406}
3407
3408static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
3409{
3410    const uint8_t fpf = get_field(s, m3);
3411    const uint8_t m4 = get_field(s, m4);
3412    gen_helper_gvec_2_ptr *fn = NULL;
3413
3414    switch (fpf) {
3415    case FPF_SHORT:
3416        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3417            fn = gen_helper_gvec_vfsq32;
3418        }
3419        break;
3420    case FPF_LONG:
3421        fn = gen_helper_gvec_vfsq64;
3422        break;
3423    case FPF_EXT:
3424        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3425            fn = gen_helper_gvec_vfsq128;
3426        }
3427        break;
3428    default:
3429        break;
3430    }
3431
3432    if (!fn || extract32(m4, 0, 3)) {
3433        gen_program_exception(s, PGM_SPECIFICATION);
3434        return DISAS_NORETURN;
3435    }
3436
3437    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3438    return DISAS_NEXT;
3439}
3440
3441static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
3442{
3443    const uint16_t i3 = get_field(s, i3);
3444    const uint8_t fpf = get_field(s, m4);
3445    const uint8_t m5 = get_field(s, m5);
3446    gen_helper_gvec_2_ptr *fn = NULL;
3447
3448    switch (fpf) {
3449    case FPF_SHORT:
3450        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3451            fn = gen_helper_gvec_vftci32;
3452        }
3453        break;
3454    case FPF_LONG:
3455        fn = gen_helper_gvec_vftci64;
3456        break;
3457    case FPF_EXT:
3458        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3459            fn = gen_helper_gvec_vftci128;
3460        }
3461        break;
3462    default:
3463        break;
3464    }
3465
3466    if (!fn || extract32(m5, 0, 3)) {
3467        gen_program_exception(s, PGM_SPECIFICATION);
3468        return DISAS_NORETURN;
3469    }
3470
3471    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3472                   deposit32(m5, 4, 12, i3), fn);
3473    set_cc_static(s);
3474    return DISAS_NEXT;
3475}
3476