xref: /openbmc/qemu/target/s390x/tcg/vec_int_helper.c (revision 00f463b38aa7cfca0bc65e3af7f2c49e1b9da690)
1 /*
2  * QEMU TCG support -- s390x vector integer instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/clmul.h"
18 
19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21     return !v->doubleword[0] && !v->doubleword[1];
22 }
23 
24 static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25                          const S390Vector *b)
26 {
27     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29 }
30 
31 static void s390_vec_and(S390Vector *res, const S390Vector *a,
32                          const S390Vector *b)
33 {
34     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36 }
37 
38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39 {
40     return a->doubleword[0] == b->doubleword[0] &&
41            a->doubleword[1] == b->doubleword[1];
42 }
43 
44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45 {
46     uint64_t tmp;
47 
48     g_assert(count < 128);
49     if (count == 0) {
50         d->doubleword[0] = a->doubleword[0];
51         d->doubleword[1] = a->doubleword[1];
52     } else if (count == 64) {
53         d->doubleword[0] = a->doubleword[1];
54         d->doubleword[1] = 0;
55     } else if (count < 64) {
56         tmp = extract64(a->doubleword[1], 64 - count, count);
57         d->doubleword[1] = a->doubleword[1] << count;
58         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59     } else {
60         d->doubleword[0] = a->doubleword[1] << (count - 64);
61         d->doubleword[1] = 0;
62     }
63 }
64 
65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
66 {
67     uint64_t tmp;
68 
69     if (count == 0) {
70         d->doubleword[0] = a->doubleword[0];
71         d->doubleword[1] = a->doubleword[1];
72     } else if (count == 64) {
73         tmp = (int64_t)a->doubleword[0] >> 63;
74         d->doubleword[1] = a->doubleword[0];
75         d->doubleword[0] = tmp;
76     } else if (count < 64) {
77         tmp = a->doubleword[1] >> count;
78         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
79         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
80     } else {
81         tmp = (int64_t)a->doubleword[0] >> 63;
82         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
83         d->doubleword[0] = tmp;
84     }
85 }
86 
87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88 {
89     uint64_t tmp;
90 
91     g_assert(count < 128);
92     if (count == 0) {
93         d->doubleword[0] = a->doubleword[0];
94         d->doubleword[1] = a->doubleword[1];
95     } else if (count == 64) {
96         d->doubleword[1] = a->doubleword[0];
97         d->doubleword[0] = 0;
98     } else if (count < 64) {
99         tmp = a->doubleword[1] >> count;
100         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101         d->doubleword[0] = a->doubleword[0] >> count;
102     } else {
103         d->doubleword[1] = a->doubleword[0] >> (count - 64);
104         d->doubleword[0] = 0;
105     }
106 }
107 #define DEF_VAVG(BITS)                                                         \
108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
109                              uint32_t desc)                                    \
110 {                                                                              \
111     int i;                                                                     \
112                                                                                \
113     for (i = 0; i < (128 / BITS); i++) {                                       \
114         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
115         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
116                                                                                \
117         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
118     }                                                                          \
119 }
120 DEF_VAVG(8)
121 DEF_VAVG(16)
122 
123 #define DEF_VAVGL(BITS)                                                        \
124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
125                               uint32_t desc)                                   \
126 {                                                                              \
127     int i;                                                                     \
128                                                                                \
129     for (i = 0; i < (128 / BITS); i++) {                                       \
130         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
131         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
132                                                                                \
133         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
134     }                                                                          \
135 }
136 DEF_VAVGL(8)
137 DEF_VAVGL(16)
138 
139 #define DEF_VCLZ(BITS)                                                         \
140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
141 {                                                                              \
142     int i;                                                                     \
143                                                                                \
144     for (i = 0; i < (128 / BITS); i++) {                                       \
145         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
146                                                                                \
147         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
148     }                                                                          \
149 }
150 DEF_VCLZ(8)
151 DEF_VCLZ(16)
152 
153 #define DEF_VCTZ(BITS)                                                         \
154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
155 {                                                                              \
156     int i;                                                                     \
157                                                                                \
158     for (i = 0; i < (128 / BITS); i++) {                                       \
159         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
160                                                                                \
161         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
162     }                                                                          \
163 }
164 DEF_VCTZ(8)
165 DEF_VCTZ(16)
166 
167 /* like binary multiplication, but XOR instead of addition */
168 
169 static S390Vector galois_multiply64(uint64_t a, uint64_t b)
170 {
171     S390Vector res = {};
172     S390Vector va = {
173         .doubleword[1] = a,
174     };
175     S390Vector vb = {
176         .doubleword[1] = b,
177     };
178 
179     while (!s390_vec_is_zero(&vb)) {
180         if (vb.doubleword[1] & 0x1) {
181             s390_vec_xor(&res, &res, &va);
182         }
183         s390_vec_shl(&va, &va, 1);
184         s390_vec_shr(&vb, &vb, 1);
185     }
186     return res;
187 }
188 
189 /*
190  * There is no carry across the two doublewords, so their order does
191  * not matter.  Nor is there partial overlap between registers.
192  */
193 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
194 {
195     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
196 }
197 
198 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
199 {
200     uint64_t *q1 = v1;
201     const uint64_t *q2 = v2, *q3 = v3;
202 
203     q1[0] = do_gfma8(q2[0], q3[0], 0);
204     q1[1] = do_gfma8(q2[1], q3[1], 0);
205 }
206 
207 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
208                          const void *v4, uint32_t desc)
209 {
210     uint64_t *q1 = v1;
211     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
212 
213     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
214     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
215 }
216 
217 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
218 {
219     return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
220 }
221 
222 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
223 {
224     uint64_t *q1 = v1;
225     const uint64_t *q2 = v2, *q3 = v3;
226 
227     q1[0] = do_gfma16(q2[0], q3[0], 0);
228     q1[1] = do_gfma16(q2[1], q3[1], 0);
229 }
230 
231 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
232                          const void *v4, uint32_t d)
233 {
234     uint64_t *q1 = v1;
235     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
236 
237     q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
238     q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
239 }
240 
241 static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
242 {
243     return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
244 }
245 
246 void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
247 {
248     uint64_t *q1 = v1;
249     const uint64_t *q2 = v2, *q3 = v3;
250 
251     q1[0] = do_gfma32(q2[0], q3[0], 0);
252     q1[1] = do_gfma32(q2[1], q3[1], 0);
253 }
254 
255 void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
256                          const void *v4, uint32_t d)
257 {
258     uint64_t *q1 = v1;
259     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
260 
261     q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
262     q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
263 }
264 
265 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
266                          uint32_t desc)
267 {
268     S390Vector tmp1, tmp2;
269     uint64_t a, b;
270 
271     a = s390_vec_read_element64(v2, 0);
272     b = s390_vec_read_element64(v3, 0);
273     tmp1 = galois_multiply64(a, b);
274     a = s390_vec_read_element64(v2, 1);
275     b = s390_vec_read_element64(v3, 1);
276     tmp2 = galois_multiply64(a, b);
277     s390_vec_xor(v1, &tmp1, &tmp2);
278 }
279 
280 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
281                           const void *v4, uint32_t desc)
282 {
283     S390Vector tmp1, tmp2;
284     uint64_t a, b;
285 
286     a = s390_vec_read_element64(v2, 0);
287     b = s390_vec_read_element64(v3, 0);
288     tmp1 = galois_multiply64(a, b);
289     a = s390_vec_read_element64(v2, 1);
290     b = s390_vec_read_element64(v3, 1);
291     tmp2 = galois_multiply64(a, b);
292     s390_vec_xor(&tmp1, &tmp1, &tmp2);
293     s390_vec_xor(v1, &tmp1, v4);
294 }
295 
296 #define DEF_VMAL(BITS)                                                         \
297 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
298                              const void *v4, uint32_t desc)                    \
299 {                                                                              \
300     int i;                                                                     \
301                                                                                \
302     for (i = 0; i < (128 / BITS); i++) {                                       \
303         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
304         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
305         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
306                                                                                \
307         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
308     }                                                                          \
309 }
310 DEF_VMAL(8)
311 DEF_VMAL(16)
312 
313 #define DEF_VMAH(BITS)                                                         \
314 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
315                              const void *v4, uint32_t desc)                    \
316 {                                                                              \
317     int i;                                                                     \
318                                                                                \
319     for (i = 0; i < (128 / BITS); i++) {                                       \
320         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
321         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
322         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
323                                                                                \
324         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
325     }                                                                          \
326 }
327 DEF_VMAH(8)
328 DEF_VMAH(16)
329 
330 #define DEF_VMALH(BITS)                                                        \
331 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
332                               const void *v4, uint32_t desc)                   \
333 {                                                                              \
334     int i;                                                                     \
335                                                                                \
336     for (i = 0; i < (128 / BITS); i++) {                                       \
337         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
338         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
339         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
340                                                                                \
341         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
342     }                                                                          \
343 }
344 DEF_VMALH(8)
345 DEF_VMALH(16)
346 
347 #define DEF_VMAE(BITS, TBITS)                                                  \
348 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
349                              const void *v4, uint32_t desc)                    \
350 {                                                                              \
351     int i, j;                                                                  \
352                                                                                \
353     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
354         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
355         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
356         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
357                                                                                \
358         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
359     }                                                                          \
360 }
361 DEF_VMAE(8, 16)
362 DEF_VMAE(16, 32)
363 DEF_VMAE(32, 64)
364 
365 #define DEF_VMALE(BITS, TBITS)                                                 \
366 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
367                               const void *v4, uint32_t desc)                   \
368 {                                                                              \
369     int i, j;                                                                  \
370                                                                                \
371     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
372         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
373         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
374         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
375                                                                                \
376         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
377     }                                                                          \
378 }
379 DEF_VMALE(8, 16)
380 DEF_VMALE(16, 32)
381 DEF_VMALE(32, 64)
382 
383 #define DEF_VMAO(BITS, TBITS)                                                  \
384 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
385                              const void *v4, uint32_t desc)                    \
386 {                                                                              \
387     int i, j;                                                                  \
388                                                                                \
389     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
390         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
391         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
392         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
393                                                                                \
394         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
395     }                                                                          \
396 }
397 DEF_VMAO(8, 16)
398 DEF_VMAO(16, 32)
399 DEF_VMAO(32, 64)
400 
401 #define DEF_VMALO(BITS, TBITS)                                                 \
402 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
403                               const void *v4, uint32_t desc)                   \
404 {                                                                              \
405     int i, j;                                                                  \
406                                                                                \
407     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
408         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
409         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
410         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
411                                                                                \
412         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
413     }                                                                          \
414 }
415 DEF_VMALO(8, 16)
416 DEF_VMALO(16, 32)
417 DEF_VMALO(32, 64)
418 
419 #define DEF_VMH(BITS)                                                          \
420 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
421                             uint32_t desc)                                     \
422 {                                                                              \
423     int i;                                                                     \
424                                                                                \
425     for (i = 0; i < (128 / BITS); i++) {                                       \
426         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
427         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
428                                                                                \
429         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
430     }                                                                          \
431 }
432 DEF_VMH(8)
433 DEF_VMH(16)
434 
435 #define DEF_VMLH(BITS)                                                         \
436 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
437                              uint32_t desc)                                    \
438 {                                                                              \
439     int i;                                                                     \
440                                                                                \
441     for (i = 0; i < (128 / BITS); i++) {                                       \
442         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
443         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
444                                                                                \
445         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
446     }                                                                          \
447 }
448 DEF_VMLH(8)
449 DEF_VMLH(16)
450 
451 #define DEF_VME(BITS, TBITS)                                                   \
452 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
453                             uint32_t desc)                                     \
454 {                                                                              \
455     int i, j;                                                                  \
456                                                                                \
457     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
458         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
459         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
460                                                                                \
461         s390_vec_write_element##TBITS(v1, i, a * b);                           \
462     }                                                                          \
463 }
464 DEF_VME(8, 16)
465 DEF_VME(16, 32)
466 DEF_VME(32, 64)
467 
468 #define DEF_VMLE(BITS, TBITS)                                                  \
469 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
470                              uint32_t desc)                                    \
471 {                                                                              \
472     int i, j;                                                                  \
473                                                                                \
474     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
475         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
476         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
477                                                                                \
478         s390_vec_write_element##TBITS(v1, i, a * b);                           \
479     }                                                                          \
480 }
481 DEF_VMLE(8, 16)
482 DEF_VMLE(16, 32)
483 DEF_VMLE(32, 64)
484 
485 #define DEF_VMO(BITS, TBITS)                                                   \
486 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
487                             uint32_t desc)                                     \
488 {                                                                              \
489     int i, j;                                                                  \
490                                                                                \
491     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
492         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
493         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
494                                                                                \
495         s390_vec_write_element##TBITS(v1, i, a * b);                           \
496     }                                                                          \
497 }
498 DEF_VMO(8, 16)
499 DEF_VMO(16, 32)
500 DEF_VMO(32, 64)
501 
502 #define DEF_VMLO(BITS, TBITS)                                                  \
503 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
504                              uint32_t desc)                                    \
505 {                                                                              \
506     int i, j;                                                                  \
507                                                                                \
508     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
509         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
510         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
511                                                                                \
512         s390_vec_write_element##TBITS(v1, i, a * b);                           \
513     }                                                                          \
514 }
515 DEF_VMLO(8, 16)
516 DEF_VMLO(16, 32)
517 DEF_VMLO(32, 64)
518 
519 #define DEF_VPOPCT(BITS)                                                       \
520 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
521 {                                                                              \
522     int i;                                                                     \
523                                                                                \
524     for (i = 0; i < (128 / BITS); i++) {                                       \
525         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
526                                                                                \
527         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
528     }                                                                          \
529 }
530 DEF_VPOPCT(8)
531 DEF_VPOPCT(16)
532 
533 #define DEF_VERIM(BITS)                                                        \
534 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
535                               uint32_t desc)                                   \
536 {                                                                              \
537     const uint8_t count = simd_data(desc);                                     \
538     int i;                                                                     \
539                                                                                \
540     for (i = 0; i < (128 / BITS); i++) {                                       \
541         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
542         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
543         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
544         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
545                                                                                \
546         s390_vec_write_element##BITS(v1, i, d);                                \
547     }                                                                          \
548 }
549 DEF_VERIM(8)
550 DEF_VERIM(16)
551 
552 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
553                       uint32_t desc)
554 {
555     s390_vec_shl(v1, v2, count);
556 }
557 
558 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
559                           uint32_t desc)
560 {
561     S390Vector tmp;
562     uint32_t sh, e0, e1 = 0;
563     int i;
564 
565     for (i = 15; i >= 0; --i, e1 = e0) {
566         e0 = s390_vec_read_element8(v2, i);
567         sh = s390_vec_read_element8(v3, i) & 7;
568 
569         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
570     }
571 
572     *(S390Vector *)v1 = tmp;
573 }
574 
575 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
576                        uint32_t desc)
577 {
578     s390_vec_sar(v1, v2, count);
579 }
580 
581 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
582                            uint32_t desc)
583 {
584     S390Vector tmp;
585     uint32_t sh, e0, e1 = 0;
586     int i = 0;
587 
588     /* Byte 0 is special only. */
589     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
590     sh = s390_vec_read_element8(v3, i) & 7;
591     s390_vec_write_element8(&tmp, i, e0 >> sh);
592 
593     e1 = e0;
594     for (i = 1; i < 16; ++i, e1 = e0) {
595         e0 = s390_vec_read_element8(v2, i);
596         sh = s390_vec_read_element8(v3, i) & 7;
597         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
598     }
599 
600     *(S390Vector *)v1 = tmp;
601 }
602 
603 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
604                        uint32_t desc)
605 {
606     s390_vec_shr(v1, v2, count);
607 }
608 
609 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
610                            uint32_t desc)
611 {
612     S390Vector tmp;
613     uint32_t sh, e0, e1 = 0;
614 
615     for (int i = 0; i < 16; ++i, e1 = e0) {
616         e0 = s390_vec_read_element8(v2, i);
617         sh = s390_vec_read_element8(v3, i) & 7;
618 
619         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
620     }
621 
622     *(S390Vector *)v1 = tmp;
623 }
624 
625 #define DEF_VSCBI(BITS)                                                        \
626 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
627                               uint32_t desc)                                   \
628 {                                                                              \
629     int i;                                                                     \
630                                                                                \
631     for (i = 0; i < (128 / BITS); i++) {                                       \
632         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
633         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
634                                                                                \
635         s390_vec_write_element##BITS(v1, i, a >= b);                           \
636     }                                                                          \
637 }
638 DEF_VSCBI(8)
639 DEF_VSCBI(16)
640 
641 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
642                       uint32_t desc)
643 {
644     S390Vector tmp;
645 
646     s390_vec_and(&tmp, v1, v2);
647     if (s390_vec_is_zero(&tmp)) {
648         /* Selected bits all zeros; or all mask bits zero */
649         env->cc_op = 0;
650     } else if (s390_vec_equal(&tmp, v2)) {
651         /* Selected bits all ones */
652         env->cc_op = 3;
653     } else {
654         /* Selected bits a mix of zeros and ones */
655         env->cc_op = 1;
656     }
657 }
658