xref: /openbmc/qemu/target/s390x/tcg/vec_int_helper.c (revision 8d7f2e76)
1 /*
2  * QEMU TCG support -- s390x vector integer instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/clmul.h"
18 
19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21     return !v->doubleword[0] && !v->doubleword[1];
22 }
23 
24 static void s390_vec_and(S390Vector *res, const S390Vector *a,
25                          const S390Vector *b)
26 {
27     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
28     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
29 }
30 
31 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
32 {
33     return a->doubleword[0] == b->doubleword[0] &&
34            a->doubleword[1] == b->doubleword[1];
35 }
36 
37 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
38 {
39     uint64_t tmp;
40 
41     g_assert(count < 128);
42     if (count == 0) {
43         d->doubleword[0] = a->doubleword[0];
44         d->doubleword[1] = a->doubleword[1];
45     } else if (count == 64) {
46         d->doubleword[0] = a->doubleword[1];
47         d->doubleword[1] = 0;
48     } else if (count < 64) {
49         tmp = extract64(a->doubleword[1], 64 - count, count);
50         d->doubleword[1] = a->doubleword[1] << count;
51         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
52     } else {
53         d->doubleword[0] = a->doubleword[1] << (count - 64);
54         d->doubleword[1] = 0;
55     }
56 }
57 
58 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
59 {
60     uint64_t tmp;
61 
62     if (count == 0) {
63         d->doubleword[0] = a->doubleword[0];
64         d->doubleword[1] = a->doubleword[1];
65     } else if (count == 64) {
66         tmp = (int64_t)a->doubleword[0] >> 63;
67         d->doubleword[1] = a->doubleword[0];
68         d->doubleword[0] = tmp;
69     } else if (count < 64) {
70         tmp = a->doubleword[1] >> count;
71         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
72         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
73     } else {
74         tmp = (int64_t)a->doubleword[0] >> 63;
75         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
76         d->doubleword[0] = tmp;
77     }
78 }
79 
80 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
81 {
82     uint64_t tmp;
83 
84     g_assert(count < 128);
85     if (count == 0) {
86         d->doubleword[0] = a->doubleword[0];
87         d->doubleword[1] = a->doubleword[1];
88     } else if (count == 64) {
89         d->doubleword[1] = a->doubleword[0];
90         d->doubleword[0] = 0;
91     } else if (count < 64) {
92         tmp = a->doubleword[1] >> count;
93         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
94         d->doubleword[0] = a->doubleword[0] >> count;
95     } else {
96         d->doubleword[1] = a->doubleword[0] >> (count - 64);
97         d->doubleword[0] = 0;
98     }
99 }
100 #define DEF_VAVG(BITS)                                                         \
101 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
102                              uint32_t desc)                                    \
103 {                                                                              \
104     int i;                                                                     \
105                                                                                \
106     for (i = 0; i < (128 / BITS); i++) {                                       \
107         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
108         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
109                                                                                \
110         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
111     }                                                                          \
112 }
113 DEF_VAVG(8)
114 DEF_VAVG(16)
115 
116 #define DEF_VAVGL(BITS)                                                        \
117 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
118                               uint32_t desc)                                   \
119 {                                                                              \
120     int i;                                                                     \
121                                                                                \
122     for (i = 0; i < (128 / BITS); i++) {                                       \
123         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
124         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
125                                                                                \
126         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
127     }                                                                          \
128 }
129 DEF_VAVGL(8)
130 DEF_VAVGL(16)
131 
132 #define DEF_VCLZ(BITS)                                                         \
133 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
134 {                                                                              \
135     int i;                                                                     \
136                                                                                \
137     for (i = 0; i < (128 / BITS); i++) {                                       \
138         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
139                                                                                \
140         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
141     }                                                                          \
142 }
143 DEF_VCLZ(8)
144 DEF_VCLZ(16)
145 
146 #define DEF_VCTZ(BITS)                                                         \
147 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
148 {                                                                              \
149     int i;                                                                     \
150                                                                                \
151     for (i = 0; i < (128 / BITS); i++) {                                       \
152         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
153                                                                                \
154         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
155     }                                                                          \
156 }
157 DEF_VCTZ(8)
158 DEF_VCTZ(16)
159 
160 /* like binary multiplication, but XOR instead of addition */
161 
162 /*
163  * There is no carry across the two doublewords, so their order does
164  * not matter.  Nor is there partial overlap between registers.
165  */
166 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
167 {
168     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
169 }
170 
171 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
172 {
173     uint64_t *q1 = v1;
174     const uint64_t *q2 = v2, *q3 = v3;
175 
176     q1[0] = do_gfma8(q2[0], q3[0], 0);
177     q1[1] = do_gfma8(q2[1], q3[1], 0);
178 }
179 
180 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
181                          const void *v4, uint32_t desc)
182 {
183     uint64_t *q1 = v1;
184     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
185 
186     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
187     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
188 }
189 
190 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
191 {
192     return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
193 }
194 
195 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
196 {
197     uint64_t *q1 = v1;
198     const uint64_t *q2 = v2, *q3 = v3;
199 
200     q1[0] = do_gfma16(q2[0], q3[0], 0);
201     q1[1] = do_gfma16(q2[1], q3[1], 0);
202 }
203 
204 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
205                          const void *v4, uint32_t d)
206 {
207     uint64_t *q1 = v1;
208     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
209 
210     q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
211     q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
212 }
213 
214 static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
215 {
216     return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
217 }
218 
219 void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
220 {
221     uint64_t *q1 = v1;
222     const uint64_t *q2 = v2, *q3 = v3;
223 
224     q1[0] = do_gfma32(q2[0], q3[0], 0);
225     q1[1] = do_gfma32(q2[1], q3[1], 0);
226 }
227 
228 void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
229                          const void *v4, uint32_t d)
230 {
231     uint64_t *q1 = v1;
232     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
233 
234     q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
235     q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
236 }
237 
238 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
239                          uint32_t desc)
240 {
241     uint64_t *q1 = v1;
242     const uint64_t *q2 = v2, *q3 = v3;
243     Int128 r;
244 
245     r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
246     q1[0] = int128_gethi(r);
247     q1[1] = int128_getlo(r);
248 }
249 
250 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251                           const void *v4, uint32_t desc)
252 {
253     uint64_t *q1 = v1;
254     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
255     Int128 r;
256 
257     r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
258     q1[0] = q4[0] ^ int128_gethi(r);
259     q1[1] = q4[1] ^ int128_getlo(r);
260 }
261 
262 #define DEF_VMAL(BITS)                                                         \
263 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
264                              const void *v4, uint32_t desc)                    \
265 {                                                                              \
266     int i;                                                                     \
267                                                                                \
268     for (i = 0; i < (128 / BITS); i++) {                                       \
269         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
270         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
271         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
272                                                                                \
273         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
274     }                                                                          \
275 }
276 DEF_VMAL(8)
277 DEF_VMAL(16)
278 
279 #define DEF_VMAH(BITS)                                                         \
280 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
281                              const void *v4, uint32_t desc)                    \
282 {                                                                              \
283     int i;                                                                     \
284                                                                                \
285     for (i = 0; i < (128 / BITS); i++) {                                       \
286         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
287         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
288         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
289                                                                                \
290         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
291     }                                                                          \
292 }
293 DEF_VMAH(8)
294 DEF_VMAH(16)
295 
296 #define DEF_VMALH(BITS)                                                        \
297 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
298                               const void *v4, uint32_t desc)                   \
299 {                                                                              \
300     int i;                                                                     \
301                                                                                \
302     for (i = 0; i < (128 / BITS); i++) {                                       \
303         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
304         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
305         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
306                                                                                \
307         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
308     }                                                                          \
309 }
310 DEF_VMALH(8)
311 DEF_VMALH(16)
312 
313 #define DEF_VMAE(BITS, TBITS)                                                  \
314 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
315                              const void *v4, uint32_t desc)                    \
316 {                                                                              \
317     int i, j;                                                                  \
318                                                                                \
319     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
320         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
321         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
322         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
323                                                                                \
324         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
325     }                                                                          \
326 }
327 DEF_VMAE(8, 16)
328 DEF_VMAE(16, 32)
329 DEF_VMAE(32, 64)
330 
331 #define DEF_VMALE(BITS, TBITS)                                                 \
332 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
333                               const void *v4, uint32_t desc)                   \
334 {                                                                              \
335     int i, j;                                                                  \
336                                                                                \
337     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
338         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
339         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
340         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
341                                                                                \
342         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
343     }                                                                          \
344 }
345 DEF_VMALE(8, 16)
346 DEF_VMALE(16, 32)
347 DEF_VMALE(32, 64)
348 
349 #define DEF_VMAO(BITS, TBITS)                                                  \
350 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
351                              const void *v4, uint32_t desc)                    \
352 {                                                                              \
353     int i, j;                                                                  \
354                                                                                \
355     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
356         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
357         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
358         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
359                                                                                \
360         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
361     }                                                                          \
362 }
363 DEF_VMAO(8, 16)
364 DEF_VMAO(16, 32)
365 DEF_VMAO(32, 64)
366 
367 #define DEF_VMALO(BITS, TBITS)                                                 \
368 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
369                               const void *v4, uint32_t desc)                   \
370 {                                                                              \
371     int i, j;                                                                  \
372                                                                                \
373     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
374         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
375         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
376         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
377                                                                                \
378         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
379     }                                                                          \
380 }
381 DEF_VMALO(8, 16)
382 DEF_VMALO(16, 32)
383 DEF_VMALO(32, 64)
384 
385 #define DEF_VMH(BITS)                                                          \
386 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
387                             uint32_t desc)                                     \
388 {                                                                              \
389     int i;                                                                     \
390                                                                                \
391     for (i = 0; i < (128 / BITS); i++) {                                       \
392         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
393         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
394                                                                                \
395         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
396     }                                                                          \
397 }
398 DEF_VMH(8)
399 DEF_VMH(16)
400 
401 #define DEF_VMLH(BITS)                                                         \
402 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
403                              uint32_t desc)                                    \
404 {                                                                              \
405     int i;                                                                     \
406                                                                                \
407     for (i = 0; i < (128 / BITS); i++) {                                       \
408         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
409         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
410                                                                                \
411         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
412     }                                                                          \
413 }
414 DEF_VMLH(8)
415 DEF_VMLH(16)
416 
417 #define DEF_VME(BITS, TBITS)                                                   \
418 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
419                             uint32_t desc)                                     \
420 {                                                                              \
421     int i, j;                                                                  \
422                                                                                \
423     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
424         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
425         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
426                                                                                \
427         s390_vec_write_element##TBITS(v1, i, a * b);                           \
428     }                                                                          \
429 }
430 DEF_VME(8, 16)
431 DEF_VME(16, 32)
432 DEF_VME(32, 64)
433 
434 #define DEF_VMLE(BITS, TBITS)                                                  \
435 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
436                              uint32_t desc)                                    \
437 {                                                                              \
438     int i, j;                                                                  \
439                                                                                \
440     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
441         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
442         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
443                                                                                \
444         s390_vec_write_element##TBITS(v1, i, a * b);                           \
445     }                                                                          \
446 }
447 DEF_VMLE(8, 16)
448 DEF_VMLE(16, 32)
449 DEF_VMLE(32, 64)
450 
451 #define DEF_VMO(BITS, TBITS)                                                   \
452 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
453                             uint32_t desc)                                     \
454 {                                                                              \
455     int i, j;                                                                  \
456                                                                                \
457     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
458         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
459         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
460                                                                                \
461         s390_vec_write_element##TBITS(v1, i, a * b);                           \
462     }                                                                          \
463 }
464 DEF_VMO(8, 16)
465 DEF_VMO(16, 32)
466 DEF_VMO(32, 64)
467 
468 #define DEF_VMLO(BITS, TBITS)                                                  \
469 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
470                              uint32_t desc)                                    \
471 {                                                                              \
472     int i, j;                                                                  \
473                                                                                \
474     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
475         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
476         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
477                                                                                \
478         s390_vec_write_element##TBITS(v1, i, a * b);                           \
479     }                                                                          \
480 }
481 DEF_VMLO(8, 16)
482 DEF_VMLO(16, 32)
483 DEF_VMLO(32, 64)
484 
485 #define DEF_VPOPCT(BITS)                                                       \
486 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
487 {                                                                              \
488     int i;                                                                     \
489                                                                                \
490     for (i = 0; i < (128 / BITS); i++) {                                       \
491         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
492                                                                                \
493         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
494     }                                                                          \
495 }
496 DEF_VPOPCT(8)
497 DEF_VPOPCT(16)
498 
499 #define DEF_VERIM(BITS)                                                        \
500 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
501                               uint32_t desc)                                   \
502 {                                                                              \
503     const uint8_t count = simd_data(desc);                                     \
504     int i;                                                                     \
505                                                                                \
506     for (i = 0; i < (128 / BITS); i++) {                                       \
507         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
508         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
509         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
510         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
511                                                                                \
512         s390_vec_write_element##BITS(v1, i, d);                                \
513     }                                                                          \
514 }
515 DEF_VERIM(8)
516 DEF_VERIM(16)
517 
518 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
519                       uint32_t desc)
520 {
521     s390_vec_shl(v1, v2, count);
522 }
523 
524 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
525                           uint32_t desc)
526 {
527     S390Vector tmp;
528     uint32_t sh, e0, e1 = 0;
529     int i;
530 
531     for (i = 15; i >= 0; --i, e1 = e0) {
532         e0 = s390_vec_read_element8(v2, i);
533         sh = s390_vec_read_element8(v3, i) & 7;
534 
535         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
536     }
537 
538     *(S390Vector *)v1 = tmp;
539 }
540 
541 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
542                        uint32_t desc)
543 {
544     s390_vec_sar(v1, v2, count);
545 }
546 
547 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
548                            uint32_t desc)
549 {
550     S390Vector tmp;
551     uint32_t sh, e0, e1 = 0;
552     int i = 0;
553 
554     /* Byte 0 is special only. */
555     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
556     sh = s390_vec_read_element8(v3, i) & 7;
557     s390_vec_write_element8(&tmp, i, e0 >> sh);
558 
559     e1 = e0;
560     for (i = 1; i < 16; ++i, e1 = e0) {
561         e0 = s390_vec_read_element8(v2, i);
562         sh = s390_vec_read_element8(v3, i) & 7;
563         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
564     }
565 
566     *(S390Vector *)v1 = tmp;
567 }
568 
569 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
570                        uint32_t desc)
571 {
572     s390_vec_shr(v1, v2, count);
573 }
574 
575 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
576                            uint32_t desc)
577 {
578     S390Vector tmp;
579     uint32_t sh, e0, e1 = 0;
580 
581     for (int i = 0; i < 16; ++i, e1 = e0) {
582         e0 = s390_vec_read_element8(v2, i);
583         sh = s390_vec_read_element8(v3, i) & 7;
584 
585         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
586     }
587 
588     *(S390Vector *)v1 = tmp;
589 }
590 
591 #define DEF_VSCBI(BITS)                                                        \
592 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
593                               uint32_t desc)                                   \
594 {                                                                              \
595     int i;                                                                     \
596                                                                                \
597     for (i = 0; i < (128 / BITS); i++) {                                       \
598         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
599         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
600                                                                                \
601         s390_vec_write_element##BITS(v1, i, a >= b);                           \
602     }                                                                          \
603 }
604 DEF_VSCBI(8)
605 DEF_VSCBI(16)
606 
607 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
608                       uint32_t desc)
609 {
610     S390Vector tmp;
611 
612     s390_vec_and(&tmp, v1, v2);
613     if (s390_vec_is_zero(&tmp)) {
614         /* Selected bits all zeros; or all mask bits zero */
615         env->cc_op = 0;
616     } else if (s390_vec_equal(&tmp, v2)) {
617         /* Selected bits all ones */
618         env->cc_op = 3;
619     } else {
620         /* Selected bits a mix of zeros and ones */
621         env->cc_op = 1;
622     }
623 }
624