1 /*
2 * QEMU TCG support -- s390x vector integer instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/clmul.h"
18
s390_vec_is_zero(const S390Vector * v)19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21 return !v->doubleword[0] && !v->doubleword[1];
22 }
23
s390_vec_and(S390Vector * res,const S390Vector * a,const S390Vector * b)24 static void s390_vec_and(S390Vector *res, const S390Vector *a,
25 const S390Vector *b)
26 {
27 res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
28 res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
29 }
30
s390_vec_equal(const S390Vector * a,const S390Vector * b)31 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
32 {
33 return a->doubleword[0] == b->doubleword[0] &&
34 a->doubleword[1] == b->doubleword[1];
35 }
36
s390_vec_shl(S390Vector * d,const S390Vector * a,uint64_t count)37 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
38 {
39 uint64_t tmp;
40
41 g_assert(count < 128);
42 if (count == 0) {
43 d->doubleword[0] = a->doubleword[0];
44 d->doubleword[1] = a->doubleword[1];
45 } else if (count == 64) {
46 d->doubleword[0] = a->doubleword[1];
47 d->doubleword[1] = 0;
48 } else if (count < 64) {
49 tmp = extract64(a->doubleword[1], 64 - count, count);
50 d->doubleword[1] = a->doubleword[1] << count;
51 d->doubleword[0] = (a->doubleword[0] << count) | tmp;
52 } else {
53 d->doubleword[0] = a->doubleword[1] << (count - 64);
54 d->doubleword[1] = 0;
55 }
56 }
57
s390_vec_sar(S390Vector * d,const S390Vector * a,uint64_t count)58 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
59 {
60 uint64_t tmp;
61
62 if (count == 0) {
63 d->doubleword[0] = a->doubleword[0];
64 d->doubleword[1] = a->doubleword[1];
65 } else if (count == 64) {
66 tmp = (int64_t)a->doubleword[0] >> 63;
67 d->doubleword[1] = a->doubleword[0];
68 d->doubleword[0] = tmp;
69 } else if (count < 64) {
70 tmp = a->doubleword[1] >> count;
71 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
72 d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
73 } else {
74 tmp = (int64_t)a->doubleword[0] >> 63;
75 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
76 d->doubleword[0] = tmp;
77 }
78 }
79
s390_vec_shr(S390Vector * d,const S390Vector * a,uint64_t count)80 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
81 {
82 uint64_t tmp;
83
84 g_assert(count < 128);
85 if (count == 0) {
86 d->doubleword[0] = a->doubleword[0];
87 d->doubleword[1] = a->doubleword[1];
88 } else if (count == 64) {
89 d->doubleword[1] = a->doubleword[0];
90 d->doubleword[0] = 0;
91 } else if (count < 64) {
92 tmp = a->doubleword[1] >> count;
93 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
94 d->doubleword[0] = a->doubleword[0] >> count;
95 } else {
96 d->doubleword[1] = a->doubleword[0] >> (count - 64);
97 d->doubleword[0] = 0;
98 }
99 }
100 #define DEF_VAVG(BITS) \
101 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
102 uint32_t desc) \
103 { \
104 int i; \
105 \
106 for (i = 0; i < (128 / BITS); i++) { \
107 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
108 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
109 \
110 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
111 } \
112 }
113 DEF_VAVG(8)
114 DEF_VAVG(16)
115
116 #define DEF_VAVGL(BITS) \
117 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
118 uint32_t desc) \
119 { \
120 int i; \
121 \
122 for (i = 0; i < (128 / BITS); i++) { \
123 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
124 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
125 \
126 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
127 } \
128 }
129 DEF_VAVGL(8)
130 DEF_VAVGL(16)
131
132 #define DEF_VCLZ(BITS) \
133 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
134 { \
135 int i; \
136 \
137 for (i = 0; i < (128 / BITS); i++) { \
138 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
139 \
140 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
141 } \
142 }
143 DEF_VCLZ(8)
144 DEF_VCLZ(16)
145
146 #define DEF_VCTZ(BITS) \
147 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
148 { \
149 int i; \
150 \
151 for (i = 0; i < (128 / BITS); i++) { \
152 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
153 \
154 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
155 } \
156 }
157 DEF_VCTZ(8)
158 DEF_VCTZ(16)
159
160 /* like binary multiplication, but XOR instead of addition */
161
162 /*
163 * There is no carry across the two doublewords, so their order does
164 * not matter. Nor is there partial overlap between registers.
165 */
do_gfma8(uint64_t n,uint64_t m,uint64_t a)166 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
167 {
168 return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
169 }
170
HELPER(gvec_vgfm8)171 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
172 {
173 uint64_t *q1 = v1;
174 const uint64_t *q2 = v2, *q3 = v3;
175
176 q1[0] = do_gfma8(q2[0], q3[0], 0);
177 q1[1] = do_gfma8(q2[1], q3[1], 0);
178 }
179
HELPER(gvec_vgfma8)180 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
181 const void *v4, uint32_t desc)
182 {
183 uint64_t *q1 = v1;
184 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
185
186 q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
187 q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
188 }
189
do_gfma16(uint64_t n,uint64_t m,uint64_t a)190 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
191 {
192 return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
193 }
194
HELPER(gvec_vgfm16)195 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
196 {
197 uint64_t *q1 = v1;
198 const uint64_t *q2 = v2, *q3 = v3;
199
200 q1[0] = do_gfma16(q2[0], q3[0], 0);
201 q1[1] = do_gfma16(q2[1], q3[1], 0);
202 }
203
HELPER(gvec_vgfma16)204 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
205 const void *v4, uint32_t d)
206 {
207 uint64_t *q1 = v1;
208 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
209
210 q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
211 q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
212 }
213
do_gfma32(uint64_t n,uint64_t m,uint64_t a)214 static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
215 {
216 return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
217 }
218
HELPER(gvec_vgfm32)219 void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
220 {
221 uint64_t *q1 = v1;
222 const uint64_t *q2 = v2, *q3 = v3;
223
224 q1[0] = do_gfma32(q2[0], q3[0], 0);
225 q1[1] = do_gfma32(q2[1], q3[1], 0);
226 }
227
HELPER(gvec_vgfma32)228 void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
229 const void *v4, uint32_t d)
230 {
231 uint64_t *q1 = v1;
232 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
233
234 q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
235 q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
236 }
237
HELPER(gvec_vgfm64)238 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
239 uint32_t desc)
240 {
241 uint64_t *q1 = v1;
242 const uint64_t *q2 = v2, *q3 = v3;
243 Int128 r;
244
245 r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
246 q1[0] = int128_gethi(r);
247 q1[1] = int128_getlo(r);
248 }
249
HELPER(gvec_vgfma64)250 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251 const void *v4, uint32_t desc)
252 {
253 uint64_t *q1 = v1;
254 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
255 Int128 r;
256
257 r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
258 q1[0] = q4[0] ^ int128_gethi(r);
259 q1[1] = q4[1] ^ int128_getlo(r);
260 }
261
262 #define DEF_VMAL(BITS) \
263 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
264 const void *v4, uint32_t desc) \
265 { \
266 int i; \
267 \
268 for (i = 0; i < (128 / BITS); i++) { \
269 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
270 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
271 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
272 \
273 s390_vec_write_element##BITS(v1, i, a * b + c); \
274 } \
275 }
276 DEF_VMAL(8)
277 DEF_VMAL(16)
278
279 #define DEF_VMAH(BITS) \
280 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
281 const void *v4, uint32_t desc) \
282 { \
283 int i; \
284 \
285 for (i = 0; i < (128 / BITS); i++) { \
286 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
287 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
288 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
289 \
290 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
291 } \
292 }
293 DEF_VMAH(8)
294 DEF_VMAH(16)
295
296 #define DEF_VMALH(BITS) \
297 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
298 const void *v4, uint32_t desc) \
299 { \
300 int i; \
301 \
302 for (i = 0; i < (128 / BITS); i++) { \
303 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
304 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
305 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
306 \
307 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
308 } \
309 }
310 DEF_VMALH(8)
311 DEF_VMALH(16)
312
313 #define DEF_VMAE(BITS, TBITS) \
314 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
315 const void *v4, uint32_t desc) \
316 { \
317 int i, j; \
318 \
319 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
320 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
321 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
322 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
323 \
324 s390_vec_write_element##TBITS(v1, i, a * b + c); \
325 } \
326 }
327 DEF_VMAE(8, 16)
328 DEF_VMAE(16, 32)
329 DEF_VMAE(32, 64)
330
331 #define DEF_VMALE(BITS, TBITS) \
332 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
333 const void *v4, uint32_t desc) \
334 { \
335 int i, j; \
336 \
337 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
338 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
339 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
340 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
341 \
342 s390_vec_write_element##TBITS(v1, i, a * b + c); \
343 } \
344 }
345 DEF_VMALE(8, 16)
346 DEF_VMALE(16, 32)
347 DEF_VMALE(32, 64)
348
349 #define DEF_VMAO(BITS, TBITS) \
350 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
351 const void *v4, uint32_t desc) \
352 { \
353 int i, j; \
354 \
355 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
356 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
357 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
358 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
359 \
360 s390_vec_write_element##TBITS(v1, i, a * b + c); \
361 } \
362 }
363 DEF_VMAO(8, 16)
364 DEF_VMAO(16, 32)
365 DEF_VMAO(32, 64)
366
367 #define DEF_VMALO(BITS, TBITS) \
368 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
369 const void *v4, uint32_t desc) \
370 { \
371 int i, j; \
372 \
373 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
374 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
375 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
376 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
377 \
378 s390_vec_write_element##TBITS(v1, i, a * b + c); \
379 } \
380 }
381 DEF_VMALO(8, 16)
382 DEF_VMALO(16, 32)
383 DEF_VMALO(32, 64)
384
385 #define DEF_VMH(BITS) \
386 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
387 uint32_t desc) \
388 { \
389 int i; \
390 \
391 for (i = 0; i < (128 / BITS); i++) { \
392 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
393 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
394 \
395 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
396 } \
397 }
398 DEF_VMH(8)
399 DEF_VMH(16)
400
401 #define DEF_VMLH(BITS) \
402 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
403 uint32_t desc) \
404 { \
405 int i; \
406 \
407 for (i = 0; i < (128 / BITS); i++) { \
408 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
409 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
410 \
411 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
412 } \
413 }
414 DEF_VMLH(8)
415 DEF_VMLH(16)
416
417 #define DEF_VME(BITS, TBITS) \
418 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
419 uint32_t desc) \
420 { \
421 int i, j; \
422 \
423 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
424 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
425 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
426 \
427 s390_vec_write_element##TBITS(v1, i, a * b); \
428 } \
429 }
430 DEF_VME(8, 16)
431 DEF_VME(16, 32)
432 DEF_VME(32, 64)
433
434 #define DEF_VMLE(BITS, TBITS) \
435 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
436 uint32_t desc) \
437 { \
438 int i, j; \
439 \
440 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
441 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
442 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
443 \
444 s390_vec_write_element##TBITS(v1, i, a * b); \
445 } \
446 }
447 DEF_VMLE(8, 16)
448 DEF_VMLE(16, 32)
449 DEF_VMLE(32, 64)
450
451 #define DEF_VMO(BITS, TBITS) \
452 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
453 uint32_t desc) \
454 { \
455 int i, j; \
456 \
457 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
458 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
459 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
460 \
461 s390_vec_write_element##TBITS(v1, i, a * b); \
462 } \
463 }
464 DEF_VMO(8, 16)
465 DEF_VMO(16, 32)
466 DEF_VMO(32, 64)
467
468 #define DEF_VMLO(BITS, TBITS) \
469 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
470 uint32_t desc) \
471 { \
472 int i, j; \
473 \
474 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
475 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
476 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
477 \
478 s390_vec_write_element##TBITS(v1, i, a * b); \
479 } \
480 }
481 DEF_VMLO(8, 16)
482 DEF_VMLO(16, 32)
483 DEF_VMLO(32, 64)
484
485 #define DEF_VPOPCT(BITS) \
486 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
487 { \
488 int i; \
489 \
490 for (i = 0; i < (128 / BITS); i++) { \
491 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
492 \
493 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
494 } \
495 }
496 DEF_VPOPCT(8)
497 DEF_VPOPCT(16)
498
499 #define DEF_VERIM(BITS) \
500 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
501 uint32_t desc) \
502 { \
503 const uint8_t count = simd_data(desc); \
504 int i; \
505 \
506 for (i = 0; i < (128 / BITS); i++) { \
507 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
508 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
509 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
510 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
511 \
512 s390_vec_write_element##BITS(v1, i, d); \
513 } \
514 }
515 DEF_VERIM(8)
516 DEF_VERIM(16)
517
HELPER(gvec_vsl)518 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
519 uint32_t desc)
520 {
521 s390_vec_shl(v1, v2, count);
522 }
523
HELPER(gvec_vsl_ve2)524 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
525 uint32_t desc)
526 {
527 S390Vector tmp;
528 uint32_t sh, e0, e1 = 0;
529 int i;
530
531 for (i = 15; i >= 0; --i, e1 = e0) {
532 e0 = s390_vec_read_element8(v2, i);
533 sh = s390_vec_read_element8(v3, i) & 7;
534
535 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
536 }
537
538 *(S390Vector *)v1 = tmp;
539 }
540
HELPER(gvec_vsra)541 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
542 uint32_t desc)
543 {
544 s390_vec_sar(v1, v2, count);
545 }
546
HELPER(gvec_vsra_ve2)547 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
548 uint32_t desc)
549 {
550 S390Vector tmp;
551 uint32_t sh, e0, e1 = 0;
552 int i = 0;
553
554 /* Byte 0 is special only. */
555 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
556 sh = s390_vec_read_element8(v3, i) & 7;
557 s390_vec_write_element8(&tmp, i, e0 >> sh);
558
559 e1 = e0;
560 for (i = 1; i < 16; ++i, e1 = e0) {
561 e0 = s390_vec_read_element8(v2, i);
562 sh = s390_vec_read_element8(v3, i) & 7;
563 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
564 }
565
566 *(S390Vector *)v1 = tmp;
567 }
568
HELPER(gvec_vsrl)569 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
570 uint32_t desc)
571 {
572 s390_vec_shr(v1, v2, count);
573 }
574
HELPER(gvec_vsrl_ve2)575 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
576 uint32_t desc)
577 {
578 S390Vector tmp;
579 uint32_t sh, e0, e1 = 0;
580
581 for (int i = 0; i < 16; ++i, e1 = e0) {
582 e0 = s390_vec_read_element8(v2, i);
583 sh = s390_vec_read_element8(v3, i) & 7;
584
585 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
586 }
587
588 *(S390Vector *)v1 = tmp;
589 }
590
591 #define DEF_VSCBI(BITS) \
592 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \
593 uint32_t desc) \
594 { \
595 int i; \
596 \
597 for (i = 0; i < (128 / BITS); i++) { \
598 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
599 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
600 \
601 s390_vec_write_element##BITS(v1, i, a >= b); \
602 } \
603 }
604 DEF_VSCBI(8)
605 DEF_VSCBI(16)
606
HELPER(gvec_vtm)607 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
608 uint32_t desc)
609 {
610 S390Vector tmp;
611
612 s390_vec_and(&tmp, v1, v2);
613 if (s390_vec_is_zero(&tmp)) {
614 /* Selected bits all zeros; or all mask bits zero */
615 env->cc_op = 0;
616 } else if (s390_vec_equal(&tmp, v2)) {
617 /* Selected bits all ones */
618 env->cc_op = 3;
619 } else {
620 /* Selected bits a mix of zeros and ones */
621 env->cc_op = 1;
622 }
623 }
624