xref: /openbmc/qemu/target/s390x/tcg/vec_int_helper.c (revision 13d6b1608160de40ec65ae4c32419e56714bbadf)
1c9274b6bSCho, Yu-Chen /*
2c9274b6bSCho, Yu-Chen  * QEMU TCG support -- s390x vector integer instruction support
3c9274b6bSCho, Yu-Chen  *
4c9274b6bSCho, Yu-Chen  * Copyright (C) 2019 Red Hat Inc
5c9274b6bSCho, Yu-Chen  *
6c9274b6bSCho, Yu-Chen  * Authors:
7c9274b6bSCho, Yu-Chen  *   David Hildenbrand <david@redhat.com>
8c9274b6bSCho, Yu-Chen  *
9c9274b6bSCho, Yu-Chen  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c9274b6bSCho, Yu-Chen  * See the COPYING file in the top-level directory.
11c9274b6bSCho, Yu-Chen  */
12c9274b6bSCho, Yu-Chen #include "qemu/osdep.h"
13c9274b6bSCho, Yu-Chen #include "cpu.h"
14c9274b6bSCho, Yu-Chen #include "vec.h"
15c9274b6bSCho, Yu-Chen #include "exec/helper-proto.h"
16c9274b6bSCho, Yu-Chen #include "tcg/tcg-gvec-desc.h"
172d8bc681SRichard Henderson #include "crypto/clmul.h"
18c9274b6bSCho, Yu-Chen 
s390_vec_is_zero(const S390Vector * v)19c9274b6bSCho, Yu-Chen static bool s390_vec_is_zero(const S390Vector *v)
20c9274b6bSCho, Yu-Chen {
21c9274b6bSCho, Yu-Chen     return !v->doubleword[0] && !v->doubleword[1];
22c9274b6bSCho, Yu-Chen }
23c9274b6bSCho, Yu-Chen 
s390_vec_and(S390Vector * res,const S390Vector * a,const S390Vector * b)24c9274b6bSCho, Yu-Chen static void s390_vec_and(S390Vector *res, const S390Vector *a,
25c9274b6bSCho, Yu-Chen                          const S390Vector *b)
26c9274b6bSCho, Yu-Chen {
27c9274b6bSCho, Yu-Chen     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
28c9274b6bSCho, Yu-Chen     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
29c9274b6bSCho, Yu-Chen }
30c9274b6bSCho, Yu-Chen 
s390_vec_equal(const S390Vector * a,const S390Vector * b)31c9274b6bSCho, Yu-Chen static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
32c9274b6bSCho, Yu-Chen {
33c9274b6bSCho, Yu-Chen     return a->doubleword[0] == b->doubleword[0] &&
34c9274b6bSCho, Yu-Chen            a->doubleword[1] == b->doubleword[1];
35c9274b6bSCho, Yu-Chen }
36c9274b6bSCho, Yu-Chen 
s390_vec_shl(S390Vector * d,const S390Vector * a,uint64_t count)37c9274b6bSCho, Yu-Chen static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
38c9274b6bSCho, Yu-Chen {
39c9274b6bSCho, Yu-Chen     uint64_t tmp;
40c9274b6bSCho, Yu-Chen 
41c9274b6bSCho, Yu-Chen     g_assert(count < 128);
42c9274b6bSCho, Yu-Chen     if (count == 0) {
43c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[0];
44c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[1];
45c9274b6bSCho, Yu-Chen     } else if (count == 64) {
46c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[1];
47c9274b6bSCho, Yu-Chen         d->doubleword[1] = 0;
48c9274b6bSCho, Yu-Chen     } else if (count < 64) {
49c9274b6bSCho, Yu-Chen         tmp = extract64(a->doubleword[1], 64 - count, count);
50c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[1] << count;
51c9274b6bSCho, Yu-Chen         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
52c9274b6bSCho, Yu-Chen     } else {
53c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[1] << (count - 64);
54c9274b6bSCho, Yu-Chen         d->doubleword[1] = 0;
55c9274b6bSCho, Yu-Chen     }
56c9274b6bSCho, Yu-Chen }
57c9274b6bSCho, Yu-Chen 
s390_vec_sar(S390Vector * d,const S390Vector * a,uint64_t count)58c9274b6bSCho, Yu-Chen static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
59c9274b6bSCho, Yu-Chen {
60c9274b6bSCho, Yu-Chen     uint64_t tmp;
61c9274b6bSCho, Yu-Chen 
62c9274b6bSCho, Yu-Chen     if (count == 0) {
63c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[0];
64c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[1];
65c9274b6bSCho, Yu-Chen     } else if (count == 64) {
66c9274b6bSCho, Yu-Chen         tmp = (int64_t)a->doubleword[0] >> 63;
67c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[0];
68c9274b6bSCho, Yu-Chen         d->doubleword[0] = tmp;
69c9274b6bSCho, Yu-Chen     } else if (count < 64) {
70c9274b6bSCho, Yu-Chen         tmp = a->doubleword[1] >> count;
71c9274b6bSCho, Yu-Chen         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
72c9274b6bSCho, Yu-Chen         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
73c9274b6bSCho, Yu-Chen     } else {
74c9274b6bSCho, Yu-Chen         tmp = (int64_t)a->doubleword[0] >> 63;
75c9274b6bSCho, Yu-Chen         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
76c9274b6bSCho, Yu-Chen         d->doubleword[0] = tmp;
77c9274b6bSCho, Yu-Chen     }
78c9274b6bSCho, Yu-Chen }
79c9274b6bSCho, Yu-Chen 
s390_vec_shr(S390Vector * d,const S390Vector * a,uint64_t count)80c9274b6bSCho, Yu-Chen static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
81c9274b6bSCho, Yu-Chen {
82c9274b6bSCho, Yu-Chen     uint64_t tmp;
83c9274b6bSCho, Yu-Chen 
84c9274b6bSCho, Yu-Chen     g_assert(count < 128);
85c9274b6bSCho, Yu-Chen     if (count == 0) {
86c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[0];
87c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[1];
88c9274b6bSCho, Yu-Chen     } else if (count == 64) {
89c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[0];
90c9274b6bSCho, Yu-Chen         d->doubleword[0] = 0;
91c9274b6bSCho, Yu-Chen     } else if (count < 64) {
92c9274b6bSCho, Yu-Chen         tmp = a->doubleword[1] >> count;
93c9274b6bSCho, Yu-Chen         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
94c9274b6bSCho, Yu-Chen         d->doubleword[0] = a->doubleword[0] >> count;
95c9274b6bSCho, Yu-Chen     } else {
96c9274b6bSCho, Yu-Chen         d->doubleword[1] = a->doubleword[0] >> (count - 64);
97c9274b6bSCho, Yu-Chen         d->doubleword[0] = 0;
98c9274b6bSCho, Yu-Chen     }
99c9274b6bSCho, Yu-Chen }
100c9274b6bSCho, Yu-Chen #define DEF_VAVG(BITS)                                                         \
101c9274b6bSCho, Yu-Chen void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
102c9274b6bSCho, Yu-Chen                              uint32_t desc)                                    \
103c9274b6bSCho, Yu-Chen {                                                                              \
104c9274b6bSCho, Yu-Chen     int i;                                                                     \
105c9274b6bSCho, Yu-Chen                                                                                \
106c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
107c9274b6bSCho, Yu-Chen         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
108c9274b6bSCho, Yu-Chen         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
109c9274b6bSCho, Yu-Chen                                                                                \
110c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
111c9274b6bSCho, Yu-Chen     }                                                                          \
112c9274b6bSCho, Yu-Chen }
113c9274b6bSCho, Yu-Chen DEF_VAVG(8)
114c9274b6bSCho, Yu-Chen DEF_VAVG(16)
115c9274b6bSCho, Yu-Chen 
116c9274b6bSCho, Yu-Chen #define DEF_VAVGL(BITS)                                                        \
117c9274b6bSCho, Yu-Chen void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
118c9274b6bSCho, Yu-Chen                               uint32_t desc)                                   \
119c9274b6bSCho, Yu-Chen {                                                                              \
120c9274b6bSCho, Yu-Chen     int i;                                                                     \
121c9274b6bSCho, Yu-Chen                                                                                \
122c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
123c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
124c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
125c9274b6bSCho, Yu-Chen                                                                                \
126c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
127c9274b6bSCho, Yu-Chen     }                                                                          \
128c9274b6bSCho, Yu-Chen }
129c9274b6bSCho, Yu-Chen DEF_VAVGL(8)
130c9274b6bSCho, Yu-Chen DEF_VAVGL(16)
131c9274b6bSCho, Yu-Chen 
132c9274b6bSCho, Yu-Chen #define DEF_VCLZ(BITS)                                                         \
133c9274b6bSCho, Yu-Chen void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
134c9274b6bSCho, Yu-Chen {                                                                              \
135c9274b6bSCho, Yu-Chen     int i;                                                                     \
136c9274b6bSCho, Yu-Chen                                                                                \
137c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
138c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
139c9274b6bSCho, Yu-Chen                                                                                \
140c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
141c9274b6bSCho, Yu-Chen     }                                                                          \
142c9274b6bSCho, Yu-Chen }
143c9274b6bSCho, Yu-Chen DEF_VCLZ(8)
144c9274b6bSCho, Yu-Chen DEF_VCLZ(16)
145c9274b6bSCho, Yu-Chen 
146c9274b6bSCho, Yu-Chen #define DEF_VCTZ(BITS)                                                         \
147c9274b6bSCho, Yu-Chen void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
148c9274b6bSCho, Yu-Chen {                                                                              \
149c9274b6bSCho, Yu-Chen     int i;                                                                     \
150c9274b6bSCho, Yu-Chen                                                                                \
151c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
152c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
153c9274b6bSCho, Yu-Chen                                                                                \
154c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
155c9274b6bSCho, Yu-Chen     }                                                                          \
156c9274b6bSCho, Yu-Chen }
157c9274b6bSCho, Yu-Chen DEF_VCTZ(8)
158c9274b6bSCho, Yu-Chen DEF_VCTZ(16)
159c9274b6bSCho, Yu-Chen 
160c9274b6bSCho, Yu-Chen /* like binary multiplication, but XOR instead of addition */
161c9274b6bSCho, Yu-Chen 
1622d8bc681SRichard Henderson /*
1632d8bc681SRichard Henderson  * There is no carry across the two doublewords, so their order does
1642d8bc681SRichard Henderson  * not matter.  Nor is there partial overlap between registers.
1652d8bc681SRichard Henderson  */
do_gfma8(uint64_t n,uint64_t m,uint64_t a)1662d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
1672d8bc681SRichard Henderson {
1682d8bc681SRichard Henderson     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
1692d8bc681SRichard Henderson }
1702d8bc681SRichard Henderson 
HELPER(gvec_vgfm8)1712d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
1722d8bc681SRichard Henderson {
1732d8bc681SRichard Henderson     uint64_t *q1 = v1;
1742d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
1752d8bc681SRichard Henderson 
1762d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], 0);
1772d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], 0);
1782d8bc681SRichard Henderson }
1792d8bc681SRichard Henderson 
HELPER(gvec_vgfma8)1802d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
1812d8bc681SRichard Henderson                          const void *v4, uint32_t desc)
1822d8bc681SRichard Henderson {
1832d8bc681SRichard Henderson     uint64_t *q1 = v1;
1842d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
1852d8bc681SRichard Henderson 
1862d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
1872d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
1882d8bc681SRichard Henderson }
1892d8bc681SRichard Henderson 
do_gfma16(uint64_t n,uint64_t m,uint64_t a)19025c304e9SRichard Henderson static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
19125c304e9SRichard Henderson {
19225c304e9SRichard Henderson     return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
19325c304e9SRichard Henderson }
19425c304e9SRichard Henderson 
HELPER(gvec_vgfm16)19525c304e9SRichard Henderson void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
19625c304e9SRichard Henderson {
19725c304e9SRichard Henderson     uint64_t *q1 = v1;
19825c304e9SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
19925c304e9SRichard Henderson 
20025c304e9SRichard Henderson     q1[0] = do_gfma16(q2[0], q3[0], 0);
20125c304e9SRichard Henderson     q1[1] = do_gfma16(q2[1], q3[1], 0);
20225c304e9SRichard Henderson }
20325c304e9SRichard Henderson 
HELPER(gvec_vgfma16)20425c304e9SRichard Henderson void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
20525c304e9SRichard Henderson                          const void *v4, uint32_t d)
20625c304e9SRichard Henderson {
20725c304e9SRichard Henderson     uint64_t *q1 = v1;
20825c304e9SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
20925c304e9SRichard Henderson 
21025c304e9SRichard Henderson     q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
21125c304e9SRichard Henderson     q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
21225c304e9SRichard Henderson }
21325c304e9SRichard Henderson 
do_gfma32(uint64_t n,uint64_t m,uint64_t a)214653aab27SRichard Henderson static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
215653aab27SRichard Henderson {
216653aab27SRichard Henderson     return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
217c9274b6bSCho, Yu-Chen }
218653aab27SRichard Henderson 
HELPER(gvec_vgfm32)219653aab27SRichard Henderson void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
220653aab27SRichard Henderson {
221653aab27SRichard Henderson     uint64_t *q1 = v1;
222653aab27SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
223653aab27SRichard Henderson 
224653aab27SRichard Henderson     q1[0] = do_gfma32(q2[0], q3[0], 0);
225653aab27SRichard Henderson     q1[1] = do_gfma32(q2[1], q3[1], 0);
226653aab27SRichard Henderson }
227653aab27SRichard Henderson 
HELPER(gvec_vgfma32)228653aab27SRichard Henderson void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
229653aab27SRichard Henderson                          const void *v4, uint32_t d)
230653aab27SRichard Henderson {
231653aab27SRichard Henderson     uint64_t *q1 = v1;
232653aab27SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
233653aab27SRichard Henderson 
234653aab27SRichard Henderson     q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
235653aab27SRichard Henderson     q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
236653aab27SRichard Henderson }
237c9274b6bSCho, Yu-Chen 
HELPER(gvec_vgfm64)238c9274b6bSCho, Yu-Chen void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
239c9274b6bSCho, Yu-Chen                          uint32_t desc)
240c9274b6bSCho, Yu-Chen {
241*ef73fe7cSRichard Henderson     uint64_t *q1 = v1;
242*ef73fe7cSRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
243*ef73fe7cSRichard Henderson     Int128 r;
244c9274b6bSCho, Yu-Chen 
245*ef73fe7cSRichard Henderson     r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
246*ef73fe7cSRichard Henderson     q1[0] = int128_gethi(r);
247*ef73fe7cSRichard Henderson     q1[1] = int128_getlo(r);
248c9274b6bSCho, Yu-Chen }
249c9274b6bSCho, Yu-Chen 
HELPER(gvec_vgfma64)250c9274b6bSCho, Yu-Chen void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251c9274b6bSCho, Yu-Chen                           const void *v4, uint32_t desc)
252c9274b6bSCho, Yu-Chen {
253*ef73fe7cSRichard Henderson     uint64_t *q1 = v1;
254*ef73fe7cSRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
255*ef73fe7cSRichard Henderson     Int128 r;
256c9274b6bSCho, Yu-Chen 
257*ef73fe7cSRichard Henderson     r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
258*ef73fe7cSRichard Henderson     q1[0] = q4[0] ^ int128_gethi(r);
259*ef73fe7cSRichard Henderson     q1[1] = q4[1] ^ int128_getlo(r);
260c9274b6bSCho, Yu-Chen }
261c9274b6bSCho, Yu-Chen 
262c9274b6bSCho, Yu-Chen #define DEF_VMAL(BITS)                                                         \
263c9274b6bSCho, Yu-Chen void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
264c9274b6bSCho, Yu-Chen                              const void *v4, uint32_t desc)                    \
265c9274b6bSCho, Yu-Chen {                                                                              \
266c9274b6bSCho, Yu-Chen     int i;                                                                     \
267c9274b6bSCho, Yu-Chen                                                                                \
268c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
269c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
270c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
271c9274b6bSCho, Yu-Chen         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
272c9274b6bSCho, Yu-Chen                                                                                \
273c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
274c9274b6bSCho, Yu-Chen     }                                                                          \
275c9274b6bSCho, Yu-Chen }
276c9274b6bSCho, Yu-Chen DEF_VMAL(8)
277c9274b6bSCho, Yu-Chen DEF_VMAL(16)
278c9274b6bSCho, Yu-Chen 
279c9274b6bSCho, Yu-Chen #define DEF_VMAH(BITS)                                                         \
280c9274b6bSCho, Yu-Chen void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
281c9274b6bSCho, Yu-Chen                              const void *v4, uint32_t desc)                    \
282c9274b6bSCho, Yu-Chen {                                                                              \
283c9274b6bSCho, Yu-Chen     int i;                                                                     \
284c9274b6bSCho, Yu-Chen                                                                                \
285c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
286c9274b6bSCho, Yu-Chen         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
287c9274b6bSCho, Yu-Chen         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
288c9274b6bSCho, Yu-Chen         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
289c9274b6bSCho, Yu-Chen                                                                                \
290c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
291c9274b6bSCho, Yu-Chen     }                                                                          \
292c9274b6bSCho, Yu-Chen }
293c9274b6bSCho, Yu-Chen DEF_VMAH(8)
294c9274b6bSCho, Yu-Chen DEF_VMAH(16)
295c9274b6bSCho, Yu-Chen 
296c9274b6bSCho, Yu-Chen #define DEF_VMALH(BITS)                                                        \
297c9274b6bSCho, Yu-Chen void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
298c9274b6bSCho, Yu-Chen                               const void *v4, uint32_t desc)                   \
299c9274b6bSCho, Yu-Chen {                                                                              \
300c9274b6bSCho, Yu-Chen     int i;                                                                     \
301c9274b6bSCho, Yu-Chen                                                                                \
302c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
303c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
304c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
305c9274b6bSCho, Yu-Chen         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
306c9274b6bSCho, Yu-Chen                                                                                \
307c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
308c9274b6bSCho, Yu-Chen     }                                                                          \
309c9274b6bSCho, Yu-Chen }
310c9274b6bSCho, Yu-Chen DEF_VMALH(8)
311c9274b6bSCho, Yu-Chen DEF_VMALH(16)
312c9274b6bSCho, Yu-Chen 
313c9274b6bSCho, Yu-Chen #define DEF_VMAE(BITS, TBITS)                                                  \
314c9274b6bSCho, Yu-Chen void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
315c9274b6bSCho, Yu-Chen                              const void *v4, uint32_t desc)                    \
316c9274b6bSCho, Yu-Chen {                                                                              \
317c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
318c9274b6bSCho, Yu-Chen                                                                                \
319c9274b6bSCho, Yu-Chen     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
320c9274b6bSCho, Yu-Chen         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
321c9274b6bSCho, Yu-Chen         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
322c9274b6bSCho, Yu-Chen         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
323c9274b6bSCho, Yu-Chen                                                                                \
324c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
325c9274b6bSCho, Yu-Chen     }                                                                          \
326c9274b6bSCho, Yu-Chen }
327c9274b6bSCho, Yu-Chen DEF_VMAE(8, 16)
328c9274b6bSCho, Yu-Chen DEF_VMAE(16, 32)
329c9274b6bSCho, Yu-Chen DEF_VMAE(32, 64)
330c9274b6bSCho, Yu-Chen 
331c9274b6bSCho, Yu-Chen #define DEF_VMALE(BITS, TBITS)                                                 \
332c9274b6bSCho, Yu-Chen void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
333c9274b6bSCho, Yu-Chen                               const void *v4, uint32_t desc)                   \
334c9274b6bSCho, Yu-Chen {                                                                              \
335c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
336c9274b6bSCho, Yu-Chen                                                                                \
337c9274b6bSCho, Yu-Chen     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
338c9274b6bSCho, Yu-Chen         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
339c9274b6bSCho, Yu-Chen         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
340c9274b6bSCho, Yu-Chen         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
341c9274b6bSCho, Yu-Chen                                                                                \
342c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
343c9274b6bSCho, Yu-Chen     }                                                                          \
344c9274b6bSCho, Yu-Chen }
345c9274b6bSCho, Yu-Chen DEF_VMALE(8, 16)
346c9274b6bSCho, Yu-Chen DEF_VMALE(16, 32)
347c9274b6bSCho, Yu-Chen DEF_VMALE(32, 64)
348c9274b6bSCho, Yu-Chen 
349c9274b6bSCho, Yu-Chen #define DEF_VMAO(BITS, TBITS)                                                  \
350c9274b6bSCho, Yu-Chen void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
351c9274b6bSCho, Yu-Chen                              const void *v4, uint32_t desc)                    \
352c9274b6bSCho, Yu-Chen {                                                                              \
353c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
354c9274b6bSCho, Yu-Chen                                                                                \
355c9274b6bSCho, Yu-Chen     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
356c9274b6bSCho, Yu-Chen         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
357c9274b6bSCho, Yu-Chen         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
358c9274b6bSCho, Yu-Chen         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
359c9274b6bSCho, Yu-Chen                                                                                \
360c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
361c9274b6bSCho, Yu-Chen     }                                                                          \
362c9274b6bSCho, Yu-Chen }
363c9274b6bSCho, Yu-Chen DEF_VMAO(8, 16)
364c9274b6bSCho, Yu-Chen DEF_VMAO(16, 32)
365c9274b6bSCho, Yu-Chen DEF_VMAO(32, 64)
366c9274b6bSCho, Yu-Chen 
367c9274b6bSCho, Yu-Chen #define DEF_VMALO(BITS, TBITS)                                                 \
368c9274b6bSCho, Yu-Chen void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
369c9274b6bSCho, Yu-Chen                               const void *v4, uint32_t desc)                   \
370c9274b6bSCho, Yu-Chen {                                                                              \
371c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
372c9274b6bSCho, Yu-Chen                                                                                \
373c9274b6bSCho, Yu-Chen     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
374c9274b6bSCho, Yu-Chen         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
375c9274b6bSCho, Yu-Chen         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
376c9274b6bSCho, Yu-Chen         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
377c9274b6bSCho, Yu-Chen                                                                                \
378c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
379c9274b6bSCho, Yu-Chen     }                                                                          \
380c9274b6bSCho, Yu-Chen }
381c9274b6bSCho, Yu-Chen DEF_VMALO(8, 16)
382c9274b6bSCho, Yu-Chen DEF_VMALO(16, 32)
383c9274b6bSCho, Yu-Chen DEF_VMALO(32, 64)
384c9274b6bSCho, Yu-Chen 
385c9274b6bSCho, Yu-Chen #define DEF_VMH(BITS)                                                          \
386c9274b6bSCho, Yu-Chen void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
387c9274b6bSCho, Yu-Chen                             uint32_t desc)                                     \
388c9274b6bSCho, Yu-Chen {                                                                              \
389c9274b6bSCho, Yu-Chen     int i;                                                                     \
390c9274b6bSCho, Yu-Chen                                                                                \
391c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
392c9274b6bSCho, Yu-Chen         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
393c9274b6bSCho, Yu-Chen         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
394c9274b6bSCho, Yu-Chen                                                                                \
395c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
396c9274b6bSCho, Yu-Chen     }                                                                          \
397c9274b6bSCho, Yu-Chen }
398c9274b6bSCho, Yu-Chen DEF_VMH(8)
399c9274b6bSCho, Yu-Chen DEF_VMH(16)
400c9274b6bSCho, Yu-Chen 
401c9274b6bSCho, Yu-Chen #define DEF_VMLH(BITS)                                                         \
402c9274b6bSCho, Yu-Chen void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
403c9274b6bSCho, Yu-Chen                              uint32_t desc)                                    \
404c9274b6bSCho, Yu-Chen {                                                                              \
405c9274b6bSCho, Yu-Chen     int i;                                                                     \
406c9274b6bSCho, Yu-Chen                                                                                \
407c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
408c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
409c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
410c9274b6bSCho, Yu-Chen                                                                                \
411c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
412c9274b6bSCho, Yu-Chen     }                                                                          \
413c9274b6bSCho, Yu-Chen }
414c9274b6bSCho, Yu-Chen DEF_VMLH(8)
415c9274b6bSCho, Yu-Chen DEF_VMLH(16)
416c9274b6bSCho, Yu-Chen 
417c9274b6bSCho, Yu-Chen #define DEF_VME(BITS, TBITS)                                                   \
418c9274b6bSCho, Yu-Chen void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
419c9274b6bSCho, Yu-Chen                             uint32_t desc)                                     \
420c9274b6bSCho, Yu-Chen {                                                                              \
421c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
422c9274b6bSCho, Yu-Chen                                                                                \
423c9274b6bSCho, Yu-Chen     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
424c9274b6bSCho, Yu-Chen         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
425c9274b6bSCho, Yu-Chen         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
426c9274b6bSCho, Yu-Chen                                                                                \
427c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b);                           \
428c9274b6bSCho, Yu-Chen     }                                                                          \
429c9274b6bSCho, Yu-Chen }
430c9274b6bSCho, Yu-Chen DEF_VME(8, 16)
431c9274b6bSCho, Yu-Chen DEF_VME(16, 32)
432c9274b6bSCho, Yu-Chen DEF_VME(32, 64)
433c9274b6bSCho, Yu-Chen 
434c9274b6bSCho, Yu-Chen #define DEF_VMLE(BITS, TBITS)                                                  \
435c9274b6bSCho, Yu-Chen void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
436c9274b6bSCho, Yu-Chen                              uint32_t desc)                                    \
437c9274b6bSCho, Yu-Chen {                                                                              \
438c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
439c9274b6bSCho, Yu-Chen                                                                                \
440c9274b6bSCho, Yu-Chen     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
441c9274b6bSCho, Yu-Chen         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
442c9274b6bSCho, Yu-Chen         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
443c9274b6bSCho, Yu-Chen                                                                                \
444c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b);                           \
445c9274b6bSCho, Yu-Chen     }                                                                          \
446c9274b6bSCho, Yu-Chen }
447c9274b6bSCho, Yu-Chen DEF_VMLE(8, 16)
448c9274b6bSCho, Yu-Chen DEF_VMLE(16, 32)
449c9274b6bSCho, Yu-Chen DEF_VMLE(32, 64)
450c9274b6bSCho, Yu-Chen 
451c9274b6bSCho, Yu-Chen #define DEF_VMO(BITS, TBITS)                                                   \
452c9274b6bSCho, Yu-Chen void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
453c9274b6bSCho, Yu-Chen                             uint32_t desc)                                     \
454c9274b6bSCho, Yu-Chen {                                                                              \
455c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
456c9274b6bSCho, Yu-Chen                                                                                \
457c9274b6bSCho, Yu-Chen     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
458c9274b6bSCho, Yu-Chen         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
459c9274b6bSCho, Yu-Chen         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
460c9274b6bSCho, Yu-Chen                                                                                \
461c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b);                           \
462c9274b6bSCho, Yu-Chen     }                                                                          \
463c9274b6bSCho, Yu-Chen }
464c9274b6bSCho, Yu-Chen DEF_VMO(8, 16)
465c9274b6bSCho, Yu-Chen DEF_VMO(16, 32)
466c9274b6bSCho, Yu-Chen DEF_VMO(32, 64)
467c9274b6bSCho, Yu-Chen 
468c9274b6bSCho, Yu-Chen #define DEF_VMLO(BITS, TBITS)                                                  \
469c9274b6bSCho, Yu-Chen void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
470c9274b6bSCho, Yu-Chen                              uint32_t desc)                                    \
471c9274b6bSCho, Yu-Chen {                                                                              \
472c9274b6bSCho, Yu-Chen     int i, j;                                                                  \
473c9274b6bSCho, Yu-Chen                                                                                \
474c9274b6bSCho, Yu-Chen     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
475c9274b6bSCho, Yu-Chen         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
476c9274b6bSCho, Yu-Chen         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
477c9274b6bSCho, Yu-Chen                                                                                \
478c9274b6bSCho, Yu-Chen         s390_vec_write_element##TBITS(v1, i, a * b);                           \
479c9274b6bSCho, Yu-Chen     }                                                                          \
480c9274b6bSCho, Yu-Chen }
481c9274b6bSCho, Yu-Chen DEF_VMLO(8, 16)
482c9274b6bSCho, Yu-Chen DEF_VMLO(16, 32)
483c9274b6bSCho, Yu-Chen DEF_VMLO(32, 64)
484c9274b6bSCho, Yu-Chen 
485c9274b6bSCho, Yu-Chen #define DEF_VPOPCT(BITS)                                                       \
486c9274b6bSCho, Yu-Chen void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
487c9274b6bSCho, Yu-Chen {                                                                              \
488c9274b6bSCho, Yu-Chen     int i;                                                                     \
489c9274b6bSCho, Yu-Chen                                                                                \
490c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
491c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
492c9274b6bSCho, Yu-Chen                                                                                \
493c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
494c9274b6bSCho, Yu-Chen     }                                                                          \
495c9274b6bSCho, Yu-Chen }
496c9274b6bSCho, Yu-Chen DEF_VPOPCT(8)
497c9274b6bSCho, Yu-Chen DEF_VPOPCT(16)
498c9274b6bSCho, Yu-Chen 
499c9274b6bSCho, Yu-Chen #define DEF_VERIM(BITS)                                                        \
500c9274b6bSCho, Yu-Chen void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
501c9274b6bSCho, Yu-Chen                               uint32_t desc)                                   \
502c9274b6bSCho, Yu-Chen {                                                                              \
503c9274b6bSCho, Yu-Chen     const uint8_t count = simd_data(desc);                                     \
504c9274b6bSCho, Yu-Chen     int i;                                                                     \
505c9274b6bSCho, Yu-Chen                                                                                \
506c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
507c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
508c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
509c9274b6bSCho, Yu-Chen         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
510c9274b6bSCho, Yu-Chen         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
511c9274b6bSCho, Yu-Chen                                                                                \
512c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, d);                                \
513c9274b6bSCho, Yu-Chen     }                                                                          \
514c9274b6bSCho, Yu-Chen }
515c9274b6bSCho, Yu-Chen DEF_VERIM(8)
516c9274b6bSCho, Yu-Chen DEF_VERIM(16)
517c9274b6bSCho, Yu-Chen 
HELPER(gvec_vsl)518c9274b6bSCho, Yu-Chen void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
519c9274b6bSCho, Yu-Chen                       uint32_t desc)
520c9274b6bSCho, Yu-Chen {
521c9274b6bSCho, Yu-Chen     s390_vec_shl(v1, v2, count);
522c9274b6bSCho, Yu-Chen }
523c9274b6bSCho, Yu-Chen 
HELPER(gvec_vsl_ve2)524b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
525b7a50eb7SDavid Miller                           uint32_t desc)
526b7a50eb7SDavid Miller {
527b7a50eb7SDavid Miller     S390Vector tmp;
528b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
529b7a50eb7SDavid Miller     int i;
530b7a50eb7SDavid Miller 
531b7a50eb7SDavid Miller     for (i = 15; i >= 0; --i, e1 = e0) {
532b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
533b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
534b7a50eb7SDavid Miller 
535b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
536b7a50eb7SDavid Miller     }
537b7a50eb7SDavid Miller 
538b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
539b7a50eb7SDavid Miller }
540b7a50eb7SDavid Miller 
HELPER(gvec_vsra)541c9274b6bSCho, Yu-Chen void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
542c9274b6bSCho, Yu-Chen                        uint32_t desc)
543c9274b6bSCho, Yu-Chen {
544c9274b6bSCho, Yu-Chen     s390_vec_sar(v1, v2, count);
545c9274b6bSCho, Yu-Chen }
546c9274b6bSCho, Yu-Chen 
HELPER(gvec_vsra_ve2)547b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
548b7a50eb7SDavid Miller                            uint32_t desc)
549b7a50eb7SDavid Miller {
550b7a50eb7SDavid Miller     S390Vector tmp;
551b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
552b7a50eb7SDavid Miller     int i = 0;
553b7a50eb7SDavid Miller 
554b7a50eb7SDavid Miller     /* Byte 0 is special only. */
555b7a50eb7SDavid Miller     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
556b7a50eb7SDavid Miller     sh = s390_vec_read_element8(v3, i) & 7;
557b7a50eb7SDavid Miller     s390_vec_write_element8(&tmp, i, e0 >> sh);
558b7a50eb7SDavid Miller 
559b7a50eb7SDavid Miller     e1 = e0;
560b7a50eb7SDavid Miller     for (i = 1; i < 16; ++i, e1 = e0) {
561b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
562b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
563b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
564b7a50eb7SDavid Miller     }
565b7a50eb7SDavid Miller 
566b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
567b7a50eb7SDavid Miller }
568b7a50eb7SDavid Miller 
HELPER(gvec_vsrl)569c9274b6bSCho, Yu-Chen void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
570c9274b6bSCho, Yu-Chen                        uint32_t desc)
571c9274b6bSCho, Yu-Chen {
572c9274b6bSCho, Yu-Chen     s390_vec_shr(v1, v2, count);
573c9274b6bSCho, Yu-Chen }
574c9274b6bSCho, Yu-Chen 
HELPER(gvec_vsrl_ve2)575b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
576b7a50eb7SDavid Miller                            uint32_t desc)
577b7a50eb7SDavid Miller {
578b7a50eb7SDavid Miller     S390Vector tmp;
579b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
580b7a50eb7SDavid Miller 
581b7a50eb7SDavid Miller     for (int i = 0; i < 16; ++i, e1 = e0) {
582b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
583b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
584b7a50eb7SDavid Miller 
585b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
586b7a50eb7SDavid Miller     }
587b7a50eb7SDavid Miller 
588b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
589b7a50eb7SDavid Miller }
590b7a50eb7SDavid Miller 
591c9274b6bSCho, Yu-Chen #define DEF_VSCBI(BITS)                                                        \
592c9274b6bSCho, Yu-Chen void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
593c9274b6bSCho, Yu-Chen                               uint32_t desc)                                   \
594c9274b6bSCho, Yu-Chen {                                                                              \
595c9274b6bSCho, Yu-Chen     int i;                                                                     \
596c9274b6bSCho, Yu-Chen                                                                                \
597c9274b6bSCho, Yu-Chen     for (i = 0; i < (128 / BITS); i++) {                                       \
598c9274b6bSCho, Yu-Chen         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
599c9274b6bSCho, Yu-Chen         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
600c9274b6bSCho, Yu-Chen                                                                                \
601c9274b6bSCho, Yu-Chen         s390_vec_write_element##BITS(v1, i, a >= b);                           \
602c9274b6bSCho, Yu-Chen     }                                                                          \
603c9274b6bSCho, Yu-Chen }
604c9274b6bSCho, Yu-Chen DEF_VSCBI(8)
605c9274b6bSCho, Yu-Chen DEF_VSCBI(16)
606c9274b6bSCho, Yu-Chen 
HELPER(gvec_vtm)607c9274b6bSCho, Yu-Chen void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
608c9274b6bSCho, Yu-Chen                       uint32_t desc)
609c9274b6bSCho, Yu-Chen {
610c9274b6bSCho, Yu-Chen     S390Vector tmp;
611c9274b6bSCho, Yu-Chen 
612c9274b6bSCho, Yu-Chen     s390_vec_and(&tmp, v1, v2);
613c9274b6bSCho, Yu-Chen     if (s390_vec_is_zero(&tmp)) {
614c9274b6bSCho, Yu-Chen         /* Selected bits all zeros; or all mask bits zero */
615c9274b6bSCho, Yu-Chen         env->cc_op = 0;
616c9274b6bSCho, Yu-Chen     } else if (s390_vec_equal(&tmp, v2)) {
617c9274b6bSCho, Yu-Chen         /* Selected bits all ones */
618c9274b6bSCho, Yu-Chen         env->cc_op = 3;
619c9274b6bSCho, Yu-Chen     } else {
620c9274b6bSCho, Yu-Chen         /* Selected bits a mix of zeros and ones */
621c9274b6bSCho, Yu-Chen         env->cc_op = 1;
622c9274b6bSCho, Yu-Chen     }
623c9274b6bSCho, Yu-Chen }
624