1 /*
2  * QEMU TCG support -- s390x vector string instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "s390x-internal.h"
16 #include "vec.h"
17 #include "tcg/tcg.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/helper-proto.h"
20 
21 /*
22  * Returns a bit set in the MSB of each element that is zero,
23  * as defined by the mask.
24  */
25 static inline uint64_t zero_search(uint64_t a, uint64_t mask)
26 {
27     return ~(((a & mask) + mask) | a | mask);
28 }
29 
30 /*
31  * Returns a bit set in the MSB of each element that is not zero,
32  * as defined by the mask.
33  */
34 static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
35 {
36     return (((a & mask) + mask) | a) & ~mask;
37 }
38 
39 /*
40  * Returns the byte offset for the first match, or 16 for no match.
41  */
42 static inline int match_index(uint64_t c0, uint64_t c1)
43 {
44     return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
45 }
46 
47 /*
48  * Returns the number of bits composing one element.
49  */
50 static uint8_t get_element_bits(uint8_t es)
51 {
52     return (1 << es) * BITS_PER_BYTE;
53 }
54 
55 /*
56  * Returns the bitmask for a single element.
57  */
58 static uint64_t get_single_element_mask(uint8_t es)
59 {
60     return -1ull >> (64 - get_element_bits(es));
61 }
62 
63 /*
64  * Returns the bitmask for a single element (excluding the MSB).
65  */
66 static uint64_t get_single_element_lsbs_mask(uint8_t es)
67 {
68     return -1ull >> (65 - get_element_bits(es));
69 }
70 
71 /*
72  * Returns the bitmasks for multiple elements (excluding the MSBs).
73  */
74 static uint64_t get_element_lsbs_mask(uint8_t es)
75 {
76     return dup_const(es, get_single_element_lsbs_mask(es));
77 }
78 
79 static int vfae(void *v1, const void *v2, const void *v3, bool in,
80                 bool rt, bool zs, uint8_t es)
81 {
82     const uint64_t mask = get_element_lsbs_mask(es);
83     const int bits = get_element_bits(es);
84     uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
85     uint64_t first_zero = 16;
86     uint64_t first_equal;
87     int i;
88 
89     a0 = s390_vec_read_element64(v2, 0);
90     a1 = s390_vec_read_element64(v2, 1);
91     b0 = s390_vec_read_element64(v3, 0);
92     b1 = s390_vec_read_element64(v3, 1);
93     e0 = 0;
94     e1 = 0;
95     /* compare against equality with every other element */
96     for (i = 0; i < 64; i += bits) {
97         t0 = rol64(b0, i);
98         t1 = rol64(b1, i);
99         e0 |= zero_search(a0 ^ t0, mask);
100         e0 |= zero_search(a0 ^ t1, mask);
101         e1 |= zero_search(a1 ^ t0, mask);
102         e1 |= zero_search(a1 ^ t1, mask);
103     }
104     /* invert the result if requested - invert only the MSBs */
105     if (in) {
106         e0 = ~e0 & ~mask;
107         e1 = ~e1 & ~mask;
108     }
109     first_equal = match_index(e0, e1);
110 
111     if (zs) {
112         z0 = zero_search(a0, mask);
113         z1 = zero_search(a1, mask);
114         first_zero = match_index(z0, z1);
115     }
116 
117     if (rt) {
118         e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
119         e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
120         s390_vec_write_element64(v1, 0, e0);
121         s390_vec_write_element64(v1, 1, e1);
122     } else {
123         s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
124         s390_vec_write_element64(v1, 1, 0);
125     }
126 
127     if (first_zero == 16 && first_equal == 16) {
128         return 3; /* no match */
129     } else if (first_zero == 16) {
130         return 1; /* matching elements, no match for zero */
131     } else if (first_equal < first_zero) {
132         return 2; /* matching elements before match for zero */
133     }
134     return 0; /* match for zero */
135 }
136 
137 #define DEF_VFAE_HELPER(BITS)                                                  \
138 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
139                              uint32_t desc)                                    \
140 {                                                                              \
141     const bool in = extract32(simd_data(desc), 3, 1);                          \
142     const bool rt = extract32(simd_data(desc), 2, 1);                          \
143     const bool zs = extract32(simd_data(desc), 1, 1);                          \
144                                                                                \
145     vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
146 }
147 DEF_VFAE_HELPER(8)
148 DEF_VFAE_HELPER(16)
149 DEF_VFAE_HELPER(32)
150 
151 #define DEF_VFAE_CC_HELPER(BITS)                                               \
152 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
153                                 CPUS390XState *env, uint32_t desc)             \
154 {                                                                              \
155     const bool in = extract32(simd_data(desc), 3, 1);                          \
156     const bool rt = extract32(simd_data(desc), 2, 1);                          \
157     const bool zs = extract32(simd_data(desc), 1, 1);                          \
158                                                                                \
159     env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
160 }
161 DEF_VFAE_CC_HELPER(8)
162 DEF_VFAE_CC_HELPER(16)
163 DEF_VFAE_CC_HELPER(32)
164 
165 static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
166 {
167     const uint64_t mask = get_element_lsbs_mask(es);
168     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
169     uint64_t first_zero = 16;
170     uint64_t first_equal;
171 
172     a0 = s390_vec_read_element64(v2, 0);
173     a1 = s390_vec_read_element64(v2, 1);
174     b0 = s390_vec_read_element64(v3, 0);
175     b1 = s390_vec_read_element64(v3, 1);
176     e0 = zero_search(a0 ^ b0, mask);
177     e1 = zero_search(a1 ^ b1, mask);
178     first_equal = match_index(e0, e1);
179 
180     if (zs) {
181         z0 = zero_search(a0, mask);
182         z1 = zero_search(a1, mask);
183         first_zero = match_index(z0, z1);
184     }
185 
186     s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
187     s390_vec_write_element64(v1, 1, 0);
188     if (first_zero == 16 && first_equal == 16) {
189         return 3; /* no match */
190     } else if (first_zero == 16) {
191         return 1; /* matching elements, no match for zero */
192     } else if (first_equal < first_zero) {
193         return 2; /* matching elements before match for zero */
194     }
195     return 0; /* match for zero */
196 }
197 
198 #define DEF_VFEE_HELPER(BITS)                                                  \
199 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
200                              uint32_t desc)                                    \
201 {                                                                              \
202     const bool zs = extract32(simd_data(desc), 1, 1);                          \
203                                                                                \
204     vfee(v1, v2, v3, zs, MO_##BITS);                                           \
205 }
206 DEF_VFEE_HELPER(8)
207 DEF_VFEE_HELPER(16)
208 DEF_VFEE_HELPER(32)
209 
210 #define DEF_VFEE_CC_HELPER(BITS)                                               \
211 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
212                                 CPUS390XState *env, uint32_t desc)             \
213 {                                                                              \
214     const bool zs = extract32(simd_data(desc), 1, 1);                          \
215                                                                                \
216     env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
217 }
218 DEF_VFEE_CC_HELPER(8)
219 DEF_VFEE_CC_HELPER(16)
220 DEF_VFEE_CC_HELPER(32)
221 
222 static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
223 {
224     const uint64_t mask = get_element_lsbs_mask(es);
225     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
226     uint64_t first_zero = 16;
227     uint64_t first_inequal;
228     bool smaller = false;
229 
230     a0 = s390_vec_read_element64(v2, 0);
231     a1 = s390_vec_read_element64(v2, 1);
232     b0 = s390_vec_read_element64(v3, 0);
233     b1 = s390_vec_read_element64(v3, 1);
234     e0 = nonzero_search(a0 ^ b0, mask);
235     e1 = nonzero_search(a1 ^ b1, mask);
236     first_inequal = match_index(e0, e1);
237 
238     /* identify the smaller element */
239     if (first_inequal < 16) {
240         uint8_t enr = first_inequal / (1 << es);
241         uint32_t a = s390_vec_read_element(v2, enr, es);
242         uint32_t b = s390_vec_read_element(v3, enr, es);
243 
244         smaller = a < b;
245     }
246 
247     if (zs) {
248         z0 = zero_search(a0, mask);
249         z1 = zero_search(a1, mask);
250         first_zero = match_index(z0, z1);
251     }
252 
253     s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
254     s390_vec_write_element64(v1, 1, 0);
255     if (first_zero == 16 && first_inequal == 16) {
256         return 3;
257     } else if (first_zero < first_inequal) {
258         return 0;
259     }
260     return smaller ? 1 : 2;
261 }
262 
263 #define DEF_VFENE_HELPER(BITS)                                                 \
264 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
265                               uint32_t desc)                                   \
266 {                                                                              \
267     const bool zs = extract32(simd_data(desc), 1, 1);                          \
268                                                                                \
269     vfene(v1, v2, v3, zs, MO_##BITS);                                          \
270 }
271 DEF_VFENE_HELPER(8)
272 DEF_VFENE_HELPER(16)
273 DEF_VFENE_HELPER(32)
274 
275 #define DEF_VFENE_CC_HELPER(BITS)                                              \
276 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
277                                  CPUS390XState *env, uint32_t desc)            \
278 {                                                                              \
279     const bool zs = extract32(simd_data(desc), 1, 1);                          \
280                                                                                \
281     env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
282 }
283 DEF_VFENE_CC_HELPER(8)
284 DEF_VFENE_CC_HELPER(16)
285 DEF_VFENE_CC_HELPER(32)
286 
287 static int vistr(void *v1, const void *v2, uint8_t es)
288 {
289     const uint64_t mask = get_element_lsbs_mask(es);
290     uint64_t a0 = s390_vec_read_element64(v2, 0);
291     uint64_t a1 = s390_vec_read_element64(v2, 1);
292     uint64_t z;
293     int cc = 3;
294 
295     z = zero_search(a0, mask);
296     if (z) {
297         a0 &= ~(-1ull >> clz64(z));
298         a1 = 0;
299         cc = 0;
300     } else {
301         z = zero_search(a1, mask);
302         if (z) {
303             a1 &= ~(-1ull >> clz64(z));
304             cc = 0;
305         }
306     }
307 
308     s390_vec_write_element64(v1, 0, a0);
309     s390_vec_write_element64(v1, 1, a1);
310     return cc;
311 }
312 
313 #define DEF_VISTR_HELPER(BITS)                                                 \
314 void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
315 {                                                                              \
316     vistr(v1, v2, MO_##BITS);                                                  \
317 }
318 DEF_VISTR_HELPER(8)
319 DEF_VISTR_HELPER(16)
320 DEF_VISTR_HELPER(32)
321 
322 #define DEF_VISTR_CC_HELPER(BITS)                                              \
323 void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
324                                 uint32_t desc)                                 \
325 {                                                                              \
326     env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
327 }
328 DEF_VISTR_CC_HELPER(8)
329 DEF_VISTR_CC_HELPER(16)
330 DEF_VISTR_CC_HELPER(32)
331 
332 static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
333 {
334     const bool equal = extract32(c, 7, 1);
335     const bool lower = extract32(c, 6, 1);
336     const bool higher = extract32(c, 5, 1);
337 
338     if (data < l) {
339         return lower;
340     } else if (data > l) {
341         return higher;
342     }
343     return equal;
344 }
345 
346 static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
347                  bool in, bool rt, bool zs, uint8_t es)
348 {
349     const uint64_t mask = get_element_lsbs_mask(es);
350     uint64_t a0 = s390_vec_read_element64(v2, 0);
351     uint64_t a1 = s390_vec_read_element64(v2, 1);
352     int first_zero = 16, first_match = 16;
353     S390Vector rt_result = {};
354     uint64_t z0, z1;
355     int i, j;
356 
357     if (zs) {
358         z0 = zero_search(a0, mask);
359         z1 = zero_search(a1, mask);
360         first_zero = match_index(z0, z1);
361     }
362 
363     for (i = 0; i < 16 / (1 << es); i++) {
364         const uint32_t data = s390_vec_read_element(v2, i, es);
365         const int cur_byte = i * (1 << es);
366         bool any_match = false;
367 
368         /* if we don't need a bit vector, we can stop early */
369         if (cur_byte == first_zero && !rt) {
370             break;
371         }
372 
373         for (j = 0; j < 16 / (1 << es); j += 2) {
374             const uint32_t l1 = s390_vec_read_element(v3, j, es);
375             const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
376             /* we are only interested in the highest byte of each element */
377             const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
378             const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
379 
380             if (element_compare(data, l1, c1) &&
381                 element_compare(data, l2, c2)) {
382                 any_match = true;
383                 break;
384             }
385         }
386         /* invert the result if requested */
387         any_match = in ^ any_match;
388 
389         if (any_match) {
390             /* indicate bit vector if requested */
391             if (rt) {
392                 const uint64_t val = -1ull;
393 
394                 first_match = MIN(cur_byte, first_match);
395                 s390_vec_write_element(&rt_result, i, es, val);
396             } else {
397                 /* stop on the first match */
398                 first_match = cur_byte;
399                 break;
400             }
401         }
402     }
403 
404     if (rt) {
405         *(S390Vector *)v1 = rt_result;
406     } else {
407         s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
408         s390_vec_write_element64(v1, 1, 0);
409     }
410 
411     if (first_zero == 16 && first_match == 16) {
412         return 3; /* no match */
413     } else if (first_zero == 16) {
414         return 1; /* matching elements, no match for zero */
415     } else if (first_match < first_zero) {
416         return 2; /* matching elements before match for zero */
417     }
418     return 0; /* match for zero */
419 }
420 
421 #define DEF_VSTRC_HELPER(BITS)                                                 \
422 void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
423                               const void *v4, uint32_t desc)                   \
424 {                                                                              \
425     const bool in = extract32(simd_data(desc), 3, 1);                          \
426     const bool zs = extract32(simd_data(desc), 1, 1);                          \
427                                                                                \
428     vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
429 }
430 DEF_VSTRC_HELPER(8)
431 DEF_VSTRC_HELPER(16)
432 DEF_VSTRC_HELPER(32)
433 
434 #define DEF_VSTRC_RT_HELPER(BITS)                                              \
435 void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
436                                  const void *v4, uint32_t desc)                \
437 {                                                                              \
438     const bool in = extract32(simd_data(desc), 3, 1);                          \
439     const bool zs = extract32(simd_data(desc), 1, 1);                          \
440                                                                                \
441     vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
442 }
443 DEF_VSTRC_RT_HELPER(8)
444 DEF_VSTRC_RT_HELPER(16)
445 DEF_VSTRC_RT_HELPER(32)
446 
447 #define DEF_VSTRC_CC_HELPER(BITS)                                              \
448 void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
449                                  const void *v4, CPUS390XState *env,           \
450                                  uint32_t desc)                                \
451 {                                                                              \
452     const bool in = extract32(simd_data(desc), 3, 1);                          \
453     const bool zs = extract32(simd_data(desc), 1, 1);                          \
454                                                                                \
455     env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
456 }
457 DEF_VSTRC_CC_HELPER(8)
458 DEF_VSTRC_CC_HELPER(16)
459 DEF_VSTRC_CC_HELPER(32)
460 
461 #define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
462 void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
463                                     const void *v4, CPUS390XState *env,        \
464                                     uint32_t desc)                             \
465 {                                                                              \
466     const bool in = extract32(simd_data(desc), 3, 1);                          \
467     const bool zs = extract32(simd_data(desc), 1, 1);                          \
468                                                                                \
469     env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
470 }
471 DEF_VSTRC_CC_RT_HELPER(8)
472 DEF_VSTRC_CC_RT_HELPER(16)
473 DEF_VSTRC_CC_RT_HELPER(32)
474