xref: /openbmc/qemu/target/s390x/tcg/vec_fpu_helper.c (revision 9c4888c9959ccb8d2e2dc7e0080d48ad1398c036)
1 /*
2  * QEMU TCG support -- s390x vector floating point instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "s390x-internal.h"
15 #include "vec.h"
16 #include "tcg_s390x.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/exec-all.h"
19 #include "exec/helper-proto.h"
20 #include "fpu/softfloat.h"
21 
22 #define VIC_INVALID         0x1
23 #define VIC_DIVBYZERO       0x2
24 #define VIC_OVERFLOW        0x3
25 #define VIC_UNDERFLOW       0x4
26 #define VIC_INEXACT         0x5
27 
28 /* returns the VEX. If the VEX is 0, there is no trap */
29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
30                               uint8_t *vec_exc)
31 {
32     uint8_t vece_exc = 0, trap_exc;
33     unsigned qemu_exc;
34 
35     /* Retrieve and clear the softfloat exceptions */
36     qemu_exc = env->fpu_status.float_exception_flags;
37     if (qemu_exc == 0) {
38         return 0;
39     }
40     env->fpu_status.float_exception_flags = 0;
41 
42     vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
43 
44     /* Add them to the vector-wide s390x exception bits */
45     *vec_exc |= vece_exc;
46 
47     /* Check for traps and construct the VXC */
48     trap_exc = vece_exc & env->fpc >> 24;
49     if (trap_exc) {
50         if (trap_exc & S390_IEEE_MASK_INVALID) {
51             return enr << 4 | VIC_INVALID;
52         } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
53             return enr << 4 | VIC_DIVBYZERO;
54         } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
55             return enr << 4 | VIC_OVERFLOW;
56         } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
57             return enr << 4 | VIC_UNDERFLOW;
58         } else if (!XxC) {
59             g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
60             /* inexact has lowest priority on traps */
61             return enr << 4 | VIC_INEXACT;
62         }
63     }
64     return 0;
65 }
66 
67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
68                             uintptr_t retaddr)
69 {
70     if (vxc) {
71         /* on traps, the fpc flags are not updated, instruction is suppressed */
72         tcg_s390_vector_exception(env, vxc, retaddr);
73     }
74     if (vec_exc) {
75         /* indicate exceptions for all elements combined */
76         env->fpc |= vec_exc << 16;
77     }
78 }
79 
80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
81 {
82     return make_float32(s390_vec_read_element32(v, enr));
83 }
84 
85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
86 {
87     return make_float64(s390_vec_read_element64(v, enr));
88 }
89 
90 static float128 s390_vec_read_float128(const S390Vector *v)
91 {
92     return make_float128(s390_vec_read_element64(v, 0),
93                          s390_vec_read_element64(v, 1));
94 }
95 
96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
97 {
98     return s390_vec_write_element32(v, enr, data);
99 }
100 
101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
102 {
103     return s390_vec_write_element64(v, enr, data);
104 }
105 
106 static void s390_vec_write_float128(S390Vector *v, float128 data)
107 {
108     s390_vec_write_element64(v, 0, data.high);
109     s390_vec_write_element64(v, 1, data.low);
110 }
111 
112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
114                     bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
115                     uintptr_t retaddr)
116 {
117     uint8_t vxc, vec_exc = 0;
118     S390Vector tmp = {};
119     int i, old_mode;
120 
121     old_mode = s390_swap_bfp_rounding_mode(env, erm);
122     for (i = 0; i < 4; i++) {
123         const float32 a = s390_vec_read_float32(v2, i);
124 
125         s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
126         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
127         if (s || vxc) {
128             break;
129         }
130     }
131     s390_restore_bfp_rounding_mode(env, old_mode);
132     handle_ieee_exc(env, vxc, vec_exc, retaddr);
133     *v1 = tmp;
134 }
135 
136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
138                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
139                     uintptr_t retaddr)
140 {
141     uint8_t vxc, vec_exc = 0;
142     S390Vector tmp = {};
143     int i, old_mode;
144 
145     old_mode = s390_swap_bfp_rounding_mode(env, erm);
146     for (i = 0; i < 2; i++) {
147         const float64 a = s390_vec_read_float64(v2, i);
148 
149         s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
150         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
151         if (s || vxc) {
152             break;
153         }
154     }
155     s390_restore_bfp_rounding_mode(env, old_mode);
156     handle_ieee_exc(env, vxc, vec_exc, retaddr);
157     *v1 = tmp;
158 }
159 
160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
162                     bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
163                     uintptr_t retaddr)
164 {
165     const float128 a = s390_vec_read_float128(v2);
166     uint8_t vxc, vec_exc = 0;
167     S390Vector tmp = {};
168     int old_mode;
169 
170     old_mode = s390_swap_bfp_rounding_mode(env, erm);
171     s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
172     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
173     s390_restore_bfp_rounding_mode(env, old_mode);
174     handle_ieee_exc(env, vxc, vec_exc, retaddr);
175     *v1 = tmp;
176 }
177 
178 static float64 vcdg64(float64 a, float_status *s)
179 {
180     return int64_to_float64(a, s);
181 }
182 
183 static float64 vcdlg64(float64 a, float_status *s)
184 {
185     return uint64_to_float64(a, s);
186 }
187 
188 static float64 vcgd64(float64 a, float_status *s)
189 {
190     const float64 tmp = float64_to_int64(a, s);
191 
192     return float64_is_any_nan(a) ? INT64_MIN : tmp;
193 }
194 
195 static float64 vclgd64(float64 a, float_status *s)
196 {
197     const float64 tmp = float64_to_uint64(a, s);
198 
199     return float64_is_any_nan(a) ? 0 : tmp;
200 }
201 
202 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
203 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
204                                uint32_t desc)                                  \
205 {                                                                              \
206     const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
207     const bool se = extract32(simd_data(desc), 3, 1);                          \
208     const bool XxC = extract32(simd_data(desc), 2, 1);                         \
209                                                                                \
210     vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
211 }
212 
213 #define DEF_GVEC_VOP2_64(NAME)                                                 \
214 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
215 
216 #define DEF_GVEC_VOP2(NAME, OP)                                                \
217 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
218 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
219 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
220 
221 DEF_GVEC_VOP2_64(vcdg)
222 DEF_GVEC_VOP2_64(vcdlg)
223 DEF_GVEC_VOP2_64(vcgd)
224 DEF_GVEC_VOP2_64(vclgd)
225 DEF_GVEC_VOP2(vfi, round_to_int)
226 DEF_GVEC_VOP2(vfsq, sqrt)
227 
228 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
229 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
230                     CPUS390XState *env, bool s, vop32_3_fn fn,
231                     uintptr_t retaddr)
232 {
233     uint8_t vxc, vec_exc = 0;
234     S390Vector tmp = {};
235     int i;
236 
237     for (i = 0; i < 4; i++) {
238         const float32 a = s390_vec_read_float32(v2, i);
239         const float32 b = s390_vec_read_float32(v3, i);
240 
241         s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
242         vxc = check_ieee_exc(env, i, false, &vec_exc);
243         if (s || vxc) {
244             break;
245         }
246     }
247     handle_ieee_exc(env, vxc, vec_exc, retaddr);
248     *v1 = tmp;
249 }
250 
251 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
252 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
253                     CPUS390XState *env, bool s, vop64_3_fn fn,
254                     uintptr_t retaddr)
255 {
256     uint8_t vxc, vec_exc = 0;
257     S390Vector tmp = {};
258     int i;
259 
260     for (i = 0; i < 2; i++) {
261         const float64 a = s390_vec_read_float64(v2, i);
262         const float64 b = s390_vec_read_float64(v3, i);
263 
264         s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
265         vxc = check_ieee_exc(env, i, false, &vec_exc);
266         if (s || vxc) {
267             break;
268         }
269     }
270     handle_ieee_exc(env, vxc, vec_exc, retaddr);
271     *v1 = tmp;
272 }
273 
274 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
275 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
276                      CPUS390XState *env, bool s, vop128_3_fn fn,
277                      uintptr_t retaddr)
278 {
279     const float128 a = s390_vec_read_float128(v2);
280     const float128 b = s390_vec_read_float128(v3);
281     uint8_t vxc, vec_exc = 0;
282     S390Vector tmp = {};
283 
284     s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
285     vxc = check_ieee_exc(env, 0, false, &vec_exc);
286     handle_ieee_exc(env, vxc, vec_exc, retaddr);
287     *v1 = tmp;
288 }
289 
290 #define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
291 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
292                               CPUS390XState *env, uint32_t desc)               \
293 {                                                                              \
294     const bool se = extract32(simd_data(desc), 3, 1);                          \
295                                                                                \
296     vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
297 }
298 
299 #define DEF_GVEC_VOP3(NAME, OP)                                                \
300 DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
301 DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
302 DEF_GVEC_VOP3_B(NAME, OP, 128)
303 
304 DEF_GVEC_VOP3(vfa, add)
305 DEF_GVEC_VOP3(vfs, sub)
306 DEF_GVEC_VOP3(vfd, div)
307 DEF_GVEC_VOP3(vfm, mul)
308 
309 static int wfc32(const S390Vector *v1, const S390Vector *v2,
310                  CPUS390XState *env, bool signal, uintptr_t retaddr)
311 {
312     /* only the zero-indexed elements are compared */
313     const float32 a = s390_vec_read_float32(v1, 0);
314     const float32 b = s390_vec_read_float32(v2, 0);
315     uint8_t vxc, vec_exc = 0;
316     int cmp;
317 
318     if (signal) {
319         cmp = float32_compare(a, b, &env->fpu_status);
320     } else {
321         cmp = float32_compare_quiet(a, b, &env->fpu_status);
322     }
323     vxc = check_ieee_exc(env, 0, false, &vec_exc);
324     handle_ieee_exc(env, vxc, vec_exc, retaddr);
325 
326     return float_comp_to_cc(env, cmp);
327 }
328 
329 static int wfc64(const S390Vector *v1, const S390Vector *v2,
330                  CPUS390XState *env, bool signal, uintptr_t retaddr)
331 {
332     /* only the zero-indexed elements are compared */
333     const float64 a = s390_vec_read_float64(v1, 0);
334     const float64 b = s390_vec_read_float64(v2, 0);
335     uint8_t vxc, vec_exc = 0;
336     int cmp;
337 
338     if (signal) {
339         cmp = float64_compare(a, b, &env->fpu_status);
340     } else {
341         cmp = float64_compare_quiet(a, b, &env->fpu_status);
342     }
343     vxc = check_ieee_exc(env, 0, false, &vec_exc);
344     handle_ieee_exc(env, vxc, vec_exc, retaddr);
345 
346     return float_comp_to_cc(env, cmp);
347 }
348 
349 static int wfc128(const S390Vector *v1, const S390Vector *v2,
350                   CPUS390XState *env, bool signal, uintptr_t retaddr)
351 {
352     /* only the zero-indexed elements are compared */
353     const float128 a = s390_vec_read_float128(v1);
354     const float128 b = s390_vec_read_float128(v2);
355     uint8_t vxc, vec_exc = 0;
356     int cmp;
357 
358     if (signal) {
359         cmp = float128_compare(a, b, &env->fpu_status);
360     } else {
361         cmp = float128_compare_quiet(a, b, &env->fpu_status);
362     }
363     vxc = check_ieee_exc(env, 0, false, &vec_exc);
364     handle_ieee_exc(env, vxc, vec_exc, retaddr);
365 
366     return float_comp_to_cc(env, cmp);
367 }
368 
369 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
370 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
371                                CPUS390XState *env, uint32_t desc)              \
372 {                                                                              \
373     env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
374 }
375 
376 #define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
377      DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
378      DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
379      DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
380 
381 DEF_GVEC_WFC(wfc, false)
382 DEF_GVEC_WFC(wfk, true)
383 
384 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
385 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
386                  CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
387 {
388     uint8_t vxc, vec_exc = 0;
389     S390Vector tmp = {};
390     int match = 0;
391     int i;
392 
393     for (i = 0; i < 4; i++) {
394         const float32 a = s390_vec_read_float32(v2, i);
395         const float32 b = s390_vec_read_float32(v3, i);
396 
397         /* swap the order of the parameters, so we can use existing functions */
398         if (fn(b, a, &env->fpu_status)) {
399             match++;
400             s390_vec_write_element32(&tmp, i, -1u);
401         }
402         vxc = check_ieee_exc(env, i, false, &vec_exc);
403         if (s || vxc) {
404             break;
405         }
406     }
407 
408     handle_ieee_exc(env, vxc, vec_exc, retaddr);
409     *v1 = tmp;
410     if (match) {
411         return s || match == 4 ? 0 : 1;
412     }
413     return 3;
414 }
415 
416 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
417 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
418                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
419 {
420     uint8_t vxc, vec_exc = 0;
421     S390Vector tmp = {};
422     int match = 0;
423     int i;
424 
425     for (i = 0; i < 2; i++) {
426         const float64 a = s390_vec_read_float64(v2, i);
427         const float64 b = s390_vec_read_float64(v3, i);
428 
429         /* swap the order of the parameters, so we can use existing functions */
430         if (fn(b, a, &env->fpu_status)) {
431             match++;
432             s390_vec_write_element64(&tmp, i, -1ull);
433         }
434         vxc = check_ieee_exc(env, i, false, &vec_exc);
435         if (s || vxc) {
436             break;
437         }
438     }
439 
440     handle_ieee_exc(env, vxc, vec_exc, retaddr);
441     *v1 = tmp;
442     if (match) {
443         return s || match == 2 ? 0 : 1;
444     }
445     return 3;
446 }
447 
448 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
449 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
450                  CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
451 {
452     const float128 a = s390_vec_read_float128(v2);
453     const float128 b = s390_vec_read_float128(v3);
454     uint8_t vxc, vec_exc = 0;
455     S390Vector tmp = {};
456     bool match = false;
457 
458     /* swap the order of the parameters, so we can use existing functions */
459     if (fn(b, a, &env->fpu_status)) {
460         match = true;
461         s390_vec_write_element64(&tmp, 0, -1ull);
462         s390_vec_write_element64(&tmp, 1, -1ull);
463     }
464     vxc = check_ieee_exc(env, 0, false, &vec_exc);
465     handle_ieee_exc(env, vxc, vec_exc, retaddr);
466     *v1 = tmp;
467     return match ? 0 : 3;
468 }
469 
470 #define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
471 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
472                                CPUS390XState *env, uint32_t desc)              \
473 {                                                                              \
474     const bool se = extract32(simd_data(desc), 3, 1);                          \
475     const bool sq = extract32(simd_data(desc), 2, 1);                          \
476     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
477                                                                                \
478     vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
479 }                                                                              \
480                                                                                \
481 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
482                                     CPUS390XState *env, uint32_t desc)         \
483 {                                                                              \
484     const bool se = extract32(simd_data(desc), 3, 1);                          \
485     const bool sq = extract32(simd_data(desc), 2, 1);                          \
486     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
487                                                                                \
488     env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
489 }
490 
491 #define DEF_GVEC_VFC(NAME, OP)                                                 \
492 DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
493 DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
494 DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
495 
496 DEF_GVEC_VFC(vfce, eq)
497 DEF_GVEC_VFC(vfch, lt)
498 DEF_GVEC_VFC(vfche, le)
499 
500 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
501                          uint32_t desc)
502 {
503     const bool s = extract32(simd_data(desc), 3, 1);
504     uint8_t vxc, vec_exc = 0;
505     S390Vector tmp = {};
506     int i;
507 
508     for (i = 0; i < 2; i++) {
509         /* load from even element */
510         const float32 a = s390_vec_read_element32(v2, i * 2);
511         const uint64_t ret = float32_to_float64(a, &env->fpu_status);
512 
513         s390_vec_write_element64(&tmp, i, ret);
514         /* indicate the source element */
515         vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
516         if (s || vxc) {
517             break;
518         }
519     }
520     handle_ieee_exc(env, vxc, vec_exc, GETPC());
521     *(S390Vector *)v1 = tmp;
522 }
523 
524 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
525                          uint32_t desc)
526 {
527     /* load from even element */
528     const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
529                                              &env->fpu_status);
530     uint8_t vxc, vec_exc = 0;
531 
532     vxc = check_ieee_exc(env, 0, false, &vec_exc);
533     handle_ieee_exc(env, vxc, vec_exc, GETPC());
534     s390_vec_write_float128(v1, ret);
535 }
536 
537 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
538                          uint32_t desc)
539 {
540     const uint8_t erm = extract32(simd_data(desc), 4, 4);
541     const bool s = extract32(simd_data(desc), 3, 1);
542     const bool XxC = extract32(simd_data(desc), 2, 1);
543     uint8_t vxc, vec_exc = 0;
544     S390Vector tmp = {};
545     int i, old_mode;
546 
547     old_mode = s390_swap_bfp_rounding_mode(env, erm);
548     for (i = 0; i < 2; i++) {
549         float64 a = s390_vec_read_element64(v2, i);
550         uint32_t ret = float64_to_float32(a, &env->fpu_status);
551 
552         /* place at even element */
553         s390_vec_write_element32(&tmp, i * 2, ret);
554         /* indicate the source element */
555         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
556         if (s || vxc) {
557             break;
558         }
559     }
560     s390_restore_bfp_rounding_mode(env, old_mode);
561     handle_ieee_exc(env, vxc, vec_exc, GETPC());
562     *(S390Vector *)v1 = tmp;
563 }
564 
565 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
566                           uint32_t desc)
567 {
568     const uint8_t erm = extract32(simd_data(desc), 4, 4);
569     const bool XxC = extract32(simd_data(desc), 2, 1);
570     uint8_t vxc, vec_exc = 0;
571     int old_mode;
572     float64 ret;
573 
574     old_mode = s390_swap_bfp_rounding_mode(env, erm);
575     ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
576     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
577     s390_restore_bfp_rounding_mode(env, old_mode);
578     handle_ieee_exc(env, vxc, vec_exc, GETPC());
579 
580     /* place at even element, odd element is unpredictable */
581     s390_vec_write_float64(v1, 0, ret);
582 }
583 
584 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
585                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
586                    uintptr_t retaddr)
587 {
588     uint8_t vxc, vec_exc = 0;
589     S390Vector tmp = {};
590     int i;
591 
592     for (i = 0; i < 4; i++) {
593         const float32 a = s390_vec_read_float32(v2, i);
594         const float32 b = s390_vec_read_float32(v3, i);
595         const float32 c = s390_vec_read_float32(v4, i);
596         float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
597 
598         s390_vec_write_float32(&tmp, i, ret);
599         vxc = check_ieee_exc(env, i, false, &vec_exc);
600         if (s || vxc) {
601             break;
602         }
603     }
604     handle_ieee_exc(env, vxc, vec_exc, retaddr);
605     *v1 = tmp;
606 }
607 
608 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
609                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
610                    uintptr_t retaddr)
611 {
612     uint8_t vxc, vec_exc = 0;
613     S390Vector tmp = {};
614     int i;
615 
616     for (i = 0; i < 2; i++) {
617         const float64 a = s390_vec_read_float64(v2, i);
618         const float64 b = s390_vec_read_float64(v3, i);
619         const float64 c = s390_vec_read_float64(v4, i);
620         const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
621 
622         s390_vec_write_float64(&tmp, i, ret);
623         vxc = check_ieee_exc(env, i, false, &vec_exc);
624         if (s || vxc) {
625             break;
626         }
627     }
628     handle_ieee_exc(env, vxc, vec_exc, retaddr);
629     *v1 = tmp;
630 }
631 
632 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
633                     const S390Vector *v4, CPUS390XState *env, bool s, int flags,
634                     uintptr_t retaddr)
635 {
636     const float128 a = s390_vec_read_float128(v2);
637     const float128 b = s390_vec_read_float128(v3);
638     const float128 c = s390_vec_read_float128(v4);
639     uint8_t vxc, vec_exc = 0;
640     float128 ret;
641 
642     ret = float128_muladd(a, b, c, flags, &env->fpu_status);
643     vxc = check_ieee_exc(env, 0, false, &vec_exc);
644     handle_ieee_exc(env, vxc, vec_exc, retaddr);
645     s390_vec_write_float128(v1, ret);
646 }
647 
648 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
649 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
650                                const void *v4, CPUS390XState *env,             \
651                                uint32_t desc)                                  \
652 {                                                                              \
653     const bool se = extract32(simd_data(desc), 3, 1);                          \
654                                                                                \
655     vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
656 }
657 
658 #define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
659     DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
660     DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
661     DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
662 
663 DEF_GVEC_VFMA(vfma, 0)
664 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
665 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
666 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
667 
668 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
669                           uint32_t desc)
670 {
671     uint16_t i3 = extract32(simd_data(desc), 4, 12);
672     bool s = extract32(simd_data(desc), 3, 1);
673     int i, match = 0;
674 
675     for (i = 0; i < 4; i++) {
676         float32 a = s390_vec_read_float32(v2, i);
677 
678         if (float32_dcmask(env, a) & i3) {
679             match++;
680             s390_vec_write_element32(v1, i, -1u);
681         } else {
682             s390_vec_write_element32(v1, i, 0);
683         }
684         if (s) {
685             break;
686         }
687     }
688 
689     if (match == 4 || (s && match)) {
690         env->cc_op = 0;
691     } else if (match) {
692         env->cc_op = 1;
693     } else {
694         env->cc_op = 3;
695     }
696 }
697 
698 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
699                           uint32_t desc)
700 {
701     const uint16_t i3 = extract32(simd_data(desc), 4, 12);
702     const bool s = extract32(simd_data(desc), 3, 1);
703     int i, match = 0;
704 
705     for (i = 0; i < 2; i++) {
706         const float64 a = s390_vec_read_float64(v2, i);
707 
708         if (float64_dcmask(env, a) & i3) {
709             match++;
710             s390_vec_write_element64(v1, i, -1ull);
711         } else {
712             s390_vec_write_element64(v1, i, 0);
713         }
714         if (s) {
715             break;
716         }
717     }
718 
719     if (match == 2 || (s && match)) {
720         env->cc_op = 0;
721     } else if (match) {
722         env->cc_op = 1;
723     } else {
724         env->cc_op = 3;
725     }
726 }
727 
728 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
729                            uint32_t desc)
730 {
731     const float128 a = s390_vec_read_float128(v2);
732     uint16_t i3 = extract32(simd_data(desc), 4, 12);
733 
734     if (float128_dcmask(env, a) & i3) {
735         env->cc_op = 0;
736         s390_vec_write_element64(v1, 0, -1ull);
737         s390_vec_write_element64(v1, 1, -1ull);
738     } else {
739         env->cc_op = 3;
740         s390_vec_write_element64(v1, 0, 0);
741         s390_vec_write_element64(v1, 1, 0);
742     }
743 }
744 
745 typedef enum S390MinMaxType {
746     S390_MINMAX_TYPE_IEEE = 0,
747     S390_MINMAX_TYPE_JAVA,
748     S390_MINMAX_TYPE_C_MACRO,
749     S390_MINMAX_TYPE_CPP,
750     S390_MINMAX_TYPE_F,
751 } S390MinMaxType;
752 
753 typedef enum S390MinMaxRes {
754     S390_MINMAX_RES_MINMAX = 0,
755     S390_MINMAX_RES_A,
756     S390_MINMAX_RES_B,
757     S390_MINMAX_RES_SILENCE_A,
758     S390_MINMAX_RES_SILENCE_B,
759 } S390MinMaxRes;
760 
761 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
762                                S390MinMaxType type, float_status *s)
763 {
764     const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
765     const bool nan_a = dcmask_a & DCMASK_NAN;
766     const bool nan_b = dcmask_b & DCMASK_NAN;
767 
768     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
769 
770     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
771         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
772         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
773 
774         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
775             s->float_exception_flags |= float_flag_invalid;
776         }
777         switch (type) {
778         case S390_MINMAX_TYPE_JAVA:
779             if (sig_a) {
780                 return S390_MINMAX_RES_SILENCE_A;
781             } else if (sig_b) {
782                 return S390_MINMAX_RES_SILENCE_B;
783             }
784             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
785         case S390_MINMAX_TYPE_F:
786             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
787         case S390_MINMAX_TYPE_C_MACRO:
788             s->float_exception_flags |= float_flag_invalid;
789             return S390_MINMAX_RES_B;
790         case S390_MINMAX_TYPE_CPP:
791             s->float_exception_flags |= float_flag_invalid;
792             return S390_MINMAX_RES_A;
793         default:
794             g_assert_not_reached();
795         }
796     } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
797         switch (type) {
798         case S390_MINMAX_TYPE_JAVA:
799             return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
800         case S390_MINMAX_TYPE_C_MACRO:
801             return S390_MINMAX_RES_B;
802         case S390_MINMAX_TYPE_F:
803             return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
804         case S390_MINMAX_TYPE_CPP:
805             return S390_MINMAX_RES_A;
806         default:
807             g_assert_not_reached();
808         }
809     }
810     return S390_MINMAX_RES_MINMAX;
811 }
812 
813 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
814                                S390MinMaxType type, float_status *s)
815 {
816     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
817 
818     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
819         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
820         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
821         const bool nan_a = dcmask_a & DCMASK_NAN;
822         const bool nan_b = dcmask_b & DCMASK_NAN;
823 
824         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
825             s->float_exception_flags |= float_flag_invalid;
826         }
827         switch (type) {
828         case S390_MINMAX_TYPE_JAVA:
829             if (sig_a) {
830                 return S390_MINMAX_RES_SILENCE_A;
831             } else if (sig_b) {
832                 return S390_MINMAX_RES_SILENCE_B;
833             }
834             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
835         case S390_MINMAX_TYPE_F:
836             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
837         case S390_MINMAX_TYPE_C_MACRO:
838             s->float_exception_flags |= float_flag_invalid;
839             return S390_MINMAX_RES_B;
840         case S390_MINMAX_TYPE_CPP:
841             s->float_exception_flags |= float_flag_invalid;
842             return S390_MINMAX_RES_A;
843         default:
844             g_assert_not_reached();
845         }
846     } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
847         const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
848 
849         switch (type) {
850         case S390_MINMAX_TYPE_JAVA:
851         case S390_MINMAX_TYPE_F:
852             return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
853         case S390_MINMAX_TYPE_C_MACRO:
854             return S390_MINMAX_RES_B;
855         case S390_MINMAX_TYPE_CPP:
856             return S390_MINMAX_RES_A;
857         default:
858             g_assert_not_reached();
859         }
860     }
861     return S390_MINMAX_RES_MINMAX;
862 }
863 
864 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
865                                   S390MinMaxType type, bool is_min,
866                                   float_status *s)
867 {
868     return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
869                     vfmax_res(dcmask_a, dcmask_b, type, s);
870 }
871 
872 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
873                        const S390Vector *v3, CPUS390XState *env,
874                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
875                        uintptr_t retaddr)
876 {
877     float_status *s = &env->fpu_status;
878     uint8_t vxc, vec_exc = 0;
879     S390Vector tmp = {};
880     int i;
881 
882     for (i = 0; i < 4; i++) {
883         float32 a = s390_vec_read_float32(v2, i);
884         float32 b = s390_vec_read_float32(v3, i);
885         float32 result;
886 
887         if (type != S390_MINMAX_TYPE_IEEE) {
888             S390MinMaxRes res;
889 
890             if (is_abs) {
891                 a = float32_abs(a);
892                 b = float32_abs(b);
893             }
894 
895             res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
896                                type, is_min, s);
897             switch (res) {
898             case S390_MINMAX_RES_MINMAX:
899                 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
900                 break;
901             case S390_MINMAX_RES_A:
902                 result = a;
903                 break;
904             case S390_MINMAX_RES_B:
905                 result = b;
906                 break;
907             case S390_MINMAX_RES_SILENCE_A:
908                 result = float32_silence_nan(a, s);
909                 break;
910             case S390_MINMAX_RES_SILENCE_B:
911                 result = float32_silence_nan(b, s);
912                 break;
913             default:
914                 g_assert_not_reached();
915             }
916         } else if (!is_abs) {
917             result = is_min ? float32_minnum(a, b, &env->fpu_status) :
918                               float32_maxnum(a, b, &env->fpu_status);
919         } else {
920             result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
921                               float32_maxnummag(a, b, &env->fpu_status);
922         }
923 
924         s390_vec_write_float32(&tmp, i, result);
925         vxc = check_ieee_exc(env, i, false, &vec_exc);
926         if (se || vxc) {
927             break;
928         }
929     }
930     handle_ieee_exc(env, vxc, vec_exc, retaddr);
931     *v1 = tmp;
932 }
933 
934 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
935                        const S390Vector *v3, CPUS390XState *env,
936                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
937                        uintptr_t retaddr)
938 {
939     float_status *s = &env->fpu_status;
940     uint8_t vxc, vec_exc = 0;
941     S390Vector tmp = {};
942     int i;
943 
944     for (i = 0; i < 2; i++) {
945         float64 a = s390_vec_read_float64(v2, i);
946         float64 b = s390_vec_read_float64(v3, i);
947         float64 result;
948 
949         if (type != S390_MINMAX_TYPE_IEEE) {
950             S390MinMaxRes res;
951 
952             if (is_abs) {
953                 a = float64_abs(a);
954                 b = float64_abs(b);
955             }
956 
957             res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
958                                type, is_min, s);
959             switch (res) {
960             case S390_MINMAX_RES_MINMAX:
961                 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
962                 break;
963             case S390_MINMAX_RES_A:
964                 result = a;
965                 break;
966             case S390_MINMAX_RES_B:
967                 result = b;
968                 break;
969             case S390_MINMAX_RES_SILENCE_A:
970                 result = float64_silence_nan(a, s);
971                 break;
972             case S390_MINMAX_RES_SILENCE_B:
973                 result = float64_silence_nan(b, s);
974                 break;
975             default:
976                 g_assert_not_reached();
977             }
978         } else if (!is_abs) {
979             result = is_min ? float64_minnum(a, b, &env->fpu_status) :
980                               float64_maxnum(a, b, &env->fpu_status);
981         } else {
982             result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
983                               float64_maxnummag(a, b, &env->fpu_status);
984         }
985 
986         s390_vec_write_float64(&tmp, i, result);
987         vxc = check_ieee_exc(env, i, false, &vec_exc);
988         if (se || vxc) {
989             break;
990         }
991     }
992     handle_ieee_exc(env, vxc, vec_exc, retaddr);
993     *v1 = tmp;
994 }
995 
996 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
997                         const S390Vector *v3, CPUS390XState *env,
998                         S390MinMaxType type, bool is_min, bool is_abs, bool se,
999                         uintptr_t retaddr)
1000 {
1001     float128 a = s390_vec_read_float128(v2);
1002     float128 b = s390_vec_read_float128(v3);
1003     float_status *s = &env->fpu_status;
1004     uint8_t vxc, vec_exc = 0;
1005     float128 result;
1006 
1007     if (type != S390_MINMAX_TYPE_IEEE) {
1008         S390MinMaxRes res;
1009 
1010         if (is_abs) {
1011             a = float128_abs(a);
1012             b = float128_abs(b);
1013         }
1014 
1015         res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1016                            type, is_min, s);
1017         switch (res) {
1018         case S390_MINMAX_RES_MINMAX:
1019             result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1020             break;
1021         case S390_MINMAX_RES_A:
1022             result = a;
1023             break;
1024         case S390_MINMAX_RES_B:
1025             result = b;
1026             break;
1027         case S390_MINMAX_RES_SILENCE_A:
1028             result = float128_silence_nan(a, s);
1029             break;
1030         case S390_MINMAX_RES_SILENCE_B:
1031             result = float128_silence_nan(b, s);
1032             break;
1033         default:
1034             g_assert_not_reached();
1035         }
1036     } else if (!is_abs) {
1037         result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1038                           float128_maxnum(a, b, &env->fpu_status);
1039     } else {
1040         result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1041                           float128_maxnummag(a, b, &env->fpu_status);
1042     }
1043 
1044     vxc = check_ieee_exc(env, 0, false, &vec_exc);
1045     handle_ieee_exc(env, vxc, vec_exc, retaddr);
1046     s390_vec_write_float128(v1, result);
1047 }
1048 
1049 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
1050 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
1051                                CPUS390XState *env, uint32_t desc)              \
1052 {                                                                              \
1053     const bool se = extract32(simd_data(desc), 3, 1);                          \
1054     uint8_t type = extract32(simd_data(desc), 4, 4);                           \
1055     bool is_abs = false;                                                       \
1056                                                                                \
1057     if (type >= 8) {                                                           \
1058         is_abs = true;                                                         \
1059         type -= 8;                                                             \
1060     }                                                                          \
1061                                                                                \
1062     vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
1063 }
1064 
1065 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
1066     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
1067     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
1068     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1069 
1070 DEF_GVEC_VFMINMAX(vfmax, false)
1071 DEF_GVEC_VFMINMAX(vfmin, true)
1072