xref: /openbmc/qemu/target/s390x/tcg/vec_fpu_helper.c (revision 8092b51849499be97c42c0f1a832ade969e38724)
1 /*
2  * QEMU TCG support -- s390x vector floating point instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "s390x-internal.h"
16 #include "vec.h"
17 #include "tcg_s390x.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/exec-all.h"
20 #include "exec/helper-proto.h"
21 #include "fpu/softfloat.h"
22 
23 #define VIC_INVALID         0x1
24 #define VIC_DIVBYZERO       0x2
25 #define VIC_OVERFLOW        0x3
26 #define VIC_UNDERFLOW       0x4
27 #define VIC_INEXACT         0x5
28 
29 /* returns the VEX. If the VEX is 0, there is no trap */
30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
31                               uint8_t *vec_exc)
32 {
33     uint8_t vece_exc = 0, trap_exc;
34     unsigned qemu_exc;
35 
36     /* Retrieve and clear the softfloat exceptions */
37     qemu_exc = env->fpu_status.float_exception_flags;
38     if (qemu_exc == 0) {
39         return 0;
40     }
41     env->fpu_status.float_exception_flags = 0;
42 
43     vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
44 
45     /* Add them to the vector-wide s390x exception bits */
46     *vec_exc |= vece_exc;
47 
48     /* Check for traps and construct the VXC */
49     trap_exc = vece_exc & env->fpc >> 24;
50     if (trap_exc) {
51         if (trap_exc & S390_IEEE_MASK_INVALID) {
52             return enr << 4 | VIC_INVALID;
53         } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
54             return enr << 4 | VIC_DIVBYZERO;
55         } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
56             return enr << 4 | VIC_OVERFLOW;
57         } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
58             return enr << 4 | VIC_UNDERFLOW;
59         } else if (!XxC) {
60             g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
61             /* inexact has lowest priority on traps */
62             return enr << 4 | VIC_INEXACT;
63         }
64     }
65     return 0;
66 }
67 
68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
69                             uintptr_t retaddr)
70 {
71     if (vxc) {
72         /* on traps, the fpc flags are not updated, instruction is suppressed */
73         tcg_s390_vector_exception(env, vxc, retaddr);
74     }
75     if (vec_exc) {
76         /* indicate exceptions for all elements combined */
77         env->fpc |= vec_exc << 16;
78     }
79 }
80 
81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
82 {
83     return make_float32(s390_vec_read_element32(v, enr));
84 }
85 
86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
87 {
88     return make_float64(s390_vec_read_element64(v, enr));
89 }
90 
91 static float128 s390_vec_read_float128(const S390Vector *v)
92 {
93     return make_float128(s390_vec_read_element64(v, 0),
94                          s390_vec_read_element64(v, 1));
95 }
96 
97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
98 {
99     return s390_vec_write_element32(v, enr, data);
100 }
101 
102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
103 {
104     return s390_vec_write_element64(v, enr, data);
105 }
106 
107 static void s390_vec_write_float128(S390Vector *v, float128 data)
108 {
109     s390_vec_write_element64(v, 0, data.high);
110     s390_vec_write_element64(v, 1, data.low);
111 }
112 
113 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
114 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
115                     bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
116                     uintptr_t retaddr)
117 {
118     uint8_t vxc, vec_exc = 0;
119     S390Vector tmp = {};
120     int i, old_mode;
121 
122     old_mode = s390_swap_bfp_rounding_mode(env, erm);
123     for (i = 0; i < 4; i++) {
124         const float32 a = s390_vec_read_float32(v2, i);
125 
126         s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
127         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
128         if (s || vxc) {
129             break;
130         }
131     }
132     s390_restore_bfp_rounding_mode(env, old_mode);
133     handle_ieee_exc(env, vxc, vec_exc, retaddr);
134     *v1 = tmp;
135 }
136 
137 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
138 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
139                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
140                     uintptr_t retaddr)
141 {
142     uint8_t vxc, vec_exc = 0;
143     S390Vector tmp = {};
144     int i, old_mode;
145 
146     old_mode = s390_swap_bfp_rounding_mode(env, erm);
147     for (i = 0; i < 2; i++) {
148         const float64 a = s390_vec_read_float64(v2, i);
149 
150         s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
151         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
152         if (s || vxc) {
153             break;
154         }
155     }
156     s390_restore_bfp_rounding_mode(env, old_mode);
157     handle_ieee_exc(env, vxc, vec_exc, retaddr);
158     *v1 = tmp;
159 }
160 
161 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
162 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
163                     bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
164                     uintptr_t retaddr)
165 {
166     const float128 a = s390_vec_read_float128(v2);
167     uint8_t vxc, vec_exc = 0;
168     S390Vector tmp = {};
169     int old_mode;
170 
171     old_mode = s390_swap_bfp_rounding_mode(env, erm);
172     s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
173     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
174     s390_restore_bfp_rounding_mode(env, old_mode);
175     handle_ieee_exc(env, vxc, vec_exc, retaddr);
176     *v1 = tmp;
177 }
178 
179 static float64 vcdg64(float64 a, float_status *s)
180 {
181     return int64_to_float64(a, s);
182 }
183 
184 static float64 vcdlg64(float64 a, float_status *s)
185 {
186     return uint64_to_float64(a, s);
187 }
188 
189 static float64 vcgd64(float64 a, float_status *s)
190 {
191     const float64 tmp = float64_to_int64(a, s);
192 
193     return float64_is_any_nan(a) ? INT64_MIN : tmp;
194 }
195 
196 static float64 vclgd64(float64 a, float_status *s)
197 {
198     const float64 tmp = float64_to_uint64(a, s);
199 
200     return float64_is_any_nan(a) ? 0 : tmp;
201 }
202 
203 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
204 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
205                                uint32_t desc)                                  \
206 {                                                                              \
207     const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
208     const bool se = extract32(simd_data(desc), 3, 1);                          \
209     const bool XxC = extract32(simd_data(desc), 2, 1);                         \
210                                                                                \
211     vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
212 }
213 
214 #define DEF_GVEC_VOP2_64(NAME)                                                 \
215 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
216 
217 #define DEF_GVEC_VOP2(NAME, OP)                                                \
218 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
219 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
220 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
221 
222 DEF_GVEC_VOP2_64(vcdg)
223 DEF_GVEC_VOP2_64(vcdlg)
224 DEF_GVEC_VOP2_64(vcgd)
225 DEF_GVEC_VOP2_64(vclgd)
226 DEF_GVEC_VOP2(vfi, round_to_int)
227 DEF_GVEC_VOP2(vfsq, sqrt)
228 
229 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
230 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
231                     CPUS390XState *env, bool s, vop32_3_fn fn,
232                     uintptr_t retaddr)
233 {
234     uint8_t vxc, vec_exc = 0;
235     S390Vector tmp = {};
236     int i;
237 
238     for (i = 0; i < 4; i++) {
239         const float32 a = s390_vec_read_float32(v2, i);
240         const float32 b = s390_vec_read_float32(v3, i);
241 
242         s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
243         vxc = check_ieee_exc(env, i, false, &vec_exc);
244         if (s || vxc) {
245             break;
246         }
247     }
248     handle_ieee_exc(env, vxc, vec_exc, retaddr);
249     *v1 = tmp;
250 }
251 
252 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
253 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
254                     CPUS390XState *env, bool s, vop64_3_fn fn,
255                     uintptr_t retaddr)
256 {
257     uint8_t vxc, vec_exc = 0;
258     S390Vector tmp = {};
259     int i;
260 
261     for (i = 0; i < 2; i++) {
262         const float64 a = s390_vec_read_float64(v2, i);
263         const float64 b = s390_vec_read_float64(v3, i);
264 
265         s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
266         vxc = check_ieee_exc(env, i, false, &vec_exc);
267         if (s || vxc) {
268             break;
269         }
270     }
271     handle_ieee_exc(env, vxc, vec_exc, retaddr);
272     *v1 = tmp;
273 }
274 
275 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
276 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
277                      CPUS390XState *env, bool s, vop128_3_fn fn,
278                      uintptr_t retaddr)
279 {
280     const float128 a = s390_vec_read_float128(v2);
281     const float128 b = s390_vec_read_float128(v3);
282     uint8_t vxc, vec_exc = 0;
283     S390Vector tmp = {};
284 
285     s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
286     vxc = check_ieee_exc(env, 0, false, &vec_exc);
287     handle_ieee_exc(env, vxc, vec_exc, retaddr);
288     *v1 = tmp;
289 }
290 
291 #define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
292 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
293                               CPUS390XState *env, uint32_t desc)               \
294 {                                                                              \
295     const bool se = extract32(simd_data(desc), 3, 1);                          \
296                                                                                \
297     vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
298 }
299 
300 #define DEF_GVEC_VOP3(NAME, OP)                                                \
301 DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
302 DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
303 DEF_GVEC_VOP3_B(NAME, OP, 128)
304 
305 DEF_GVEC_VOP3(vfa, add)
306 DEF_GVEC_VOP3(vfs, sub)
307 DEF_GVEC_VOP3(vfd, div)
308 DEF_GVEC_VOP3(vfm, mul)
309 
310 static int wfc32(const S390Vector *v1, const S390Vector *v2,
311                  CPUS390XState *env, bool signal, uintptr_t retaddr)
312 {
313     /* only the zero-indexed elements are compared */
314     const float32 a = s390_vec_read_float32(v1, 0);
315     const float32 b = s390_vec_read_float32(v2, 0);
316     uint8_t vxc, vec_exc = 0;
317     int cmp;
318 
319     if (signal) {
320         cmp = float32_compare(a, b, &env->fpu_status);
321     } else {
322         cmp = float32_compare_quiet(a, b, &env->fpu_status);
323     }
324     vxc = check_ieee_exc(env, 0, false, &vec_exc);
325     handle_ieee_exc(env, vxc, vec_exc, retaddr);
326 
327     return float_comp_to_cc(env, cmp);
328 }
329 
330 static int wfc64(const S390Vector *v1, const S390Vector *v2,
331                  CPUS390XState *env, bool signal, uintptr_t retaddr)
332 {
333     /* only the zero-indexed elements are compared */
334     const float64 a = s390_vec_read_float64(v1, 0);
335     const float64 b = s390_vec_read_float64(v2, 0);
336     uint8_t vxc, vec_exc = 0;
337     int cmp;
338 
339     if (signal) {
340         cmp = float64_compare(a, b, &env->fpu_status);
341     } else {
342         cmp = float64_compare_quiet(a, b, &env->fpu_status);
343     }
344     vxc = check_ieee_exc(env, 0, false, &vec_exc);
345     handle_ieee_exc(env, vxc, vec_exc, retaddr);
346 
347     return float_comp_to_cc(env, cmp);
348 }
349 
350 static int wfc128(const S390Vector *v1, const S390Vector *v2,
351                   CPUS390XState *env, bool signal, uintptr_t retaddr)
352 {
353     /* only the zero-indexed elements are compared */
354     const float128 a = s390_vec_read_float128(v1);
355     const float128 b = s390_vec_read_float128(v2);
356     uint8_t vxc, vec_exc = 0;
357     int cmp;
358 
359     if (signal) {
360         cmp = float128_compare(a, b, &env->fpu_status);
361     } else {
362         cmp = float128_compare_quiet(a, b, &env->fpu_status);
363     }
364     vxc = check_ieee_exc(env, 0, false, &vec_exc);
365     handle_ieee_exc(env, vxc, vec_exc, retaddr);
366 
367     return float_comp_to_cc(env, cmp);
368 }
369 
370 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
371 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
372                                CPUS390XState *env, uint32_t desc)              \
373 {                                                                              \
374     env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
375 }
376 
377 #define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
378      DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
379      DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
380      DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
381 
382 DEF_GVEC_WFC(wfc, false)
383 DEF_GVEC_WFC(wfk, true)
384 
385 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
386 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
387                  CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
388 {
389     uint8_t vxc, vec_exc = 0;
390     S390Vector tmp = {};
391     int match = 0;
392     int i;
393 
394     for (i = 0; i < 4; i++) {
395         const float32 a = s390_vec_read_float32(v2, i);
396         const float32 b = s390_vec_read_float32(v3, i);
397 
398         /* swap the order of the parameters, so we can use existing functions */
399         if (fn(b, a, &env->fpu_status)) {
400             match++;
401             s390_vec_write_element32(&tmp, i, -1u);
402         }
403         vxc = check_ieee_exc(env, i, false, &vec_exc);
404         if (s || vxc) {
405             break;
406         }
407     }
408 
409     handle_ieee_exc(env, vxc, vec_exc, retaddr);
410     *v1 = tmp;
411     if (match) {
412         return s || match == 4 ? 0 : 1;
413     }
414     return 3;
415 }
416 
417 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
418 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
419                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
420 {
421     uint8_t vxc, vec_exc = 0;
422     S390Vector tmp = {};
423     int match = 0;
424     int i;
425 
426     for (i = 0; i < 2; i++) {
427         const float64 a = s390_vec_read_float64(v2, i);
428         const float64 b = s390_vec_read_float64(v3, i);
429 
430         /* swap the order of the parameters, so we can use existing functions */
431         if (fn(b, a, &env->fpu_status)) {
432             match++;
433             s390_vec_write_element64(&tmp, i, -1ull);
434         }
435         vxc = check_ieee_exc(env, i, false, &vec_exc);
436         if (s || vxc) {
437             break;
438         }
439     }
440 
441     handle_ieee_exc(env, vxc, vec_exc, retaddr);
442     *v1 = tmp;
443     if (match) {
444         return s || match == 2 ? 0 : 1;
445     }
446     return 3;
447 }
448 
449 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
450 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
451                  CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
452 {
453     const float128 a = s390_vec_read_float128(v2);
454     const float128 b = s390_vec_read_float128(v3);
455     uint8_t vxc, vec_exc = 0;
456     S390Vector tmp = {};
457     bool match = false;
458 
459     /* swap the order of the parameters, so we can use existing functions */
460     if (fn(b, a, &env->fpu_status)) {
461         match = true;
462         s390_vec_write_element64(&tmp, 0, -1ull);
463         s390_vec_write_element64(&tmp, 1, -1ull);
464     }
465     vxc = check_ieee_exc(env, 0, false, &vec_exc);
466     handle_ieee_exc(env, vxc, vec_exc, retaddr);
467     *v1 = tmp;
468     return match ? 0 : 3;
469 }
470 
471 #define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
472 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
473                                CPUS390XState *env, uint32_t desc)              \
474 {                                                                              \
475     const bool se = extract32(simd_data(desc), 3, 1);                          \
476     const bool sq = extract32(simd_data(desc), 2, 1);                          \
477     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
478                                                                                \
479     vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
480 }                                                                              \
481                                                                                \
482 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
483                                     CPUS390XState *env, uint32_t desc)         \
484 {                                                                              \
485     const bool se = extract32(simd_data(desc), 3, 1);                          \
486     const bool sq = extract32(simd_data(desc), 2, 1);                          \
487     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
488                                                                                \
489     env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
490 }
491 
492 #define DEF_GVEC_VFC(NAME, OP)                                                 \
493 DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
494 DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
495 DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
496 
497 DEF_GVEC_VFC(vfce, eq)
498 DEF_GVEC_VFC(vfch, lt)
499 DEF_GVEC_VFC(vfche, le)
500 
501 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
502                          uint32_t desc)
503 {
504     const bool s = extract32(simd_data(desc), 3, 1);
505     uint8_t vxc, vec_exc = 0;
506     S390Vector tmp = {};
507     int i;
508 
509     for (i = 0; i < 2; i++) {
510         /* load from even element */
511         const float32 a = s390_vec_read_element32(v2, i * 2);
512         const uint64_t ret = float32_to_float64(a, &env->fpu_status);
513 
514         s390_vec_write_element64(&tmp, i, ret);
515         /* indicate the source element */
516         vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
517         if (s || vxc) {
518             break;
519         }
520     }
521     handle_ieee_exc(env, vxc, vec_exc, GETPC());
522     *(S390Vector *)v1 = tmp;
523 }
524 
525 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
526                          uint32_t desc)
527 {
528     /* load from even element */
529     const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
530                                              &env->fpu_status);
531     uint8_t vxc, vec_exc = 0;
532 
533     vxc = check_ieee_exc(env, 0, false, &vec_exc);
534     handle_ieee_exc(env, vxc, vec_exc, GETPC());
535     s390_vec_write_float128(v1, ret);
536 }
537 
538 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
539                          uint32_t desc)
540 {
541     const uint8_t erm = extract32(simd_data(desc), 4, 4);
542     const bool s = extract32(simd_data(desc), 3, 1);
543     const bool XxC = extract32(simd_data(desc), 2, 1);
544     uint8_t vxc, vec_exc = 0;
545     S390Vector tmp = {};
546     int i, old_mode;
547 
548     old_mode = s390_swap_bfp_rounding_mode(env, erm);
549     for (i = 0; i < 2; i++) {
550         float64 a = s390_vec_read_element64(v2, i);
551         uint32_t ret = float64_to_float32(a, &env->fpu_status);
552 
553         /* place at even element */
554         s390_vec_write_element32(&tmp, i * 2, ret);
555         /* indicate the source element */
556         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
557         if (s || vxc) {
558             break;
559         }
560     }
561     s390_restore_bfp_rounding_mode(env, old_mode);
562     handle_ieee_exc(env, vxc, vec_exc, GETPC());
563     *(S390Vector *)v1 = tmp;
564 }
565 
566 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
567                           uint32_t desc)
568 {
569     const uint8_t erm = extract32(simd_data(desc), 4, 4);
570     const bool XxC = extract32(simd_data(desc), 2, 1);
571     uint8_t vxc, vec_exc = 0;
572     int old_mode;
573     float64 ret;
574 
575     old_mode = s390_swap_bfp_rounding_mode(env, erm);
576     ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
577     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
578     s390_restore_bfp_rounding_mode(env, old_mode);
579     handle_ieee_exc(env, vxc, vec_exc, GETPC());
580 
581     /* place at even element, odd element is unpredictable */
582     s390_vec_write_float64(v1, 0, ret);
583 }
584 
585 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
586                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
587                    uintptr_t retaddr)
588 {
589     uint8_t vxc, vec_exc = 0;
590     S390Vector tmp = {};
591     int i;
592 
593     for (i = 0; i < 4; i++) {
594         const float32 a = s390_vec_read_float32(v2, i);
595         const float32 b = s390_vec_read_float32(v3, i);
596         const float32 c = s390_vec_read_float32(v4, i);
597         float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
598 
599         s390_vec_write_float32(&tmp, i, ret);
600         vxc = check_ieee_exc(env, i, false, &vec_exc);
601         if (s || vxc) {
602             break;
603         }
604     }
605     handle_ieee_exc(env, vxc, vec_exc, retaddr);
606     *v1 = tmp;
607 }
608 
609 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
610                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
611                    uintptr_t retaddr)
612 {
613     uint8_t vxc, vec_exc = 0;
614     S390Vector tmp = {};
615     int i;
616 
617     for (i = 0; i < 2; i++) {
618         const float64 a = s390_vec_read_float64(v2, i);
619         const float64 b = s390_vec_read_float64(v3, i);
620         const float64 c = s390_vec_read_float64(v4, i);
621         const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
622 
623         s390_vec_write_float64(&tmp, i, ret);
624         vxc = check_ieee_exc(env, i, false, &vec_exc);
625         if (s || vxc) {
626             break;
627         }
628     }
629     handle_ieee_exc(env, vxc, vec_exc, retaddr);
630     *v1 = tmp;
631 }
632 
633 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
634                     const S390Vector *v4, CPUS390XState *env, bool s, int flags,
635                     uintptr_t retaddr)
636 {
637     const float128 a = s390_vec_read_float128(v2);
638     const float128 b = s390_vec_read_float128(v3);
639     const float128 c = s390_vec_read_float128(v4);
640     uint8_t vxc, vec_exc = 0;
641     float128 ret;
642 
643     ret = float128_muladd(a, b, c, flags, &env->fpu_status);
644     vxc = check_ieee_exc(env, 0, false, &vec_exc);
645     handle_ieee_exc(env, vxc, vec_exc, retaddr);
646     s390_vec_write_float128(v1, ret);
647 }
648 
649 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
650 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
651                                const void *v4, CPUS390XState *env,             \
652                                uint32_t desc)                                  \
653 {                                                                              \
654     const bool se = extract32(simd_data(desc), 3, 1);                          \
655                                                                                \
656     vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
657 }
658 
659 #define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
660     DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
661     DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
662     DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
663 
664 DEF_GVEC_VFMA(vfma, 0)
665 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
666 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
667 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
668 
669 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
670                           uint32_t desc)
671 {
672     uint16_t i3 = extract32(simd_data(desc), 4, 12);
673     bool s = extract32(simd_data(desc), 3, 1);
674     int i, match = 0;
675 
676     for (i = 0; i < 4; i++) {
677         float32 a = s390_vec_read_float32(v2, i);
678 
679         if (float32_dcmask(env, a) & i3) {
680             match++;
681             s390_vec_write_element32(v1, i, -1u);
682         } else {
683             s390_vec_write_element32(v1, i, 0);
684         }
685         if (s) {
686             break;
687         }
688     }
689 
690     if (match == 4 || (s && match)) {
691         env->cc_op = 0;
692     } else if (match) {
693         env->cc_op = 1;
694     } else {
695         env->cc_op = 3;
696     }
697 }
698 
699 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
700                           uint32_t desc)
701 {
702     const uint16_t i3 = extract32(simd_data(desc), 4, 12);
703     const bool s = extract32(simd_data(desc), 3, 1);
704     int i, match = 0;
705 
706     for (i = 0; i < 2; i++) {
707         const float64 a = s390_vec_read_float64(v2, i);
708 
709         if (float64_dcmask(env, a) & i3) {
710             match++;
711             s390_vec_write_element64(v1, i, -1ull);
712         } else {
713             s390_vec_write_element64(v1, i, 0);
714         }
715         if (s) {
716             break;
717         }
718     }
719 
720     if (match == 2 || (s && match)) {
721         env->cc_op = 0;
722     } else if (match) {
723         env->cc_op = 1;
724     } else {
725         env->cc_op = 3;
726     }
727 }
728 
729 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
730                            uint32_t desc)
731 {
732     const float128 a = s390_vec_read_float128(v2);
733     uint16_t i3 = extract32(simd_data(desc), 4, 12);
734 
735     if (float128_dcmask(env, a) & i3) {
736         env->cc_op = 0;
737         s390_vec_write_element64(v1, 0, -1ull);
738         s390_vec_write_element64(v1, 1, -1ull);
739     } else {
740         env->cc_op = 3;
741         s390_vec_write_element64(v1, 0, 0);
742         s390_vec_write_element64(v1, 1, 0);
743     }
744 }
745 
746 typedef enum S390MinMaxType {
747     S390_MINMAX_TYPE_IEEE = 0,
748     S390_MINMAX_TYPE_JAVA,
749     S390_MINMAX_TYPE_C_MACRO,
750     S390_MINMAX_TYPE_CPP,
751     S390_MINMAX_TYPE_F,
752 } S390MinMaxType;
753 
754 typedef enum S390MinMaxRes {
755     S390_MINMAX_RES_MINMAX = 0,
756     S390_MINMAX_RES_A,
757     S390_MINMAX_RES_B,
758     S390_MINMAX_RES_SILENCE_A,
759     S390_MINMAX_RES_SILENCE_B,
760 } S390MinMaxRes;
761 
762 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
763                                S390MinMaxType type, float_status *s)
764 {
765     const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
766     const bool nan_a = dcmask_a & DCMASK_NAN;
767     const bool nan_b = dcmask_b & DCMASK_NAN;
768 
769     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
770 
771     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
772         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
773         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
774 
775         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
776             s->float_exception_flags |= float_flag_invalid;
777         }
778         switch (type) {
779         case S390_MINMAX_TYPE_JAVA:
780             if (sig_a) {
781                 return S390_MINMAX_RES_SILENCE_A;
782             } else if (sig_b) {
783                 return S390_MINMAX_RES_SILENCE_B;
784             }
785             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
786         case S390_MINMAX_TYPE_F:
787             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
788         case S390_MINMAX_TYPE_C_MACRO:
789             s->float_exception_flags |= float_flag_invalid;
790             return S390_MINMAX_RES_B;
791         case S390_MINMAX_TYPE_CPP:
792             s->float_exception_flags |= float_flag_invalid;
793             return S390_MINMAX_RES_A;
794         default:
795             g_assert_not_reached();
796         }
797     } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
798         switch (type) {
799         case S390_MINMAX_TYPE_JAVA:
800             return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
801         case S390_MINMAX_TYPE_C_MACRO:
802             return S390_MINMAX_RES_B;
803         case S390_MINMAX_TYPE_F:
804             return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
805         case S390_MINMAX_TYPE_CPP:
806             return S390_MINMAX_RES_A;
807         default:
808             g_assert_not_reached();
809         }
810     }
811     return S390_MINMAX_RES_MINMAX;
812 }
813 
814 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
815                                S390MinMaxType type, float_status *s)
816 {
817     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
818 
819     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
820         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
821         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
822         const bool nan_a = dcmask_a & DCMASK_NAN;
823         const bool nan_b = dcmask_b & DCMASK_NAN;
824 
825         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
826             s->float_exception_flags |= float_flag_invalid;
827         }
828         switch (type) {
829         case S390_MINMAX_TYPE_JAVA:
830             if (sig_a) {
831                 return S390_MINMAX_RES_SILENCE_A;
832             } else if (sig_b) {
833                 return S390_MINMAX_RES_SILENCE_B;
834             }
835             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
836         case S390_MINMAX_TYPE_F:
837             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
838         case S390_MINMAX_TYPE_C_MACRO:
839             s->float_exception_flags |= float_flag_invalid;
840             return S390_MINMAX_RES_B;
841         case S390_MINMAX_TYPE_CPP:
842             s->float_exception_flags |= float_flag_invalid;
843             return S390_MINMAX_RES_A;
844         default:
845             g_assert_not_reached();
846         }
847     } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
848         const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
849 
850         switch (type) {
851         case S390_MINMAX_TYPE_JAVA:
852         case S390_MINMAX_TYPE_F:
853             return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
854         case S390_MINMAX_TYPE_C_MACRO:
855             return S390_MINMAX_RES_B;
856         case S390_MINMAX_TYPE_CPP:
857             return S390_MINMAX_RES_A;
858         default:
859             g_assert_not_reached();
860         }
861     }
862     return S390_MINMAX_RES_MINMAX;
863 }
864 
865 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
866                                   S390MinMaxType type, bool is_min,
867                                   float_status *s)
868 {
869     return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
870                     vfmax_res(dcmask_a, dcmask_b, type, s);
871 }
872 
873 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
874                        const S390Vector *v3, CPUS390XState *env,
875                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
876                        uintptr_t retaddr)
877 {
878     float_status *s = &env->fpu_status;
879     uint8_t vxc, vec_exc = 0;
880     S390Vector tmp = {};
881     int i;
882 
883     for (i = 0; i < 4; i++) {
884         float32 a = s390_vec_read_float32(v2, i);
885         float32 b = s390_vec_read_float32(v3, i);
886         float32 result;
887 
888         if (type != S390_MINMAX_TYPE_IEEE) {
889             S390MinMaxRes res;
890 
891             if (is_abs) {
892                 a = float32_abs(a);
893                 b = float32_abs(b);
894             }
895 
896             res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
897                                type, is_min, s);
898             switch (res) {
899             case S390_MINMAX_RES_MINMAX:
900                 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
901                 break;
902             case S390_MINMAX_RES_A:
903                 result = a;
904                 break;
905             case S390_MINMAX_RES_B:
906                 result = b;
907                 break;
908             case S390_MINMAX_RES_SILENCE_A:
909                 result = float32_silence_nan(a, s);
910                 break;
911             case S390_MINMAX_RES_SILENCE_B:
912                 result = float32_silence_nan(b, s);
913                 break;
914             default:
915                 g_assert_not_reached();
916             }
917         } else if (!is_abs) {
918             result = is_min ? float32_minnum(a, b, &env->fpu_status) :
919                               float32_maxnum(a, b, &env->fpu_status);
920         } else {
921             result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
922                               float32_maxnummag(a, b, &env->fpu_status);
923         }
924 
925         s390_vec_write_float32(&tmp, i, result);
926         vxc = check_ieee_exc(env, i, false, &vec_exc);
927         if (se || vxc) {
928             break;
929         }
930     }
931     handle_ieee_exc(env, vxc, vec_exc, retaddr);
932     *v1 = tmp;
933 }
934 
935 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
936                        const S390Vector *v3, CPUS390XState *env,
937                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
938                        uintptr_t retaddr)
939 {
940     float_status *s = &env->fpu_status;
941     uint8_t vxc, vec_exc = 0;
942     S390Vector tmp = {};
943     int i;
944 
945     for (i = 0; i < 2; i++) {
946         float64 a = s390_vec_read_float64(v2, i);
947         float64 b = s390_vec_read_float64(v3, i);
948         float64 result;
949 
950         if (type != S390_MINMAX_TYPE_IEEE) {
951             S390MinMaxRes res;
952 
953             if (is_abs) {
954                 a = float64_abs(a);
955                 b = float64_abs(b);
956             }
957 
958             res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
959                                type, is_min, s);
960             switch (res) {
961             case S390_MINMAX_RES_MINMAX:
962                 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
963                 break;
964             case S390_MINMAX_RES_A:
965                 result = a;
966                 break;
967             case S390_MINMAX_RES_B:
968                 result = b;
969                 break;
970             case S390_MINMAX_RES_SILENCE_A:
971                 result = float64_silence_nan(a, s);
972                 break;
973             case S390_MINMAX_RES_SILENCE_B:
974                 result = float64_silence_nan(b, s);
975                 break;
976             default:
977                 g_assert_not_reached();
978             }
979         } else if (!is_abs) {
980             result = is_min ? float64_minnum(a, b, &env->fpu_status) :
981                               float64_maxnum(a, b, &env->fpu_status);
982         } else {
983             result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
984                               float64_maxnummag(a, b, &env->fpu_status);
985         }
986 
987         s390_vec_write_float64(&tmp, i, result);
988         vxc = check_ieee_exc(env, i, false, &vec_exc);
989         if (se || vxc) {
990             break;
991         }
992     }
993     handle_ieee_exc(env, vxc, vec_exc, retaddr);
994     *v1 = tmp;
995 }
996 
997 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
998                         const S390Vector *v3, CPUS390XState *env,
999                         S390MinMaxType type, bool is_min, bool is_abs, bool se,
1000                         uintptr_t retaddr)
1001 {
1002     float128 a = s390_vec_read_float128(v2);
1003     float128 b = s390_vec_read_float128(v3);
1004     float_status *s = &env->fpu_status;
1005     uint8_t vxc, vec_exc = 0;
1006     float128 result;
1007 
1008     if (type != S390_MINMAX_TYPE_IEEE) {
1009         S390MinMaxRes res;
1010 
1011         if (is_abs) {
1012             a = float128_abs(a);
1013             b = float128_abs(b);
1014         }
1015 
1016         res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1017                            type, is_min, s);
1018         switch (res) {
1019         case S390_MINMAX_RES_MINMAX:
1020             result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1021             break;
1022         case S390_MINMAX_RES_A:
1023             result = a;
1024             break;
1025         case S390_MINMAX_RES_B:
1026             result = b;
1027             break;
1028         case S390_MINMAX_RES_SILENCE_A:
1029             result = float128_silence_nan(a, s);
1030             break;
1031         case S390_MINMAX_RES_SILENCE_B:
1032             result = float128_silence_nan(b, s);
1033             break;
1034         default:
1035             g_assert_not_reached();
1036         }
1037     } else if (!is_abs) {
1038         result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1039                           float128_maxnum(a, b, &env->fpu_status);
1040     } else {
1041         result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1042                           float128_maxnummag(a, b, &env->fpu_status);
1043     }
1044 
1045     vxc = check_ieee_exc(env, 0, false, &vec_exc);
1046     handle_ieee_exc(env, vxc, vec_exc, retaddr);
1047     s390_vec_write_float128(v1, result);
1048 }
1049 
1050 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
1051 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
1052                                CPUS390XState *env, uint32_t desc)              \
1053 {                                                                              \
1054     const bool se = extract32(simd_data(desc), 3, 1);                          \
1055     uint8_t type = extract32(simd_data(desc), 4, 4);                           \
1056     bool is_abs = false;                                                       \
1057                                                                                \
1058     if (type >= 8) {                                                           \
1059         is_abs = true;                                                         \
1060         type -= 8;                                                             \
1061     }                                                                          \
1062                                                                                \
1063     vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
1064 }
1065 
1066 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
1067     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
1068     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
1069     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1070 
1071 DEF_GVEC_VFMINMAX(vfmax, false)
1072 DEF_GVEC_VFMINMAX(vfmin, true)
1073