1 /*
2 * QEMU TCG support -- s390x vector floating point instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "s390x-internal.h"
15 #include "vec.h"
16 #include "tcg_s390x.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/exec-all.h"
19 #include "exec/helper-proto.h"
20 #include "fpu/softfloat.h"
21
22 #define VIC_INVALID 0x1
23 #define VIC_DIVBYZERO 0x2
24 #define VIC_OVERFLOW 0x3
25 #define VIC_UNDERFLOW 0x4
26 #define VIC_INEXACT 0x5
27
28 /* returns the VEX. If the VEX is 0, there is no trap */
check_ieee_exc(CPUS390XState * env,uint8_t enr,bool XxC,uint8_t * vec_exc)29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
30 uint8_t *vec_exc)
31 {
32 uint8_t vece_exc = 0, trap_exc;
33 unsigned qemu_exc;
34
35 /* Retrieve and clear the softfloat exceptions */
36 qemu_exc = env->fpu_status.float_exception_flags;
37 if (qemu_exc == 0) {
38 return 0;
39 }
40 env->fpu_status.float_exception_flags = 0;
41
42 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
43
44 /* Add them to the vector-wide s390x exception bits */
45 *vec_exc |= vece_exc;
46
47 /* Check for traps and construct the VXC */
48 trap_exc = vece_exc & env->fpc >> 24;
49 if (trap_exc) {
50 if (trap_exc & S390_IEEE_MASK_INVALID) {
51 return enr << 4 | VIC_INVALID;
52 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
53 return enr << 4 | VIC_DIVBYZERO;
54 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
55 return enr << 4 | VIC_OVERFLOW;
56 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
57 return enr << 4 | VIC_UNDERFLOW;
58 } else if (!XxC) {
59 g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
60 /* inexact has lowest priority on traps */
61 return enr << 4 | VIC_INEXACT;
62 }
63 }
64 return 0;
65 }
66
handle_ieee_exc(CPUS390XState * env,uint8_t vxc,uint8_t vec_exc,uintptr_t retaddr)67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
68 uintptr_t retaddr)
69 {
70 if (vxc) {
71 /* on traps, the fpc flags are not updated, instruction is suppressed */
72 tcg_s390_vector_exception(env, vxc, retaddr);
73 }
74 if (vec_exc) {
75 /* indicate exceptions for all elements combined */
76 env->fpc |= vec_exc << 16;
77 }
78 }
79
s390_vec_read_float32(const S390Vector * v,uint8_t enr)80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
81 {
82 return make_float32(s390_vec_read_element32(v, enr));
83 }
84
s390_vec_read_float64(const S390Vector * v,uint8_t enr)85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
86 {
87 return make_float64(s390_vec_read_element64(v, enr));
88 }
89
s390_vec_read_float128(const S390Vector * v)90 static float128 s390_vec_read_float128(const S390Vector *v)
91 {
92 return make_float128(s390_vec_read_element64(v, 0),
93 s390_vec_read_element64(v, 1));
94 }
95
s390_vec_write_float32(S390Vector * v,uint8_t enr,float32 data)96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
97 {
98 return s390_vec_write_element32(v, enr, data);
99 }
100
s390_vec_write_float64(S390Vector * v,uint8_t enr,float64 data)101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
102 {
103 return s390_vec_write_element64(v, enr, data);
104 }
105
s390_vec_write_float128(S390Vector * v,float128 data)106 static void s390_vec_write_float128(S390Vector *v, float128 data)
107 {
108 s390_vec_write_element64(v, 0, data.high);
109 s390_vec_write_element64(v, 1, data.low);
110 }
111
112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
vop32_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop32_2_fn fn,uintptr_t retaddr)113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
114 bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
115 uintptr_t retaddr)
116 {
117 uint8_t vxc, vec_exc = 0;
118 S390Vector tmp = {};
119 int i, old_mode;
120
121 old_mode = s390_swap_bfp_rounding_mode(env, erm);
122 for (i = 0; i < 4; i++) {
123 const float32 a = s390_vec_read_float32(v2, i);
124
125 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
126 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
127 if (s || vxc) {
128 break;
129 }
130 }
131 s390_restore_bfp_rounding_mode(env, old_mode);
132 handle_ieee_exc(env, vxc, vec_exc, retaddr);
133 *v1 = tmp;
134 }
135
136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
vop64_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop64_2_fn fn,uintptr_t retaddr)137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
138 bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
139 uintptr_t retaddr)
140 {
141 uint8_t vxc, vec_exc = 0;
142 S390Vector tmp = {};
143 int i, old_mode;
144
145 old_mode = s390_swap_bfp_rounding_mode(env, erm);
146 for (i = 0; i < 2; i++) {
147 const float64 a = s390_vec_read_float64(v2, i);
148
149 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
150 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
151 if (s || vxc) {
152 break;
153 }
154 }
155 s390_restore_bfp_rounding_mode(env, old_mode);
156 handle_ieee_exc(env, vxc, vec_exc, retaddr);
157 *v1 = tmp;
158 }
159
160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
vop128_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop128_2_fn fn,uintptr_t retaddr)161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
162 bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
163 uintptr_t retaddr)
164 {
165 const float128 a = s390_vec_read_float128(v2);
166 uint8_t vxc, vec_exc = 0;
167 S390Vector tmp = {};
168 int old_mode;
169
170 old_mode = s390_swap_bfp_rounding_mode(env, erm);
171 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
172 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
173 s390_restore_bfp_rounding_mode(env, old_mode);
174 handle_ieee_exc(env, vxc, vec_exc, retaddr);
175 *v1 = tmp;
176 }
177
vcdg32(float32 a,float_status * s)178 static float32 vcdg32(float32 a, float_status *s)
179 {
180 return int32_to_float32(a, s);
181 }
182
vcdlg32(float32 a,float_status * s)183 static float32 vcdlg32(float32 a, float_status *s)
184 {
185 return uint32_to_float32(a, s);
186 }
187
vcgd32(float32 a,float_status * s)188 static float32 vcgd32(float32 a, float_status *s)
189 {
190 const float32 tmp = float32_to_int32(a, s);
191
192 return float32_is_any_nan(a) ? INT32_MIN : tmp;
193 }
194
vclgd32(float32 a,float_status * s)195 static float32 vclgd32(float32 a, float_status *s)
196 {
197 const float32 tmp = float32_to_uint32(a, s);
198
199 return float32_is_any_nan(a) ? 0 : tmp;
200 }
201
vcdg64(float64 a,float_status * s)202 static float64 vcdg64(float64 a, float_status *s)
203 {
204 return int64_to_float64(a, s);
205 }
206
vcdlg64(float64 a,float_status * s)207 static float64 vcdlg64(float64 a, float_status *s)
208 {
209 return uint64_to_float64(a, s);
210 }
211
vcgd64(float64 a,float_status * s)212 static float64 vcgd64(float64 a, float_status *s)
213 {
214 const float64 tmp = float64_to_int64(a, s);
215
216 return float64_is_any_nan(a) ? INT64_MIN : tmp;
217 }
218
vclgd64(float64 a,float_status * s)219 static float64 vclgd64(float64 a, float_status *s)
220 {
221 const float64 tmp = float64_to_uint64(a, s);
222
223 return float64_is_any_nan(a) ? 0 : tmp;
224 }
225
226 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
227 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
228 uint32_t desc) \
229 { \
230 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
231 const bool se = extract32(simd_data(desc), 3, 1); \
232 const bool XxC = extract32(simd_data(desc), 2, 1); \
233 \
234 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
235 }
236
237 #define DEF_GVEC_VOP2_32(NAME) \
238 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
239
240 #define DEF_GVEC_VOP2_64(NAME) \
241 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
242
243 #define DEF_GVEC_VOP2(NAME, OP) \
244 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \
245 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \
246 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
247
248 DEF_GVEC_VOP2_32(vcdg)
249 DEF_GVEC_VOP2_32(vcdlg)
250 DEF_GVEC_VOP2_32(vcgd)
251 DEF_GVEC_VOP2_32(vclgd)
252 DEF_GVEC_VOP2_64(vcdg)
253 DEF_GVEC_VOP2_64(vcdlg)
254 DEF_GVEC_VOP2_64(vcgd)
255 DEF_GVEC_VOP2_64(vclgd)
256 DEF_GVEC_VOP2(vfi, round_to_int)
257 DEF_GVEC_VOP2(vfsq, sqrt)
258
259 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
vop32_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop32_3_fn fn,uintptr_t retaddr)260 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
261 CPUS390XState *env, bool s, vop32_3_fn fn,
262 uintptr_t retaddr)
263 {
264 uint8_t vxc, vec_exc = 0;
265 S390Vector tmp = {};
266 int i;
267
268 for (i = 0; i < 4; i++) {
269 const float32 a = s390_vec_read_float32(v2, i);
270 const float32 b = s390_vec_read_float32(v3, i);
271
272 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
273 vxc = check_ieee_exc(env, i, false, &vec_exc);
274 if (s || vxc) {
275 break;
276 }
277 }
278 handle_ieee_exc(env, vxc, vec_exc, retaddr);
279 *v1 = tmp;
280 }
281
282 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
vop64_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop64_3_fn fn,uintptr_t retaddr)283 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
284 CPUS390XState *env, bool s, vop64_3_fn fn,
285 uintptr_t retaddr)
286 {
287 uint8_t vxc, vec_exc = 0;
288 S390Vector tmp = {};
289 int i;
290
291 for (i = 0; i < 2; i++) {
292 const float64 a = s390_vec_read_float64(v2, i);
293 const float64 b = s390_vec_read_float64(v3, i);
294
295 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
296 vxc = check_ieee_exc(env, i, false, &vec_exc);
297 if (s || vxc) {
298 break;
299 }
300 }
301 handle_ieee_exc(env, vxc, vec_exc, retaddr);
302 *v1 = tmp;
303 }
304
305 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
vop128_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop128_3_fn fn,uintptr_t retaddr)306 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
307 CPUS390XState *env, bool s, vop128_3_fn fn,
308 uintptr_t retaddr)
309 {
310 const float128 a = s390_vec_read_float128(v2);
311 const float128 b = s390_vec_read_float128(v3);
312 uint8_t vxc, vec_exc = 0;
313 S390Vector tmp = {};
314
315 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
316 vxc = check_ieee_exc(env, 0, false, &vec_exc);
317 handle_ieee_exc(env, vxc, vec_exc, retaddr);
318 *v1 = tmp;
319 }
320
321 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
322 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
323 CPUS390XState *env, uint32_t desc) \
324 { \
325 const bool se = extract32(simd_data(desc), 3, 1); \
326 \
327 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
328 }
329
330 #define DEF_GVEC_VOP3(NAME, OP) \
331 DEF_GVEC_VOP3_B(NAME, OP, 32) \
332 DEF_GVEC_VOP3_B(NAME, OP, 64) \
333 DEF_GVEC_VOP3_B(NAME, OP, 128)
334
DEF_GVEC_VOP3(vfa,add)335 DEF_GVEC_VOP3(vfa, add)
336 DEF_GVEC_VOP3(vfs, sub)
337 DEF_GVEC_VOP3(vfd, div)
338 DEF_GVEC_VOP3(vfm, mul)
339
340 static int wfc32(const S390Vector *v1, const S390Vector *v2,
341 CPUS390XState *env, bool signal, uintptr_t retaddr)
342 {
343 /* only the zero-indexed elements are compared */
344 const float32 a = s390_vec_read_float32(v1, 0);
345 const float32 b = s390_vec_read_float32(v2, 0);
346 uint8_t vxc, vec_exc = 0;
347 int cmp;
348
349 if (signal) {
350 cmp = float32_compare(a, b, &env->fpu_status);
351 } else {
352 cmp = float32_compare_quiet(a, b, &env->fpu_status);
353 }
354 vxc = check_ieee_exc(env, 0, false, &vec_exc);
355 handle_ieee_exc(env, vxc, vec_exc, retaddr);
356
357 return float_comp_to_cc(env, cmp);
358 }
359
wfc64(const S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool signal,uintptr_t retaddr)360 static int wfc64(const S390Vector *v1, const S390Vector *v2,
361 CPUS390XState *env, bool signal, uintptr_t retaddr)
362 {
363 /* only the zero-indexed elements are compared */
364 const float64 a = s390_vec_read_float64(v1, 0);
365 const float64 b = s390_vec_read_float64(v2, 0);
366 uint8_t vxc, vec_exc = 0;
367 int cmp;
368
369 if (signal) {
370 cmp = float64_compare(a, b, &env->fpu_status);
371 } else {
372 cmp = float64_compare_quiet(a, b, &env->fpu_status);
373 }
374 vxc = check_ieee_exc(env, 0, false, &vec_exc);
375 handle_ieee_exc(env, vxc, vec_exc, retaddr);
376
377 return float_comp_to_cc(env, cmp);
378 }
379
wfc128(const S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool signal,uintptr_t retaddr)380 static int wfc128(const S390Vector *v1, const S390Vector *v2,
381 CPUS390XState *env, bool signal, uintptr_t retaddr)
382 {
383 /* only the zero-indexed elements are compared */
384 const float128 a = s390_vec_read_float128(v1);
385 const float128 b = s390_vec_read_float128(v2);
386 uint8_t vxc, vec_exc = 0;
387 int cmp;
388
389 if (signal) {
390 cmp = float128_compare(a, b, &env->fpu_status);
391 } else {
392 cmp = float128_compare_quiet(a, b, &env->fpu_status);
393 }
394 vxc = check_ieee_exc(env, 0, false, &vec_exc);
395 handle_ieee_exc(env, vxc, vec_exc, retaddr);
396
397 return float_comp_to_cc(env, cmp);
398 }
399
400 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
401 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
402 CPUS390XState *env, uint32_t desc) \
403 { \
404 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
405 }
406
407 #define DEF_GVEC_WFC(NAME, SIGNAL) \
408 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \
409 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \
410 DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
411
412 DEF_GVEC_WFC(wfc, false)
413 DEF_GVEC_WFC(wfk, true)
414
415 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
vfc32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc32_fn fn,uintptr_t retaddr)416 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
417 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
418 {
419 uint8_t vxc, vec_exc = 0;
420 S390Vector tmp = {};
421 int match = 0;
422 int i;
423
424 for (i = 0; i < 4; i++) {
425 const float32 a = s390_vec_read_float32(v2, i);
426 const float32 b = s390_vec_read_float32(v3, i);
427
428 /* swap the order of the parameters, so we can use existing functions */
429 if (fn(b, a, &env->fpu_status)) {
430 match++;
431 s390_vec_write_element32(&tmp, i, -1u);
432 }
433 vxc = check_ieee_exc(env, i, false, &vec_exc);
434 if (s || vxc) {
435 break;
436 }
437 }
438
439 handle_ieee_exc(env, vxc, vec_exc, retaddr);
440 *v1 = tmp;
441 if (match) {
442 return s || match == 4 ? 0 : 1;
443 }
444 return 3;
445 }
446
447 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
vfc64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc64_fn fn,uintptr_t retaddr)448 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
449 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
450 {
451 uint8_t vxc, vec_exc = 0;
452 S390Vector tmp = {};
453 int match = 0;
454 int i;
455
456 for (i = 0; i < 2; i++) {
457 const float64 a = s390_vec_read_float64(v2, i);
458 const float64 b = s390_vec_read_float64(v3, i);
459
460 /* swap the order of the parameters, so we can use existing functions */
461 if (fn(b, a, &env->fpu_status)) {
462 match++;
463 s390_vec_write_element64(&tmp, i, -1ull);
464 }
465 vxc = check_ieee_exc(env, i, false, &vec_exc);
466 if (s || vxc) {
467 break;
468 }
469 }
470
471 handle_ieee_exc(env, vxc, vec_exc, retaddr);
472 *v1 = tmp;
473 if (match) {
474 return s || match == 2 ? 0 : 1;
475 }
476 return 3;
477 }
478
479 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
vfc128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc128_fn fn,uintptr_t retaddr)480 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
481 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
482 {
483 const float128 a = s390_vec_read_float128(v2);
484 const float128 b = s390_vec_read_float128(v3);
485 uint8_t vxc, vec_exc = 0;
486 S390Vector tmp = {};
487 bool match = false;
488
489 /* swap the order of the parameters, so we can use existing functions */
490 if (fn(b, a, &env->fpu_status)) {
491 match = true;
492 s390_vec_write_element64(&tmp, 0, -1ull);
493 s390_vec_write_element64(&tmp, 1, -1ull);
494 }
495 vxc = check_ieee_exc(env, 0, false, &vec_exc);
496 handle_ieee_exc(env, vxc, vec_exc, retaddr);
497 *v1 = tmp;
498 return match ? 0 : 3;
499 }
500
501 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \
502 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
503 CPUS390XState *env, uint32_t desc) \
504 { \
505 const bool se = extract32(simd_data(desc), 3, 1); \
506 const bool sq = extract32(simd_data(desc), 2, 1); \
507 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
508 \
509 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
510 } \
511 \
512 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
513 CPUS390XState *env, uint32_t desc) \
514 { \
515 const bool se = extract32(simd_data(desc), 3, 1); \
516 const bool sq = extract32(simd_data(desc), 2, 1); \
517 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
518 \
519 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
520 }
521
522 #define DEF_GVEC_VFC(NAME, OP) \
523 DEF_GVEC_VFC_B(NAME, OP, 32) \
524 DEF_GVEC_VFC_B(NAME, OP, 64) \
525 DEF_GVEC_VFC_B(NAME, OP, 128) \
526
DEF_GVEC_VFC(vfce,eq)527 DEF_GVEC_VFC(vfce, eq)
528 DEF_GVEC_VFC(vfch, lt)
529 DEF_GVEC_VFC(vfche, le)
530
531 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
532 uint32_t desc)
533 {
534 const bool s = extract32(simd_data(desc), 3, 1);
535 uint8_t vxc, vec_exc = 0;
536 S390Vector tmp = {};
537 int i;
538
539 for (i = 0; i < 2; i++) {
540 /* load from even element */
541 const float32 a = s390_vec_read_element32(v2, i * 2);
542 const uint64_t ret = float32_to_float64(a, &env->fpu_status);
543
544 s390_vec_write_element64(&tmp, i, ret);
545 /* indicate the source element */
546 vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
547 if (s || vxc) {
548 break;
549 }
550 }
551 handle_ieee_exc(env, vxc, vec_exc, GETPC());
552 *(S390Vector *)v1 = tmp;
553 }
554
HELPER(gvec_vfll64)555 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
556 uint32_t desc)
557 {
558 /* load from even element */
559 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
560 &env->fpu_status);
561 uint8_t vxc, vec_exc = 0;
562
563 vxc = check_ieee_exc(env, 0, false, &vec_exc);
564 handle_ieee_exc(env, vxc, vec_exc, GETPC());
565 s390_vec_write_float128(v1, ret);
566 }
567
HELPER(gvec_vflr64)568 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
569 uint32_t desc)
570 {
571 const uint8_t erm = extract32(simd_data(desc), 4, 4);
572 const bool s = extract32(simd_data(desc), 3, 1);
573 const bool XxC = extract32(simd_data(desc), 2, 1);
574 uint8_t vxc, vec_exc = 0;
575 S390Vector tmp = {};
576 int i, old_mode;
577
578 old_mode = s390_swap_bfp_rounding_mode(env, erm);
579 for (i = 0; i < 2; i++) {
580 float64 a = s390_vec_read_element64(v2, i);
581 uint32_t ret = float64_to_float32(a, &env->fpu_status);
582
583 /* place at even element */
584 s390_vec_write_element32(&tmp, i * 2, ret);
585 /* indicate the source element */
586 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
587 if (s || vxc) {
588 break;
589 }
590 }
591 s390_restore_bfp_rounding_mode(env, old_mode);
592 handle_ieee_exc(env, vxc, vec_exc, GETPC());
593 *(S390Vector *)v1 = tmp;
594 }
595
HELPER(gvec_vflr128)596 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
597 uint32_t desc)
598 {
599 const uint8_t erm = extract32(simd_data(desc), 4, 4);
600 const bool XxC = extract32(simd_data(desc), 2, 1);
601 uint8_t vxc, vec_exc = 0;
602 int old_mode;
603 float64 ret;
604
605 old_mode = s390_swap_bfp_rounding_mode(env, erm);
606 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
607 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
608 s390_restore_bfp_rounding_mode(env, old_mode);
609 handle_ieee_exc(env, vxc, vec_exc, GETPC());
610
611 /* place at even element, odd element is unpredictable */
612 s390_vec_write_float64(v1, 0, ret);
613 }
614
vfma32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)615 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
616 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
617 uintptr_t retaddr)
618 {
619 uint8_t vxc, vec_exc = 0;
620 S390Vector tmp = {};
621 int i;
622
623 for (i = 0; i < 4; i++) {
624 const float32 a = s390_vec_read_float32(v3, i);
625 const float32 b = s390_vec_read_float32(v2, i);
626 const float32 c = s390_vec_read_float32(v4, i);
627 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
628
629 s390_vec_write_float32(&tmp, i, ret);
630 vxc = check_ieee_exc(env, i, false, &vec_exc);
631 if (s || vxc) {
632 break;
633 }
634 }
635 handle_ieee_exc(env, vxc, vec_exc, retaddr);
636 *v1 = tmp;
637 }
638
vfma64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)639 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
640 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
641 uintptr_t retaddr)
642 {
643 uint8_t vxc, vec_exc = 0;
644 S390Vector tmp = {};
645 int i;
646
647 for (i = 0; i < 2; i++) {
648 const float64 a = s390_vec_read_float64(v3, i);
649 const float64 b = s390_vec_read_float64(v2, i);
650 const float64 c = s390_vec_read_float64(v4, i);
651 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
652
653 s390_vec_write_float64(&tmp, i, ret);
654 vxc = check_ieee_exc(env, i, false, &vec_exc);
655 if (s || vxc) {
656 break;
657 }
658 }
659 handle_ieee_exc(env, vxc, vec_exc, retaddr);
660 *v1 = tmp;
661 }
662
vfma128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)663 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
664 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
665 uintptr_t retaddr)
666 {
667 const float128 a = s390_vec_read_float128(v3);
668 const float128 b = s390_vec_read_float128(v2);
669 const float128 c = s390_vec_read_float128(v4);
670 uint8_t vxc, vec_exc = 0;
671 float128 ret;
672
673 ret = float128_muladd(a, b, c, flags, &env->fpu_status);
674 vxc = check_ieee_exc(env, 0, false, &vec_exc);
675 handle_ieee_exc(env, vxc, vec_exc, retaddr);
676 s390_vec_write_float128(v1, ret);
677 }
678
679 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
680 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
681 const void *v4, CPUS390XState *env, \
682 uint32_t desc) \
683 { \
684 const bool se = extract32(simd_data(desc), 3, 1); \
685 \
686 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
687 }
688
689 #define DEF_GVEC_VFMA(NAME, FLAGS) \
690 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \
691 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \
692 DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
693
694 DEF_GVEC_VFMA(vfma, 0)
DEF_GVEC_VFMA(vfms,float_muladd_negate_c)695 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
696 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
697 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
698
699 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
700 uint32_t desc)
701 {
702 uint16_t i3 = extract32(simd_data(desc), 4, 12);
703 bool s = extract32(simd_data(desc), 3, 1);
704 int i, match = 0;
705
706 for (i = 0; i < 4; i++) {
707 float32 a = s390_vec_read_float32(v2, i);
708
709 if (float32_dcmask(env, a) & i3) {
710 match++;
711 s390_vec_write_element32(v1, i, -1u);
712 } else {
713 s390_vec_write_element32(v1, i, 0);
714 }
715 if (s) {
716 break;
717 }
718 }
719
720 if (match == 4 || (s && match)) {
721 env->cc_op = 0;
722 } else if (match) {
723 env->cc_op = 1;
724 } else {
725 env->cc_op = 3;
726 }
727 }
728
HELPER(gvec_vftci64)729 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
730 uint32_t desc)
731 {
732 const uint16_t i3 = extract32(simd_data(desc), 4, 12);
733 const bool s = extract32(simd_data(desc), 3, 1);
734 int i, match = 0;
735
736 for (i = 0; i < 2; i++) {
737 const float64 a = s390_vec_read_float64(v2, i);
738
739 if (float64_dcmask(env, a) & i3) {
740 match++;
741 s390_vec_write_element64(v1, i, -1ull);
742 } else {
743 s390_vec_write_element64(v1, i, 0);
744 }
745 if (s) {
746 break;
747 }
748 }
749
750 if (match == 2 || (s && match)) {
751 env->cc_op = 0;
752 } else if (match) {
753 env->cc_op = 1;
754 } else {
755 env->cc_op = 3;
756 }
757 }
758
HELPER(gvec_vftci128)759 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
760 uint32_t desc)
761 {
762 const float128 a = s390_vec_read_float128(v2);
763 uint16_t i3 = extract32(simd_data(desc), 4, 12);
764
765 if (float128_dcmask(env, a) & i3) {
766 env->cc_op = 0;
767 s390_vec_write_element64(v1, 0, -1ull);
768 s390_vec_write_element64(v1, 1, -1ull);
769 } else {
770 env->cc_op = 3;
771 s390_vec_write_element64(v1, 0, 0);
772 s390_vec_write_element64(v1, 1, 0);
773 }
774 }
775
776 typedef enum S390MinMaxType {
777 S390_MINMAX_TYPE_IEEE = 0,
778 S390_MINMAX_TYPE_JAVA,
779 S390_MINMAX_TYPE_C_MACRO,
780 S390_MINMAX_TYPE_CPP,
781 S390_MINMAX_TYPE_F,
782 } S390MinMaxType;
783
784 typedef enum S390MinMaxRes {
785 S390_MINMAX_RES_MINMAX = 0,
786 S390_MINMAX_RES_A,
787 S390_MINMAX_RES_B,
788 S390_MINMAX_RES_SILENCE_A,
789 S390_MINMAX_RES_SILENCE_B,
790 } S390MinMaxRes;
791
vfmin_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,float_status * s)792 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
793 S390MinMaxType type, float_status *s)
794 {
795 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
796 const bool nan_a = dcmask_a & DCMASK_NAN;
797 const bool nan_b = dcmask_b & DCMASK_NAN;
798
799 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
800
801 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
802 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
803 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
804
805 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
806 s->float_exception_flags |= float_flag_invalid;
807 }
808 switch (type) {
809 case S390_MINMAX_TYPE_JAVA:
810 if (sig_a) {
811 return S390_MINMAX_RES_SILENCE_A;
812 } else if (sig_b) {
813 return S390_MINMAX_RES_SILENCE_B;
814 }
815 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
816 case S390_MINMAX_TYPE_F:
817 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
818 case S390_MINMAX_TYPE_C_MACRO:
819 s->float_exception_flags |= float_flag_invalid;
820 return S390_MINMAX_RES_B;
821 case S390_MINMAX_TYPE_CPP:
822 s->float_exception_flags |= float_flag_invalid;
823 return S390_MINMAX_RES_A;
824 default:
825 g_assert_not_reached();
826 }
827 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
828 switch (type) {
829 case S390_MINMAX_TYPE_JAVA:
830 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
831 case S390_MINMAX_TYPE_C_MACRO:
832 return S390_MINMAX_RES_B;
833 case S390_MINMAX_TYPE_F:
834 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
835 case S390_MINMAX_TYPE_CPP:
836 return S390_MINMAX_RES_A;
837 default:
838 g_assert_not_reached();
839 }
840 }
841 return S390_MINMAX_RES_MINMAX;
842 }
843
vfmax_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,float_status * s)844 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
845 S390MinMaxType type, float_status *s)
846 {
847 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
848
849 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
850 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
851 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
852 const bool nan_a = dcmask_a & DCMASK_NAN;
853 const bool nan_b = dcmask_b & DCMASK_NAN;
854
855 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
856 s->float_exception_flags |= float_flag_invalid;
857 }
858 switch (type) {
859 case S390_MINMAX_TYPE_JAVA:
860 if (sig_a) {
861 return S390_MINMAX_RES_SILENCE_A;
862 } else if (sig_b) {
863 return S390_MINMAX_RES_SILENCE_B;
864 }
865 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
866 case S390_MINMAX_TYPE_F:
867 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
868 case S390_MINMAX_TYPE_C_MACRO:
869 s->float_exception_flags |= float_flag_invalid;
870 return S390_MINMAX_RES_B;
871 case S390_MINMAX_TYPE_CPP:
872 s->float_exception_flags |= float_flag_invalid;
873 return S390_MINMAX_RES_A;
874 default:
875 g_assert_not_reached();
876 }
877 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
878 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
879
880 switch (type) {
881 case S390_MINMAX_TYPE_JAVA:
882 case S390_MINMAX_TYPE_F:
883 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
884 case S390_MINMAX_TYPE_C_MACRO:
885 return S390_MINMAX_RES_B;
886 case S390_MINMAX_TYPE_CPP:
887 return S390_MINMAX_RES_A;
888 default:
889 g_assert_not_reached();
890 }
891 }
892 return S390_MINMAX_RES_MINMAX;
893 }
894
vfminmax_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,bool is_min,float_status * s)895 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
896 S390MinMaxType type, bool is_min,
897 float_status *s)
898 {
899 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
900 vfmax_res(dcmask_a, dcmask_b, type, s);
901 }
902
vfminmax32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)903 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
904 const S390Vector *v3, CPUS390XState *env,
905 S390MinMaxType type, bool is_min, bool is_abs, bool se,
906 uintptr_t retaddr)
907 {
908 float_status *s = &env->fpu_status;
909 uint8_t vxc, vec_exc = 0;
910 S390Vector tmp = {};
911 int i;
912
913 for (i = 0; i < 4; i++) {
914 float32 a = s390_vec_read_float32(v2, i);
915 float32 b = s390_vec_read_float32(v3, i);
916 float32 result;
917
918 if (type != S390_MINMAX_TYPE_IEEE) {
919 S390MinMaxRes res;
920
921 if (is_abs) {
922 a = float32_abs(a);
923 b = float32_abs(b);
924 }
925
926 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
927 type, is_min, s);
928 switch (res) {
929 case S390_MINMAX_RES_MINMAX:
930 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
931 break;
932 case S390_MINMAX_RES_A:
933 result = a;
934 break;
935 case S390_MINMAX_RES_B:
936 result = b;
937 break;
938 case S390_MINMAX_RES_SILENCE_A:
939 result = float32_silence_nan(a, s);
940 break;
941 case S390_MINMAX_RES_SILENCE_B:
942 result = float32_silence_nan(b, s);
943 break;
944 default:
945 g_assert_not_reached();
946 }
947 } else if (!is_abs) {
948 result = is_min ? float32_minnum(a, b, &env->fpu_status) :
949 float32_maxnum(a, b, &env->fpu_status);
950 } else {
951 result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
952 float32_maxnummag(a, b, &env->fpu_status);
953 }
954
955 s390_vec_write_float32(&tmp, i, result);
956 vxc = check_ieee_exc(env, i, false, &vec_exc);
957 if (se || vxc) {
958 break;
959 }
960 }
961 handle_ieee_exc(env, vxc, vec_exc, retaddr);
962 *v1 = tmp;
963 }
964
vfminmax64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)965 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
966 const S390Vector *v3, CPUS390XState *env,
967 S390MinMaxType type, bool is_min, bool is_abs, bool se,
968 uintptr_t retaddr)
969 {
970 float_status *s = &env->fpu_status;
971 uint8_t vxc, vec_exc = 0;
972 S390Vector tmp = {};
973 int i;
974
975 for (i = 0; i < 2; i++) {
976 float64 a = s390_vec_read_float64(v2, i);
977 float64 b = s390_vec_read_float64(v3, i);
978 float64 result;
979
980 if (type != S390_MINMAX_TYPE_IEEE) {
981 S390MinMaxRes res;
982
983 if (is_abs) {
984 a = float64_abs(a);
985 b = float64_abs(b);
986 }
987
988 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
989 type, is_min, s);
990 switch (res) {
991 case S390_MINMAX_RES_MINMAX:
992 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
993 break;
994 case S390_MINMAX_RES_A:
995 result = a;
996 break;
997 case S390_MINMAX_RES_B:
998 result = b;
999 break;
1000 case S390_MINMAX_RES_SILENCE_A:
1001 result = float64_silence_nan(a, s);
1002 break;
1003 case S390_MINMAX_RES_SILENCE_B:
1004 result = float64_silence_nan(b, s);
1005 break;
1006 default:
1007 g_assert_not_reached();
1008 }
1009 } else if (!is_abs) {
1010 result = is_min ? float64_minnum(a, b, &env->fpu_status) :
1011 float64_maxnum(a, b, &env->fpu_status);
1012 } else {
1013 result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
1014 float64_maxnummag(a, b, &env->fpu_status);
1015 }
1016
1017 s390_vec_write_float64(&tmp, i, result);
1018 vxc = check_ieee_exc(env, i, false, &vec_exc);
1019 if (se || vxc) {
1020 break;
1021 }
1022 }
1023 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1024 *v1 = tmp;
1025 }
1026
vfminmax128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)1027 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
1028 const S390Vector *v3, CPUS390XState *env,
1029 S390MinMaxType type, bool is_min, bool is_abs, bool se,
1030 uintptr_t retaddr)
1031 {
1032 float128 a = s390_vec_read_float128(v2);
1033 float128 b = s390_vec_read_float128(v3);
1034 float_status *s = &env->fpu_status;
1035 uint8_t vxc, vec_exc = 0;
1036 float128 result;
1037
1038 if (type != S390_MINMAX_TYPE_IEEE) {
1039 S390MinMaxRes res;
1040
1041 if (is_abs) {
1042 a = float128_abs(a);
1043 b = float128_abs(b);
1044 }
1045
1046 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1047 type, is_min, s);
1048 switch (res) {
1049 case S390_MINMAX_RES_MINMAX:
1050 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1051 break;
1052 case S390_MINMAX_RES_A:
1053 result = a;
1054 break;
1055 case S390_MINMAX_RES_B:
1056 result = b;
1057 break;
1058 case S390_MINMAX_RES_SILENCE_A:
1059 result = float128_silence_nan(a, s);
1060 break;
1061 case S390_MINMAX_RES_SILENCE_B:
1062 result = float128_silence_nan(b, s);
1063 break;
1064 default:
1065 g_assert_not_reached();
1066 }
1067 } else if (!is_abs) {
1068 result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1069 float128_maxnum(a, b, &env->fpu_status);
1070 } else {
1071 result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1072 float128_maxnummag(a, b, &env->fpu_status);
1073 }
1074
1075 vxc = check_ieee_exc(env, 0, false, &vec_exc);
1076 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1077 s390_vec_write_float128(v1, result);
1078 }
1079
1080 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \
1081 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
1082 CPUS390XState *env, uint32_t desc) \
1083 { \
1084 const bool se = extract32(simd_data(desc), 3, 1); \
1085 uint8_t type = extract32(simd_data(desc), 4, 4); \
1086 bool is_abs = false; \
1087 \
1088 if (type >= 8) { \
1089 is_abs = true; \
1090 type -= 8; \
1091 } \
1092 \
1093 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \
1094 }
1095
1096 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \
1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \
1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \
1099 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1100
1101 DEF_GVEC_VFMINMAX(vfmax, false)
1102 DEF_GVEC_VFMINMAX(vfmin, true)
1103