1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "s390x-internal.h" 15 #include "vec.h" 16 #include "tcg_s390x.h" 17 #include "tcg/tcg-gvec-desc.h" 18 #include "exec/exec-all.h" 19 #include "exec/helper-proto.h" 20 #include "fpu/softfloat.h" 21 22 #define VIC_INVALID 0x1 23 #define VIC_DIVBYZERO 0x2 24 #define VIC_OVERFLOW 0x3 25 #define VIC_UNDERFLOW 0x4 26 #define VIC_INEXACT 0x5 27 28 /* returns the VEX. If the VEX is 0, there is no trap */ 29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 30 uint8_t *vec_exc) 31 { 32 uint8_t vece_exc = 0, trap_exc; 33 unsigned qemu_exc; 34 35 /* Retrieve and clear the softfloat exceptions */ 36 qemu_exc = env->fpu_status.float_exception_flags; 37 if (qemu_exc == 0) { 38 return 0; 39 } 40 env->fpu_status.float_exception_flags = 0; 41 42 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 43 44 /* Add them to the vector-wide s390x exception bits */ 45 *vec_exc |= vece_exc; 46 47 /* Check for traps and construct the VXC */ 48 trap_exc = vece_exc & env->fpc >> 24; 49 if (trap_exc) { 50 if (trap_exc & S390_IEEE_MASK_INVALID) { 51 return enr << 4 | VIC_INVALID; 52 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 53 return enr << 4 | VIC_DIVBYZERO; 54 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 55 return enr << 4 | VIC_OVERFLOW; 56 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 57 return enr << 4 | VIC_UNDERFLOW; 58 } else if (!XxC) { 59 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 60 /* inexact has lowest priority on traps */ 61 return enr << 4 | VIC_INEXACT; 62 } 63 } 64 return 0; 65 } 66 67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 68 uintptr_t retaddr) 69 { 70 if (vxc) { 71 /* on traps, the fpc flags are not updated, instruction is suppressed */ 72 tcg_s390_vector_exception(env, vxc, retaddr); 73 } 74 if (vec_exc) { 75 /* indicate exceptions for all elements combined */ 76 env->fpc |= vec_exc << 16; 77 } 78 } 79 80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) 81 { 82 return make_float32(s390_vec_read_element32(v, enr)); 83 } 84 85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 86 { 87 return make_float64(s390_vec_read_element64(v, enr)); 88 } 89 90 static float128 s390_vec_read_float128(const S390Vector *v) 91 { 92 return make_float128(s390_vec_read_element64(v, 0), 93 s390_vec_read_element64(v, 1)); 94 } 95 96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) 97 { 98 return s390_vec_write_element32(v, enr, data); 99 } 100 101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 102 { 103 return s390_vec_write_element64(v, enr, data); 104 } 105 106 static void s390_vec_write_float128(S390Vector *v, float128 data) 107 { 108 s390_vec_write_element64(v, 0, data.high); 109 s390_vec_write_element64(v, 1, data.low); 110 } 111 112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s); 113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 114 bool s, bool XxC, uint8_t erm, vop32_2_fn fn, 115 uintptr_t retaddr) 116 { 117 uint8_t vxc, vec_exc = 0; 118 S390Vector tmp = {}; 119 int i, old_mode; 120 121 old_mode = s390_swap_bfp_rounding_mode(env, erm); 122 for (i = 0; i < 4; i++) { 123 const float32 a = s390_vec_read_float32(v2, i); 124 125 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); 126 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 127 if (s || vxc) { 128 break; 129 } 130 } 131 s390_restore_bfp_rounding_mode(env, old_mode); 132 handle_ieee_exc(env, vxc, vec_exc, retaddr); 133 *v1 = tmp; 134 } 135 136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s); 137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 138 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 139 uintptr_t retaddr) 140 { 141 uint8_t vxc, vec_exc = 0; 142 S390Vector tmp = {}; 143 int i, old_mode; 144 145 old_mode = s390_swap_bfp_rounding_mode(env, erm); 146 for (i = 0; i < 2; i++) { 147 const float64 a = s390_vec_read_float64(v2, i); 148 149 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); 150 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 151 if (s || vxc) { 152 break; 153 } 154 } 155 s390_restore_bfp_rounding_mode(env, old_mode); 156 handle_ieee_exc(env, vxc, vec_exc, retaddr); 157 *v1 = tmp; 158 } 159 160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s); 161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 162 bool s, bool XxC, uint8_t erm, vop128_2_fn fn, 163 uintptr_t retaddr) 164 { 165 const float128 a = s390_vec_read_float128(v2); 166 uint8_t vxc, vec_exc = 0; 167 S390Vector tmp = {}; 168 int old_mode; 169 170 old_mode = s390_swap_bfp_rounding_mode(env, erm); 171 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); 172 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 173 s390_restore_bfp_rounding_mode(env, old_mode); 174 handle_ieee_exc(env, vxc, vec_exc, retaddr); 175 *v1 = tmp; 176 } 177 178 static float64 vcdg64(float64 a, float_status *s) 179 { 180 return int64_to_float64(a, s); 181 } 182 183 static float64 vcdlg64(float64 a, float_status *s) 184 { 185 return uint64_to_float64(a, s); 186 } 187 188 static float64 vcgd64(float64 a, float_status *s) 189 { 190 const float64 tmp = float64_to_int64(a, s); 191 192 return float64_is_any_nan(a) ? INT64_MIN : tmp; 193 } 194 195 static float64 vclgd64(float64 a, float_status *s) 196 { 197 const float64 tmp = float64_to_uint64(a, s); 198 199 return float64_is_any_nan(a) ? 0 : tmp; 200 } 201 202 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ 203 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 204 uint32_t desc) \ 205 { \ 206 const uint8_t erm = extract32(simd_data(desc), 4, 4); \ 207 const bool se = extract32(simd_data(desc), 3, 1); \ 208 const bool XxC = extract32(simd_data(desc), 2, 1); \ 209 \ 210 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ 211 } 212 213 #define DEF_GVEC_VOP2_64(NAME) \ 214 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) 215 216 #define DEF_GVEC_VOP2(NAME, OP) \ 217 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ 218 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ 219 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) 220 221 DEF_GVEC_VOP2_64(vcdg) 222 DEF_GVEC_VOP2_64(vcdlg) 223 DEF_GVEC_VOP2_64(vcgd) 224 DEF_GVEC_VOP2_64(vclgd) 225 DEF_GVEC_VOP2(vfi, round_to_int) 226 DEF_GVEC_VOP2(vfsq, sqrt) 227 228 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); 229 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 230 CPUS390XState *env, bool s, vop32_3_fn fn, 231 uintptr_t retaddr) 232 { 233 uint8_t vxc, vec_exc = 0; 234 S390Vector tmp = {}; 235 int i; 236 237 for (i = 0; i < 4; i++) { 238 const float32 a = s390_vec_read_float32(v2, i); 239 const float32 b = s390_vec_read_float32(v3, i); 240 241 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); 242 vxc = check_ieee_exc(env, i, false, &vec_exc); 243 if (s || vxc) { 244 break; 245 } 246 } 247 handle_ieee_exc(env, vxc, vec_exc, retaddr); 248 *v1 = tmp; 249 } 250 251 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 252 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 253 CPUS390XState *env, bool s, vop64_3_fn fn, 254 uintptr_t retaddr) 255 { 256 uint8_t vxc, vec_exc = 0; 257 S390Vector tmp = {}; 258 int i; 259 260 for (i = 0; i < 2; i++) { 261 const float64 a = s390_vec_read_float64(v2, i); 262 const float64 b = s390_vec_read_float64(v3, i); 263 264 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 265 vxc = check_ieee_exc(env, i, false, &vec_exc); 266 if (s || vxc) { 267 break; 268 } 269 } 270 handle_ieee_exc(env, vxc, vec_exc, retaddr); 271 *v1 = tmp; 272 } 273 274 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); 275 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 276 CPUS390XState *env, bool s, vop128_3_fn fn, 277 uintptr_t retaddr) 278 { 279 const float128 a = s390_vec_read_float128(v2); 280 const float128 b = s390_vec_read_float128(v3); 281 uint8_t vxc, vec_exc = 0; 282 S390Vector tmp = {}; 283 284 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); 285 vxc = check_ieee_exc(env, 0, false, &vec_exc); 286 handle_ieee_exc(env, vxc, vec_exc, retaddr); 287 *v1 = tmp; 288 } 289 290 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ 291 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 292 CPUS390XState *env, uint32_t desc) \ 293 { \ 294 const bool se = extract32(simd_data(desc), 3, 1); \ 295 \ 296 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ 297 } 298 299 #define DEF_GVEC_VOP3(NAME, OP) \ 300 DEF_GVEC_VOP3_B(NAME, OP, 32) \ 301 DEF_GVEC_VOP3_B(NAME, OP, 64) \ 302 DEF_GVEC_VOP3_B(NAME, OP, 128) 303 304 DEF_GVEC_VOP3(vfa, add) 305 DEF_GVEC_VOP3(vfs, sub) 306 DEF_GVEC_VOP3(vfd, div) 307 DEF_GVEC_VOP3(vfm, mul) 308 309 static int wfc32(const S390Vector *v1, const S390Vector *v2, 310 CPUS390XState *env, bool signal, uintptr_t retaddr) 311 { 312 /* only the zero-indexed elements are compared */ 313 const float32 a = s390_vec_read_float32(v1, 0); 314 const float32 b = s390_vec_read_float32(v2, 0); 315 uint8_t vxc, vec_exc = 0; 316 int cmp; 317 318 if (signal) { 319 cmp = float32_compare(a, b, &env->fpu_status); 320 } else { 321 cmp = float32_compare_quiet(a, b, &env->fpu_status); 322 } 323 vxc = check_ieee_exc(env, 0, false, &vec_exc); 324 handle_ieee_exc(env, vxc, vec_exc, retaddr); 325 326 return float_comp_to_cc(env, cmp); 327 } 328 329 static int wfc64(const S390Vector *v1, const S390Vector *v2, 330 CPUS390XState *env, bool signal, uintptr_t retaddr) 331 { 332 /* only the zero-indexed elements are compared */ 333 const float64 a = s390_vec_read_float64(v1, 0); 334 const float64 b = s390_vec_read_float64(v2, 0); 335 uint8_t vxc, vec_exc = 0; 336 int cmp; 337 338 if (signal) { 339 cmp = float64_compare(a, b, &env->fpu_status); 340 } else { 341 cmp = float64_compare_quiet(a, b, &env->fpu_status); 342 } 343 vxc = check_ieee_exc(env, 0, false, &vec_exc); 344 handle_ieee_exc(env, vxc, vec_exc, retaddr); 345 346 return float_comp_to_cc(env, cmp); 347 } 348 349 static int wfc128(const S390Vector *v1, const S390Vector *v2, 350 CPUS390XState *env, bool signal, uintptr_t retaddr) 351 { 352 /* only the zero-indexed elements are compared */ 353 const float128 a = s390_vec_read_float128(v1); 354 const float128 b = s390_vec_read_float128(v2); 355 uint8_t vxc, vec_exc = 0; 356 int cmp; 357 358 if (signal) { 359 cmp = float128_compare(a, b, &env->fpu_status); 360 } else { 361 cmp = float128_compare_quiet(a, b, &env->fpu_status); 362 } 363 vxc = check_ieee_exc(env, 0, false, &vec_exc); 364 handle_ieee_exc(env, vxc, vec_exc, retaddr); 365 366 return float_comp_to_cc(env, cmp); 367 } 368 369 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ 370 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ 371 CPUS390XState *env, uint32_t desc) \ 372 { \ 373 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ 374 } 375 376 #define DEF_GVEC_WFC(NAME, SIGNAL) \ 377 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ 378 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ 379 DEF_GVEC_WFC_B(NAME, SIGNAL, 128) 380 381 DEF_GVEC_WFC(wfc, false) 382 DEF_GVEC_WFC(wfk, true) 383 384 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); 385 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 386 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) 387 { 388 uint8_t vxc, vec_exc = 0; 389 S390Vector tmp = {}; 390 int match = 0; 391 int i; 392 393 for (i = 0; i < 4; i++) { 394 const float32 a = s390_vec_read_float32(v2, i); 395 const float32 b = s390_vec_read_float32(v3, i); 396 397 /* swap the order of the parameters, so we can use existing functions */ 398 if (fn(b, a, &env->fpu_status)) { 399 match++; 400 s390_vec_write_element32(&tmp, i, -1u); 401 } 402 vxc = check_ieee_exc(env, i, false, &vec_exc); 403 if (s || vxc) { 404 break; 405 } 406 } 407 408 handle_ieee_exc(env, vxc, vec_exc, retaddr); 409 *v1 = tmp; 410 if (match) { 411 return s || match == 4 ? 0 : 1; 412 } 413 return 3; 414 } 415 416 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 417 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 418 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 419 { 420 uint8_t vxc, vec_exc = 0; 421 S390Vector tmp = {}; 422 int match = 0; 423 int i; 424 425 for (i = 0; i < 2; i++) { 426 const float64 a = s390_vec_read_float64(v2, i); 427 const float64 b = s390_vec_read_float64(v3, i); 428 429 /* swap the order of the parameters, so we can use existing functions */ 430 if (fn(b, a, &env->fpu_status)) { 431 match++; 432 s390_vec_write_element64(&tmp, i, -1ull); 433 } 434 vxc = check_ieee_exc(env, i, false, &vec_exc); 435 if (s || vxc) { 436 break; 437 } 438 } 439 440 handle_ieee_exc(env, vxc, vec_exc, retaddr); 441 *v1 = tmp; 442 if (match) { 443 return s || match == 2 ? 0 : 1; 444 } 445 return 3; 446 } 447 448 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); 449 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 450 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) 451 { 452 const float128 a = s390_vec_read_float128(v2); 453 const float128 b = s390_vec_read_float128(v3); 454 uint8_t vxc, vec_exc = 0; 455 S390Vector tmp = {}; 456 bool match = false; 457 458 /* swap the order of the parameters, so we can use existing functions */ 459 if (fn(b, a, &env->fpu_status)) { 460 match = true; 461 s390_vec_write_element64(&tmp, 0, -1ull); 462 s390_vec_write_element64(&tmp, 1, -1ull); 463 } 464 vxc = check_ieee_exc(env, 0, false, &vec_exc); 465 handle_ieee_exc(env, vxc, vec_exc, retaddr); 466 *v1 = tmp; 467 return match ? 0 : 3; 468 } 469 470 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ 471 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 472 CPUS390XState *env, uint32_t desc) \ 473 { \ 474 const bool se = extract32(simd_data(desc), 3, 1); \ 475 const bool sq = extract32(simd_data(desc), 2, 1); \ 476 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 477 \ 478 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 479 } \ 480 \ 481 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ 482 CPUS390XState *env, uint32_t desc) \ 483 { \ 484 const bool se = extract32(simd_data(desc), 3, 1); \ 485 const bool sq = extract32(simd_data(desc), 2, 1); \ 486 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 487 \ 488 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 489 } 490 491 #define DEF_GVEC_VFC(NAME, OP) \ 492 DEF_GVEC_VFC_B(NAME, OP, 32) \ 493 DEF_GVEC_VFC_B(NAME, OP, 64) \ 494 DEF_GVEC_VFC_B(NAME, OP, 128) \ 495 496 DEF_GVEC_VFC(vfce, eq) 497 DEF_GVEC_VFC(vfch, lt) 498 DEF_GVEC_VFC(vfche, le) 499 500 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 501 uint32_t desc) 502 { 503 const bool s = extract32(simd_data(desc), 3, 1); 504 uint8_t vxc, vec_exc = 0; 505 S390Vector tmp = {}; 506 int i; 507 508 for (i = 0; i < 2; i++) { 509 /* load from even element */ 510 const float32 a = s390_vec_read_element32(v2, i * 2); 511 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 512 513 s390_vec_write_element64(&tmp, i, ret); 514 /* indicate the source element */ 515 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 516 if (s || vxc) { 517 break; 518 } 519 } 520 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 521 *(S390Vector *)v1 = tmp; 522 } 523 524 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, 525 uint32_t desc) 526 { 527 /* load from even element */ 528 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), 529 &env->fpu_status); 530 uint8_t vxc, vec_exc = 0; 531 532 vxc = check_ieee_exc(env, 0, false, &vec_exc); 533 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 534 s390_vec_write_float128(v1, ret); 535 } 536 537 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 538 uint32_t desc) 539 { 540 const uint8_t erm = extract32(simd_data(desc), 4, 4); 541 const bool s = extract32(simd_data(desc), 3, 1); 542 const bool XxC = extract32(simd_data(desc), 2, 1); 543 uint8_t vxc, vec_exc = 0; 544 S390Vector tmp = {}; 545 int i, old_mode; 546 547 old_mode = s390_swap_bfp_rounding_mode(env, erm); 548 for (i = 0; i < 2; i++) { 549 float64 a = s390_vec_read_element64(v2, i); 550 uint32_t ret = float64_to_float32(a, &env->fpu_status); 551 552 /* place at even element */ 553 s390_vec_write_element32(&tmp, i * 2, ret); 554 /* indicate the source element */ 555 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 556 if (s || vxc) { 557 break; 558 } 559 } 560 s390_restore_bfp_rounding_mode(env, old_mode); 561 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 562 *(S390Vector *)v1 = tmp; 563 } 564 565 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, 566 uint32_t desc) 567 { 568 const uint8_t erm = extract32(simd_data(desc), 4, 4); 569 const bool XxC = extract32(simd_data(desc), 2, 1); 570 uint8_t vxc, vec_exc = 0; 571 int old_mode; 572 float64 ret; 573 574 old_mode = s390_swap_bfp_rounding_mode(env, erm); 575 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); 576 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 577 s390_restore_bfp_rounding_mode(env, old_mode); 578 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 579 580 /* place at even element, odd element is unpredictable */ 581 s390_vec_write_float64(v1, 0, ret); 582 } 583 584 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 585 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 586 uintptr_t retaddr) 587 { 588 uint8_t vxc, vec_exc = 0; 589 S390Vector tmp = {}; 590 int i; 591 592 for (i = 0; i < 4; i++) { 593 const float32 a = s390_vec_read_float32(v2, i); 594 const float32 b = s390_vec_read_float32(v3, i); 595 const float32 c = s390_vec_read_float32(v4, i); 596 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); 597 598 s390_vec_write_float32(&tmp, i, ret); 599 vxc = check_ieee_exc(env, i, false, &vec_exc); 600 if (s || vxc) { 601 break; 602 } 603 } 604 handle_ieee_exc(env, vxc, vec_exc, retaddr); 605 *v1 = tmp; 606 } 607 608 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 609 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 610 uintptr_t retaddr) 611 { 612 uint8_t vxc, vec_exc = 0; 613 S390Vector tmp = {}; 614 int i; 615 616 for (i = 0; i < 2; i++) { 617 const float64 a = s390_vec_read_float64(v2, i); 618 const float64 b = s390_vec_read_float64(v3, i); 619 const float64 c = s390_vec_read_float64(v4, i); 620 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); 621 622 s390_vec_write_float64(&tmp, i, ret); 623 vxc = check_ieee_exc(env, i, false, &vec_exc); 624 if (s || vxc) { 625 break; 626 } 627 } 628 handle_ieee_exc(env, vxc, vec_exc, retaddr); 629 *v1 = tmp; 630 } 631 632 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 633 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 634 uintptr_t retaddr) 635 { 636 const float128 a = s390_vec_read_float128(v2); 637 const float128 b = s390_vec_read_float128(v3); 638 const float128 c = s390_vec_read_float128(v4); 639 uint8_t vxc, vec_exc = 0; 640 float128 ret; 641 642 ret = float128_muladd(a, b, c, flags, &env->fpu_status); 643 vxc = check_ieee_exc(env, 0, false, &vec_exc); 644 handle_ieee_exc(env, vxc, vec_exc, retaddr); 645 s390_vec_write_float128(v1, ret); 646 } 647 648 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ 649 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 650 const void *v4, CPUS390XState *env, \ 651 uint32_t desc) \ 652 { \ 653 const bool se = extract32(simd_data(desc), 3, 1); \ 654 \ 655 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ 656 } 657 658 #define DEF_GVEC_VFMA(NAME, FLAGS) \ 659 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ 660 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ 661 DEF_GVEC_VFMA_B(NAME, FLAGS, 128) 662 663 DEF_GVEC_VFMA(vfma, 0) 664 DEF_GVEC_VFMA(vfms, float_muladd_negate_c) 665 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) 666 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) 667 668 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, 669 uint32_t desc) 670 { 671 uint16_t i3 = extract32(simd_data(desc), 4, 12); 672 bool s = extract32(simd_data(desc), 3, 1); 673 int i, match = 0; 674 675 for (i = 0; i < 4; i++) { 676 float32 a = s390_vec_read_float32(v2, i); 677 678 if (float32_dcmask(env, a) & i3) { 679 match++; 680 s390_vec_write_element32(v1, i, -1u); 681 } else { 682 s390_vec_write_element32(v1, i, 0); 683 } 684 if (s) { 685 break; 686 } 687 } 688 689 if (match == 4 || (s && match)) { 690 env->cc_op = 0; 691 } else if (match) { 692 env->cc_op = 1; 693 } else { 694 env->cc_op = 3; 695 } 696 } 697 698 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 699 uint32_t desc) 700 { 701 const uint16_t i3 = extract32(simd_data(desc), 4, 12); 702 const bool s = extract32(simd_data(desc), 3, 1); 703 int i, match = 0; 704 705 for (i = 0; i < 2; i++) { 706 const float64 a = s390_vec_read_float64(v2, i); 707 708 if (float64_dcmask(env, a) & i3) { 709 match++; 710 s390_vec_write_element64(v1, i, -1ull); 711 } else { 712 s390_vec_write_element64(v1, i, 0); 713 } 714 if (s) { 715 break; 716 } 717 } 718 719 if (match == 2 || (s && match)) { 720 env->cc_op = 0; 721 } else if (match) { 722 env->cc_op = 1; 723 } else { 724 env->cc_op = 3; 725 } 726 } 727 728 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, 729 uint32_t desc) 730 { 731 const float128 a = s390_vec_read_float128(v2); 732 uint16_t i3 = extract32(simd_data(desc), 4, 12); 733 734 if (float128_dcmask(env, a) & i3) { 735 env->cc_op = 0; 736 s390_vec_write_element64(v1, 0, -1ull); 737 s390_vec_write_element64(v1, 1, -1ull); 738 } else { 739 env->cc_op = 3; 740 s390_vec_write_element64(v1, 0, 0); 741 s390_vec_write_element64(v1, 1, 0); 742 } 743 } 744 745 typedef enum S390MinMaxType { 746 S390_MINMAX_TYPE_IEEE = 0, 747 S390_MINMAX_TYPE_JAVA, 748 S390_MINMAX_TYPE_C_MACRO, 749 S390_MINMAX_TYPE_CPP, 750 S390_MINMAX_TYPE_F, 751 } S390MinMaxType; 752 753 typedef enum S390MinMaxRes { 754 S390_MINMAX_RES_MINMAX = 0, 755 S390_MINMAX_RES_A, 756 S390_MINMAX_RES_B, 757 S390_MINMAX_RES_SILENCE_A, 758 S390_MINMAX_RES_SILENCE_B, 759 } S390MinMaxRes; 760 761 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, 762 S390MinMaxType type, float_status *s) 763 { 764 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 765 const bool nan_a = dcmask_a & DCMASK_NAN; 766 const bool nan_b = dcmask_b & DCMASK_NAN; 767 768 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 769 770 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 771 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 772 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 773 774 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 775 s->float_exception_flags |= float_flag_invalid; 776 } 777 switch (type) { 778 case S390_MINMAX_TYPE_JAVA: 779 if (sig_a) { 780 return S390_MINMAX_RES_SILENCE_A; 781 } else if (sig_b) { 782 return S390_MINMAX_RES_SILENCE_B; 783 } 784 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 785 case S390_MINMAX_TYPE_F: 786 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 787 case S390_MINMAX_TYPE_C_MACRO: 788 s->float_exception_flags |= float_flag_invalid; 789 return S390_MINMAX_RES_B; 790 case S390_MINMAX_TYPE_CPP: 791 s->float_exception_flags |= float_flag_invalid; 792 return S390_MINMAX_RES_A; 793 default: 794 g_assert_not_reached(); 795 } 796 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 797 switch (type) { 798 case S390_MINMAX_TYPE_JAVA: 799 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 800 case S390_MINMAX_TYPE_C_MACRO: 801 return S390_MINMAX_RES_B; 802 case S390_MINMAX_TYPE_F: 803 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 804 case S390_MINMAX_TYPE_CPP: 805 return S390_MINMAX_RES_A; 806 default: 807 g_assert_not_reached(); 808 } 809 } 810 return S390_MINMAX_RES_MINMAX; 811 } 812 813 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 814 S390MinMaxType type, float_status *s) 815 { 816 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 817 818 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 819 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 820 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 821 const bool nan_a = dcmask_a & DCMASK_NAN; 822 const bool nan_b = dcmask_b & DCMASK_NAN; 823 824 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 825 s->float_exception_flags |= float_flag_invalid; 826 } 827 switch (type) { 828 case S390_MINMAX_TYPE_JAVA: 829 if (sig_a) { 830 return S390_MINMAX_RES_SILENCE_A; 831 } else if (sig_b) { 832 return S390_MINMAX_RES_SILENCE_B; 833 } 834 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 835 case S390_MINMAX_TYPE_F: 836 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 837 case S390_MINMAX_TYPE_C_MACRO: 838 s->float_exception_flags |= float_flag_invalid; 839 return S390_MINMAX_RES_B; 840 case S390_MINMAX_TYPE_CPP: 841 s->float_exception_flags |= float_flag_invalid; 842 return S390_MINMAX_RES_A; 843 default: 844 g_assert_not_reached(); 845 } 846 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 847 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 848 849 switch (type) { 850 case S390_MINMAX_TYPE_JAVA: 851 case S390_MINMAX_TYPE_F: 852 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 853 case S390_MINMAX_TYPE_C_MACRO: 854 return S390_MINMAX_RES_B; 855 case S390_MINMAX_TYPE_CPP: 856 return S390_MINMAX_RES_A; 857 default: 858 g_assert_not_reached(); 859 } 860 } 861 return S390_MINMAX_RES_MINMAX; 862 } 863 864 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 865 S390MinMaxType type, bool is_min, 866 float_status *s) 867 { 868 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : 869 vfmax_res(dcmask_a, dcmask_b, type, s); 870 } 871 872 static void vfminmax32(S390Vector *v1, const S390Vector *v2, 873 const S390Vector *v3, CPUS390XState *env, 874 S390MinMaxType type, bool is_min, bool is_abs, bool se, 875 uintptr_t retaddr) 876 { 877 float_status *s = &env->fpu_status; 878 uint8_t vxc, vec_exc = 0; 879 S390Vector tmp = {}; 880 int i; 881 882 for (i = 0; i < 4; i++) { 883 float32 a = s390_vec_read_float32(v2, i); 884 float32 b = s390_vec_read_float32(v3, i); 885 float32 result; 886 887 if (type != S390_MINMAX_TYPE_IEEE) { 888 S390MinMaxRes res; 889 890 if (is_abs) { 891 a = float32_abs(a); 892 b = float32_abs(b); 893 } 894 895 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), 896 type, is_min, s); 897 switch (res) { 898 case S390_MINMAX_RES_MINMAX: 899 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); 900 break; 901 case S390_MINMAX_RES_A: 902 result = a; 903 break; 904 case S390_MINMAX_RES_B: 905 result = b; 906 break; 907 case S390_MINMAX_RES_SILENCE_A: 908 result = float32_silence_nan(a, s); 909 break; 910 case S390_MINMAX_RES_SILENCE_B: 911 result = float32_silence_nan(b, s); 912 break; 913 default: 914 g_assert_not_reached(); 915 } 916 } else if (!is_abs) { 917 result = is_min ? float32_minnum(a, b, &env->fpu_status) : 918 float32_maxnum(a, b, &env->fpu_status); 919 } else { 920 result = is_min ? float32_minnummag(a, b, &env->fpu_status) : 921 float32_maxnummag(a, b, &env->fpu_status); 922 } 923 924 s390_vec_write_float32(&tmp, i, result); 925 vxc = check_ieee_exc(env, i, false, &vec_exc); 926 if (se || vxc) { 927 break; 928 } 929 } 930 handle_ieee_exc(env, vxc, vec_exc, retaddr); 931 *v1 = tmp; 932 } 933 934 static void vfminmax64(S390Vector *v1, const S390Vector *v2, 935 const S390Vector *v3, CPUS390XState *env, 936 S390MinMaxType type, bool is_min, bool is_abs, bool se, 937 uintptr_t retaddr) 938 { 939 float_status *s = &env->fpu_status; 940 uint8_t vxc, vec_exc = 0; 941 S390Vector tmp = {}; 942 int i; 943 944 for (i = 0; i < 2; i++) { 945 float64 a = s390_vec_read_float64(v2, i); 946 float64 b = s390_vec_read_float64(v3, i); 947 float64 result; 948 949 if (type != S390_MINMAX_TYPE_IEEE) { 950 S390MinMaxRes res; 951 952 if (is_abs) { 953 a = float64_abs(a); 954 b = float64_abs(b); 955 } 956 957 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), 958 type, is_min, s); 959 switch (res) { 960 case S390_MINMAX_RES_MINMAX: 961 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); 962 break; 963 case S390_MINMAX_RES_A: 964 result = a; 965 break; 966 case S390_MINMAX_RES_B: 967 result = b; 968 break; 969 case S390_MINMAX_RES_SILENCE_A: 970 result = float64_silence_nan(a, s); 971 break; 972 case S390_MINMAX_RES_SILENCE_B: 973 result = float64_silence_nan(b, s); 974 break; 975 default: 976 g_assert_not_reached(); 977 } 978 } else if (!is_abs) { 979 result = is_min ? float64_minnum(a, b, &env->fpu_status) : 980 float64_maxnum(a, b, &env->fpu_status); 981 } else { 982 result = is_min ? float64_minnummag(a, b, &env->fpu_status) : 983 float64_maxnummag(a, b, &env->fpu_status); 984 } 985 986 s390_vec_write_float64(&tmp, i, result); 987 vxc = check_ieee_exc(env, i, false, &vec_exc); 988 if (se || vxc) { 989 break; 990 } 991 } 992 handle_ieee_exc(env, vxc, vec_exc, retaddr); 993 *v1 = tmp; 994 } 995 996 static void vfminmax128(S390Vector *v1, const S390Vector *v2, 997 const S390Vector *v3, CPUS390XState *env, 998 S390MinMaxType type, bool is_min, bool is_abs, bool se, 999 uintptr_t retaddr) 1000 { 1001 float128 a = s390_vec_read_float128(v2); 1002 float128 b = s390_vec_read_float128(v3); 1003 float_status *s = &env->fpu_status; 1004 uint8_t vxc, vec_exc = 0; 1005 float128 result; 1006 1007 if (type != S390_MINMAX_TYPE_IEEE) { 1008 S390MinMaxRes res; 1009 1010 if (is_abs) { 1011 a = float128_abs(a); 1012 b = float128_abs(b); 1013 } 1014 1015 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), 1016 type, is_min, s); 1017 switch (res) { 1018 case S390_MINMAX_RES_MINMAX: 1019 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); 1020 break; 1021 case S390_MINMAX_RES_A: 1022 result = a; 1023 break; 1024 case S390_MINMAX_RES_B: 1025 result = b; 1026 break; 1027 case S390_MINMAX_RES_SILENCE_A: 1028 result = float128_silence_nan(a, s); 1029 break; 1030 case S390_MINMAX_RES_SILENCE_B: 1031 result = float128_silence_nan(b, s); 1032 break; 1033 default: 1034 g_assert_not_reached(); 1035 } 1036 } else if (!is_abs) { 1037 result = is_min ? float128_minnum(a, b, &env->fpu_status) : 1038 float128_maxnum(a, b, &env->fpu_status); 1039 } else { 1040 result = is_min ? float128_minnummag(a, b, &env->fpu_status) : 1041 float128_maxnummag(a, b, &env->fpu_status); 1042 } 1043 1044 vxc = check_ieee_exc(env, 0, false, &vec_exc); 1045 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1046 s390_vec_write_float128(v1, result); 1047 } 1048 1049 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ 1050 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 1051 CPUS390XState *env, uint32_t desc) \ 1052 { \ 1053 const bool se = extract32(simd_data(desc), 3, 1); \ 1054 uint8_t type = extract32(simd_data(desc), 4, 4); \ 1055 bool is_abs = false; \ 1056 \ 1057 if (type >= 8) { \ 1058 is_abs = true; \ 1059 type -= 8; \ 1060 } \ 1061 \ 1062 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ 1063 } 1064 1065 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ 1066 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ 1067 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ 1068 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) 1069 1070 DEF_GVEC_VFMINMAX(vfmax, false) 1071 DEF_GVEC_VFMINMAX(vfmin, true) 1072