1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "s390x-internal.h" 16 #include "vec.h" 17 #include "tcg_s390x.h" 18 #include "tcg/tcg-gvec-desc.h" 19 #include "exec/exec-all.h" 20 #include "exec/helper-proto.h" 21 #include "fpu/softfloat.h" 22 23 #define VIC_INVALID 0x1 24 #define VIC_DIVBYZERO 0x2 25 #define VIC_OVERFLOW 0x3 26 #define VIC_UNDERFLOW 0x4 27 #define VIC_INEXACT 0x5 28 29 /* returns the VEX. If the VEX is 0, there is no trap */ 30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 31 uint8_t *vec_exc) 32 { 33 uint8_t vece_exc = 0, trap_exc; 34 unsigned qemu_exc; 35 36 /* Retrieve and clear the softfloat exceptions */ 37 qemu_exc = env->fpu_status.float_exception_flags; 38 if (qemu_exc == 0) { 39 return 0; 40 } 41 env->fpu_status.float_exception_flags = 0; 42 43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 44 45 /* Add them to the vector-wide s390x exception bits */ 46 *vec_exc |= vece_exc; 47 48 /* Check for traps and construct the VXC */ 49 trap_exc = vece_exc & env->fpc >> 24; 50 if (trap_exc) { 51 if (trap_exc & S390_IEEE_MASK_INVALID) { 52 return enr << 4 | VIC_INVALID; 53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 54 return enr << 4 | VIC_DIVBYZERO; 55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 56 return enr << 4 | VIC_OVERFLOW; 57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 58 return enr << 4 | VIC_UNDERFLOW; 59 } else if (!XxC) { 60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 61 /* inexact has lowest priority on traps */ 62 return enr << 4 | VIC_INEXACT; 63 } 64 } 65 return 0; 66 } 67 68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 69 uintptr_t retaddr) 70 { 71 if (vxc) { 72 /* on traps, the fpc flags are not updated, instruction is suppressed */ 73 tcg_s390_vector_exception(env, vxc, retaddr); 74 } 75 if (vec_exc) { 76 /* indicate exceptions for all elements combined */ 77 env->fpc |= vec_exc << 16; 78 } 79 } 80 81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) 82 { 83 return make_float32(s390_vec_read_element32(v, enr)); 84 } 85 86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 87 { 88 return make_float64(s390_vec_read_element64(v, enr)); 89 } 90 91 static float128 s390_vec_read_float128(const S390Vector *v) 92 { 93 return make_float128(s390_vec_read_element64(v, 0), 94 s390_vec_read_element64(v, 1)); 95 } 96 97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) 98 { 99 return s390_vec_write_element32(v, enr, data); 100 } 101 102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 103 { 104 return s390_vec_write_element64(v, enr, data); 105 } 106 107 static void s390_vec_write_float128(S390Vector *v, float128 data) 108 { 109 s390_vec_write_element64(v, 0, data.high); 110 s390_vec_write_element64(v, 1, data.low); 111 } 112 113 typedef float32 (*vop32_2_fn)(float32 a, float_status *s); 114 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 115 bool s, bool XxC, uint8_t erm, vop32_2_fn fn, 116 uintptr_t retaddr) 117 { 118 uint8_t vxc, vec_exc = 0; 119 S390Vector tmp = {}; 120 int i, old_mode; 121 122 old_mode = s390_swap_bfp_rounding_mode(env, erm); 123 for (i = 0; i < 4; i++) { 124 const float32 a = s390_vec_read_float32(v2, i); 125 126 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); 127 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 128 if (s || vxc) { 129 break; 130 } 131 } 132 s390_restore_bfp_rounding_mode(env, old_mode); 133 handle_ieee_exc(env, vxc, vec_exc, retaddr); 134 *v1 = tmp; 135 } 136 137 typedef float64 (*vop64_2_fn)(float64 a, float_status *s); 138 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 139 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 140 uintptr_t retaddr) 141 { 142 uint8_t vxc, vec_exc = 0; 143 S390Vector tmp = {}; 144 int i, old_mode; 145 146 old_mode = s390_swap_bfp_rounding_mode(env, erm); 147 for (i = 0; i < 2; i++) { 148 const float64 a = s390_vec_read_float64(v2, i); 149 150 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); 151 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 152 if (s || vxc) { 153 break; 154 } 155 } 156 s390_restore_bfp_rounding_mode(env, old_mode); 157 handle_ieee_exc(env, vxc, vec_exc, retaddr); 158 *v1 = tmp; 159 } 160 161 typedef float128 (*vop128_2_fn)(float128 a, float_status *s); 162 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 163 bool s, bool XxC, uint8_t erm, vop128_2_fn fn, 164 uintptr_t retaddr) 165 { 166 const float128 a = s390_vec_read_float128(v2); 167 uint8_t vxc, vec_exc = 0; 168 S390Vector tmp = {}; 169 int old_mode; 170 171 old_mode = s390_swap_bfp_rounding_mode(env, erm); 172 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); 173 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 174 s390_restore_bfp_rounding_mode(env, old_mode); 175 handle_ieee_exc(env, vxc, vec_exc, retaddr); 176 *v1 = tmp; 177 } 178 179 static float64 vcdg64(float64 a, float_status *s) 180 { 181 return int64_to_float64(a, s); 182 } 183 184 static float64 vcdlg64(float64 a, float_status *s) 185 { 186 return uint64_to_float64(a, s); 187 } 188 189 static float64 vcgd64(float64 a, float_status *s) 190 { 191 const float64 tmp = float64_to_int64(a, s); 192 193 return float64_is_any_nan(a) ? INT64_MIN : tmp; 194 } 195 196 static float64 vclgd64(float64 a, float_status *s) 197 { 198 const float64 tmp = float64_to_uint64(a, s); 199 200 return float64_is_any_nan(a) ? 0 : tmp; 201 } 202 203 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ 204 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 205 uint32_t desc) \ 206 { \ 207 const uint8_t erm = extract32(simd_data(desc), 4, 4); \ 208 const bool se = extract32(simd_data(desc), 3, 1); \ 209 const bool XxC = extract32(simd_data(desc), 2, 1); \ 210 \ 211 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ 212 } 213 214 #define DEF_GVEC_VOP2_64(NAME) \ 215 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) 216 217 #define DEF_GVEC_VOP2(NAME, OP) \ 218 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ 219 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ 220 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) 221 222 DEF_GVEC_VOP2_64(vcdg) 223 DEF_GVEC_VOP2_64(vcdlg) 224 DEF_GVEC_VOP2_64(vcgd) 225 DEF_GVEC_VOP2_64(vclgd) 226 DEF_GVEC_VOP2(vfi, round_to_int) 227 DEF_GVEC_VOP2(vfsq, sqrt) 228 229 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); 230 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 231 CPUS390XState *env, bool s, vop32_3_fn fn, 232 uintptr_t retaddr) 233 { 234 uint8_t vxc, vec_exc = 0; 235 S390Vector tmp = {}; 236 int i; 237 238 for (i = 0; i < 4; i++) { 239 const float32 a = s390_vec_read_float32(v2, i); 240 const float32 b = s390_vec_read_float32(v3, i); 241 242 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); 243 vxc = check_ieee_exc(env, i, false, &vec_exc); 244 if (s || vxc) { 245 break; 246 } 247 } 248 handle_ieee_exc(env, vxc, vec_exc, retaddr); 249 *v1 = tmp; 250 } 251 252 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 253 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 254 CPUS390XState *env, bool s, vop64_3_fn fn, 255 uintptr_t retaddr) 256 { 257 uint8_t vxc, vec_exc = 0; 258 S390Vector tmp = {}; 259 int i; 260 261 for (i = 0; i < 2; i++) { 262 const float64 a = s390_vec_read_float64(v2, i); 263 const float64 b = s390_vec_read_float64(v3, i); 264 265 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 266 vxc = check_ieee_exc(env, i, false, &vec_exc); 267 if (s || vxc) { 268 break; 269 } 270 } 271 handle_ieee_exc(env, vxc, vec_exc, retaddr); 272 *v1 = tmp; 273 } 274 275 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); 276 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 277 CPUS390XState *env, bool s, vop128_3_fn fn, 278 uintptr_t retaddr) 279 { 280 const float128 a = s390_vec_read_float128(v2); 281 const float128 b = s390_vec_read_float128(v3); 282 uint8_t vxc, vec_exc = 0; 283 S390Vector tmp = {}; 284 285 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); 286 vxc = check_ieee_exc(env, 0, false, &vec_exc); 287 handle_ieee_exc(env, vxc, vec_exc, retaddr); 288 *v1 = tmp; 289 } 290 291 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ 292 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 293 CPUS390XState *env, uint32_t desc) \ 294 { \ 295 const bool se = extract32(simd_data(desc), 3, 1); \ 296 \ 297 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ 298 } 299 300 #define DEF_GVEC_VOP3(NAME, OP) \ 301 DEF_GVEC_VOP3_B(NAME, OP, 32) \ 302 DEF_GVEC_VOP3_B(NAME, OP, 64) \ 303 DEF_GVEC_VOP3_B(NAME, OP, 128) 304 305 DEF_GVEC_VOP3(vfa, add) 306 DEF_GVEC_VOP3(vfs, sub) 307 DEF_GVEC_VOP3(vfd, div) 308 DEF_GVEC_VOP3(vfm, mul) 309 310 static int wfc32(const S390Vector *v1, const S390Vector *v2, 311 CPUS390XState *env, bool signal, uintptr_t retaddr) 312 { 313 /* only the zero-indexed elements are compared */ 314 const float32 a = s390_vec_read_float32(v1, 0); 315 const float32 b = s390_vec_read_float32(v2, 0); 316 uint8_t vxc, vec_exc = 0; 317 int cmp; 318 319 if (signal) { 320 cmp = float32_compare(a, b, &env->fpu_status); 321 } else { 322 cmp = float32_compare_quiet(a, b, &env->fpu_status); 323 } 324 vxc = check_ieee_exc(env, 0, false, &vec_exc); 325 handle_ieee_exc(env, vxc, vec_exc, retaddr); 326 327 return float_comp_to_cc(env, cmp); 328 } 329 330 static int wfc64(const S390Vector *v1, const S390Vector *v2, 331 CPUS390XState *env, bool signal, uintptr_t retaddr) 332 { 333 /* only the zero-indexed elements are compared */ 334 const float64 a = s390_vec_read_float64(v1, 0); 335 const float64 b = s390_vec_read_float64(v2, 0); 336 uint8_t vxc, vec_exc = 0; 337 int cmp; 338 339 if (signal) { 340 cmp = float64_compare(a, b, &env->fpu_status); 341 } else { 342 cmp = float64_compare_quiet(a, b, &env->fpu_status); 343 } 344 vxc = check_ieee_exc(env, 0, false, &vec_exc); 345 handle_ieee_exc(env, vxc, vec_exc, retaddr); 346 347 return float_comp_to_cc(env, cmp); 348 } 349 350 static int wfc128(const S390Vector *v1, const S390Vector *v2, 351 CPUS390XState *env, bool signal, uintptr_t retaddr) 352 { 353 /* only the zero-indexed elements are compared */ 354 const float128 a = s390_vec_read_float128(v1); 355 const float128 b = s390_vec_read_float128(v2); 356 uint8_t vxc, vec_exc = 0; 357 int cmp; 358 359 if (signal) { 360 cmp = float128_compare(a, b, &env->fpu_status); 361 } else { 362 cmp = float128_compare_quiet(a, b, &env->fpu_status); 363 } 364 vxc = check_ieee_exc(env, 0, false, &vec_exc); 365 handle_ieee_exc(env, vxc, vec_exc, retaddr); 366 367 return float_comp_to_cc(env, cmp); 368 } 369 370 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ 371 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ 372 CPUS390XState *env, uint32_t desc) \ 373 { \ 374 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ 375 } 376 377 #define DEF_GVEC_WFC(NAME, SIGNAL) \ 378 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ 379 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ 380 DEF_GVEC_WFC_B(NAME, SIGNAL, 128) 381 382 DEF_GVEC_WFC(wfc, false) 383 DEF_GVEC_WFC(wfk, true) 384 385 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); 386 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 387 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) 388 { 389 uint8_t vxc, vec_exc = 0; 390 S390Vector tmp = {}; 391 int match = 0; 392 int i; 393 394 for (i = 0; i < 4; i++) { 395 const float32 a = s390_vec_read_float32(v2, i); 396 const float32 b = s390_vec_read_float32(v3, i); 397 398 /* swap the order of the parameters, so we can use existing functions */ 399 if (fn(b, a, &env->fpu_status)) { 400 match++; 401 s390_vec_write_element32(&tmp, i, -1u); 402 } 403 vxc = check_ieee_exc(env, i, false, &vec_exc); 404 if (s || vxc) { 405 break; 406 } 407 } 408 409 handle_ieee_exc(env, vxc, vec_exc, retaddr); 410 *v1 = tmp; 411 if (match) { 412 return s || match == 4 ? 0 : 1; 413 } 414 return 3; 415 } 416 417 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 418 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 419 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 420 { 421 uint8_t vxc, vec_exc = 0; 422 S390Vector tmp = {}; 423 int match = 0; 424 int i; 425 426 for (i = 0; i < 2; i++) { 427 const float64 a = s390_vec_read_float64(v2, i); 428 const float64 b = s390_vec_read_float64(v3, i); 429 430 /* swap the order of the parameters, so we can use existing functions */ 431 if (fn(b, a, &env->fpu_status)) { 432 match++; 433 s390_vec_write_element64(&tmp, i, -1ull); 434 } 435 vxc = check_ieee_exc(env, i, false, &vec_exc); 436 if (s || vxc) { 437 break; 438 } 439 } 440 441 handle_ieee_exc(env, vxc, vec_exc, retaddr); 442 *v1 = tmp; 443 if (match) { 444 return s || match == 2 ? 0 : 1; 445 } 446 return 3; 447 } 448 449 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); 450 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 451 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) 452 { 453 const float128 a = s390_vec_read_float128(v2); 454 const float128 b = s390_vec_read_float128(v3); 455 uint8_t vxc, vec_exc = 0; 456 S390Vector tmp = {}; 457 bool match = false; 458 459 /* swap the order of the parameters, so we can use existing functions */ 460 if (fn(b, a, &env->fpu_status)) { 461 match = true; 462 s390_vec_write_element64(&tmp, 0, -1ull); 463 s390_vec_write_element64(&tmp, 1, -1ull); 464 } 465 vxc = check_ieee_exc(env, 0, false, &vec_exc); 466 handle_ieee_exc(env, vxc, vec_exc, retaddr); 467 *v1 = tmp; 468 return match ? 0 : 3; 469 } 470 471 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ 472 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 473 CPUS390XState *env, uint32_t desc) \ 474 { \ 475 const bool se = extract32(simd_data(desc), 3, 1); \ 476 const bool sq = extract32(simd_data(desc), 2, 1); \ 477 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 478 \ 479 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 480 } \ 481 \ 482 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ 483 CPUS390XState *env, uint32_t desc) \ 484 { \ 485 const bool se = extract32(simd_data(desc), 3, 1); \ 486 const bool sq = extract32(simd_data(desc), 2, 1); \ 487 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 488 \ 489 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 490 } 491 492 #define DEF_GVEC_VFC(NAME, OP) \ 493 DEF_GVEC_VFC_B(NAME, OP, 32) \ 494 DEF_GVEC_VFC_B(NAME, OP, 64) \ 495 DEF_GVEC_VFC_B(NAME, OP, 128) \ 496 497 DEF_GVEC_VFC(vfce, eq) 498 DEF_GVEC_VFC(vfch, lt) 499 DEF_GVEC_VFC(vfche, le) 500 501 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 502 uint32_t desc) 503 { 504 const bool s = extract32(simd_data(desc), 3, 1); 505 uint8_t vxc, vec_exc = 0; 506 S390Vector tmp = {}; 507 int i; 508 509 for (i = 0; i < 2; i++) { 510 /* load from even element */ 511 const float32 a = s390_vec_read_element32(v2, i * 2); 512 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 513 514 s390_vec_write_element64(&tmp, i, ret); 515 /* indicate the source element */ 516 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 517 if (s || vxc) { 518 break; 519 } 520 } 521 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 522 *(S390Vector *)v1 = tmp; 523 } 524 525 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, 526 uint32_t desc) 527 { 528 /* load from even element */ 529 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), 530 &env->fpu_status); 531 uint8_t vxc, vec_exc = 0; 532 533 vxc = check_ieee_exc(env, 0, false, &vec_exc); 534 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 535 s390_vec_write_float128(v1, ret); 536 } 537 538 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 539 uint32_t desc) 540 { 541 const uint8_t erm = extract32(simd_data(desc), 4, 4); 542 const bool s = extract32(simd_data(desc), 3, 1); 543 const bool XxC = extract32(simd_data(desc), 2, 1); 544 uint8_t vxc, vec_exc = 0; 545 S390Vector tmp = {}; 546 int i, old_mode; 547 548 old_mode = s390_swap_bfp_rounding_mode(env, erm); 549 for (i = 0; i < 2; i++) { 550 float64 a = s390_vec_read_element64(v2, i); 551 uint32_t ret = float64_to_float32(a, &env->fpu_status); 552 553 /* place at even element */ 554 s390_vec_write_element32(&tmp, i * 2, ret); 555 /* indicate the source element */ 556 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 557 if (s || vxc) { 558 break; 559 } 560 } 561 s390_restore_bfp_rounding_mode(env, old_mode); 562 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 563 *(S390Vector *)v1 = tmp; 564 } 565 566 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, 567 uint32_t desc) 568 { 569 const uint8_t erm = extract32(simd_data(desc), 4, 4); 570 const bool XxC = extract32(simd_data(desc), 2, 1); 571 uint8_t vxc, vec_exc = 0; 572 int old_mode; 573 float64 ret; 574 575 old_mode = s390_swap_bfp_rounding_mode(env, erm); 576 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); 577 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 578 s390_restore_bfp_rounding_mode(env, old_mode); 579 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 580 581 /* place at even element, odd element is unpredictable */ 582 s390_vec_write_float64(v1, 0, ret); 583 } 584 585 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 586 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 587 uintptr_t retaddr) 588 { 589 uint8_t vxc, vec_exc = 0; 590 S390Vector tmp = {}; 591 int i; 592 593 for (i = 0; i < 4; i++) { 594 const float32 a = s390_vec_read_float32(v2, i); 595 const float32 b = s390_vec_read_float32(v3, i); 596 const float32 c = s390_vec_read_float32(v4, i); 597 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); 598 599 s390_vec_write_float32(&tmp, i, ret); 600 vxc = check_ieee_exc(env, i, false, &vec_exc); 601 if (s || vxc) { 602 break; 603 } 604 } 605 handle_ieee_exc(env, vxc, vec_exc, retaddr); 606 *v1 = tmp; 607 } 608 609 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 610 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 611 uintptr_t retaddr) 612 { 613 uint8_t vxc, vec_exc = 0; 614 S390Vector tmp = {}; 615 int i; 616 617 for (i = 0; i < 2; i++) { 618 const float64 a = s390_vec_read_float64(v2, i); 619 const float64 b = s390_vec_read_float64(v3, i); 620 const float64 c = s390_vec_read_float64(v4, i); 621 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); 622 623 s390_vec_write_float64(&tmp, i, ret); 624 vxc = check_ieee_exc(env, i, false, &vec_exc); 625 if (s || vxc) { 626 break; 627 } 628 } 629 handle_ieee_exc(env, vxc, vec_exc, retaddr); 630 *v1 = tmp; 631 } 632 633 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 634 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 635 uintptr_t retaddr) 636 { 637 const float128 a = s390_vec_read_float128(v2); 638 const float128 b = s390_vec_read_float128(v3); 639 const float128 c = s390_vec_read_float128(v4); 640 uint8_t vxc, vec_exc = 0; 641 float128 ret; 642 643 ret = float128_muladd(a, b, c, flags, &env->fpu_status); 644 vxc = check_ieee_exc(env, 0, false, &vec_exc); 645 handle_ieee_exc(env, vxc, vec_exc, retaddr); 646 s390_vec_write_float128(v1, ret); 647 } 648 649 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ 650 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 651 const void *v4, CPUS390XState *env, \ 652 uint32_t desc) \ 653 { \ 654 const bool se = extract32(simd_data(desc), 3, 1); \ 655 \ 656 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ 657 } 658 659 #define DEF_GVEC_VFMA(NAME, FLAGS) \ 660 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ 661 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ 662 DEF_GVEC_VFMA_B(NAME, FLAGS, 128) 663 664 DEF_GVEC_VFMA(vfma, 0) 665 DEF_GVEC_VFMA(vfms, float_muladd_negate_c) 666 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) 667 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) 668 669 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, 670 uint32_t desc) 671 { 672 uint16_t i3 = extract32(simd_data(desc), 4, 12); 673 bool s = extract32(simd_data(desc), 3, 1); 674 int i, match = 0; 675 676 for (i = 0; i < 4; i++) { 677 float32 a = s390_vec_read_float32(v2, i); 678 679 if (float32_dcmask(env, a) & i3) { 680 match++; 681 s390_vec_write_element32(v1, i, -1u); 682 } else { 683 s390_vec_write_element32(v1, i, 0); 684 } 685 if (s) { 686 break; 687 } 688 } 689 690 if (match == 4 || (s && match)) { 691 env->cc_op = 0; 692 } else if (match) { 693 env->cc_op = 1; 694 } else { 695 env->cc_op = 3; 696 } 697 } 698 699 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 700 uint32_t desc) 701 { 702 const uint16_t i3 = extract32(simd_data(desc), 4, 12); 703 const bool s = extract32(simd_data(desc), 3, 1); 704 int i, match = 0; 705 706 for (i = 0; i < 2; i++) { 707 const float64 a = s390_vec_read_float64(v2, i); 708 709 if (float64_dcmask(env, a) & i3) { 710 match++; 711 s390_vec_write_element64(v1, i, -1ull); 712 } else { 713 s390_vec_write_element64(v1, i, 0); 714 } 715 if (s) { 716 break; 717 } 718 } 719 720 if (match == 2 || (s && match)) { 721 env->cc_op = 0; 722 } else if (match) { 723 env->cc_op = 1; 724 } else { 725 env->cc_op = 3; 726 } 727 } 728 729 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, 730 uint32_t desc) 731 { 732 const float128 a = s390_vec_read_float128(v2); 733 uint16_t i3 = extract32(simd_data(desc), 4, 12); 734 735 if (float128_dcmask(env, a) & i3) { 736 env->cc_op = 0; 737 s390_vec_write_element64(v1, 0, -1ull); 738 s390_vec_write_element64(v1, 1, -1ull); 739 } else { 740 env->cc_op = 3; 741 s390_vec_write_element64(v1, 0, 0); 742 s390_vec_write_element64(v1, 1, 0); 743 } 744 } 745 746 typedef enum S390MinMaxType { 747 S390_MINMAX_TYPE_IEEE = 0, 748 S390_MINMAX_TYPE_JAVA, 749 S390_MINMAX_TYPE_C_MACRO, 750 S390_MINMAX_TYPE_CPP, 751 S390_MINMAX_TYPE_F, 752 } S390MinMaxType; 753 754 typedef enum S390MinMaxRes { 755 S390_MINMAX_RES_MINMAX = 0, 756 S390_MINMAX_RES_A, 757 S390_MINMAX_RES_B, 758 S390_MINMAX_RES_SILENCE_A, 759 S390_MINMAX_RES_SILENCE_B, 760 } S390MinMaxRes; 761 762 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, 763 S390MinMaxType type, float_status *s) 764 { 765 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 766 const bool nan_a = dcmask_a & DCMASK_NAN; 767 const bool nan_b = dcmask_b & DCMASK_NAN; 768 769 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 770 771 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 772 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 773 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 774 775 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 776 s->float_exception_flags |= float_flag_invalid; 777 } 778 switch (type) { 779 case S390_MINMAX_TYPE_JAVA: 780 if (sig_a) { 781 return S390_MINMAX_RES_SILENCE_A; 782 } else if (sig_b) { 783 return S390_MINMAX_RES_SILENCE_B; 784 } 785 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 786 case S390_MINMAX_TYPE_F: 787 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 788 case S390_MINMAX_TYPE_C_MACRO: 789 s->float_exception_flags |= float_flag_invalid; 790 return S390_MINMAX_RES_B; 791 case S390_MINMAX_TYPE_CPP: 792 s->float_exception_flags |= float_flag_invalid; 793 return S390_MINMAX_RES_A; 794 default: 795 g_assert_not_reached(); 796 } 797 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 798 switch (type) { 799 case S390_MINMAX_TYPE_JAVA: 800 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 801 case S390_MINMAX_TYPE_C_MACRO: 802 return S390_MINMAX_RES_B; 803 case S390_MINMAX_TYPE_F: 804 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 805 case S390_MINMAX_TYPE_CPP: 806 return S390_MINMAX_RES_A; 807 default: 808 g_assert_not_reached(); 809 } 810 } 811 return S390_MINMAX_RES_MINMAX; 812 } 813 814 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 815 S390MinMaxType type, float_status *s) 816 { 817 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 818 819 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 820 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 821 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 822 const bool nan_a = dcmask_a & DCMASK_NAN; 823 const bool nan_b = dcmask_b & DCMASK_NAN; 824 825 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 826 s->float_exception_flags |= float_flag_invalid; 827 } 828 switch (type) { 829 case S390_MINMAX_TYPE_JAVA: 830 if (sig_a) { 831 return S390_MINMAX_RES_SILENCE_A; 832 } else if (sig_b) { 833 return S390_MINMAX_RES_SILENCE_B; 834 } 835 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 836 case S390_MINMAX_TYPE_F: 837 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 838 case S390_MINMAX_TYPE_C_MACRO: 839 s->float_exception_flags |= float_flag_invalid; 840 return S390_MINMAX_RES_B; 841 case S390_MINMAX_TYPE_CPP: 842 s->float_exception_flags |= float_flag_invalid; 843 return S390_MINMAX_RES_A; 844 default: 845 g_assert_not_reached(); 846 } 847 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 848 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 849 850 switch (type) { 851 case S390_MINMAX_TYPE_JAVA: 852 case S390_MINMAX_TYPE_F: 853 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 854 case S390_MINMAX_TYPE_C_MACRO: 855 return S390_MINMAX_RES_B; 856 case S390_MINMAX_TYPE_CPP: 857 return S390_MINMAX_RES_A; 858 default: 859 g_assert_not_reached(); 860 } 861 } 862 return S390_MINMAX_RES_MINMAX; 863 } 864 865 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 866 S390MinMaxType type, bool is_min, 867 float_status *s) 868 { 869 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : 870 vfmax_res(dcmask_a, dcmask_b, type, s); 871 } 872 873 static void vfminmax32(S390Vector *v1, const S390Vector *v2, 874 const S390Vector *v3, CPUS390XState *env, 875 S390MinMaxType type, bool is_min, bool is_abs, bool se, 876 uintptr_t retaddr) 877 { 878 float_status *s = &env->fpu_status; 879 uint8_t vxc, vec_exc = 0; 880 S390Vector tmp = {}; 881 int i; 882 883 for (i = 0; i < 4; i++) { 884 float32 a = s390_vec_read_float32(v2, i); 885 float32 b = s390_vec_read_float32(v3, i); 886 float32 result; 887 888 if (type != S390_MINMAX_TYPE_IEEE) { 889 S390MinMaxRes res; 890 891 if (is_abs) { 892 a = float32_abs(a); 893 b = float32_abs(b); 894 } 895 896 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), 897 type, is_min, s); 898 switch (res) { 899 case S390_MINMAX_RES_MINMAX: 900 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); 901 break; 902 case S390_MINMAX_RES_A: 903 result = a; 904 break; 905 case S390_MINMAX_RES_B: 906 result = b; 907 break; 908 case S390_MINMAX_RES_SILENCE_A: 909 result = float32_silence_nan(a, s); 910 break; 911 case S390_MINMAX_RES_SILENCE_B: 912 result = float32_silence_nan(b, s); 913 break; 914 default: 915 g_assert_not_reached(); 916 } 917 } else if (!is_abs) { 918 result = is_min ? float32_minnum(a, b, &env->fpu_status) : 919 float32_maxnum(a, b, &env->fpu_status); 920 } else { 921 result = is_min ? float32_minnummag(a, b, &env->fpu_status) : 922 float32_maxnummag(a, b, &env->fpu_status); 923 } 924 925 s390_vec_write_float32(&tmp, i, result); 926 vxc = check_ieee_exc(env, i, false, &vec_exc); 927 if (se || vxc) { 928 break; 929 } 930 } 931 handle_ieee_exc(env, vxc, vec_exc, retaddr); 932 *v1 = tmp; 933 } 934 935 static void vfminmax64(S390Vector *v1, const S390Vector *v2, 936 const S390Vector *v3, CPUS390XState *env, 937 S390MinMaxType type, bool is_min, bool is_abs, bool se, 938 uintptr_t retaddr) 939 { 940 float_status *s = &env->fpu_status; 941 uint8_t vxc, vec_exc = 0; 942 S390Vector tmp = {}; 943 int i; 944 945 for (i = 0; i < 2; i++) { 946 float64 a = s390_vec_read_float64(v2, i); 947 float64 b = s390_vec_read_float64(v3, i); 948 float64 result; 949 950 if (type != S390_MINMAX_TYPE_IEEE) { 951 S390MinMaxRes res; 952 953 if (is_abs) { 954 a = float64_abs(a); 955 b = float64_abs(b); 956 } 957 958 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), 959 type, is_min, s); 960 switch (res) { 961 case S390_MINMAX_RES_MINMAX: 962 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); 963 break; 964 case S390_MINMAX_RES_A: 965 result = a; 966 break; 967 case S390_MINMAX_RES_B: 968 result = b; 969 break; 970 case S390_MINMAX_RES_SILENCE_A: 971 result = float64_silence_nan(a, s); 972 break; 973 case S390_MINMAX_RES_SILENCE_B: 974 result = float64_silence_nan(b, s); 975 break; 976 default: 977 g_assert_not_reached(); 978 } 979 } else if (!is_abs) { 980 result = is_min ? float64_minnum(a, b, &env->fpu_status) : 981 float64_maxnum(a, b, &env->fpu_status); 982 } else { 983 result = is_min ? float64_minnummag(a, b, &env->fpu_status) : 984 float64_maxnummag(a, b, &env->fpu_status); 985 } 986 987 s390_vec_write_float64(&tmp, i, result); 988 vxc = check_ieee_exc(env, i, false, &vec_exc); 989 if (se || vxc) { 990 break; 991 } 992 } 993 handle_ieee_exc(env, vxc, vec_exc, retaddr); 994 *v1 = tmp; 995 } 996 997 static void vfminmax128(S390Vector *v1, const S390Vector *v2, 998 const S390Vector *v3, CPUS390XState *env, 999 S390MinMaxType type, bool is_min, bool is_abs, bool se, 1000 uintptr_t retaddr) 1001 { 1002 float128 a = s390_vec_read_float128(v2); 1003 float128 b = s390_vec_read_float128(v3); 1004 float_status *s = &env->fpu_status; 1005 uint8_t vxc, vec_exc = 0; 1006 float128 result; 1007 1008 if (type != S390_MINMAX_TYPE_IEEE) { 1009 S390MinMaxRes res; 1010 1011 if (is_abs) { 1012 a = float128_abs(a); 1013 b = float128_abs(b); 1014 } 1015 1016 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), 1017 type, is_min, s); 1018 switch (res) { 1019 case S390_MINMAX_RES_MINMAX: 1020 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); 1021 break; 1022 case S390_MINMAX_RES_A: 1023 result = a; 1024 break; 1025 case S390_MINMAX_RES_B: 1026 result = b; 1027 break; 1028 case S390_MINMAX_RES_SILENCE_A: 1029 result = float128_silence_nan(a, s); 1030 break; 1031 case S390_MINMAX_RES_SILENCE_B: 1032 result = float128_silence_nan(b, s); 1033 break; 1034 default: 1035 g_assert_not_reached(); 1036 } 1037 } else if (!is_abs) { 1038 result = is_min ? float128_minnum(a, b, &env->fpu_status) : 1039 float128_maxnum(a, b, &env->fpu_status); 1040 } else { 1041 result = is_min ? float128_minnummag(a, b, &env->fpu_status) : 1042 float128_maxnummag(a, b, &env->fpu_status); 1043 } 1044 1045 vxc = check_ieee_exc(env, 0, false, &vec_exc); 1046 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1047 s390_vec_write_float128(v1, result); 1048 } 1049 1050 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ 1051 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 1052 CPUS390XState *env, uint32_t desc) \ 1053 { \ 1054 const bool se = extract32(simd_data(desc), 3, 1); \ 1055 uint8_t type = extract32(simd_data(desc), 4, 4); \ 1056 bool is_abs = false; \ 1057 \ 1058 if (type >= 8) { \ 1059 is_abs = true; \ 1060 type -= 8; \ 1061 } \ 1062 \ 1063 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ 1064 } 1065 1066 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ 1067 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ 1068 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ 1069 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) 1070 1071 DEF_GVEC_VFMINMAX(vfmax, false) 1072 DEF_GVEC_VFMINMAX(vfmin, true) 1073