1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "s390x-internal.h" 15 #include "vec.h" 16 #include "tcg_s390x.h" 17 #include "tcg/tcg-gvec-desc.h" 18 #include "exec/exec-all.h" 19 #include "exec/helper-proto.h" 20 #include "fpu/softfloat.h" 21 22 #define VIC_INVALID 0x1 23 #define VIC_DIVBYZERO 0x2 24 #define VIC_OVERFLOW 0x3 25 #define VIC_UNDERFLOW 0x4 26 #define VIC_INEXACT 0x5 27 28 /* returns the VEX. If the VEX is 0, there is no trap */ 29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 30 uint8_t *vec_exc) 31 { 32 uint8_t vece_exc = 0, trap_exc; 33 unsigned qemu_exc; 34 35 /* Retrieve and clear the softfloat exceptions */ 36 qemu_exc = env->fpu_status.float_exception_flags; 37 if (qemu_exc == 0) { 38 return 0; 39 } 40 env->fpu_status.float_exception_flags = 0; 41 42 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 43 44 /* Add them to the vector-wide s390x exception bits */ 45 *vec_exc |= vece_exc; 46 47 /* Check for traps and construct the VXC */ 48 trap_exc = vece_exc & env->fpc >> 24; 49 if (trap_exc) { 50 if (trap_exc & S390_IEEE_MASK_INVALID) { 51 return enr << 4 | VIC_INVALID; 52 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 53 return enr << 4 | VIC_DIVBYZERO; 54 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 55 return enr << 4 | VIC_OVERFLOW; 56 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 57 return enr << 4 | VIC_UNDERFLOW; 58 } else if (!XxC) { 59 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 60 /* inexact has lowest priority on traps */ 61 return enr << 4 | VIC_INEXACT; 62 } 63 } 64 return 0; 65 } 66 67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 68 uintptr_t retaddr) 69 { 70 if (vxc) { 71 /* on traps, the fpc flags are not updated, instruction is suppressed */ 72 tcg_s390_vector_exception(env, vxc, retaddr); 73 } 74 if (vec_exc) { 75 /* indicate exceptions for all elements combined */ 76 env->fpc |= vec_exc << 16; 77 } 78 } 79 80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) 81 { 82 return make_float32(s390_vec_read_element32(v, enr)); 83 } 84 85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 86 { 87 return make_float64(s390_vec_read_element64(v, enr)); 88 } 89 90 static float128 s390_vec_read_float128(const S390Vector *v) 91 { 92 return make_float128(s390_vec_read_element64(v, 0), 93 s390_vec_read_element64(v, 1)); 94 } 95 96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) 97 { 98 return s390_vec_write_element32(v, enr, data); 99 } 100 101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 102 { 103 return s390_vec_write_element64(v, enr, data); 104 } 105 106 static void s390_vec_write_float128(S390Vector *v, float128 data) 107 { 108 s390_vec_write_element64(v, 0, data.high); 109 s390_vec_write_element64(v, 1, data.low); 110 } 111 112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s); 113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 114 bool s, bool XxC, uint8_t erm, vop32_2_fn fn, 115 uintptr_t retaddr) 116 { 117 uint8_t vxc, vec_exc = 0; 118 S390Vector tmp = {}; 119 int i, old_mode; 120 121 old_mode = s390_swap_bfp_rounding_mode(env, erm); 122 for (i = 0; i < 4; i++) { 123 const float32 a = s390_vec_read_float32(v2, i); 124 125 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); 126 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 127 if (s || vxc) { 128 break; 129 } 130 } 131 s390_restore_bfp_rounding_mode(env, old_mode); 132 handle_ieee_exc(env, vxc, vec_exc, retaddr); 133 *v1 = tmp; 134 } 135 136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s); 137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 138 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 139 uintptr_t retaddr) 140 { 141 uint8_t vxc, vec_exc = 0; 142 S390Vector tmp = {}; 143 int i, old_mode; 144 145 old_mode = s390_swap_bfp_rounding_mode(env, erm); 146 for (i = 0; i < 2; i++) { 147 const float64 a = s390_vec_read_float64(v2, i); 148 149 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); 150 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 151 if (s || vxc) { 152 break; 153 } 154 } 155 s390_restore_bfp_rounding_mode(env, old_mode); 156 handle_ieee_exc(env, vxc, vec_exc, retaddr); 157 *v1 = tmp; 158 } 159 160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s); 161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 162 bool s, bool XxC, uint8_t erm, vop128_2_fn fn, 163 uintptr_t retaddr) 164 { 165 const float128 a = s390_vec_read_float128(v2); 166 uint8_t vxc, vec_exc = 0; 167 S390Vector tmp = {}; 168 int old_mode; 169 170 old_mode = s390_swap_bfp_rounding_mode(env, erm); 171 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); 172 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 173 s390_restore_bfp_rounding_mode(env, old_mode); 174 handle_ieee_exc(env, vxc, vec_exc, retaddr); 175 *v1 = tmp; 176 } 177 178 static float32 vcdg32(float32 a, float_status *s) 179 { 180 return int32_to_float32(a, s); 181 } 182 183 static float32 vcdlg32(float32 a, float_status *s) 184 { 185 return uint32_to_float32(a, s); 186 } 187 188 static float32 vcgd32(float32 a, float_status *s) 189 { 190 const float32 tmp = float32_to_int32(a, s); 191 192 return float32_is_any_nan(a) ? INT32_MIN : tmp; 193 } 194 195 static float32 vclgd32(float32 a, float_status *s) 196 { 197 const float32 tmp = float32_to_uint32(a, s); 198 199 return float32_is_any_nan(a) ? 0 : tmp; 200 } 201 202 static float64 vcdg64(float64 a, float_status *s) 203 { 204 return int64_to_float64(a, s); 205 } 206 207 static float64 vcdlg64(float64 a, float_status *s) 208 { 209 return uint64_to_float64(a, s); 210 } 211 212 static float64 vcgd64(float64 a, float_status *s) 213 { 214 const float64 tmp = float64_to_int64(a, s); 215 216 return float64_is_any_nan(a) ? INT64_MIN : tmp; 217 } 218 219 static float64 vclgd64(float64 a, float_status *s) 220 { 221 const float64 tmp = float64_to_uint64(a, s); 222 223 return float64_is_any_nan(a) ? 0 : tmp; 224 } 225 226 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ 227 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 228 uint32_t desc) \ 229 { \ 230 const uint8_t erm = extract32(simd_data(desc), 4, 4); \ 231 const bool se = extract32(simd_data(desc), 3, 1); \ 232 const bool XxC = extract32(simd_data(desc), 2, 1); \ 233 \ 234 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ 235 } 236 237 #define DEF_GVEC_VOP2_32(NAME) \ 238 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32) 239 240 #define DEF_GVEC_VOP2_64(NAME) \ 241 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) 242 243 #define DEF_GVEC_VOP2(NAME, OP) \ 244 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ 245 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ 246 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) 247 248 DEF_GVEC_VOP2_32(vcdg) 249 DEF_GVEC_VOP2_32(vcdlg) 250 DEF_GVEC_VOP2_32(vcgd) 251 DEF_GVEC_VOP2_32(vclgd) 252 DEF_GVEC_VOP2_64(vcdg) 253 DEF_GVEC_VOP2_64(vcdlg) 254 DEF_GVEC_VOP2_64(vcgd) 255 DEF_GVEC_VOP2_64(vclgd) 256 DEF_GVEC_VOP2(vfi, round_to_int) 257 DEF_GVEC_VOP2(vfsq, sqrt) 258 259 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); 260 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 261 CPUS390XState *env, bool s, vop32_3_fn fn, 262 uintptr_t retaddr) 263 { 264 uint8_t vxc, vec_exc = 0; 265 S390Vector tmp = {}; 266 int i; 267 268 for (i = 0; i < 4; i++) { 269 const float32 a = s390_vec_read_float32(v2, i); 270 const float32 b = s390_vec_read_float32(v3, i); 271 272 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); 273 vxc = check_ieee_exc(env, i, false, &vec_exc); 274 if (s || vxc) { 275 break; 276 } 277 } 278 handle_ieee_exc(env, vxc, vec_exc, retaddr); 279 *v1 = tmp; 280 } 281 282 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 283 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 284 CPUS390XState *env, bool s, vop64_3_fn fn, 285 uintptr_t retaddr) 286 { 287 uint8_t vxc, vec_exc = 0; 288 S390Vector tmp = {}; 289 int i; 290 291 for (i = 0; i < 2; i++) { 292 const float64 a = s390_vec_read_float64(v2, i); 293 const float64 b = s390_vec_read_float64(v3, i); 294 295 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 296 vxc = check_ieee_exc(env, i, false, &vec_exc); 297 if (s || vxc) { 298 break; 299 } 300 } 301 handle_ieee_exc(env, vxc, vec_exc, retaddr); 302 *v1 = tmp; 303 } 304 305 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); 306 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 307 CPUS390XState *env, bool s, vop128_3_fn fn, 308 uintptr_t retaddr) 309 { 310 const float128 a = s390_vec_read_float128(v2); 311 const float128 b = s390_vec_read_float128(v3); 312 uint8_t vxc, vec_exc = 0; 313 S390Vector tmp = {}; 314 315 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); 316 vxc = check_ieee_exc(env, 0, false, &vec_exc); 317 handle_ieee_exc(env, vxc, vec_exc, retaddr); 318 *v1 = tmp; 319 } 320 321 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ 322 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 323 CPUS390XState *env, uint32_t desc) \ 324 { \ 325 const bool se = extract32(simd_data(desc), 3, 1); \ 326 \ 327 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ 328 } 329 330 #define DEF_GVEC_VOP3(NAME, OP) \ 331 DEF_GVEC_VOP3_B(NAME, OP, 32) \ 332 DEF_GVEC_VOP3_B(NAME, OP, 64) \ 333 DEF_GVEC_VOP3_B(NAME, OP, 128) 334 335 DEF_GVEC_VOP3(vfa, add) 336 DEF_GVEC_VOP3(vfs, sub) 337 DEF_GVEC_VOP3(vfd, div) 338 DEF_GVEC_VOP3(vfm, mul) 339 340 static int wfc32(const S390Vector *v1, const S390Vector *v2, 341 CPUS390XState *env, bool signal, uintptr_t retaddr) 342 { 343 /* only the zero-indexed elements are compared */ 344 const float32 a = s390_vec_read_float32(v1, 0); 345 const float32 b = s390_vec_read_float32(v2, 0); 346 uint8_t vxc, vec_exc = 0; 347 int cmp; 348 349 if (signal) { 350 cmp = float32_compare(a, b, &env->fpu_status); 351 } else { 352 cmp = float32_compare_quiet(a, b, &env->fpu_status); 353 } 354 vxc = check_ieee_exc(env, 0, false, &vec_exc); 355 handle_ieee_exc(env, vxc, vec_exc, retaddr); 356 357 return float_comp_to_cc(env, cmp); 358 } 359 360 static int wfc64(const S390Vector *v1, const S390Vector *v2, 361 CPUS390XState *env, bool signal, uintptr_t retaddr) 362 { 363 /* only the zero-indexed elements are compared */ 364 const float64 a = s390_vec_read_float64(v1, 0); 365 const float64 b = s390_vec_read_float64(v2, 0); 366 uint8_t vxc, vec_exc = 0; 367 int cmp; 368 369 if (signal) { 370 cmp = float64_compare(a, b, &env->fpu_status); 371 } else { 372 cmp = float64_compare_quiet(a, b, &env->fpu_status); 373 } 374 vxc = check_ieee_exc(env, 0, false, &vec_exc); 375 handle_ieee_exc(env, vxc, vec_exc, retaddr); 376 377 return float_comp_to_cc(env, cmp); 378 } 379 380 static int wfc128(const S390Vector *v1, const S390Vector *v2, 381 CPUS390XState *env, bool signal, uintptr_t retaddr) 382 { 383 /* only the zero-indexed elements are compared */ 384 const float128 a = s390_vec_read_float128(v1); 385 const float128 b = s390_vec_read_float128(v2); 386 uint8_t vxc, vec_exc = 0; 387 int cmp; 388 389 if (signal) { 390 cmp = float128_compare(a, b, &env->fpu_status); 391 } else { 392 cmp = float128_compare_quiet(a, b, &env->fpu_status); 393 } 394 vxc = check_ieee_exc(env, 0, false, &vec_exc); 395 handle_ieee_exc(env, vxc, vec_exc, retaddr); 396 397 return float_comp_to_cc(env, cmp); 398 } 399 400 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ 401 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ 402 CPUS390XState *env, uint32_t desc) \ 403 { \ 404 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ 405 } 406 407 #define DEF_GVEC_WFC(NAME, SIGNAL) \ 408 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ 409 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ 410 DEF_GVEC_WFC_B(NAME, SIGNAL, 128) 411 412 DEF_GVEC_WFC(wfc, false) 413 DEF_GVEC_WFC(wfk, true) 414 415 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); 416 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 417 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) 418 { 419 uint8_t vxc, vec_exc = 0; 420 S390Vector tmp = {}; 421 int match = 0; 422 int i; 423 424 for (i = 0; i < 4; i++) { 425 const float32 a = s390_vec_read_float32(v2, i); 426 const float32 b = s390_vec_read_float32(v3, i); 427 428 /* swap the order of the parameters, so we can use existing functions */ 429 if (fn(b, a, &env->fpu_status)) { 430 match++; 431 s390_vec_write_element32(&tmp, i, -1u); 432 } 433 vxc = check_ieee_exc(env, i, false, &vec_exc); 434 if (s || vxc) { 435 break; 436 } 437 } 438 439 handle_ieee_exc(env, vxc, vec_exc, retaddr); 440 *v1 = tmp; 441 if (match) { 442 return s || match == 4 ? 0 : 1; 443 } 444 return 3; 445 } 446 447 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 448 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 449 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 450 { 451 uint8_t vxc, vec_exc = 0; 452 S390Vector tmp = {}; 453 int match = 0; 454 int i; 455 456 for (i = 0; i < 2; i++) { 457 const float64 a = s390_vec_read_float64(v2, i); 458 const float64 b = s390_vec_read_float64(v3, i); 459 460 /* swap the order of the parameters, so we can use existing functions */ 461 if (fn(b, a, &env->fpu_status)) { 462 match++; 463 s390_vec_write_element64(&tmp, i, -1ull); 464 } 465 vxc = check_ieee_exc(env, i, false, &vec_exc); 466 if (s || vxc) { 467 break; 468 } 469 } 470 471 handle_ieee_exc(env, vxc, vec_exc, retaddr); 472 *v1 = tmp; 473 if (match) { 474 return s || match == 2 ? 0 : 1; 475 } 476 return 3; 477 } 478 479 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); 480 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 481 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) 482 { 483 const float128 a = s390_vec_read_float128(v2); 484 const float128 b = s390_vec_read_float128(v3); 485 uint8_t vxc, vec_exc = 0; 486 S390Vector tmp = {}; 487 bool match = false; 488 489 /* swap the order of the parameters, so we can use existing functions */ 490 if (fn(b, a, &env->fpu_status)) { 491 match = true; 492 s390_vec_write_element64(&tmp, 0, -1ull); 493 s390_vec_write_element64(&tmp, 1, -1ull); 494 } 495 vxc = check_ieee_exc(env, 0, false, &vec_exc); 496 handle_ieee_exc(env, vxc, vec_exc, retaddr); 497 *v1 = tmp; 498 return match ? 0 : 3; 499 } 500 501 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ 502 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 503 CPUS390XState *env, uint32_t desc) \ 504 { \ 505 const bool se = extract32(simd_data(desc), 3, 1); \ 506 const bool sq = extract32(simd_data(desc), 2, 1); \ 507 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 508 \ 509 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 510 } \ 511 \ 512 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ 513 CPUS390XState *env, uint32_t desc) \ 514 { \ 515 const bool se = extract32(simd_data(desc), 3, 1); \ 516 const bool sq = extract32(simd_data(desc), 2, 1); \ 517 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 518 \ 519 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 520 } 521 522 #define DEF_GVEC_VFC(NAME, OP) \ 523 DEF_GVEC_VFC_B(NAME, OP, 32) \ 524 DEF_GVEC_VFC_B(NAME, OP, 64) \ 525 DEF_GVEC_VFC_B(NAME, OP, 128) \ 526 527 DEF_GVEC_VFC(vfce, eq) 528 DEF_GVEC_VFC(vfch, lt) 529 DEF_GVEC_VFC(vfche, le) 530 531 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 532 uint32_t desc) 533 { 534 const bool s = extract32(simd_data(desc), 3, 1); 535 uint8_t vxc, vec_exc = 0; 536 S390Vector tmp = {}; 537 int i; 538 539 for (i = 0; i < 2; i++) { 540 /* load from even element */ 541 const float32 a = s390_vec_read_element32(v2, i * 2); 542 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 543 544 s390_vec_write_element64(&tmp, i, ret); 545 /* indicate the source element */ 546 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 547 if (s || vxc) { 548 break; 549 } 550 } 551 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 552 *(S390Vector *)v1 = tmp; 553 } 554 555 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, 556 uint32_t desc) 557 { 558 /* load from even element */ 559 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), 560 &env->fpu_status); 561 uint8_t vxc, vec_exc = 0; 562 563 vxc = check_ieee_exc(env, 0, false, &vec_exc); 564 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 565 s390_vec_write_float128(v1, ret); 566 } 567 568 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 569 uint32_t desc) 570 { 571 const uint8_t erm = extract32(simd_data(desc), 4, 4); 572 const bool s = extract32(simd_data(desc), 3, 1); 573 const bool XxC = extract32(simd_data(desc), 2, 1); 574 uint8_t vxc, vec_exc = 0; 575 S390Vector tmp = {}; 576 int i, old_mode; 577 578 old_mode = s390_swap_bfp_rounding_mode(env, erm); 579 for (i = 0; i < 2; i++) { 580 float64 a = s390_vec_read_element64(v2, i); 581 uint32_t ret = float64_to_float32(a, &env->fpu_status); 582 583 /* place at even element */ 584 s390_vec_write_element32(&tmp, i * 2, ret); 585 /* indicate the source element */ 586 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 587 if (s || vxc) { 588 break; 589 } 590 } 591 s390_restore_bfp_rounding_mode(env, old_mode); 592 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 593 *(S390Vector *)v1 = tmp; 594 } 595 596 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, 597 uint32_t desc) 598 { 599 const uint8_t erm = extract32(simd_data(desc), 4, 4); 600 const bool XxC = extract32(simd_data(desc), 2, 1); 601 uint8_t vxc, vec_exc = 0; 602 int old_mode; 603 float64 ret; 604 605 old_mode = s390_swap_bfp_rounding_mode(env, erm); 606 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); 607 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 608 s390_restore_bfp_rounding_mode(env, old_mode); 609 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 610 611 /* place at even element, odd element is unpredictable */ 612 s390_vec_write_float64(v1, 0, ret); 613 } 614 615 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 616 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 617 uintptr_t retaddr) 618 { 619 uint8_t vxc, vec_exc = 0; 620 S390Vector tmp = {}; 621 int i; 622 623 for (i = 0; i < 4; i++) { 624 const float32 a = s390_vec_read_float32(v2, i); 625 const float32 b = s390_vec_read_float32(v3, i); 626 const float32 c = s390_vec_read_float32(v4, i); 627 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); 628 629 s390_vec_write_float32(&tmp, i, ret); 630 vxc = check_ieee_exc(env, i, false, &vec_exc); 631 if (s || vxc) { 632 break; 633 } 634 } 635 handle_ieee_exc(env, vxc, vec_exc, retaddr); 636 *v1 = tmp; 637 } 638 639 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 640 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 641 uintptr_t retaddr) 642 { 643 uint8_t vxc, vec_exc = 0; 644 S390Vector tmp = {}; 645 int i; 646 647 for (i = 0; i < 2; i++) { 648 const float64 a = s390_vec_read_float64(v2, i); 649 const float64 b = s390_vec_read_float64(v3, i); 650 const float64 c = s390_vec_read_float64(v4, i); 651 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); 652 653 s390_vec_write_float64(&tmp, i, ret); 654 vxc = check_ieee_exc(env, i, false, &vec_exc); 655 if (s || vxc) { 656 break; 657 } 658 } 659 handle_ieee_exc(env, vxc, vec_exc, retaddr); 660 *v1 = tmp; 661 } 662 663 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 664 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 665 uintptr_t retaddr) 666 { 667 const float128 a = s390_vec_read_float128(v2); 668 const float128 b = s390_vec_read_float128(v3); 669 const float128 c = s390_vec_read_float128(v4); 670 uint8_t vxc, vec_exc = 0; 671 float128 ret; 672 673 ret = float128_muladd(a, b, c, flags, &env->fpu_status); 674 vxc = check_ieee_exc(env, 0, false, &vec_exc); 675 handle_ieee_exc(env, vxc, vec_exc, retaddr); 676 s390_vec_write_float128(v1, ret); 677 } 678 679 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ 680 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 681 const void *v4, CPUS390XState *env, \ 682 uint32_t desc) \ 683 { \ 684 const bool se = extract32(simd_data(desc), 3, 1); \ 685 \ 686 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ 687 } 688 689 #define DEF_GVEC_VFMA(NAME, FLAGS) \ 690 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ 691 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ 692 DEF_GVEC_VFMA_B(NAME, FLAGS, 128) 693 694 DEF_GVEC_VFMA(vfma, 0) 695 DEF_GVEC_VFMA(vfms, float_muladd_negate_c) 696 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) 697 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) 698 699 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, 700 uint32_t desc) 701 { 702 uint16_t i3 = extract32(simd_data(desc), 4, 12); 703 bool s = extract32(simd_data(desc), 3, 1); 704 int i, match = 0; 705 706 for (i = 0; i < 4; i++) { 707 float32 a = s390_vec_read_float32(v2, i); 708 709 if (float32_dcmask(env, a) & i3) { 710 match++; 711 s390_vec_write_element32(v1, i, -1u); 712 } else { 713 s390_vec_write_element32(v1, i, 0); 714 } 715 if (s) { 716 break; 717 } 718 } 719 720 if (match == 4 || (s && match)) { 721 env->cc_op = 0; 722 } else if (match) { 723 env->cc_op = 1; 724 } else { 725 env->cc_op = 3; 726 } 727 } 728 729 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 730 uint32_t desc) 731 { 732 const uint16_t i3 = extract32(simd_data(desc), 4, 12); 733 const bool s = extract32(simd_data(desc), 3, 1); 734 int i, match = 0; 735 736 for (i = 0; i < 2; i++) { 737 const float64 a = s390_vec_read_float64(v2, i); 738 739 if (float64_dcmask(env, a) & i3) { 740 match++; 741 s390_vec_write_element64(v1, i, -1ull); 742 } else { 743 s390_vec_write_element64(v1, i, 0); 744 } 745 if (s) { 746 break; 747 } 748 } 749 750 if (match == 2 || (s && match)) { 751 env->cc_op = 0; 752 } else if (match) { 753 env->cc_op = 1; 754 } else { 755 env->cc_op = 3; 756 } 757 } 758 759 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, 760 uint32_t desc) 761 { 762 const float128 a = s390_vec_read_float128(v2); 763 uint16_t i3 = extract32(simd_data(desc), 4, 12); 764 765 if (float128_dcmask(env, a) & i3) { 766 env->cc_op = 0; 767 s390_vec_write_element64(v1, 0, -1ull); 768 s390_vec_write_element64(v1, 1, -1ull); 769 } else { 770 env->cc_op = 3; 771 s390_vec_write_element64(v1, 0, 0); 772 s390_vec_write_element64(v1, 1, 0); 773 } 774 } 775 776 typedef enum S390MinMaxType { 777 S390_MINMAX_TYPE_IEEE = 0, 778 S390_MINMAX_TYPE_JAVA, 779 S390_MINMAX_TYPE_C_MACRO, 780 S390_MINMAX_TYPE_CPP, 781 S390_MINMAX_TYPE_F, 782 } S390MinMaxType; 783 784 typedef enum S390MinMaxRes { 785 S390_MINMAX_RES_MINMAX = 0, 786 S390_MINMAX_RES_A, 787 S390_MINMAX_RES_B, 788 S390_MINMAX_RES_SILENCE_A, 789 S390_MINMAX_RES_SILENCE_B, 790 } S390MinMaxRes; 791 792 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, 793 S390MinMaxType type, float_status *s) 794 { 795 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 796 const bool nan_a = dcmask_a & DCMASK_NAN; 797 const bool nan_b = dcmask_b & DCMASK_NAN; 798 799 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 800 801 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 802 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 803 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 804 805 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 806 s->float_exception_flags |= float_flag_invalid; 807 } 808 switch (type) { 809 case S390_MINMAX_TYPE_JAVA: 810 if (sig_a) { 811 return S390_MINMAX_RES_SILENCE_A; 812 } else if (sig_b) { 813 return S390_MINMAX_RES_SILENCE_B; 814 } 815 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 816 case S390_MINMAX_TYPE_F: 817 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 818 case S390_MINMAX_TYPE_C_MACRO: 819 s->float_exception_flags |= float_flag_invalid; 820 return S390_MINMAX_RES_B; 821 case S390_MINMAX_TYPE_CPP: 822 s->float_exception_flags |= float_flag_invalid; 823 return S390_MINMAX_RES_A; 824 default: 825 g_assert_not_reached(); 826 } 827 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 828 switch (type) { 829 case S390_MINMAX_TYPE_JAVA: 830 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 831 case S390_MINMAX_TYPE_C_MACRO: 832 return S390_MINMAX_RES_B; 833 case S390_MINMAX_TYPE_F: 834 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 835 case S390_MINMAX_TYPE_CPP: 836 return S390_MINMAX_RES_A; 837 default: 838 g_assert_not_reached(); 839 } 840 } 841 return S390_MINMAX_RES_MINMAX; 842 } 843 844 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 845 S390MinMaxType type, float_status *s) 846 { 847 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 848 849 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 850 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 851 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 852 const bool nan_a = dcmask_a & DCMASK_NAN; 853 const bool nan_b = dcmask_b & DCMASK_NAN; 854 855 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 856 s->float_exception_flags |= float_flag_invalid; 857 } 858 switch (type) { 859 case S390_MINMAX_TYPE_JAVA: 860 if (sig_a) { 861 return S390_MINMAX_RES_SILENCE_A; 862 } else if (sig_b) { 863 return S390_MINMAX_RES_SILENCE_B; 864 } 865 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 866 case S390_MINMAX_TYPE_F: 867 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 868 case S390_MINMAX_TYPE_C_MACRO: 869 s->float_exception_flags |= float_flag_invalid; 870 return S390_MINMAX_RES_B; 871 case S390_MINMAX_TYPE_CPP: 872 s->float_exception_flags |= float_flag_invalid; 873 return S390_MINMAX_RES_A; 874 default: 875 g_assert_not_reached(); 876 } 877 } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { 878 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 879 880 switch (type) { 881 case S390_MINMAX_TYPE_JAVA: 882 case S390_MINMAX_TYPE_F: 883 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 884 case S390_MINMAX_TYPE_C_MACRO: 885 return S390_MINMAX_RES_B; 886 case S390_MINMAX_TYPE_CPP: 887 return S390_MINMAX_RES_A; 888 default: 889 g_assert_not_reached(); 890 } 891 } 892 return S390_MINMAX_RES_MINMAX; 893 } 894 895 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 896 S390MinMaxType type, bool is_min, 897 float_status *s) 898 { 899 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : 900 vfmax_res(dcmask_a, dcmask_b, type, s); 901 } 902 903 static void vfminmax32(S390Vector *v1, const S390Vector *v2, 904 const S390Vector *v3, CPUS390XState *env, 905 S390MinMaxType type, bool is_min, bool is_abs, bool se, 906 uintptr_t retaddr) 907 { 908 float_status *s = &env->fpu_status; 909 uint8_t vxc, vec_exc = 0; 910 S390Vector tmp = {}; 911 int i; 912 913 for (i = 0; i < 4; i++) { 914 float32 a = s390_vec_read_float32(v2, i); 915 float32 b = s390_vec_read_float32(v3, i); 916 float32 result; 917 918 if (type != S390_MINMAX_TYPE_IEEE) { 919 S390MinMaxRes res; 920 921 if (is_abs) { 922 a = float32_abs(a); 923 b = float32_abs(b); 924 } 925 926 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), 927 type, is_min, s); 928 switch (res) { 929 case S390_MINMAX_RES_MINMAX: 930 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); 931 break; 932 case S390_MINMAX_RES_A: 933 result = a; 934 break; 935 case S390_MINMAX_RES_B: 936 result = b; 937 break; 938 case S390_MINMAX_RES_SILENCE_A: 939 result = float32_silence_nan(a, s); 940 break; 941 case S390_MINMAX_RES_SILENCE_B: 942 result = float32_silence_nan(b, s); 943 break; 944 default: 945 g_assert_not_reached(); 946 } 947 } else if (!is_abs) { 948 result = is_min ? float32_minnum(a, b, &env->fpu_status) : 949 float32_maxnum(a, b, &env->fpu_status); 950 } else { 951 result = is_min ? float32_minnummag(a, b, &env->fpu_status) : 952 float32_maxnummag(a, b, &env->fpu_status); 953 } 954 955 s390_vec_write_float32(&tmp, i, result); 956 vxc = check_ieee_exc(env, i, false, &vec_exc); 957 if (se || vxc) { 958 break; 959 } 960 } 961 handle_ieee_exc(env, vxc, vec_exc, retaddr); 962 *v1 = tmp; 963 } 964 965 static void vfminmax64(S390Vector *v1, const S390Vector *v2, 966 const S390Vector *v3, CPUS390XState *env, 967 S390MinMaxType type, bool is_min, bool is_abs, bool se, 968 uintptr_t retaddr) 969 { 970 float_status *s = &env->fpu_status; 971 uint8_t vxc, vec_exc = 0; 972 S390Vector tmp = {}; 973 int i; 974 975 for (i = 0; i < 2; i++) { 976 float64 a = s390_vec_read_float64(v2, i); 977 float64 b = s390_vec_read_float64(v3, i); 978 float64 result; 979 980 if (type != S390_MINMAX_TYPE_IEEE) { 981 S390MinMaxRes res; 982 983 if (is_abs) { 984 a = float64_abs(a); 985 b = float64_abs(b); 986 } 987 988 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), 989 type, is_min, s); 990 switch (res) { 991 case S390_MINMAX_RES_MINMAX: 992 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); 993 break; 994 case S390_MINMAX_RES_A: 995 result = a; 996 break; 997 case S390_MINMAX_RES_B: 998 result = b; 999 break; 1000 case S390_MINMAX_RES_SILENCE_A: 1001 result = float64_silence_nan(a, s); 1002 break; 1003 case S390_MINMAX_RES_SILENCE_B: 1004 result = float64_silence_nan(b, s); 1005 break; 1006 default: 1007 g_assert_not_reached(); 1008 } 1009 } else if (!is_abs) { 1010 result = is_min ? float64_minnum(a, b, &env->fpu_status) : 1011 float64_maxnum(a, b, &env->fpu_status); 1012 } else { 1013 result = is_min ? float64_minnummag(a, b, &env->fpu_status) : 1014 float64_maxnummag(a, b, &env->fpu_status); 1015 } 1016 1017 s390_vec_write_float64(&tmp, i, result); 1018 vxc = check_ieee_exc(env, i, false, &vec_exc); 1019 if (se || vxc) { 1020 break; 1021 } 1022 } 1023 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1024 *v1 = tmp; 1025 } 1026 1027 static void vfminmax128(S390Vector *v1, const S390Vector *v2, 1028 const S390Vector *v3, CPUS390XState *env, 1029 S390MinMaxType type, bool is_min, bool is_abs, bool se, 1030 uintptr_t retaddr) 1031 { 1032 float128 a = s390_vec_read_float128(v2); 1033 float128 b = s390_vec_read_float128(v3); 1034 float_status *s = &env->fpu_status; 1035 uint8_t vxc, vec_exc = 0; 1036 float128 result; 1037 1038 if (type != S390_MINMAX_TYPE_IEEE) { 1039 S390MinMaxRes res; 1040 1041 if (is_abs) { 1042 a = float128_abs(a); 1043 b = float128_abs(b); 1044 } 1045 1046 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), 1047 type, is_min, s); 1048 switch (res) { 1049 case S390_MINMAX_RES_MINMAX: 1050 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); 1051 break; 1052 case S390_MINMAX_RES_A: 1053 result = a; 1054 break; 1055 case S390_MINMAX_RES_B: 1056 result = b; 1057 break; 1058 case S390_MINMAX_RES_SILENCE_A: 1059 result = float128_silence_nan(a, s); 1060 break; 1061 case S390_MINMAX_RES_SILENCE_B: 1062 result = float128_silence_nan(b, s); 1063 break; 1064 default: 1065 g_assert_not_reached(); 1066 } 1067 } else if (!is_abs) { 1068 result = is_min ? float128_minnum(a, b, &env->fpu_status) : 1069 float128_maxnum(a, b, &env->fpu_status); 1070 } else { 1071 result = is_min ? float128_minnummag(a, b, &env->fpu_status) : 1072 float128_maxnummag(a, b, &env->fpu_status); 1073 } 1074 1075 vxc = check_ieee_exc(env, 0, false, &vec_exc); 1076 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1077 s390_vec_write_float128(v1, result); 1078 } 1079 1080 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ 1081 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 1082 CPUS390XState *env, uint32_t desc) \ 1083 { \ 1084 const bool se = extract32(simd_data(desc), 3, 1); \ 1085 uint8_t type = extract32(simd_data(desc), 4, 4); \ 1086 bool is_abs = false; \ 1087 \ 1088 if (type >= 8) { \ 1089 is_abs = true; \ 1090 type -= 8; \ 1091 } \ 1092 \ 1093 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ 1094 } 1095 1096 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ 1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ 1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ 1099 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) 1100 1101 DEF_GVEC_VFMINMAX(vfmax, false) 1102 DEF_GVEC_VFMINMAX(vfmin, true) 1103