1 /* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "vec.h" 16 #include "exec/helper-proto.h" 17 #include "tcg/tcg-gvec-desc.h" 18 19 static bool s390_vec_is_zero(const S390Vector *v) 20 { 21 return !v->doubleword[0] && !v->doubleword[1]; 22 } 23 24 static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25 const S390Vector *b) 26 { 27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29 } 30 31 static void s390_vec_and(S390Vector *res, const S390Vector *a, 32 const S390Vector *b) 33 { 34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36 } 37 38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39 { 40 return a->doubleword[0] == b->doubleword[0] && 41 a->doubleword[1] == b->doubleword[1]; 42 } 43 44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45 { 46 uint64_t tmp; 47 48 g_assert(count < 128); 49 if (count == 0) { 50 d->doubleword[0] = a->doubleword[0]; 51 d->doubleword[1] = a->doubleword[1]; 52 } else if (count == 64) { 53 d->doubleword[0] = a->doubleword[1]; 54 d->doubleword[1] = 0; 55 } else if (count < 64) { 56 tmp = extract64(a->doubleword[1], 64 - count, count); 57 d->doubleword[1] = a->doubleword[1] << count; 58 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59 } else { 60 d->doubleword[0] = a->doubleword[1] << (count - 64); 61 d->doubleword[1] = 0; 62 } 63 } 64 65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 66 { 67 uint64_t tmp; 68 69 if (count == 0) { 70 d->doubleword[0] = a->doubleword[0]; 71 d->doubleword[1] = a->doubleword[1]; 72 } else if (count == 64) { 73 tmp = (int64_t)a->doubleword[0] >> 63; 74 d->doubleword[1] = a->doubleword[0]; 75 d->doubleword[0] = tmp; 76 } else if (count < 64) { 77 tmp = a->doubleword[1] >> count; 78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 80 } else { 81 tmp = (int64_t)a->doubleword[0] >> 63; 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83 d->doubleword[0] = tmp; 84 } 85 } 86 87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88 { 89 uint64_t tmp; 90 91 g_assert(count < 128); 92 if (count == 0) { 93 d->doubleword[0] = a->doubleword[0]; 94 d->doubleword[1] = a->doubleword[1]; 95 } else if (count == 64) { 96 d->doubleword[1] = a->doubleword[0]; 97 d->doubleword[0] = 0; 98 } else if (count < 64) { 99 tmp = a->doubleword[1] >> count; 100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101 d->doubleword[0] = a->doubleword[0] >> count; 102 } else { 103 d->doubleword[1] = a->doubleword[0] >> (count - 64); 104 d->doubleword[0] = 0; 105 } 106 } 107 #define DEF_VAVG(BITS) \ 108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109 uint32_t desc) \ 110 { \ 111 int i; \ 112 \ 113 for (i = 0; i < (128 / BITS); i++) { \ 114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116 \ 117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118 } \ 119 } 120 DEF_VAVG(8) 121 DEF_VAVG(16) 122 123 #define DEF_VAVGL(BITS) \ 124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125 uint32_t desc) \ 126 { \ 127 int i; \ 128 \ 129 for (i = 0; i < (128 / BITS); i++) { \ 130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132 \ 133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134 } \ 135 } 136 DEF_VAVGL(8) 137 DEF_VAVGL(16) 138 139 #define DEF_VCLZ(BITS) \ 140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 141 { \ 142 int i; \ 143 \ 144 for (i = 0; i < (128 / BITS); i++) { \ 145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 146 \ 147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 148 } \ 149 } 150 DEF_VCLZ(8) 151 DEF_VCLZ(16) 152 153 #define DEF_VCTZ(BITS) \ 154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155 { \ 156 int i; \ 157 \ 158 for (i = 0; i < (128 / BITS); i++) { \ 159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160 \ 161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162 } \ 163 } 164 DEF_VCTZ(8) 165 DEF_VCTZ(16) 166 167 /* like binary multiplication, but XOR instead of addition */ 168 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170 uint##TBITS##_t b) \ 171 { \ 172 uint##TBITS##_t res = 0; \ 173 \ 174 while (b) { \ 175 if (b & 0x1) { \ 176 res = res ^ a; \ 177 } \ 178 a = a << 1; \ 179 b = b >> 1; \ 180 } \ 181 return res; \ 182 } 183 DEF_GALOIS_MULTIPLY(8, 16) 184 DEF_GALOIS_MULTIPLY(16, 32) 185 DEF_GALOIS_MULTIPLY(32, 64) 186 187 static S390Vector galois_multiply64(uint64_t a, uint64_t b) 188 { 189 S390Vector res = {}; 190 S390Vector va = { 191 .doubleword[1] = a, 192 }; 193 S390Vector vb = { 194 .doubleword[1] = b, 195 }; 196 197 while (!s390_vec_is_zero(&vb)) { 198 if (vb.doubleword[1] & 0x1) { 199 s390_vec_xor(&res, &res, &va); 200 } 201 s390_vec_shl(&va, &va, 1); 202 s390_vec_shr(&vb, &vb, 1); 203 } 204 return res; 205 } 206 207 #define DEF_VGFM(BITS, TBITS) \ 208 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 209 uint32_t desc) \ 210 { \ 211 int i; \ 212 \ 213 for (i = 0; i < (128 / TBITS); i++) { \ 214 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 215 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 216 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 217 \ 218 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 219 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 220 d = d ^ galois_multiply32(a, b); \ 221 s390_vec_write_element##TBITS(v1, i, d); \ 222 } \ 223 } 224 DEF_VGFM(8, 16) 225 DEF_VGFM(16, 32) 226 DEF_VGFM(32, 64) 227 228 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 229 uint32_t desc) 230 { 231 S390Vector tmp1, tmp2; 232 uint64_t a, b; 233 234 a = s390_vec_read_element64(v2, 0); 235 b = s390_vec_read_element64(v3, 0); 236 tmp1 = galois_multiply64(a, b); 237 a = s390_vec_read_element64(v2, 1); 238 b = s390_vec_read_element64(v3, 1); 239 tmp2 = galois_multiply64(a, b); 240 s390_vec_xor(v1, &tmp1, &tmp2); 241 } 242 243 #define DEF_VGFMA(BITS, TBITS) \ 244 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 245 const void *v4, uint32_t desc) \ 246 { \ 247 int i; \ 248 \ 249 for (i = 0; i < (128 / TBITS); i++) { \ 250 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 251 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 252 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 253 \ 254 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 255 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 256 d = d ^ galois_multiply32(a, b); \ 257 d = d ^ s390_vec_read_element##TBITS(v4, i); \ 258 s390_vec_write_element##TBITS(v1, i, d); \ 259 } \ 260 } 261 DEF_VGFMA(8, 16) 262 DEF_VGFMA(16, 32) 263 DEF_VGFMA(32, 64) 264 265 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 266 const void *v4, uint32_t desc) 267 { 268 S390Vector tmp1, tmp2; 269 uint64_t a, b; 270 271 a = s390_vec_read_element64(v2, 0); 272 b = s390_vec_read_element64(v3, 0); 273 tmp1 = galois_multiply64(a, b); 274 a = s390_vec_read_element64(v2, 1); 275 b = s390_vec_read_element64(v3, 1); 276 tmp2 = galois_multiply64(a, b); 277 s390_vec_xor(&tmp1, &tmp1, &tmp2); 278 s390_vec_xor(v1, &tmp1, v4); 279 } 280 281 #define DEF_VMAL(BITS) \ 282 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 283 const void *v4, uint32_t desc) \ 284 { \ 285 int i; \ 286 \ 287 for (i = 0; i < (128 / BITS); i++) { \ 288 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 289 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 290 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 291 \ 292 s390_vec_write_element##BITS(v1, i, a * b + c); \ 293 } \ 294 } 295 DEF_VMAL(8) 296 DEF_VMAL(16) 297 298 #define DEF_VMAH(BITS) \ 299 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 300 const void *v4, uint32_t desc) \ 301 { \ 302 int i; \ 303 \ 304 for (i = 0; i < (128 / BITS); i++) { \ 305 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 306 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 307 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 308 \ 309 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 310 } \ 311 } 312 DEF_VMAH(8) 313 DEF_VMAH(16) 314 315 #define DEF_VMALH(BITS) \ 316 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 317 const void *v4, uint32_t desc) \ 318 { \ 319 int i; \ 320 \ 321 for (i = 0; i < (128 / BITS); i++) { \ 322 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 323 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 324 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 325 \ 326 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 327 } \ 328 } 329 DEF_VMALH(8) 330 DEF_VMALH(16) 331 332 #define DEF_VMAE(BITS, TBITS) \ 333 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 334 const void *v4, uint32_t desc) \ 335 { \ 336 int i, j; \ 337 \ 338 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 339 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 340 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 341 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 342 \ 343 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 344 } \ 345 } 346 DEF_VMAE(8, 16) 347 DEF_VMAE(16, 32) 348 DEF_VMAE(32, 64) 349 350 #define DEF_VMALE(BITS, TBITS) \ 351 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 352 const void *v4, uint32_t desc) \ 353 { \ 354 int i, j; \ 355 \ 356 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 357 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 358 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 359 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 360 \ 361 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 362 } \ 363 } 364 DEF_VMALE(8, 16) 365 DEF_VMALE(16, 32) 366 DEF_VMALE(32, 64) 367 368 #define DEF_VMAO(BITS, TBITS) \ 369 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 370 const void *v4, uint32_t desc) \ 371 { \ 372 int i, j; \ 373 \ 374 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 375 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 376 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 377 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 378 \ 379 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 380 } \ 381 } 382 DEF_VMAO(8, 16) 383 DEF_VMAO(16, 32) 384 DEF_VMAO(32, 64) 385 386 #define DEF_VMALO(BITS, TBITS) \ 387 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 388 const void *v4, uint32_t desc) \ 389 { \ 390 int i, j; \ 391 \ 392 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 393 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 394 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 395 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 396 \ 397 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 398 } \ 399 } 400 DEF_VMALO(8, 16) 401 DEF_VMALO(16, 32) 402 DEF_VMALO(32, 64) 403 404 #define DEF_VMH(BITS) \ 405 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 406 uint32_t desc) \ 407 { \ 408 int i; \ 409 \ 410 for (i = 0; i < (128 / BITS); i++) { \ 411 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 412 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 413 \ 414 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 415 } \ 416 } 417 DEF_VMH(8) 418 DEF_VMH(16) 419 420 #define DEF_VMLH(BITS) \ 421 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 422 uint32_t desc) \ 423 { \ 424 int i; \ 425 \ 426 for (i = 0; i < (128 / BITS); i++) { \ 427 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 428 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 429 \ 430 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 431 } \ 432 } 433 DEF_VMLH(8) 434 DEF_VMLH(16) 435 436 #define DEF_VME(BITS, TBITS) \ 437 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 438 uint32_t desc) \ 439 { \ 440 int i, j; \ 441 \ 442 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 443 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 444 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 445 \ 446 s390_vec_write_element##TBITS(v1, i, a * b); \ 447 } \ 448 } 449 DEF_VME(8, 16) 450 DEF_VME(16, 32) 451 DEF_VME(32, 64) 452 453 #define DEF_VMLE(BITS, TBITS) \ 454 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 455 uint32_t desc) \ 456 { \ 457 int i, j; \ 458 \ 459 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 460 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 461 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 462 \ 463 s390_vec_write_element##TBITS(v1, i, a * b); \ 464 } \ 465 } 466 DEF_VMLE(8, 16) 467 DEF_VMLE(16, 32) 468 DEF_VMLE(32, 64) 469 470 #define DEF_VMO(BITS, TBITS) \ 471 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 472 uint32_t desc) \ 473 { \ 474 int i, j; \ 475 \ 476 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 477 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 478 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 479 \ 480 s390_vec_write_element##TBITS(v1, i, a * b); \ 481 } \ 482 } 483 DEF_VMO(8, 16) 484 DEF_VMO(16, 32) 485 DEF_VMO(32, 64) 486 487 #define DEF_VMLO(BITS, TBITS) \ 488 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 489 uint32_t desc) \ 490 { \ 491 int i, j; \ 492 \ 493 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 494 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 495 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 496 \ 497 s390_vec_write_element##TBITS(v1, i, a * b); \ 498 } \ 499 } 500 DEF_VMLO(8, 16) 501 DEF_VMLO(16, 32) 502 DEF_VMLO(32, 64) 503 504 #define DEF_VPOPCT(BITS) \ 505 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 506 { \ 507 int i; \ 508 \ 509 for (i = 0; i < (128 / BITS); i++) { \ 510 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 511 \ 512 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 513 } \ 514 } 515 DEF_VPOPCT(8) 516 DEF_VPOPCT(16) 517 518 #define DEF_VERIM(BITS) \ 519 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 520 uint32_t desc) \ 521 { \ 522 const uint8_t count = simd_data(desc); \ 523 int i; \ 524 \ 525 for (i = 0; i < (128 / BITS); i++) { \ 526 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 527 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 528 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 529 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 530 \ 531 s390_vec_write_element##BITS(v1, i, d); \ 532 } \ 533 } 534 DEF_VERIM(8) 535 DEF_VERIM(16) 536 537 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 538 uint32_t desc) 539 { 540 s390_vec_shl(v1, v2, count); 541 } 542 543 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 544 uint32_t desc) 545 { 546 s390_vec_sar(v1, v2, count); 547 } 548 549 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 550 uint32_t desc) 551 { 552 s390_vec_shr(v1, v2, count); 553 } 554 555 #define DEF_VSCBI(BITS) \ 556 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 557 uint32_t desc) \ 558 { \ 559 int i; \ 560 \ 561 for (i = 0; i < (128 / BITS); i++) { \ 562 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 563 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 564 \ 565 s390_vec_write_element##BITS(v1, i, a >= b); \ 566 } \ 567 } 568 DEF_VSCBI(8) 569 DEF_VSCBI(16) 570 571 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 572 uint32_t desc) 573 { 574 S390Vector tmp; 575 576 s390_vec_and(&tmp, v1, v2); 577 if (s390_vec_is_zero(&tmp)) { 578 /* Selected bits all zeros; or all mask bits zero */ 579 env->cc_op = 0; 580 } else if (s390_vec_equal(&tmp, v2)) { 581 /* Selected bits all ones */ 582 env->cc_op = 3; 583 } else { 584 /* Selected bits a mix of zeros and ones */ 585 env->cc_op = 1; 586 } 587 } 588