1 /* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "vec.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/clmul.h" 18 19 static bool s390_vec_is_zero(const S390Vector *v) 20 { 21 return !v->doubleword[0] && !v->doubleword[1]; 22 } 23 24 static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25 const S390Vector *b) 26 { 27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29 } 30 31 static void s390_vec_and(S390Vector *res, const S390Vector *a, 32 const S390Vector *b) 33 { 34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36 } 37 38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39 { 40 return a->doubleword[0] == b->doubleword[0] && 41 a->doubleword[1] == b->doubleword[1]; 42 } 43 44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45 { 46 uint64_t tmp; 47 48 g_assert(count < 128); 49 if (count == 0) { 50 d->doubleword[0] = a->doubleword[0]; 51 d->doubleword[1] = a->doubleword[1]; 52 } else if (count == 64) { 53 d->doubleword[0] = a->doubleword[1]; 54 d->doubleword[1] = 0; 55 } else if (count < 64) { 56 tmp = extract64(a->doubleword[1], 64 - count, count); 57 d->doubleword[1] = a->doubleword[1] << count; 58 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59 } else { 60 d->doubleword[0] = a->doubleword[1] << (count - 64); 61 d->doubleword[1] = 0; 62 } 63 } 64 65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 66 { 67 uint64_t tmp; 68 69 if (count == 0) { 70 d->doubleword[0] = a->doubleword[0]; 71 d->doubleword[1] = a->doubleword[1]; 72 } else if (count == 64) { 73 tmp = (int64_t)a->doubleword[0] >> 63; 74 d->doubleword[1] = a->doubleword[0]; 75 d->doubleword[0] = tmp; 76 } else if (count < 64) { 77 tmp = a->doubleword[1] >> count; 78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 80 } else { 81 tmp = (int64_t)a->doubleword[0] >> 63; 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83 d->doubleword[0] = tmp; 84 } 85 } 86 87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88 { 89 uint64_t tmp; 90 91 g_assert(count < 128); 92 if (count == 0) { 93 d->doubleword[0] = a->doubleword[0]; 94 d->doubleword[1] = a->doubleword[1]; 95 } else if (count == 64) { 96 d->doubleword[1] = a->doubleword[0]; 97 d->doubleword[0] = 0; 98 } else if (count < 64) { 99 tmp = a->doubleword[1] >> count; 100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101 d->doubleword[0] = a->doubleword[0] >> count; 102 } else { 103 d->doubleword[1] = a->doubleword[0] >> (count - 64); 104 d->doubleword[0] = 0; 105 } 106 } 107 #define DEF_VAVG(BITS) \ 108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109 uint32_t desc) \ 110 { \ 111 int i; \ 112 \ 113 for (i = 0; i < (128 / BITS); i++) { \ 114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116 \ 117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118 } \ 119 } 120 DEF_VAVG(8) 121 DEF_VAVG(16) 122 123 #define DEF_VAVGL(BITS) \ 124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125 uint32_t desc) \ 126 { \ 127 int i; \ 128 \ 129 for (i = 0; i < (128 / BITS); i++) { \ 130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132 \ 133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134 } \ 135 } 136 DEF_VAVGL(8) 137 DEF_VAVGL(16) 138 139 #define DEF_VCLZ(BITS) \ 140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 141 { \ 142 int i; \ 143 \ 144 for (i = 0; i < (128 / BITS); i++) { \ 145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 146 \ 147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 148 } \ 149 } 150 DEF_VCLZ(8) 151 DEF_VCLZ(16) 152 153 #define DEF_VCTZ(BITS) \ 154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155 { \ 156 int i; \ 157 \ 158 for (i = 0; i < (128 / BITS); i++) { \ 159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160 \ 161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162 } \ 163 } 164 DEF_VCTZ(8) 165 DEF_VCTZ(16) 166 167 /* like binary multiplication, but XOR instead of addition */ 168 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170 uint##TBITS##_t b) \ 171 { \ 172 uint##TBITS##_t res = 0; \ 173 \ 174 while (b) { \ 175 if (b & 0x1) { \ 176 res = res ^ a; \ 177 } \ 178 a = a << 1; \ 179 b = b >> 1; \ 180 } \ 181 return res; \ 182 } 183 DEF_GALOIS_MULTIPLY(32, 64) 184 185 static S390Vector galois_multiply64(uint64_t a, uint64_t b) 186 { 187 S390Vector res = {}; 188 S390Vector va = { 189 .doubleword[1] = a, 190 }; 191 S390Vector vb = { 192 .doubleword[1] = b, 193 }; 194 195 while (!s390_vec_is_zero(&vb)) { 196 if (vb.doubleword[1] & 0x1) { 197 s390_vec_xor(&res, &res, &va); 198 } 199 s390_vec_shl(&va, &va, 1); 200 s390_vec_shr(&vb, &vb, 1); 201 } 202 return res; 203 } 204 205 /* 206 * There is no carry across the two doublewords, so their order does 207 * not matter. Nor is there partial overlap between registers. 208 */ 209 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a) 210 { 211 return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a; 212 } 213 214 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d) 215 { 216 uint64_t *q1 = v1; 217 const uint64_t *q2 = v2, *q3 = v3; 218 219 q1[0] = do_gfma8(q2[0], q3[0], 0); 220 q1[1] = do_gfma8(q2[1], q3[1], 0); 221 } 222 223 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3, 224 const void *v4, uint32_t desc) 225 { 226 uint64_t *q1 = v1; 227 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 228 229 q1[0] = do_gfma8(q2[0], q3[0], q4[0]); 230 q1[1] = do_gfma8(q2[1], q3[1], q4[1]); 231 } 232 233 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a) 234 { 235 return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a; 236 } 237 238 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d) 239 { 240 uint64_t *q1 = v1; 241 const uint64_t *q2 = v2, *q3 = v3; 242 243 q1[0] = do_gfma16(q2[0], q3[0], 0); 244 q1[1] = do_gfma16(q2[1], q3[1], 0); 245 } 246 247 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3, 248 const void *v4, uint32_t d) 249 { 250 uint64_t *q1 = v1; 251 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 252 253 q1[0] = do_gfma16(q2[0], q3[0], q4[0]); 254 q1[1] = do_gfma16(q2[1], q3[1], q4[1]); 255 } 256 257 #define DEF_VGFM(BITS, TBITS) \ 258 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 259 uint32_t desc) \ 260 { \ 261 int i; \ 262 \ 263 for (i = 0; i < (128 / TBITS); i++) { \ 264 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 265 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 266 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 267 \ 268 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 269 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 270 d = d ^ galois_multiply32(a, b); \ 271 s390_vec_write_element##TBITS(v1, i, d); \ 272 } \ 273 } 274 DEF_VGFM(32, 64) 275 276 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 277 uint32_t desc) 278 { 279 S390Vector tmp1, tmp2; 280 uint64_t a, b; 281 282 a = s390_vec_read_element64(v2, 0); 283 b = s390_vec_read_element64(v3, 0); 284 tmp1 = galois_multiply64(a, b); 285 a = s390_vec_read_element64(v2, 1); 286 b = s390_vec_read_element64(v3, 1); 287 tmp2 = galois_multiply64(a, b); 288 s390_vec_xor(v1, &tmp1, &tmp2); 289 } 290 291 #define DEF_VGFMA(BITS, TBITS) \ 292 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 293 const void *v4, uint32_t desc) \ 294 { \ 295 int i; \ 296 \ 297 for (i = 0; i < (128 / TBITS); i++) { \ 298 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 299 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 300 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 301 \ 302 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 303 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 304 d = d ^ galois_multiply32(a, b); \ 305 d = d ^ s390_vec_read_element##TBITS(v4, i); \ 306 s390_vec_write_element##TBITS(v1, i, d); \ 307 } \ 308 } 309 DEF_VGFMA(32, 64) 310 311 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 312 const void *v4, uint32_t desc) 313 { 314 S390Vector tmp1, tmp2; 315 uint64_t a, b; 316 317 a = s390_vec_read_element64(v2, 0); 318 b = s390_vec_read_element64(v3, 0); 319 tmp1 = galois_multiply64(a, b); 320 a = s390_vec_read_element64(v2, 1); 321 b = s390_vec_read_element64(v3, 1); 322 tmp2 = galois_multiply64(a, b); 323 s390_vec_xor(&tmp1, &tmp1, &tmp2); 324 s390_vec_xor(v1, &tmp1, v4); 325 } 326 327 #define DEF_VMAL(BITS) \ 328 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 329 const void *v4, uint32_t desc) \ 330 { \ 331 int i; \ 332 \ 333 for (i = 0; i < (128 / BITS); i++) { \ 334 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 335 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 336 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 337 \ 338 s390_vec_write_element##BITS(v1, i, a * b + c); \ 339 } \ 340 } 341 DEF_VMAL(8) 342 DEF_VMAL(16) 343 344 #define DEF_VMAH(BITS) \ 345 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 346 const void *v4, uint32_t desc) \ 347 { \ 348 int i; \ 349 \ 350 for (i = 0; i < (128 / BITS); i++) { \ 351 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 352 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 353 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 354 \ 355 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 356 } \ 357 } 358 DEF_VMAH(8) 359 DEF_VMAH(16) 360 361 #define DEF_VMALH(BITS) \ 362 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 363 const void *v4, uint32_t desc) \ 364 { \ 365 int i; \ 366 \ 367 for (i = 0; i < (128 / BITS); i++) { \ 368 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 369 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 370 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 371 \ 372 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 373 } \ 374 } 375 DEF_VMALH(8) 376 DEF_VMALH(16) 377 378 #define DEF_VMAE(BITS, TBITS) \ 379 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 380 const void *v4, uint32_t desc) \ 381 { \ 382 int i, j; \ 383 \ 384 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 385 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 386 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 387 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 388 \ 389 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 390 } \ 391 } 392 DEF_VMAE(8, 16) 393 DEF_VMAE(16, 32) 394 DEF_VMAE(32, 64) 395 396 #define DEF_VMALE(BITS, TBITS) \ 397 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 398 const void *v4, uint32_t desc) \ 399 { \ 400 int i, j; \ 401 \ 402 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 403 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 404 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 405 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 406 \ 407 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 408 } \ 409 } 410 DEF_VMALE(8, 16) 411 DEF_VMALE(16, 32) 412 DEF_VMALE(32, 64) 413 414 #define DEF_VMAO(BITS, TBITS) \ 415 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 416 const void *v4, uint32_t desc) \ 417 { \ 418 int i, j; \ 419 \ 420 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 421 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 422 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 423 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 424 \ 425 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 426 } \ 427 } 428 DEF_VMAO(8, 16) 429 DEF_VMAO(16, 32) 430 DEF_VMAO(32, 64) 431 432 #define DEF_VMALO(BITS, TBITS) \ 433 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 434 const void *v4, uint32_t desc) \ 435 { \ 436 int i, j; \ 437 \ 438 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 439 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 440 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 441 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 442 \ 443 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 444 } \ 445 } 446 DEF_VMALO(8, 16) 447 DEF_VMALO(16, 32) 448 DEF_VMALO(32, 64) 449 450 #define DEF_VMH(BITS) \ 451 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 452 uint32_t desc) \ 453 { \ 454 int i; \ 455 \ 456 for (i = 0; i < (128 / BITS); i++) { \ 457 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 458 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 459 \ 460 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 461 } \ 462 } 463 DEF_VMH(8) 464 DEF_VMH(16) 465 466 #define DEF_VMLH(BITS) \ 467 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 468 uint32_t desc) \ 469 { \ 470 int i; \ 471 \ 472 for (i = 0; i < (128 / BITS); i++) { \ 473 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 474 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 475 \ 476 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 477 } \ 478 } 479 DEF_VMLH(8) 480 DEF_VMLH(16) 481 482 #define DEF_VME(BITS, TBITS) \ 483 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 484 uint32_t desc) \ 485 { \ 486 int i, j; \ 487 \ 488 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 489 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 490 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 491 \ 492 s390_vec_write_element##TBITS(v1, i, a * b); \ 493 } \ 494 } 495 DEF_VME(8, 16) 496 DEF_VME(16, 32) 497 DEF_VME(32, 64) 498 499 #define DEF_VMLE(BITS, TBITS) \ 500 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 501 uint32_t desc) \ 502 { \ 503 int i, j; \ 504 \ 505 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 506 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 507 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 508 \ 509 s390_vec_write_element##TBITS(v1, i, a * b); \ 510 } \ 511 } 512 DEF_VMLE(8, 16) 513 DEF_VMLE(16, 32) 514 DEF_VMLE(32, 64) 515 516 #define DEF_VMO(BITS, TBITS) \ 517 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 518 uint32_t desc) \ 519 { \ 520 int i, j; \ 521 \ 522 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 523 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 524 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 525 \ 526 s390_vec_write_element##TBITS(v1, i, a * b); \ 527 } \ 528 } 529 DEF_VMO(8, 16) 530 DEF_VMO(16, 32) 531 DEF_VMO(32, 64) 532 533 #define DEF_VMLO(BITS, TBITS) \ 534 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 535 uint32_t desc) \ 536 { \ 537 int i, j; \ 538 \ 539 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 540 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 541 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 542 \ 543 s390_vec_write_element##TBITS(v1, i, a * b); \ 544 } \ 545 } 546 DEF_VMLO(8, 16) 547 DEF_VMLO(16, 32) 548 DEF_VMLO(32, 64) 549 550 #define DEF_VPOPCT(BITS) \ 551 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 552 { \ 553 int i; \ 554 \ 555 for (i = 0; i < (128 / BITS); i++) { \ 556 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 557 \ 558 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 559 } \ 560 } 561 DEF_VPOPCT(8) 562 DEF_VPOPCT(16) 563 564 #define DEF_VERIM(BITS) \ 565 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 566 uint32_t desc) \ 567 { \ 568 const uint8_t count = simd_data(desc); \ 569 int i; \ 570 \ 571 for (i = 0; i < (128 / BITS); i++) { \ 572 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 573 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 574 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 575 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 576 \ 577 s390_vec_write_element##BITS(v1, i, d); \ 578 } \ 579 } 580 DEF_VERIM(8) 581 DEF_VERIM(16) 582 583 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 584 uint32_t desc) 585 { 586 s390_vec_shl(v1, v2, count); 587 } 588 589 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 590 uint32_t desc) 591 { 592 S390Vector tmp; 593 uint32_t sh, e0, e1 = 0; 594 int i; 595 596 for (i = 15; i >= 0; --i, e1 = e0) { 597 e0 = s390_vec_read_element8(v2, i); 598 sh = s390_vec_read_element8(v3, i) & 7; 599 600 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 601 } 602 603 *(S390Vector *)v1 = tmp; 604 } 605 606 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 607 uint32_t desc) 608 { 609 s390_vec_sar(v1, v2, count); 610 } 611 612 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 613 uint32_t desc) 614 { 615 S390Vector tmp; 616 uint32_t sh, e0, e1 = 0; 617 int i = 0; 618 619 /* Byte 0 is special only. */ 620 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 621 sh = s390_vec_read_element8(v3, i) & 7; 622 s390_vec_write_element8(&tmp, i, e0 >> sh); 623 624 e1 = e0; 625 for (i = 1; i < 16; ++i, e1 = e0) { 626 e0 = s390_vec_read_element8(v2, i); 627 sh = s390_vec_read_element8(v3, i) & 7; 628 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 629 } 630 631 *(S390Vector *)v1 = tmp; 632 } 633 634 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 635 uint32_t desc) 636 { 637 s390_vec_shr(v1, v2, count); 638 } 639 640 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 641 uint32_t desc) 642 { 643 S390Vector tmp; 644 uint32_t sh, e0, e1 = 0; 645 646 for (int i = 0; i < 16; ++i, e1 = e0) { 647 e0 = s390_vec_read_element8(v2, i); 648 sh = s390_vec_read_element8(v3, i) & 7; 649 650 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 651 } 652 653 *(S390Vector *)v1 = tmp; 654 } 655 656 #define DEF_VSCBI(BITS) \ 657 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 658 uint32_t desc) \ 659 { \ 660 int i; \ 661 \ 662 for (i = 0; i < (128 / BITS); i++) { \ 663 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 664 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 665 \ 666 s390_vec_write_element##BITS(v1, i, a >= b); \ 667 } \ 668 } 669 DEF_VSCBI(8) 670 DEF_VSCBI(16) 671 672 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 673 uint32_t desc) 674 { 675 S390Vector tmp; 676 677 s390_vec_and(&tmp, v1, v2); 678 if (s390_vec_is_zero(&tmp)) { 679 /* Selected bits all zeros; or all mask bits zero */ 680 env->cc_op = 0; 681 } else if (s390_vec_equal(&tmp, v2)) { 682 /* Selected bits all ones */ 683 env->cc_op = 3; 684 } else { 685 /* Selected bits a mix of zeros and ones */ 686 env->cc_op = 1; 687 } 688 } 689