1 /* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "vec.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/clmul.h" 18 19 static bool s390_vec_is_zero(const S390Vector *v) 20 { 21 return !v->doubleword[0] && !v->doubleword[1]; 22 } 23 24 static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25 const S390Vector *b) 26 { 27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29 } 30 31 static void s390_vec_and(S390Vector *res, const S390Vector *a, 32 const S390Vector *b) 33 { 34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36 } 37 38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39 { 40 return a->doubleword[0] == b->doubleword[0] && 41 a->doubleword[1] == b->doubleword[1]; 42 } 43 44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45 { 46 uint64_t tmp; 47 48 g_assert(count < 128); 49 if (count == 0) { 50 d->doubleword[0] = a->doubleword[0]; 51 d->doubleword[1] = a->doubleword[1]; 52 } else if (count == 64) { 53 d->doubleword[0] = a->doubleword[1]; 54 d->doubleword[1] = 0; 55 } else if (count < 64) { 56 tmp = extract64(a->doubleword[1], 64 - count, count); 57 d->doubleword[1] = a->doubleword[1] << count; 58 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59 } else { 60 d->doubleword[0] = a->doubleword[1] << (count - 64); 61 d->doubleword[1] = 0; 62 } 63 } 64 65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 66 { 67 uint64_t tmp; 68 69 if (count == 0) { 70 d->doubleword[0] = a->doubleword[0]; 71 d->doubleword[1] = a->doubleword[1]; 72 } else if (count == 64) { 73 tmp = (int64_t)a->doubleword[0] >> 63; 74 d->doubleword[1] = a->doubleword[0]; 75 d->doubleword[0] = tmp; 76 } else if (count < 64) { 77 tmp = a->doubleword[1] >> count; 78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 80 } else { 81 tmp = (int64_t)a->doubleword[0] >> 63; 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83 d->doubleword[0] = tmp; 84 } 85 } 86 87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88 { 89 uint64_t tmp; 90 91 g_assert(count < 128); 92 if (count == 0) { 93 d->doubleword[0] = a->doubleword[0]; 94 d->doubleword[1] = a->doubleword[1]; 95 } else if (count == 64) { 96 d->doubleword[1] = a->doubleword[0]; 97 d->doubleword[0] = 0; 98 } else if (count < 64) { 99 tmp = a->doubleword[1] >> count; 100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101 d->doubleword[0] = a->doubleword[0] >> count; 102 } else { 103 d->doubleword[1] = a->doubleword[0] >> (count - 64); 104 d->doubleword[0] = 0; 105 } 106 } 107 #define DEF_VAVG(BITS) \ 108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109 uint32_t desc) \ 110 { \ 111 int i; \ 112 \ 113 for (i = 0; i < (128 / BITS); i++) { \ 114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116 \ 117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118 } \ 119 } 120 DEF_VAVG(8) 121 DEF_VAVG(16) 122 123 #define DEF_VAVGL(BITS) \ 124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125 uint32_t desc) \ 126 { \ 127 int i; \ 128 \ 129 for (i = 0; i < (128 / BITS); i++) { \ 130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132 \ 133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134 } \ 135 } 136 DEF_VAVGL(8) 137 DEF_VAVGL(16) 138 139 #define DEF_VCLZ(BITS) \ 140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 141 { \ 142 int i; \ 143 \ 144 for (i = 0; i < (128 / BITS); i++) { \ 145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 146 \ 147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 148 } \ 149 } 150 DEF_VCLZ(8) 151 DEF_VCLZ(16) 152 153 #define DEF_VCTZ(BITS) \ 154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155 { \ 156 int i; \ 157 \ 158 for (i = 0; i < (128 / BITS); i++) { \ 159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160 \ 161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162 } \ 163 } 164 DEF_VCTZ(8) 165 DEF_VCTZ(16) 166 167 /* like binary multiplication, but XOR instead of addition */ 168 169 static S390Vector galois_multiply64(uint64_t a, uint64_t b) 170 { 171 S390Vector res = {}; 172 S390Vector va = { 173 .doubleword[1] = a, 174 }; 175 S390Vector vb = { 176 .doubleword[1] = b, 177 }; 178 179 while (!s390_vec_is_zero(&vb)) { 180 if (vb.doubleword[1] & 0x1) { 181 s390_vec_xor(&res, &res, &va); 182 } 183 s390_vec_shl(&va, &va, 1); 184 s390_vec_shr(&vb, &vb, 1); 185 } 186 return res; 187 } 188 189 /* 190 * There is no carry across the two doublewords, so their order does 191 * not matter. Nor is there partial overlap between registers. 192 */ 193 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a) 194 { 195 return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a; 196 } 197 198 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d) 199 { 200 uint64_t *q1 = v1; 201 const uint64_t *q2 = v2, *q3 = v3; 202 203 q1[0] = do_gfma8(q2[0], q3[0], 0); 204 q1[1] = do_gfma8(q2[1], q3[1], 0); 205 } 206 207 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3, 208 const void *v4, uint32_t desc) 209 { 210 uint64_t *q1 = v1; 211 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 212 213 q1[0] = do_gfma8(q2[0], q3[0], q4[0]); 214 q1[1] = do_gfma8(q2[1], q3[1], q4[1]); 215 } 216 217 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a) 218 { 219 return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a; 220 } 221 222 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d) 223 { 224 uint64_t *q1 = v1; 225 const uint64_t *q2 = v2, *q3 = v3; 226 227 q1[0] = do_gfma16(q2[0], q3[0], 0); 228 q1[1] = do_gfma16(q2[1], q3[1], 0); 229 } 230 231 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3, 232 const void *v4, uint32_t d) 233 { 234 uint64_t *q1 = v1; 235 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 236 237 q1[0] = do_gfma16(q2[0], q3[0], q4[0]); 238 q1[1] = do_gfma16(q2[1], q3[1], q4[1]); 239 } 240 241 static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a) 242 { 243 return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a; 244 } 245 246 void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d) 247 { 248 uint64_t *q1 = v1; 249 const uint64_t *q2 = v2, *q3 = v3; 250 251 q1[0] = do_gfma32(q2[0], q3[0], 0); 252 q1[1] = do_gfma32(q2[1], q3[1], 0); 253 } 254 255 void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3, 256 const void *v4, uint32_t d) 257 { 258 uint64_t *q1 = v1; 259 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 260 261 q1[0] = do_gfma32(q2[0], q3[0], q4[0]); 262 q1[1] = do_gfma32(q2[1], q3[1], q4[1]); 263 } 264 265 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 266 uint32_t desc) 267 { 268 S390Vector tmp1, tmp2; 269 uint64_t a, b; 270 271 a = s390_vec_read_element64(v2, 0); 272 b = s390_vec_read_element64(v3, 0); 273 tmp1 = galois_multiply64(a, b); 274 a = s390_vec_read_element64(v2, 1); 275 b = s390_vec_read_element64(v3, 1); 276 tmp2 = galois_multiply64(a, b); 277 s390_vec_xor(v1, &tmp1, &tmp2); 278 } 279 280 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 281 const void *v4, uint32_t desc) 282 { 283 S390Vector tmp1, tmp2; 284 uint64_t a, b; 285 286 a = s390_vec_read_element64(v2, 0); 287 b = s390_vec_read_element64(v3, 0); 288 tmp1 = galois_multiply64(a, b); 289 a = s390_vec_read_element64(v2, 1); 290 b = s390_vec_read_element64(v3, 1); 291 tmp2 = galois_multiply64(a, b); 292 s390_vec_xor(&tmp1, &tmp1, &tmp2); 293 s390_vec_xor(v1, &tmp1, v4); 294 } 295 296 #define DEF_VMAL(BITS) \ 297 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 298 const void *v4, uint32_t desc) \ 299 { \ 300 int i; \ 301 \ 302 for (i = 0; i < (128 / BITS); i++) { \ 303 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 304 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 305 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 306 \ 307 s390_vec_write_element##BITS(v1, i, a * b + c); \ 308 } \ 309 } 310 DEF_VMAL(8) 311 DEF_VMAL(16) 312 313 #define DEF_VMAH(BITS) \ 314 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 315 const void *v4, uint32_t desc) \ 316 { \ 317 int i; \ 318 \ 319 for (i = 0; i < (128 / BITS); i++) { \ 320 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 321 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 322 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 323 \ 324 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 325 } \ 326 } 327 DEF_VMAH(8) 328 DEF_VMAH(16) 329 330 #define DEF_VMALH(BITS) \ 331 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 332 const void *v4, uint32_t desc) \ 333 { \ 334 int i; \ 335 \ 336 for (i = 0; i < (128 / BITS); i++) { \ 337 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 338 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 339 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 340 \ 341 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 342 } \ 343 } 344 DEF_VMALH(8) 345 DEF_VMALH(16) 346 347 #define DEF_VMAE(BITS, TBITS) \ 348 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 349 const void *v4, uint32_t desc) \ 350 { \ 351 int i, j; \ 352 \ 353 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 354 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 355 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 356 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 357 \ 358 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 359 } \ 360 } 361 DEF_VMAE(8, 16) 362 DEF_VMAE(16, 32) 363 DEF_VMAE(32, 64) 364 365 #define DEF_VMALE(BITS, TBITS) \ 366 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 367 const void *v4, uint32_t desc) \ 368 { \ 369 int i, j; \ 370 \ 371 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 372 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 373 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 374 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 375 \ 376 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 377 } \ 378 } 379 DEF_VMALE(8, 16) 380 DEF_VMALE(16, 32) 381 DEF_VMALE(32, 64) 382 383 #define DEF_VMAO(BITS, TBITS) \ 384 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 385 const void *v4, uint32_t desc) \ 386 { \ 387 int i, j; \ 388 \ 389 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 390 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 391 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 392 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 393 \ 394 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 395 } \ 396 } 397 DEF_VMAO(8, 16) 398 DEF_VMAO(16, 32) 399 DEF_VMAO(32, 64) 400 401 #define DEF_VMALO(BITS, TBITS) \ 402 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 403 const void *v4, uint32_t desc) \ 404 { \ 405 int i, j; \ 406 \ 407 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 408 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 409 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 410 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 411 \ 412 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 413 } \ 414 } 415 DEF_VMALO(8, 16) 416 DEF_VMALO(16, 32) 417 DEF_VMALO(32, 64) 418 419 #define DEF_VMH(BITS) \ 420 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 421 uint32_t desc) \ 422 { \ 423 int i; \ 424 \ 425 for (i = 0; i < (128 / BITS); i++) { \ 426 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 427 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 428 \ 429 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 430 } \ 431 } 432 DEF_VMH(8) 433 DEF_VMH(16) 434 435 #define DEF_VMLH(BITS) \ 436 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 437 uint32_t desc) \ 438 { \ 439 int i; \ 440 \ 441 for (i = 0; i < (128 / BITS); i++) { \ 442 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 443 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 444 \ 445 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 446 } \ 447 } 448 DEF_VMLH(8) 449 DEF_VMLH(16) 450 451 #define DEF_VME(BITS, TBITS) \ 452 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 453 uint32_t desc) \ 454 { \ 455 int i, j; \ 456 \ 457 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 458 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 459 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 460 \ 461 s390_vec_write_element##TBITS(v1, i, a * b); \ 462 } \ 463 } 464 DEF_VME(8, 16) 465 DEF_VME(16, 32) 466 DEF_VME(32, 64) 467 468 #define DEF_VMLE(BITS, TBITS) \ 469 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 470 uint32_t desc) \ 471 { \ 472 int i, j; \ 473 \ 474 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 475 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 476 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 477 \ 478 s390_vec_write_element##TBITS(v1, i, a * b); \ 479 } \ 480 } 481 DEF_VMLE(8, 16) 482 DEF_VMLE(16, 32) 483 DEF_VMLE(32, 64) 484 485 #define DEF_VMO(BITS, TBITS) \ 486 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 487 uint32_t desc) \ 488 { \ 489 int i, j; \ 490 \ 491 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 492 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 493 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 494 \ 495 s390_vec_write_element##TBITS(v1, i, a * b); \ 496 } \ 497 } 498 DEF_VMO(8, 16) 499 DEF_VMO(16, 32) 500 DEF_VMO(32, 64) 501 502 #define DEF_VMLO(BITS, TBITS) \ 503 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 504 uint32_t desc) \ 505 { \ 506 int i, j; \ 507 \ 508 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 509 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 510 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 511 \ 512 s390_vec_write_element##TBITS(v1, i, a * b); \ 513 } \ 514 } 515 DEF_VMLO(8, 16) 516 DEF_VMLO(16, 32) 517 DEF_VMLO(32, 64) 518 519 #define DEF_VPOPCT(BITS) \ 520 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 521 { \ 522 int i; \ 523 \ 524 for (i = 0; i < (128 / BITS); i++) { \ 525 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 526 \ 527 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 528 } \ 529 } 530 DEF_VPOPCT(8) 531 DEF_VPOPCT(16) 532 533 #define DEF_VERIM(BITS) \ 534 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 535 uint32_t desc) \ 536 { \ 537 const uint8_t count = simd_data(desc); \ 538 int i; \ 539 \ 540 for (i = 0; i < (128 / BITS); i++) { \ 541 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 542 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 543 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 544 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 545 \ 546 s390_vec_write_element##BITS(v1, i, d); \ 547 } \ 548 } 549 DEF_VERIM(8) 550 DEF_VERIM(16) 551 552 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 553 uint32_t desc) 554 { 555 s390_vec_shl(v1, v2, count); 556 } 557 558 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 559 uint32_t desc) 560 { 561 S390Vector tmp; 562 uint32_t sh, e0, e1 = 0; 563 int i; 564 565 for (i = 15; i >= 0; --i, e1 = e0) { 566 e0 = s390_vec_read_element8(v2, i); 567 sh = s390_vec_read_element8(v3, i) & 7; 568 569 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 570 } 571 572 *(S390Vector *)v1 = tmp; 573 } 574 575 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 576 uint32_t desc) 577 { 578 s390_vec_sar(v1, v2, count); 579 } 580 581 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 582 uint32_t desc) 583 { 584 S390Vector tmp; 585 uint32_t sh, e0, e1 = 0; 586 int i = 0; 587 588 /* Byte 0 is special only. */ 589 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 590 sh = s390_vec_read_element8(v3, i) & 7; 591 s390_vec_write_element8(&tmp, i, e0 >> sh); 592 593 e1 = e0; 594 for (i = 1; i < 16; ++i, e1 = e0) { 595 e0 = s390_vec_read_element8(v2, i); 596 sh = s390_vec_read_element8(v3, i) & 7; 597 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 598 } 599 600 *(S390Vector *)v1 = tmp; 601 } 602 603 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 604 uint32_t desc) 605 { 606 s390_vec_shr(v1, v2, count); 607 } 608 609 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 610 uint32_t desc) 611 { 612 S390Vector tmp; 613 uint32_t sh, e0, e1 = 0; 614 615 for (int i = 0; i < 16; ++i, e1 = e0) { 616 e0 = s390_vec_read_element8(v2, i); 617 sh = s390_vec_read_element8(v3, i) & 7; 618 619 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 620 } 621 622 *(S390Vector *)v1 = tmp; 623 } 624 625 #define DEF_VSCBI(BITS) \ 626 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 627 uint32_t desc) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < (128 / BITS); i++) { \ 632 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 633 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 634 \ 635 s390_vec_write_element##BITS(v1, i, a >= b); \ 636 } \ 637 } 638 DEF_VSCBI(8) 639 DEF_VSCBI(16) 640 641 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 642 uint32_t desc) 643 { 644 S390Vector tmp; 645 646 s390_vec_and(&tmp, v1, v2); 647 if (s390_vec_is_zero(&tmp)) { 648 /* Selected bits all zeros; or all mask bits zero */ 649 env->cc_op = 0; 650 } else if (s390_vec_equal(&tmp, v2)) { 651 /* Selected bits all ones */ 652 env->cc_op = 3; 653 } else { 654 /* Selected bits a mix of zeros and ones */ 655 env->cc_op = 1; 656 } 657 } 658