1 /* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "vec.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 18 static bool s390_vec_is_zero(const S390Vector *v) 19 { 20 return !v->doubleword[0] && !v->doubleword[1]; 21 } 22 23 static void s390_vec_xor(S390Vector *res, const S390Vector *a, 24 const S390Vector *b) 25 { 26 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 27 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 28 } 29 30 static void s390_vec_and(S390Vector *res, const S390Vector *a, 31 const S390Vector *b) 32 { 33 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 34 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 35 } 36 37 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 38 { 39 return a->doubleword[0] == b->doubleword[0] && 40 a->doubleword[1] == b->doubleword[1]; 41 } 42 43 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 44 { 45 uint64_t tmp; 46 47 g_assert(count < 128); 48 if (count == 0) { 49 d->doubleword[0] = a->doubleword[0]; 50 d->doubleword[1] = a->doubleword[1]; 51 } else if (count == 64) { 52 d->doubleword[0] = a->doubleword[1]; 53 d->doubleword[1] = 0; 54 } else if (count < 64) { 55 tmp = extract64(a->doubleword[1], 64 - count, count); 56 d->doubleword[1] = a->doubleword[1] << count; 57 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 58 } else { 59 d->doubleword[0] = a->doubleword[1] << (count - 64); 60 d->doubleword[1] = 0; 61 } 62 } 63 64 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 65 { 66 uint64_t tmp; 67 68 if (count == 0) { 69 d->doubleword[0] = a->doubleword[0]; 70 d->doubleword[1] = a->doubleword[1]; 71 } else if (count == 64) { 72 tmp = (int64_t)a->doubleword[0] >> 63; 73 d->doubleword[1] = a->doubleword[0]; 74 d->doubleword[0] = tmp; 75 } else if (count < 64) { 76 tmp = a->doubleword[1] >> count; 77 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 78 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 79 } else { 80 tmp = (int64_t)a->doubleword[0] >> 63; 81 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 82 d->doubleword[0] = tmp; 83 } 84 } 85 86 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 87 { 88 uint64_t tmp; 89 90 g_assert(count < 128); 91 if (count == 0) { 92 d->doubleword[0] = a->doubleword[0]; 93 d->doubleword[1] = a->doubleword[1]; 94 } else if (count == 64) { 95 d->doubleword[1] = a->doubleword[0]; 96 d->doubleword[0] = 0; 97 } else if (count < 64) { 98 tmp = a->doubleword[1] >> count; 99 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 100 d->doubleword[0] = a->doubleword[0] >> count; 101 } else { 102 d->doubleword[1] = a->doubleword[0] >> (count - 64); 103 d->doubleword[0] = 0; 104 } 105 } 106 #define DEF_VAVG(BITS) \ 107 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 108 uint32_t desc) \ 109 { \ 110 int i; \ 111 \ 112 for (i = 0; i < (128 / BITS); i++) { \ 113 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 114 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 115 \ 116 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 117 } \ 118 } 119 DEF_VAVG(8) 120 DEF_VAVG(16) 121 122 #define DEF_VAVGL(BITS) \ 123 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 124 uint32_t desc) \ 125 { \ 126 int i; \ 127 \ 128 for (i = 0; i < (128 / BITS); i++) { \ 129 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 130 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 131 \ 132 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 133 } \ 134 } 135 DEF_VAVGL(8) 136 DEF_VAVGL(16) 137 138 #define DEF_VCLZ(BITS) \ 139 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 140 { \ 141 int i; \ 142 \ 143 for (i = 0; i < (128 / BITS); i++) { \ 144 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 145 \ 146 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 147 } \ 148 } 149 DEF_VCLZ(8) 150 DEF_VCLZ(16) 151 152 #define DEF_VCTZ(BITS) \ 153 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 154 { \ 155 int i; \ 156 \ 157 for (i = 0; i < (128 / BITS); i++) { \ 158 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 159 \ 160 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 161 } \ 162 } 163 DEF_VCTZ(8) 164 DEF_VCTZ(16) 165 166 /* like binary multiplication, but XOR instead of addition */ 167 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 168 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 169 uint##TBITS##_t b) \ 170 { \ 171 uint##TBITS##_t res = 0; \ 172 \ 173 while (b) { \ 174 if (b & 0x1) { \ 175 res = res ^ a; \ 176 } \ 177 a = a << 1; \ 178 b = b >> 1; \ 179 } \ 180 return res; \ 181 } 182 DEF_GALOIS_MULTIPLY(8, 16) 183 DEF_GALOIS_MULTIPLY(16, 32) 184 DEF_GALOIS_MULTIPLY(32, 64) 185 186 static S390Vector galois_multiply64(uint64_t a, uint64_t b) 187 { 188 S390Vector res = {}; 189 S390Vector va = { 190 .doubleword[1] = a, 191 }; 192 S390Vector vb = { 193 .doubleword[1] = b, 194 }; 195 196 while (!s390_vec_is_zero(&vb)) { 197 if (vb.doubleword[1] & 0x1) { 198 s390_vec_xor(&res, &res, &va); 199 } 200 s390_vec_shl(&va, &va, 1); 201 s390_vec_shr(&vb, &vb, 1); 202 } 203 return res; 204 } 205 206 #define DEF_VGFM(BITS, TBITS) \ 207 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 208 uint32_t desc) \ 209 { \ 210 int i; \ 211 \ 212 for (i = 0; i < (128 / TBITS); i++) { \ 213 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 214 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 215 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 216 \ 217 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 218 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 219 d = d ^ galois_multiply32(a, b); \ 220 s390_vec_write_element##TBITS(v1, i, d); \ 221 } \ 222 } 223 DEF_VGFM(8, 16) 224 DEF_VGFM(16, 32) 225 DEF_VGFM(32, 64) 226 227 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 228 uint32_t desc) 229 { 230 S390Vector tmp1, tmp2; 231 uint64_t a, b; 232 233 a = s390_vec_read_element64(v2, 0); 234 b = s390_vec_read_element64(v3, 0); 235 tmp1 = galois_multiply64(a, b); 236 a = s390_vec_read_element64(v2, 1); 237 b = s390_vec_read_element64(v3, 1); 238 tmp2 = galois_multiply64(a, b); 239 s390_vec_xor(v1, &tmp1, &tmp2); 240 } 241 242 #define DEF_VGFMA(BITS, TBITS) \ 243 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 244 const void *v4, uint32_t desc) \ 245 { \ 246 int i; \ 247 \ 248 for (i = 0; i < (128 / TBITS); i++) { \ 249 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 250 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 251 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 252 \ 253 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 254 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 255 d = d ^ galois_multiply32(a, b); \ 256 d = d ^ s390_vec_read_element##TBITS(v4, i); \ 257 s390_vec_write_element##TBITS(v1, i, d); \ 258 } \ 259 } 260 DEF_VGFMA(8, 16) 261 DEF_VGFMA(16, 32) 262 DEF_VGFMA(32, 64) 263 264 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 265 const void *v4, uint32_t desc) 266 { 267 S390Vector tmp1, tmp2; 268 uint64_t a, b; 269 270 a = s390_vec_read_element64(v2, 0); 271 b = s390_vec_read_element64(v3, 0); 272 tmp1 = galois_multiply64(a, b); 273 a = s390_vec_read_element64(v2, 1); 274 b = s390_vec_read_element64(v3, 1); 275 tmp2 = galois_multiply64(a, b); 276 s390_vec_xor(&tmp1, &tmp1, &tmp2); 277 s390_vec_xor(v1, &tmp1, v4); 278 } 279 280 #define DEF_VMAL(BITS) \ 281 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 282 const void *v4, uint32_t desc) \ 283 { \ 284 int i; \ 285 \ 286 for (i = 0; i < (128 / BITS); i++) { \ 287 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 288 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 289 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 290 \ 291 s390_vec_write_element##BITS(v1, i, a * b + c); \ 292 } \ 293 } 294 DEF_VMAL(8) 295 DEF_VMAL(16) 296 297 #define DEF_VMAH(BITS) \ 298 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 299 const void *v4, uint32_t desc) \ 300 { \ 301 int i; \ 302 \ 303 for (i = 0; i < (128 / BITS); i++) { \ 304 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 305 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 306 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 307 \ 308 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 309 } \ 310 } 311 DEF_VMAH(8) 312 DEF_VMAH(16) 313 314 #define DEF_VMALH(BITS) \ 315 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 316 const void *v4, uint32_t desc) \ 317 { \ 318 int i; \ 319 \ 320 for (i = 0; i < (128 / BITS); i++) { \ 321 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 322 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 323 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 324 \ 325 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 326 } \ 327 } 328 DEF_VMALH(8) 329 DEF_VMALH(16) 330 331 #define DEF_VMAE(BITS, TBITS) \ 332 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 333 const void *v4, uint32_t desc) \ 334 { \ 335 int i, j; \ 336 \ 337 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 338 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 339 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 340 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 341 \ 342 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 343 } \ 344 } 345 DEF_VMAE(8, 16) 346 DEF_VMAE(16, 32) 347 DEF_VMAE(32, 64) 348 349 #define DEF_VMALE(BITS, TBITS) \ 350 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 351 const void *v4, uint32_t desc) \ 352 { \ 353 int i, j; \ 354 \ 355 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 356 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 357 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 358 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 359 \ 360 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 361 } \ 362 } 363 DEF_VMALE(8, 16) 364 DEF_VMALE(16, 32) 365 DEF_VMALE(32, 64) 366 367 #define DEF_VMAO(BITS, TBITS) \ 368 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 369 const void *v4, uint32_t desc) \ 370 { \ 371 int i, j; \ 372 \ 373 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 374 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 375 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 376 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 377 \ 378 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 379 } \ 380 } 381 DEF_VMAO(8, 16) 382 DEF_VMAO(16, 32) 383 DEF_VMAO(32, 64) 384 385 #define DEF_VMALO(BITS, TBITS) \ 386 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 387 const void *v4, uint32_t desc) \ 388 { \ 389 int i, j; \ 390 \ 391 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 392 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 393 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 394 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 395 \ 396 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 397 } \ 398 } 399 DEF_VMALO(8, 16) 400 DEF_VMALO(16, 32) 401 DEF_VMALO(32, 64) 402 403 #define DEF_VMH(BITS) \ 404 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 405 uint32_t desc) \ 406 { \ 407 int i; \ 408 \ 409 for (i = 0; i < (128 / BITS); i++) { \ 410 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 411 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 412 \ 413 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 414 } \ 415 } 416 DEF_VMH(8) 417 DEF_VMH(16) 418 419 #define DEF_VMLH(BITS) \ 420 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 421 uint32_t desc) \ 422 { \ 423 int i; \ 424 \ 425 for (i = 0; i < (128 / BITS); i++) { \ 426 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 427 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 428 \ 429 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 430 } \ 431 } 432 DEF_VMLH(8) 433 DEF_VMLH(16) 434 435 #define DEF_VME(BITS, TBITS) \ 436 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 437 uint32_t desc) \ 438 { \ 439 int i, j; \ 440 \ 441 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 442 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 443 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 444 \ 445 s390_vec_write_element##TBITS(v1, i, a * b); \ 446 } \ 447 } 448 DEF_VME(8, 16) 449 DEF_VME(16, 32) 450 DEF_VME(32, 64) 451 452 #define DEF_VMLE(BITS, TBITS) \ 453 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 454 uint32_t desc) \ 455 { \ 456 int i, j; \ 457 \ 458 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 459 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 460 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 461 \ 462 s390_vec_write_element##TBITS(v1, i, a * b); \ 463 } \ 464 } 465 DEF_VMLE(8, 16) 466 DEF_VMLE(16, 32) 467 DEF_VMLE(32, 64) 468 469 #define DEF_VMO(BITS, TBITS) \ 470 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 471 uint32_t desc) \ 472 { \ 473 int i, j; \ 474 \ 475 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 476 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 477 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 478 \ 479 s390_vec_write_element##TBITS(v1, i, a * b); \ 480 } \ 481 } 482 DEF_VMO(8, 16) 483 DEF_VMO(16, 32) 484 DEF_VMO(32, 64) 485 486 #define DEF_VMLO(BITS, TBITS) \ 487 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 488 uint32_t desc) \ 489 { \ 490 int i, j; \ 491 \ 492 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 493 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 494 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 495 \ 496 s390_vec_write_element##TBITS(v1, i, a * b); \ 497 } \ 498 } 499 DEF_VMLO(8, 16) 500 DEF_VMLO(16, 32) 501 DEF_VMLO(32, 64) 502 503 #define DEF_VPOPCT(BITS) \ 504 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 505 { \ 506 int i; \ 507 \ 508 for (i = 0; i < (128 / BITS); i++) { \ 509 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 510 \ 511 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 512 } \ 513 } 514 DEF_VPOPCT(8) 515 DEF_VPOPCT(16) 516 517 #define DEF_VERIM(BITS) \ 518 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 519 uint32_t desc) \ 520 { \ 521 const uint8_t count = simd_data(desc); \ 522 int i; \ 523 \ 524 for (i = 0; i < (128 / BITS); i++) { \ 525 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 526 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 527 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 528 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 529 \ 530 s390_vec_write_element##BITS(v1, i, d); \ 531 } \ 532 } 533 DEF_VERIM(8) 534 DEF_VERIM(16) 535 536 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 537 uint32_t desc) 538 { 539 s390_vec_shl(v1, v2, count); 540 } 541 542 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 543 uint32_t desc) 544 { 545 S390Vector tmp; 546 uint32_t sh, e0, e1 = 0; 547 int i; 548 549 for (i = 15; i >= 0; --i, e1 = e0) { 550 e0 = s390_vec_read_element8(v2, i); 551 sh = s390_vec_read_element8(v3, i) & 7; 552 553 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 554 } 555 556 *(S390Vector *)v1 = tmp; 557 } 558 559 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 560 uint32_t desc) 561 { 562 s390_vec_sar(v1, v2, count); 563 } 564 565 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 566 uint32_t desc) 567 { 568 S390Vector tmp; 569 uint32_t sh, e0, e1 = 0; 570 int i = 0; 571 572 /* Byte 0 is special only. */ 573 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 574 sh = s390_vec_read_element8(v3, i) & 7; 575 s390_vec_write_element8(&tmp, i, e0 >> sh); 576 577 e1 = e0; 578 for (i = 1; i < 16; ++i, e1 = e0) { 579 e0 = s390_vec_read_element8(v2, i); 580 sh = s390_vec_read_element8(v3, i) & 7; 581 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 582 } 583 584 *(S390Vector *)v1 = tmp; 585 } 586 587 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 588 uint32_t desc) 589 { 590 s390_vec_shr(v1, v2, count); 591 } 592 593 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 594 uint32_t desc) 595 { 596 S390Vector tmp; 597 uint32_t sh, e0, e1 = 0; 598 599 for (int i = 0; i < 16; ++i, e1 = e0) { 600 e0 = s390_vec_read_element8(v2, i); 601 sh = s390_vec_read_element8(v3, i) & 7; 602 603 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 604 } 605 606 *(S390Vector *)v1 = tmp; 607 } 608 609 #define DEF_VSCBI(BITS) \ 610 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 611 uint32_t desc) \ 612 { \ 613 int i; \ 614 \ 615 for (i = 0; i < (128 / BITS); i++) { \ 616 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 617 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 618 \ 619 s390_vec_write_element##BITS(v1, i, a >= b); \ 620 } \ 621 } 622 DEF_VSCBI(8) 623 DEF_VSCBI(16) 624 625 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 626 uint32_t desc) 627 { 628 S390Vector tmp; 629 630 s390_vec_and(&tmp, v1, v2); 631 if (s390_vec_is_zero(&tmp)) { 632 /* Selected bits all zeros; or all mask bits zero */ 633 env->cc_op = 0; 634 } else if (s390_vec_equal(&tmp, v2)) { 635 /* Selected bits all ones */ 636 env->cc_op = 3; 637 } else { 638 /* Selected bits a mix of zeros and ones */ 639 env->cc_op = 1; 640 } 641 } 642