1 /* 2 * QEMU TCG support -- s390x vector string instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "s390x-internal.h" 16 #include "vec.h" 17 #include "tcg/tcg.h" 18 #include "tcg/tcg-gvec-desc.h" 19 #include "exec/helper-proto.h" 20 21 /* 22 * Returns a bit set in the MSB of each element that is zero, 23 * as defined by the mask. 24 */ 25 static inline uint64_t zero_search(uint64_t a, uint64_t mask) 26 { 27 return ~(((a & mask) + mask) | a | mask); 28 } 29 30 /* 31 * Returns a bit set in the MSB of each element that is not zero, 32 * as defined by the mask. 33 */ 34 static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) 35 { 36 return (((a & mask) + mask) | a) & ~mask; 37 } 38 39 /* 40 * Returns the byte offset for the first match, or 16 for no match. 41 */ 42 static inline int match_index(uint64_t c0, uint64_t c1) 43 { 44 return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; 45 } 46 47 /* 48 * Returns the number of bits composing one element. 49 */ 50 static uint8_t get_element_bits(uint8_t es) 51 { 52 return (1 << es) * BITS_PER_BYTE; 53 } 54 55 /* 56 * Returns the bitmask for a single element. 57 */ 58 static uint64_t get_single_element_mask(uint8_t es) 59 { 60 return -1ull >> (64 - get_element_bits(es)); 61 } 62 63 /* 64 * Returns the bitmask for a single element (excluding the MSB). 65 */ 66 static uint64_t get_single_element_lsbs_mask(uint8_t es) 67 { 68 return -1ull >> (65 - get_element_bits(es)); 69 } 70 71 /* 72 * Returns the bitmasks for multiple elements (excluding the MSBs). 73 */ 74 static uint64_t get_element_lsbs_mask(uint8_t es) 75 { 76 return dup_const(es, get_single_element_lsbs_mask(es)); 77 } 78 79 static int vfae(void *v1, const void *v2, const void *v3, bool in, 80 bool rt, bool zs, uint8_t es) 81 { 82 const uint64_t mask = get_element_lsbs_mask(es); 83 const int bits = get_element_bits(es); 84 uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; 85 uint64_t first_zero = 16; 86 uint64_t first_equal; 87 int i; 88 89 a0 = s390_vec_read_element64(v2, 0); 90 a1 = s390_vec_read_element64(v2, 1); 91 b0 = s390_vec_read_element64(v3, 0); 92 b1 = s390_vec_read_element64(v3, 1); 93 e0 = 0; 94 e1 = 0; 95 /* compare against equality with every other element */ 96 for (i = 0; i < 64; i += bits) { 97 t0 = rol64(b0, i); 98 t1 = rol64(b1, i); 99 e0 |= zero_search(a0 ^ t0, mask); 100 e0 |= zero_search(a0 ^ t1, mask); 101 e1 |= zero_search(a1 ^ t0, mask); 102 e1 |= zero_search(a1 ^ t1, mask); 103 } 104 /* invert the result if requested - invert only the MSBs */ 105 if (in) { 106 e0 = ~e0 & ~mask; 107 e1 = ~e1 & ~mask; 108 } 109 first_equal = match_index(e0, e1); 110 111 if (zs) { 112 z0 = zero_search(a0, mask); 113 z1 = zero_search(a1, mask); 114 first_zero = match_index(z0, z1); 115 } 116 117 if (rt) { 118 e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); 119 e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); 120 s390_vec_write_element64(v1, 0, e0); 121 s390_vec_write_element64(v1, 1, e1); 122 } else { 123 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 124 s390_vec_write_element64(v1, 1, 0); 125 } 126 127 if (first_zero == 16 && first_equal == 16) { 128 return 3; /* no match */ 129 } else if (first_zero == 16) { 130 return 1; /* matching elements, no match for zero */ 131 } else if (first_equal < first_zero) { 132 return 2; /* matching elements before match for zero */ 133 } 134 return 0; /* match for zero */ 135 } 136 137 #define DEF_VFAE_HELPER(BITS) \ 138 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ 139 uint32_t desc) \ 140 { \ 141 const bool in = extract32(simd_data(desc), 3, 1); \ 142 const bool rt = extract32(simd_data(desc), 2, 1); \ 143 const bool zs = extract32(simd_data(desc), 1, 1); \ 144 \ 145 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 146 } 147 DEF_VFAE_HELPER(8) 148 DEF_VFAE_HELPER(16) 149 DEF_VFAE_HELPER(32) 150 151 #define DEF_VFAE_CC_HELPER(BITS) \ 152 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ 153 CPUS390XState *env, uint32_t desc) \ 154 { \ 155 const bool in = extract32(simd_data(desc), 3, 1); \ 156 const bool rt = extract32(simd_data(desc), 2, 1); \ 157 const bool zs = extract32(simd_data(desc), 1, 1); \ 158 \ 159 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 160 } 161 DEF_VFAE_CC_HELPER(8) 162 DEF_VFAE_CC_HELPER(16) 163 DEF_VFAE_CC_HELPER(32) 164 165 static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 166 { 167 const uint64_t mask = get_element_lsbs_mask(es); 168 uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 169 uint64_t first_zero = 16; 170 uint64_t first_equal; 171 172 a0 = s390_vec_read_element64(v2, 0); 173 a1 = s390_vec_read_element64(v2, 1); 174 b0 = s390_vec_read_element64(v3, 0); 175 b1 = s390_vec_read_element64(v3, 1); 176 e0 = zero_search(a0 ^ b0, mask); 177 e1 = zero_search(a1 ^ b1, mask); 178 first_equal = match_index(e0, e1); 179 180 if (zs) { 181 z0 = zero_search(a0, mask); 182 z1 = zero_search(a1, mask); 183 first_zero = match_index(z0, z1); 184 } 185 186 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 187 s390_vec_write_element64(v1, 1, 0); 188 if (first_zero == 16 && first_equal == 16) { 189 return 3; /* no match */ 190 } else if (first_zero == 16) { 191 return 1; /* matching elements, no match for zero */ 192 } else if (first_equal < first_zero) { 193 return 2; /* matching elements before match for zero */ 194 } 195 return 0; /* match for zero */ 196 } 197 198 #define DEF_VFEE_HELPER(BITS) \ 199 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ 200 uint32_t desc) \ 201 { \ 202 const bool zs = extract32(simd_data(desc), 1, 1); \ 203 \ 204 vfee(v1, v2, v3, zs, MO_##BITS); \ 205 } 206 DEF_VFEE_HELPER(8) 207 DEF_VFEE_HELPER(16) 208 DEF_VFEE_HELPER(32) 209 210 #define DEF_VFEE_CC_HELPER(BITS) \ 211 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ 212 CPUS390XState *env, uint32_t desc) \ 213 { \ 214 const bool zs = extract32(simd_data(desc), 1, 1); \ 215 \ 216 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ 217 } 218 DEF_VFEE_CC_HELPER(8) 219 DEF_VFEE_CC_HELPER(16) 220 DEF_VFEE_CC_HELPER(32) 221 222 static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 223 { 224 const uint64_t mask = get_element_lsbs_mask(es); 225 uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 226 uint64_t first_zero = 16; 227 uint64_t first_inequal; 228 bool smaller = false; 229 230 a0 = s390_vec_read_element64(v2, 0); 231 a1 = s390_vec_read_element64(v2, 1); 232 b0 = s390_vec_read_element64(v3, 0); 233 b1 = s390_vec_read_element64(v3, 1); 234 e0 = nonzero_search(a0 ^ b0, mask); 235 e1 = nonzero_search(a1 ^ b1, mask); 236 first_inequal = match_index(e0, e1); 237 238 /* identify the smaller element */ 239 if (first_inequal < 16) { 240 uint8_t enr = first_inequal / (1 << es); 241 uint32_t a = s390_vec_read_element(v2, enr, es); 242 uint32_t b = s390_vec_read_element(v3, enr, es); 243 244 smaller = a < b; 245 } 246 247 if (zs) { 248 z0 = zero_search(a0, mask); 249 z1 = zero_search(a1, mask); 250 first_zero = match_index(z0, z1); 251 } 252 253 s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); 254 s390_vec_write_element64(v1, 1, 0); 255 if (first_zero == 16 && first_inequal == 16) { 256 return 3; 257 } else if (first_zero < first_inequal) { 258 return 0; 259 } 260 return smaller ? 1 : 2; 261 } 262 263 #define DEF_VFENE_HELPER(BITS) \ 264 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ 265 uint32_t desc) \ 266 { \ 267 const bool zs = extract32(simd_data(desc), 1, 1); \ 268 \ 269 vfene(v1, v2, v3, zs, MO_##BITS); \ 270 } 271 DEF_VFENE_HELPER(8) 272 DEF_VFENE_HELPER(16) 273 DEF_VFENE_HELPER(32) 274 275 #define DEF_VFENE_CC_HELPER(BITS) \ 276 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ 277 CPUS390XState *env, uint32_t desc) \ 278 { \ 279 const bool zs = extract32(simd_data(desc), 1, 1); \ 280 \ 281 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ 282 } 283 DEF_VFENE_CC_HELPER(8) 284 DEF_VFENE_CC_HELPER(16) 285 DEF_VFENE_CC_HELPER(32) 286 287 static int vistr(void *v1, const void *v2, uint8_t es) 288 { 289 const uint64_t mask = get_element_lsbs_mask(es); 290 uint64_t a0 = s390_vec_read_element64(v2, 0); 291 uint64_t a1 = s390_vec_read_element64(v2, 1); 292 uint64_t z; 293 int cc = 3; 294 295 z = zero_search(a0, mask); 296 if (z) { 297 a0 &= ~(-1ull >> clz64(z)); 298 a1 = 0; 299 cc = 0; 300 } else { 301 z = zero_search(a1, mask); 302 if (z) { 303 a1 &= ~(-1ull >> clz64(z)); 304 cc = 0; 305 } 306 } 307 308 s390_vec_write_element64(v1, 0, a0); 309 s390_vec_write_element64(v1, 1, a1); 310 return cc; 311 } 312 313 #define DEF_VISTR_HELPER(BITS) \ 314 void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ 315 { \ 316 vistr(v1, v2, MO_##BITS); \ 317 } 318 DEF_VISTR_HELPER(8) 319 DEF_VISTR_HELPER(16) 320 DEF_VISTR_HELPER(32) 321 322 #define DEF_VISTR_CC_HELPER(BITS) \ 323 void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 324 uint32_t desc) \ 325 { \ 326 env->cc_op = vistr(v1, v2, MO_##BITS); \ 327 } 328 DEF_VISTR_CC_HELPER(8) 329 DEF_VISTR_CC_HELPER(16) 330 DEF_VISTR_CC_HELPER(32) 331 332 static bool element_compare(uint32_t data, uint32_t l, uint8_t c) 333 { 334 const bool equal = extract32(c, 7, 1); 335 const bool lower = extract32(c, 6, 1); 336 const bool higher = extract32(c, 5, 1); 337 338 if (data < l) { 339 return lower; 340 } else if (data > l) { 341 return higher; 342 } 343 return equal; 344 } 345 346 static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, 347 bool in, bool rt, bool zs, uint8_t es) 348 { 349 const uint64_t mask = get_element_lsbs_mask(es); 350 uint64_t a0 = s390_vec_read_element64(v2, 0); 351 uint64_t a1 = s390_vec_read_element64(v2, 1); 352 int first_zero = 16, first_match = 16; 353 S390Vector rt_result = {}; 354 uint64_t z0, z1; 355 int i, j; 356 357 if (zs) { 358 z0 = zero_search(a0, mask); 359 z1 = zero_search(a1, mask); 360 first_zero = match_index(z0, z1); 361 } 362 363 for (i = 0; i < 16 / (1 << es); i++) { 364 const uint32_t data = s390_vec_read_element(v2, i, es); 365 const int cur_byte = i * (1 << es); 366 bool any_match = false; 367 368 /* if we don't need a bit vector, we can stop early */ 369 if (cur_byte == first_zero && !rt) { 370 break; 371 } 372 373 for (j = 0; j < 16 / (1 << es); j += 2) { 374 const uint32_t l1 = s390_vec_read_element(v3, j, es); 375 const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); 376 /* we are only interested in the highest byte of each element */ 377 const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); 378 const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); 379 380 if (element_compare(data, l1, c1) && 381 element_compare(data, l2, c2)) { 382 any_match = true; 383 break; 384 } 385 } 386 /* invert the result if requested */ 387 any_match = in ^ any_match; 388 389 if (any_match) { 390 /* indicate bit vector if requested */ 391 if (rt) { 392 const uint64_t val = -1ull; 393 394 first_match = MIN(cur_byte, first_match); 395 s390_vec_write_element(&rt_result, i, es, val); 396 } else { 397 /* stop on the first match */ 398 first_match = cur_byte; 399 break; 400 } 401 } 402 } 403 404 if (rt) { 405 *(S390Vector *)v1 = rt_result; 406 } else { 407 s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); 408 s390_vec_write_element64(v1, 1, 0); 409 } 410 411 if (first_zero == 16 && first_match == 16) { 412 return 3; /* no match */ 413 } else if (first_zero == 16) { 414 return 1; /* matching elements, no match for zero */ 415 } else if (first_match < first_zero) { 416 return 2; /* matching elements before match for zero */ 417 } 418 return 0; /* match for zero */ 419 } 420 421 #define DEF_VSTRC_HELPER(BITS) \ 422 void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ 423 const void *v4, uint32_t desc) \ 424 { \ 425 const bool in = extract32(simd_data(desc), 3, 1); \ 426 const bool zs = extract32(simd_data(desc), 1, 1); \ 427 \ 428 vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 429 } 430 DEF_VSTRC_HELPER(8) 431 DEF_VSTRC_HELPER(16) 432 DEF_VSTRC_HELPER(32) 433 434 #define DEF_VSTRC_RT_HELPER(BITS) \ 435 void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 436 const void *v4, uint32_t desc) \ 437 { \ 438 const bool in = extract32(simd_data(desc), 3, 1); \ 439 const bool zs = extract32(simd_data(desc), 1, 1); \ 440 \ 441 vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 442 } 443 DEF_VSTRC_RT_HELPER(8) 444 DEF_VSTRC_RT_HELPER(16) 445 DEF_VSTRC_RT_HELPER(32) 446 447 #define DEF_VSTRC_CC_HELPER(BITS) \ 448 void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ 449 const void *v4, CPUS390XState *env, \ 450 uint32_t desc) \ 451 { \ 452 const bool in = extract32(simd_data(desc), 3, 1); \ 453 const bool zs = extract32(simd_data(desc), 1, 1); \ 454 \ 455 env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 456 } 457 DEF_VSTRC_CC_HELPER(8) 458 DEF_VSTRC_CC_HELPER(16) 459 DEF_VSTRC_CC_HELPER(32) 460 461 #define DEF_VSTRC_CC_RT_HELPER(BITS) \ 462 void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 463 const void *v4, CPUS390XState *env, \ 464 uint32_t desc) \ 465 { \ 466 const bool in = extract32(simd_data(desc), 3, 1); \ 467 const bool zs = extract32(simd_data(desc), 1, 1); \ 468 \ 469 env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 470 } 471 DEF_VSTRC_CC_RT_HELPER(8) 472 DEF_VSTRC_CC_RT_HELPER(16) 473 DEF_VSTRC_CC_RT_HELPER(32) 474