1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "tcg/tcg-gvec-desc.h" 25 26 27 typedef uint8_t vec8; 28 typedef uint16_t vec16; 29 typedef uint32_t vec32; 30 typedef uint64_t vec64; 31 32 typedef int8_t svec8; 33 typedef int16_t svec16; 34 typedef int32_t svec32; 35 typedef int64_t svec64; 36 37 #define DUP16(X) X 38 #define DUP8(X) X 39 #define DUP4(X) X 40 #define DUP2(X) X 41 42 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 43 { 44 intptr_t maxsz = simd_maxsz(desc); 45 intptr_t i; 46 47 if (unlikely(maxsz > oprsz)) { 48 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 49 *(uint64_t *)(d + i) = 0; 50 } 51 } 52 } 53 54 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 55 { 56 intptr_t oprsz = simd_oprsz(desc); 57 intptr_t i; 58 59 for (i = 0; i < oprsz; i += sizeof(vec8)) { 60 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 61 } 62 clear_high(d, oprsz, desc); 63 } 64 65 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 66 { 67 intptr_t oprsz = simd_oprsz(desc); 68 intptr_t i; 69 70 for (i = 0; i < oprsz; i += sizeof(vec16)) { 71 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 72 } 73 clear_high(d, oprsz, desc); 74 } 75 76 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 77 { 78 intptr_t oprsz = simd_oprsz(desc); 79 intptr_t i; 80 81 for (i = 0; i < oprsz; i += sizeof(vec32)) { 82 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 83 } 84 clear_high(d, oprsz, desc); 85 } 86 87 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 88 { 89 intptr_t oprsz = simd_oprsz(desc); 90 intptr_t i; 91 92 for (i = 0; i < oprsz; i += sizeof(vec64)) { 93 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 94 } 95 clear_high(d, oprsz, desc); 96 } 97 98 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 99 { 100 intptr_t oprsz = simd_oprsz(desc); 101 vec8 vecb = (vec8)DUP16(b); 102 intptr_t i; 103 104 for (i = 0; i < oprsz; i += sizeof(vec8)) { 105 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 106 } 107 clear_high(d, oprsz, desc); 108 } 109 110 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 111 { 112 intptr_t oprsz = simd_oprsz(desc); 113 vec16 vecb = (vec16)DUP8(b); 114 intptr_t i; 115 116 for (i = 0; i < oprsz; i += sizeof(vec16)) { 117 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 118 } 119 clear_high(d, oprsz, desc); 120 } 121 122 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 123 { 124 intptr_t oprsz = simd_oprsz(desc); 125 vec32 vecb = (vec32)DUP4(b); 126 intptr_t i; 127 128 for (i = 0; i < oprsz; i += sizeof(vec32)) { 129 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 130 } 131 clear_high(d, oprsz, desc); 132 } 133 134 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 135 { 136 intptr_t oprsz = simd_oprsz(desc); 137 vec64 vecb = (vec64)DUP2(b); 138 intptr_t i; 139 140 for (i = 0; i < oprsz; i += sizeof(vec64)) { 141 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 142 } 143 clear_high(d, oprsz, desc); 144 } 145 146 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 147 { 148 intptr_t oprsz = simd_oprsz(desc); 149 intptr_t i; 150 151 for (i = 0; i < oprsz; i += sizeof(vec8)) { 152 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 153 } 154 clear_high(d, oprsz, desc); 155 } 156 157 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 158 { 159 intptr_t oprsz = simd_oprsz(desc); 160 intptr_t i; 161 162 for (i = 0; i < oprsz; i += sizeof(vec16)) { 163 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 164 } 165 clear_high(d, oprsz, desc); 166 } 167 168 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 169 { 170 intptr_t oprsz = simd_oprsz(desc); 171 intptr_t i; 172 173 for (i = 0; i < oprsz; i += sizeof(vec32)) { 174 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 175 } 176 clear_high(d, oprsz, desc); 177 } 178 179 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 180 { 181 intptr_t oprsz = simd_oprsz(desc); 182 intptr_t i; 183 184 for (i = 0; i < oprsz; i += sizeof(vec64)) { 185 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 186 } 187 clear_high(d, oprsz, desc); 188 } 189 190 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 191 { 192 intptr_t oprsz = simd_oprsz(desc); 193 vec8 vecb = (vec8)DUP16(b); 194 intptr_t i; 195 196 for (i = 0; i < oprsz; i += sizeof(vec8)) { 197 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 198 } 199 clear_high(d, oprsz, desc); 200 } 201 202 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 203 { 204 intptr_t oprsz = simd_oprsz(desc); 205 vec16 vecb = (vec16)DUP8(b); 206 intptr_t i; 207 208 for (i = 0; i < oprsz; i += sizeof(vec16)) { 209 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 210 } 211 clear_high(d, oprsz, desc); 212 } 213 214 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 215 { 216 intptr_t oprsz = simd_oprsz(desc); 217 vec32 vecb = (vec32)DUP4(b); 218 intptr_t i; 219 220 for (i = 0; i < oprsz; i += sizeof(vec32)) { 221 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 222 } 223 clear_high(d, oprsz, desc); 224 } 225 226 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 227 { 228 intptr_t oprsz = simd_oprsz(desc); 229 vec64 vecb = (vec64)DUP2(b); 230 intptr_t i; 231 232 for (i = 0; i < oprsz; i += sizeof(vec64)) { 233 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 234 } 235 clear_high(d, oprsz, desc); 236 } 237 238 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 239 { 240 intptr_t oprsz = simd_oprsz(desc); 241 intptr_t i; 242 243 for (i = 0; i < oprsz; i += sizeof(vec8)) { 244 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 245 } 246 clear_high(d, oprsz, desc); 247 } 248 249 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 250 { 251 intptr_t oprsz = simd_oprsz(desc); 252 intptr_t i; 253 254 for (i = 0; i < oprsz; i += sizeof(vec16)) { 255 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 256 } 257 clear_high(d, oprsz, desc); 258 } 259 260 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 261 { 262 intptr_t oprsz = simd_oprsz(desc); 263 intptr_t i; 264 265 for (i = 0; i < oprsz; i += sizeof(vec32)) { 266 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 267 } 268 clear_high(d, oprsz, desc); 269 } 270 271 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 272 { 273 intptr_t oprsz = simd_oprsz(desc); 274 intptr_t i; 275 276 for (i = 0; i < oprsz; i += sizeof(vec64)) { 277 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 278 } 279 clear_high(d, oprsz, desc); 280 } 281 282 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 283 { 284 intptr_t oprsz = simd_oprsz(desc); 285 vec8 vecb = (vec8)DUP16(b); 286 intptr_t i; 287 288 for (i = 0; i < oprsz; i += sizeof(vec8)) { 289 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 290 } 291 clear_high(d, oprsz, desc); 292 } 293 294 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 295 { 296 intptr_t oprsz = simd_oprsz(desc); 297 vec16 vecb = (vec16)DUP8(b); 298 intptr_t i; 299 300 for (i = 0; i < oprsz; i += sizeof(vec16)) { 301 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 302 } 303 clear_high(d, oprsz, desc); 304 } 305 306 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 307 { 308 intptr_t oprsz = simd_oprsz(desc); 309 vec32 vecb = (vec32)DUP4(b); 310 intptr_t i; 311 312 for (i = 0; i < oprsz; i += sizeof(vec32)) { 313 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 314 } 315 clear_high(d, oprsz, desc); 316 } 317 318 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 319 { 320 intptr_t oprsz = simd_oprsz(desc); 321 vec64 vecb = (vec64)DUP2(b); 322 intptr_t i; 323 324 for (i = 0; i < oprsz; i += sizeof(vec64)) { 325 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 326 } 327 clear_high(d, oprsz, desc); 328 } 329 330 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 331 { 332 intptr_t oprsz = simd_oprsz(desc); 333 intptr_t i; 334 335 for (i = 0; i < oprsz; i += sizeof(vec8)) { 336 *(vec8 *)(d + i) = -*(vec8 *)(a + i); 337 } 338 clear_high(d, oprsz, desc); 339 } 340 341 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 342 { 343 intptr_t oprsz = simd_oprsz(desc); 344 intptr_t i; 345 346 for (i = 0; i < oprsz; i += sizeof(vec16)) { 347 *(vec16 *)(d + i) = -*(vec16 *)(a + i); 348 } 349 clear_high(d, oprsz, desc); 350 } 351 352 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 353 { 354 intptr_t oprsz = simd_oprsz(desc); 355 intptr_t i; 356 357 for (i = 0; i < oprsz; i += sizeof(vec32)) { 358 *(vec32 *)(d + i) = -*(vec32 *)(a + i); 359 } 360 clear_high(d, oprsz, desc); 361 } 362 363 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 364 { 365 intptr_t oprsz = simd_oprsz(desc); 366 intptr_t i; 367 368 for (i = 0; i < oprsz; i += sizeof(vec64)) { 369 *(vec64 *)(d + i) = -*(vec64 *)(a + i); 370 } 371 clear_high(d, oprsz, desc); 372 } 373 374 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 375 { 376 intptr_t oprsz = simd_oprsz(desc); 377 intptr_t i; 378 379 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 380 int8_t aa = *(int8_t *)(a + i); 381 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 382 } 383 clear_high(d, oprsz, desc); 384 } 385 386 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 387 { 388 intptr_t oprsz = simd_oprsz(desc); 389 intptr_t i; 390 391 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 392 int16_t aa = *(int16_t *)(a + i); 393 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 394 } 395 clear_high(d, oprsz, desc); 396 } 397 398 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 399 { 400 intptr_t oprsz = simd_oprsz(desc); 401 intptr_t i; 402 403 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 404 int32_t aa = *(int32_t *)(a + i); 405 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 406 } 407 clear_high(d, oprsz, desc); 408 } 409 410 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 411 { 412 intptr_t oprsz = simd_oprsz(desc); 413 intptr_t i; 414 415 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 416 int64_t aa = *(int64_t *)(a + i); 417 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 418 } 419 clear_high(d, oprsz, desc); 420 } 421 422 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 423 { 424 intptr_t oprsz = simd_oprsz(desc); 425 426 memcpy(d, a, oprsz); 427 clear_high(d, oprsz, desc); 428 } 429 430 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 431 { 432 intptr_t oprsz = simd_oprsz(desc); 433 intptr_t i; 434 435 if (c == 0) { 436 oprsz = 0; 437 } else { 438 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 439 *(uint64_t *)(d + i) = c; 440 } 441 } 442 clear_high(d, oprsz, desc); 443 } 444 445 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 446 { 447 intptr_t oprsz = simd_oprsz(desc); 448 intptr_t i; 449 450 if (c == 0) { 451 oprsz = 0; 452 } else { 453 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 454 *(uint32_t *)(d + i) = c; 455 } 456 } 457 clear_high(d, oprsz, desc); 458 } 459 460 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 461 { 462 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 463 } 464 465 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 466 { 467 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 468 } 469 470 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 471 { 472 intptr_t oprsz = simd_oprsz(desc); 473 intptr_t i; 474 475 for (i = 0; i < oprsz; i += sizeof(vec64)) { 476 *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 477 } 478 clear_high(d, oprsz, desc); 479 } 480 481 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 482 { 483 intptr_t oprsz = simd_oprsz(desc); 484 intptr_t i; 485 486 for (i = 0; i < oprsz; i += sizeof(vec64)) { 487 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 488 } 489 clear_high(d, oprsz, desc); 490 } 491 492 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 493 { 494 intptr_t oprsz = simd_oprsz(desc); 495 intptr_t i; 496 497 for (i = 0; i < oprsz; i += sizeof(vec64)) { 498 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 499 } 500 clear_high(d, oprsz, desc); 501 } 502 503 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 504 { 505 intptr_t oprsz = simd_oprsz(desc); 506 intptr_t i; 507 508 for (i = 0; i < oprsz; i += sizeof(vec64)) { 509 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 510 } 511 clear_high(d, oprsz, desc); 512 } 513 514 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 515 { 516 intptr_t oprsz = simd_oprsz(desc); 517 intptr_t i; 518 519 for (i = 0; i < oprsz; i += sizeof(vec64)) { 520 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 521 } 522 clear_high(d, oprsz, desc); 523 } 524 525 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 526 { 527 intptr_t oprsz = simd_oprsz(desc); 528 intptr_t i; 529 530 for (i = 0; i < oprsz; i += sizeof(vec64)) { 531 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 532 } 533 clear_high(d, oprsz, desc); 534 } 535 536 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 537 { 538 intptr_t oprsz = simd_oprsz(desc); 539 intptr_t i; 540 541 for (i = 0; i < oprsz; i += sizeof(vec64)) { 542 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 543 } 544 clear_high(d, oprsz, desc); 545 } 546 547 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 548 { 549 intptr_t oprsz = simd_oprsz(desc); 550 intptr_t i; 551 552 for (i = 0; i < oprsz; i += sizeof(vec64)) { 553 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 554 } 555 clear_high(d, oprsz, desc); 556 } 557 558 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 559 { 560 intptr_t oprsz = simd_oprsz(desc); 561 intptr_t i; 562 563 for (i = 0; i < oprsz; i += sizeof(vec64)) { 564 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 565 } 566 clear_high(d, oprsz, desc); 567 } 568 569 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 570 { 571 intptr_t oprsz = simd_oprsz(desc); 572 vec64 vecb = (vec64)DUP2(b); 573 intptr_t i; 574 575 for (i = 0; i < oprsz; i += sizeof(vec64)) { 576 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 577 } 578 clear_high(d, oprsz, desc); 579 } 580 581 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 582 { 583 intptr_t oprsz = simd_oprsz(desc); 584 vec64 vecb = (vec64)DUP2(b); 585 intptr_t i; 586 587 for (i = 0; i < oprsz; i += sizeof(vec64)) { 588 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 589 } 590 clear_high(d, oprsz, desc); 591 } 592 593 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 594 { 595 intptr_t oprsz = simd_oprsz(desc); 596 vec64 vecb = (vec64)DUP2(b); 597 intptr_t i; 598 599 for (i = 0; i < oprsz; i += sizeof(vec64)) { 600 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 601 } 602 clear_high(d, oprsz, desc); 603 } 604 605 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 606 { 607 intptr_t oprsz = simd_oprsz(desc); 608 int shift = simd_data(desc); 609 intptr_t i; 610 611 for (i = 0; i < oprsz; i += sizeof(vec8)) { 612 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 613 } 614 clear_high(d, oprsz, desc); 615 } 616 617 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 618 { 619 intptr_t oprsz = simd_oprsz(desc); 620 int shift = simd_data(desc); 621 intptr_t i; 622 623 for (i = 0; i < oprsz; i += sizeof(vec16)) { 624 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 625 } 626 clear_high(d, oprsz, desc); 627 } 628 629 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 630 { 631 intptr_t oprsz = simd_oprsz(desc); 632 int shift = simd_data(desc); 633 intptr_t i; 634 635 for (i = 0; i < oprsz; i += sizeof(vec32)) { 636 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 637 } 638 clear_high(d, oprsz, desc); 639 } 640 641 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 642 { 643 intptr_t oprsz = simd_oprsz(desc); 644 int shift = simd_data(desc); 645 intptr_t i; 646 647 for (i = 0; i < oprsz; i += sizeof(vec64)) { 648 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 649 } 650 clear_high(d, oprsz, desc); 651 } 652 653 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 654 { 655 intptr_t oprsz = simd_oprsz(desc); 656 int shift = simd_data(desc); 657 intptr_t i; 658 659 for (i = 0; i < oprsz; i += sizeof(vec8)) { 660 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 661 } 662 clear_high(d, oprsz, desc); 663 } 664 665 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 666 { 667 intptr_t oprsz = simd_oprsz(desc); 668 int shift = simd_data(desc); 669 intptr_t i; 670 671 for (i = 0; i < oprsz; i += sizeof(vec16)) { 672 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 673 } 674 clear_high(d, oprsz, desc); 675 } 676 677 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 678 { 679 intptr_t oprsz = simd_oprsz(desc); 680 int shift = simd_data(desc); 681 intptr_t i; 682 683 for (i = 0; i < oprsz; i += sizeof(vec32)) { 684 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 685 } 686 clear_high(d, oprsz, desc); 687 } 688 689 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 690 { 691 intptr_t oprsz = simd_oprsz(desc); 692 int shift = simd_data(desc); 693 intptr_t i; 694 695 for (i = 0; i < oprsz; i += sizeof(vec64)) { 696 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 697 } 698 clear_high(d, oprsz, desc); 699 } 700 701 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 702 { 703 intptr_t oprsz = simd_oprsz(desc); 704 int shift = simd_data(desc); 705 intptr_t i; 706 707 for (i = 0; i < oprsz; i += sizeof(vec8)) { 708 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 709 } 710 clear_high(d, oprsz, desc); 711 } 712 713 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 714 { 715 intptr_t oprsz = simd_oprsz(desc); 716 int shift = simd_data(desc); 717 intptr_t i; 718 719 for (i = 0; i < oprsz; i += sizeof(vec16)) { 720 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 721 } 722 clear_high(d, oprsz, desc); 723 } 724 725 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 726 { 727 intptr_t oprsz = simd_oprsz(desc); 728 int shift = simd_data(desc); 729 intptr_t i; 730 731 for (i = 0; i < oprsz; i += sizeof(vec32)) { 732 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 733 } 734 clear_high(d, oprsz, desc); 735 } 736 737 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 738 { 739 intptr_t oprsz = simd_oprsz(desc); 740 int shift = simd_data(desc); 741 intptr_t i; 742 743 for (i = 0; i < oprsz; i += sizeof(vec64)) { 744 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 745 } 746 clear_high(d, oprsz, desc); 747 } 748 749 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 750 { 751 intptr_t oprsz = simd_oprsz(desc); 752 intptr_t i; 753 754 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 755 uint8_t sh = *(uint8_t *)(b + i) & 7; 756 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 757 } 758 clear_high(d, oprsz, desc); 759 } 760 761 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 762 { 763 intptr_t oprsz = simd_oprsz(desc); 764 intptr_t i; 765 766 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 767 uint8_t sh = *(uint16_t *)(b + i) & 15; 768 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 769 } 770 clear_high(d, oprsz, desc); 771 } 772 773 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 774 { 775 intptr_t oprsz = simd_oprsz(desc); 776 intptr_t i; 777 778 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 779 uint8_t sh = *(uint32_t *)(b + i) & 31; 780 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 781 } 782 clear_high(d, oprsz, desc); 783 } 784 785 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 786 { 787 intptr_t oprsz = simd_oprsz(desc); 788 intptr_t i; 789 790 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 791 uint8_t sh = *(uint64_t *)(b + i) & 63; 792 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 793 } 794 clear_high(d, oprsz, desc); 795 } 796 797 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 798 { 799 intptr_t oprsz = simd_oprsz(desc); 800 intptr_t i; 801 802 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 803 uint8_t sh = *(uint8_t *)(b + i) & 7; 804 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 805 } 806 clear_high(d, oprsz, desc); 807 } 808 809 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 810 { 811 intptr_t oprsz = simd_oprsz(desc); 812 intptr_t i; 813 814 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 815 uint8_t sh = *(uint16_t *)(b + i) & 15; 816 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 817 } 818 clear_high(d, oprsz, desc); 819 } 820 821 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 822 { 823 intptr_t oprsz = simd_oprsz(desc); 824 intptr_t i; 825 826 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 827 uint8_t sh = *(uint32_t *)(b + i) & 31; 828 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 829 } 830 clear_high(d, oprsz, desc); 831 } 832 833 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 834 { 835 intptr_t oprsz = simd_oprsz(desc); 836 intptr_t i; 837 838 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 839 uint8_t sh = *(uint64_t *)(b + i) & 63; 840 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 841 } 842 clear_high(d, oprsz, desc); 843 } 844 845 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 846 { 847 intptr_t oprsz = simd_oprsz(desc); 848 intptr_t i; 849 850 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 851 uint8_t sh = *(uint8_t *)(b + i) & 7; 852 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 853 } 854 clear_high(d, oprsz, desc); 855 } 856 857 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 858 { 859 intptr_t oprsz = simd_oprsz(desc); 860 intptr_t i; 861 862 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 863 uint8_t sh = *(uint16_t *)(b + i) & 15; 864 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 865 } 866 clear_high(d, oprsz, desc); 867 } 868 869 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 870 { 871 intptr_t oprsz = simd_oprsz(desc); 872 intptr_t i; 873 874 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 875 uint8_t sh = *(uint32_t *)(b + i) & 31; 876 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 877 } 878 clear_high(d, oprsz, desc); 879 } 880 881 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 882 { 883 intptr_t oprsz = simd_oprsz(desc); 884 intptr_t i; 885 886 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 887 uint8_t sh = *(uint64_t *)(b + i) & 63; 888 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 889 } 890 clear_high(d, oprsz, desc); 891 } 892 893 #define DO_CMP0(X) -(X) 894 895 #define DO_CMP1(NAME, TYPE, OP) \ 896 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 897 { \ 898 intptr_t oprsz = simd_oprsz(desc); \ 899 intptr_t i; \ 900 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 901 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 902 } \ 903 clear_high(d, oprsz, desc); \ 904 } 905 906 #define DO_CMP2(SZ) \ 907 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 908 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 909 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 910 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 911 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 912 DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 913 914 DO_CMP2(8) 915 DO_CMP2(16) 916 DO_CMP2(32) 917 DO_CMP2(64) 918 919 #undef DO_CMP0 920 #undef DO_CMP1 921 #undef DO_CMP2 922 923 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 924 { 925 intptr_t oprsz = simd_oprsz(desc); 926 intptr_t i; 927 928 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 929 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 930 if (r > INT8_MAX) { 931 r = INT8_MAX; 932 } else if (r < INT8_MIN) { 933 r = INT8_MIN; 934 } 935 *(int8_t *)(d + i) = r; 936 } 937 clear_high(d, oprsz, desc); 938 } 939 940 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 941 { 942 intptr_t oprsz = simd_oprsz(desc); 943 intptr_t i; 944 945 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 946 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 947 if (r > INT16_MAX) { 948 r = INT16_MAX; 949 } else if (r < INT16_MIN) { 950 r = INT16_MIN; 951 } 952 *(int16_t *)(d + i) = r; 953 } 954 clear_high(d, oprsz, desc); 955 } 956 957 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 958 { 959 intptr_t oprsz = simd_oprsz(desc); 960 intptr_t i; 961 962 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 963 int32_t ai = *(int32_t *)(a + i); 964 int32_t bi = *(int32_t *)(b + i); 965 int32_t di = ai + bi; 966 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 967 /* Signed overflow. */ 968 di = (di < 0 ? INT32_MAX : INT32_MIN); 969 } 970 *(int32_t *)(d + i) = di; 971 } 972 clear_high(d, oprsz, desc); 973 } 974 975 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 976 { 977 intptr_t oprsz = simd_oprsz(desc); 978 intptr_t i; 979 980 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 981 int64_t ai = *(int64_t *)(a + i); 982 int64_t bi = *(int64_t *)(b + i); 983 int64_t di = ai + bi; 984 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 985 /* Signed overflow. */ 986 di = (di < 0 ? INT64_MAX : INT64_MIN); 987 } 988 *(int64_t *)(d + i) = di; 989 } 990 clear_high(d, oprsz, desc); 991 } 992 993 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 994 { 995 intptr_t oprsz = simd_oprsz(desc); 996 intptr_t i; 997 998 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 999 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1000 if (r > INT8_MAX) { 1001 r = INT8_MAX; 1002 } else if (r < INT8_MIN) { 1003 r = INT8_MIN; 1004 } 1005 *(uint8_t *)(d + i) = r; 1006 } 1007 clear_high(d, oprsz, desc); 1008 } 1009 1010 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1011 { 1012 intptr_t oprsz = simd_oprsz(desc); 1013 intptr_t i; 1014 1015 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1016 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1017 if (r > INT16_MAX) { 1018 r = INT16_MAX; 1019 } else if (r < INT16_MIN) { 1020 r = INT16_MIN; 1021 } 1022 *(int16_t *)(d + i) = r; 1023 } 1024 clear_high(d, oprsz, desc); 1025 } 1026 1027 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1028 { 1029 intptr_t oprsz = simd_oprsz(desc); 1030 intptr_t i; 1031 1032 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1033 int32_t ai = *(int32_t *)(a + i); 1034 int32_t bi = *(int32_t *)(b + i); 1035 int32_t di = ai - bi; 1036 if (((di ^ ai) & (ai ^ bi)) < 0) { 1037 /* Signed overflow. */ 1038 di = (di < 0 ? INT32_MAX : INT32_MIN); 1039 } 1040 *(int32_t *)(d + i) = di; 1041 } 1042 clear_high(d, oprsz, desc); 1043 } 1044 1045 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1046 { 1047 intptr_t oprsz = simd_oprsz(desc); 1048 intptr_t i; 1049 1050 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1051 int64_t ai = *(int64_t *)(a + i); 1052 int64_t bi = *(int64_t *)(b + i); 1053 int64_t di = ai - bi; 1054 if (((di ^ ai) & (ai ^ bi)) < 0) { 1055 /* Signed overflow. */ 1056 di = (di < 0 ? INT64_MAX : INT64_MIN); 1057 } 1058 *(int64_t *)(d + i) = di; 1059 } 1060 clear_high(d, oprsz, desc); 1061 } 1062 1063 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1064 { 1065 intptr_t oprsz = simd_oprsz(desc); 1066 intptr_t i; 1067 1068 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1069 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1070 if (r > UINT8_MAX) { 1071 r = UINT8_MAX; 1072 } 1073 *(uint8_t *)(d + i) = r; 1074 } 1075 clear_high(d, oprsz, desc); 1076 } 1077 1078 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1079 { 1080 intptr_t oprsz = simd_oprsz(desc); 1081 intptr_t i; 1082 1083 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1084 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1085 if (r > UINT16_MAX) { 1086 r = UINT16_MAX; 1087 } 1088 *(uint16_t *)(d + i) = r; 1089 } 1090 clear_high(d, oprsz, desc); 1091 } 1092 1093 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1094 { 1095 intptr_t oprsz = simd_oprsz(desc); 1096 intptr_t i; 1097 1098 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1099 uint32_t ai = *(uint32_t *)(a + i); 1100 uint32_t bi = *(uint32_t *)(b + i); 1101 uint32_t di = ai + bi; 1102 if (di < ai) { 1103 di = UINT32_MAX; 1104 } 1105 *(uint32_t *)(d + i) = di; 1106 } 1107 clear_high(d, oprsz, desc); 1108 } 1109 1110 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1111 { 1112 intptr_t oprsz = simd_oprsz(desc); 1113 intptr_t i; 1114 1115 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1116 uint64_t ai = *(uint64_t *)(a + i); 1117 uint64_t bi = *(uint64_t *)(b + i); 1118 uint64_t di = ai + bi; 1119 if (di < ai) { 1120 di = UINT64_MAX; 1121 } 1122 *(uint64_t *)(d + i) = di; 1123 } 1124 clear_high(d, oprsz, desc); 1125 } 1126 1127 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1128 { 1129 intptr_t oprsz = simd_oprsz(desc); 1130 intptr_t i; 1131 1132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1133 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1134 if (r < 0) { 1135 r = 0; 1136 } 1137 *(uint8_t *)(d + i) = r; 1138 } 1139 clear_high(d, oprsz, desc); 1140 } 1141 1142 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1143 { 1144 intptr_t oprsz = simd_oprsz(desc); 1145 intptr_t i; 1146 1147 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1148 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1149 if (r < 0) { 1150 r = 0; 1151 } 1152 *(uint16_t *)(d + i) = r; 1153 } 1154 clear_high(d, oprsz, desc); 1155 } 1156 1157 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1158 { 1159 intptr_t oprsz = simd_oprsz(desc); 1160 intptr_t i; 1161 1162 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1163 uint32_t ai = *(uint32_t *)(a + i); 1164 uint32_t bi = *(uint32_t *)(b + i); 1165 uint32_t di = ai - bi; 1166 if (ai < bi) { 1167 di = 0; 1168 } 1169 *(uint32_t *)(d + i) = di; 1170 } 1171 clear_high(d, oprsz, desc); 1172 } 1173 1174 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1175 { 1176 intptr_t oprsz = simd_oprsz(desc); 1177 intptr_t i; 1178 1179 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1180 uint64_t ai = *(uint64_t *)(a + i); 1181 uint64_t bi = *(uint64_t *)(b + i); 1182 uint64_t di = ai - bi; 1183 if (ai < bi) { 1184 di = 0; 1185 } 1186 *(uint64_t *)(d + i) = di; 1187 } 1188 clear_high(d, oprsz, desc); 1189 } 1190 1191 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1192 { 1193 intptr_t oprsz = simd_oprsz(desc); 1194 intptr_t i; 1195 1196 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1197 int8_t aa = *(int8_t *)(a + i); 1198 int8_t bb = *(int8_t *)(b + i); 1199 int8_t dd = aa < bb ? aa : bb; 1200 *(int8_t *)(d + i) = dd; 1201 } 1202 clear_high(d, oprsz, desc); 1203 } 1204 1205 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1206 { 1207 intptr_t oprsz = simd_oprsz(desc); 1208 intptr_t i; 1209 1210 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1211 int16_t aa = *(int16_t *)(a + i); 1212 int16_t bb = *(int16_t *)(b + i); 1213 int16_t dd = aa < bb ? aa : bb; 1214 *(int16_t *)(d + i) = dd; 1215 } 1216 clear_high(d, oprsz, desc); 1217 } 1218 1219 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1220 { 1221 intptr_t oprsz = simd_oprsz(desc); 1222 intptr_t i; 1223 1224 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1225 int32_t aa = *(int32_t *)(a + i); 1226 int32_t bb = *(int32_t *)(b + i); 1227 int32_t dd = aa < bb ? aa : bb; 1228 *(int32_t *)(d + i) = dd; 1229 } 1230 clear_high(d, oprsz, desc); 1231 } 1232 1233 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1234 { 1235 intptr_t oprsz = simd_oprsz(desc); 1236 intptr_t i; 1237 1238 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1239 int64_t aa = *(int64_t *)(a + i); 1240 int64_t bb = *(int64_t *)(b + i); 1241 int64_t dd = aa < bb ? aa : bb; 1242 *(int64_t *)(d + i) = dd; 1243 } 1244 clear_high(d, oprsz, desc); 1245 } 1246 1247 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1248 { 1249 intptr_t oprsz = simd_oprsz(desc); 1250 intptr_t i; 1251 1252 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1253 int8_t aa = *(int8_t *)(a + i); 1254 int8_t bb = *(int8_t *)(b + i); 1255 int8_t dd = aa > bb ? aa : bb; 1256 *(int8_t *)(d + i) = dd; 1257 } 1258 clear_high(d, oprsz, desc); 1259 } 1260 1261 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1262 { 1263 intptr_t oprsz = simd_oprsz(desc); 1264 intptr_t i; 1265 1266 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1267 int16_t aa = *(int16_t *)(a + i); 1268 int16_t bb = *(int16_t *)(b + i); 1269 int16_t dd = aa > bb ? aa : bb; 1270 *(int16_t *)(d + i) = dd; 1271 } 1272 clear_high(d, oprsz, desc); 1273 } 1274 1275 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1276 { 1277 intptr_t oprsz = simd_oprsz(desc); 1278 intptr_t i; 1279 1280 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1281 int32_t aa = *(int32_t *)(a + i); 1282 int32_t bb = *(int32_t *)(b + i); 1283 int32_t dd = aa > bb ? aa : bb; 1284 *(int32_t *)(d + i) = dd; 1285 } 1286 clear_high(d, oprsz, desc); 1287 } 1288 1289 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1290 { 1291 intptr_t oprsz = simd_oprsz(desc); 1292 intptr_t i; 1293 1294 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1295 int64_t aa = *(int64_t *)(a + i); 1296 int64_t bb = *(int64_t *)(b + i); 1297 int64_t dd = aa > bb ? aa : bb; 1298 *(int64_t *)(d + i) = dd; 1299 } 1300 clear_high(d, oprsz, desc); 1301 } 1302 1303 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1304 { 1305 intptr_t oprsz = simd_oprsz(desc); 1306 intptr_t i; 1307 1308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1309 uint8_t aa = *(uint8_t *)(a + i); 1310 uint8_t bb = *(uint8_t *)(b + i); 1311 uint8_t dd = aa < bb ? aa : bb; 1312 *(uint8_t *)(d + i) = dd; 1313 } 1314 clear_high(d, oprsz, desc); 1315 } 1316 1317 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1318 { 1319 intptr_t oprsz = simd_oprsz(desc); 1320 intptr_t i; 1321 1322 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1323 uint16_t aa = *(uint16_t *)(a + i); 1324 uint16_t bb = *(uint16_t *)(b + i); 1325 uint16_t dd = aa < bb ? aa : bb; 1326 *(uint16_t *)(d + i) = dd; 1327 } 1328 clear_high(d, oprsz, desc); 1329 } 1330 1331 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1332 { 1333 intptr_t oprsz = simd_oprsz(desc); 1334 intptr_t i; 1335 1336 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1337 uint32_t aa = *(uint32_t *)(a + i); 1338 uint32_t bb = *(uint32_t *)(b + i); 1339 uint32_t dd = aa < bb ? aa : bb; 1340 *(uint32_t *)(d + i) = dd; 1341 } 1342 clear_high(d, oprsz, desc); 1343 } 1344 1345 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1346 { 1347 intptr_t oprsz = simd_oprsz(desc); 1348 intptr_t i; 1349 1350 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1351 uint64_t aa = *(uint64_t *)(a + i); 1352 uint64_t bb = *(uint64_t *)(b + i); 1353 uint64_t dd = aa < bb ? aa : bb; 1354 *(uint64_t *)(d + i) = dd; 1355 } 1356 clear_high(d, oprsz, desc); 1357 } 1358 1359 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1360 { 1361 intptr_t oprsz = simd_oprsz(desc); 1362 intptr_t i; 1363 1364 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1365 uint8_t aa = *(uint8_t *)(a + i); 1366 uint8_t bb = *(uint8_t *)(b + i); 1367 uint8_t dd = aa > bb ? aa : bb; 1368 *(uint8_t *)(d + i) = dd; 1369 } 1370 clear_high(d, oprsz, desc); 1371 } 1372 1373 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1374 { 1375 intptr_t oprsz = simd_oprsz(desc); 1376 intptr_t i; 1377 1378 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1379 uint16_t aa = *(uint16_t *)(a + i); 1380 uint16_t bb = *(uint16_t *)(b + i); 1381 uint16_t dd = aa > bb ? aa : bb; 1382 *(uint16_t *)(d + i) = dd; 1383 } 1384 clear_high(d, oprsz, desc); 1385 } 1386 1387 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1388 { 1389 intptr_t oprsz = simd_oprsz(desc); 1390 intptr_t i; 1391 1392 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1393 uint32_t aa = *(uint32_t *)(a + i); 1394 uint32_t bb = *(uint32_t *)(b + i); 1395 uint32_t dd = aa > bb ? aa : bb; 1396 *(uint32_t *)(d + i) = dd; 1397 } 1398 clear_high(d, oprsz, desc); 1399 } 1400 1401 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1402 { 1403 intptr_t oprsz = simd_oprsz(desc); 1404 intptr_t i; 1405 1406 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1407 uint64_t aa = *(uint64_t *)(a + i); 1408 uint64_t bb = *(uint64_t *)(b + i); 1409 uint64_t dd = aa > bb ? aa : bb; 1410 *(uint64_t *)(d + i) = dd; 1411 } 1412 clear_high(d, oprsz, desc); 1413 } 1414 1415 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1416 { 1417 intptr_t oprsz = simd_oprsz(desc); 1418 intptr_t i; 1419 1420 for (i = 0; i < oprsz; i += sizeof(vec64)) { 1421 vec64 aa = *(vec64 *)(a + i); 1422 vec64 bb = *(vec64 *)(b + i); 1423 vec64 cc = *(vec64 *)(c + i); 1424 *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa); 1425 } 1426 clear_high(d, oprsz, desc); 1427 } 1428