1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto-common.h" 24 #include "tcg/tcg-gvec-desc.h" 25 26 27 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 28 { 29 intptr_t maxsz = simd_maxsz(desc); 30 intptr_t i; 31 32 if (unlikely(maxsz > oprsz)) { 33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 34 *(uint64_t *)(d + i) = 0; 35 } 36 } 37 } 38 39 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 40 { 41 intptr_t oprsz = simd_oprsz(desc); 42 intptr_t i; 43 44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 46 } 47 clear_high(d, oprsz, desc); 48 } 49 50 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 51 { 52 intptr_t oprsz = simd_oprsz(desc); 53 intptr_t i; 54 55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 57 } 58 clear_high(d, oprsz, desc); 59 } 60 61 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 62 { 63 intptr_t oprsz = simd_oprsz(desc); 64 intptr_t i; 65 66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 68 } 69 clear_high(d, oprsz, desc); 70 } 71 72 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 73 { 74 intptr_t oprsz = simd_oprsz(desc); 75 intptr_t i; 76 77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 79 } 80 clear_high(d, oprsz, desc); 81 } 82 83 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 84 { 85 intptr_t oprsz = simd_oprsz(desc); 86 intptr_t i; 87 88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 90 } 91 clear_high(d, oprsz, desc); 92 } 93 94 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 95 { 96 intptr_t oprsz = simd_oprsz(desc); 97 intptr_t i; 98 99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 101 } 102 clear_high(d, oprsz, desc); 103 } 104 105 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 106 { 107 intptr_t oprsz = simd_oprsz(desc); 108 intptr_t i; 109 110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 112 } 113 clear_high(d, oprsz, desc); 114 } 115 116 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 117 { 118 intptr_t oprsz = simd_oprsz(desc); 119 intptr_t i; 120 121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 123 } 124 clear_high(d, oprsz, desc); 125 } 126 127 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 128 { 129 intptr_t oprsz = simd_oprsz(desc); 130 intptr_t i; 131 132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 134 } 135 clear_high(d, oprsz, desc); 136 } 137 138 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 139 { 140 intptr_t oprsz = simd_oprsz(desc); 141 intptr_t i; 142 143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 145 } 146 clear_high(d, oprsz, desc); 147 } 148 149 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 150 { 151 intptr_t oprsz = simd_oprsz(desc); 152 intptr_t i; 153 154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 156 } 157 clear_high(d, oprsz, desc); 158 } 159 160 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 161 { 162 intptr_t oprsz = simd_oprsz(desc); 163 intptr_t i; 164 165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 167 } 168 clear_high(d, oprsz, desc); 169 } 170 171 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 172 { 173 intptr_t oprsz = simd_oprsz(desc); 174 intptr_t i; 175 176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 178 } 179 clear_high(d, oprsz, desc); 180 } 181 182 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 183 { 184 intptr_t oprsz = simd_oprsz(desc); 185 intptr_t i; 186 187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 189 } 190 clear_high(d, oprsz, desc); 191 } 192 193 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 194 { 195 intptr_t oprsz = simd_oprsz(desc); 196 intptr_t i; 197 198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 200 } 201 clear_high(d, oprsz, desc); 202 } 203 204 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 205 { 206 intptr_t oprsz = simd_oprsz(desc); 207 intptr_t i; 208 209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 211 } 212 clear_high(d, oprsz, desc); 213 } 214 215 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 216 { 217 intptr_t oprsz = simd_oprsz(desc); 218 intptr_t i; 219 220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 222 } 223 clear_high(d, oprsz, desc); 224 } 225 226 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 227 { 228 intptr_t oprsz = simd_oprsz(desc); 229 intptr_t i; 230 231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 233 } 234 clear_high(d, oprsz, desc); 235 } 236 237 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 238 { 239 intptr_t oprsz = simd_oprsz(desc); 240 intptr_t i; 241 242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 244 } 245 clear_high(d, oprsz, desc); 246 } 247 248 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 249 { 250 intptr_t oprsz = simd_oprsz(desc); 251 intptr_t i; 252 253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 255 } 256 clear_high(d, oprsz, desc); 257 } 258 259 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 260 { 261 intptr_t oprsz = simd_oprsz(desc); 262 intptr_t i; 263 264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 266 } 267 clear_high(d, oprsz, desc); 268 } 269 270 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 271 { 272 intptr_t oprsz = simd_oprsz(desc); 273 intptr_t i; 274 275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 277 } 278 clear_high(d, oprsz, desc); 279 } 280 281 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 282 { 283 intptr_t oprsz = simd_oprsz(desc); 284 intptr_t i; 285 286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 288 } 289 clear_high(d, oprsz, desc); 290 } 291 292 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 293 { 294 intptr_t oprsz = simd_oprsz(desc); 295 intptr_t i; 296 297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 299 } 300 clear_high(d, oprsz, desc); 301 } 302 303 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 304 { 305 intptr_t oprsz = simd_oprsz(desc); 306 intptr_t i; 307 308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 310 } 311 clear_high(d, oprsz, desc); 312 } 313 314 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 315 { 316 intptr_t oprsz = simd_oprsz(desc); 317 intptr_t i; 318 319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 321 } 322 clear_high(d, oprsz, desc); 323 } 324 325 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 326 { 327 intptr_t oprsz = simd_oprsz(desc); 328 intptr_t i; 329 330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 332 } 333 clear_high(d, oprsz, desc); 334 } 335 336 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 337 { 338 intptr_t oprsz = simd_oprsz(desc); 339 intptr_t i; 340 341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 343 } 344 clear_high(d, oprsz, desc); 345 } 346 347 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 348 { 349 intptr_t oprsz = simd_oprsz(desc); 350 intptr_t i; 351 352 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 353 int8_t aa = *(int8_t *)(a + i); 354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 355 } 356 clear_high(d, oprsz, desc); 357 } 358 359 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 360 { 361 intptr_t oprsz = simd_oprsz(desc); 362 intptr_t i; 363 364 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 365 int16_t aa = *(int16_t *)(a + i); 366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 367 } 368 clear_high(d, oprsz, desc); 369 } 370 371 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 372 { 373 intptr_t oprsz = simd_oprsz(desc); 374 intptr_t i; 375 376 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 377 int32_t aa = *(int32_t *)(a + i); 378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 379 } 380 clear_high(d, oprsz, desc); 381 } 382 383 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 384 { 385 intptr_t oprsz = simd_oprsz(desc); 386 intptr_t i; 387 388 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 389 int64_t aa = *(int64_t *)(a + i); 390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 391 } 392 clear_high(d, oprsz, desc); 393 } 394 395 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 396 { 397 intptr_t oprsz = simd_oprsz(desc); 398 399 memcpy(d, a, oprsz); 400 clear_high(d, oprsz, desc); 401 } 402 403 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 404 { 405 intptr_t oprsz = simd_oprsz(desc); 406 intptr_t i; 407 408 if (c == 0) { 409 oprsz = 0; 410 } else { 411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 412 *(uint64_t *)(d + i) = c; 413 } 414 } 415 clear_high(d, oprsz, desc); 416 } 417 418 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 419 { 420 intptr_t oprsz = simd_oprsz(desc); 421 intptr_t i; 422 423 if (c == 0) { 424 oprsz = 0; 425 } else { 426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 427 *(uint32_t *)(d + i) = c; 428 } 429 } 430 clear_high(d, oprsz, desc); 431 } 432 433 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 434 { 435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 436 } 437 438 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 439 { 440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 441 } 442 443 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 444 { 445 intptr_t oprsz = simd_oprsz(desc); 446 intptr_t i; 447 448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 450 } 451 clear_high(d, oprsz, desc); 452 } 453 454 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 455 { 456 intptr_t oprsz = simd_oprsz(desc); 457 intptr_t i; 458 459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 461 } 462 clear_high(d, oprsz, desc); 463 } 464 465 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 466 { 467 intptr_t oprsz = simd_oprsz(desc); 468 intptr_t i; 469 470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 472 } 473 clear_high(d, oprsz, desc); 474 } 475 476 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 477 { 478 intptr_t oprsz = simd_oprsz(desc); 479 intptr_t i; 480 481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 483 } 484 clear_high(d, oprsz, desc); 485 } 486 487 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 488 { 489 intptr_t oprsz = simd_oprsz(desc); 490 intptr_t i; 491 492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 494 } 495 clear_high(d, oprsz, desc); 496 } 497 498 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 499 { 500 intptr_t oprsz = simd_oprsz(desc); 501 intptr_t i; 502 503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 505 } 506 clear_high(d, oprsz, desc); 507 } 508 509 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 510 { 511 intptr_t oprsz = simd_oprsz(desc); 512 intptr_t i; 513 514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 516 } 517 clear_high(d, oprsz, desc); 518 } 519 520 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 521 { 522 intptr_t oprsz = simd_oprsz(desc); 523 intptr_t i; 524 525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 527 } 528 clear_high(d, oprsz, desc); 529 } 530 531 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 532 { 533 intptr_t oprsz = simd_oprsz(desc); 534 intptr_t i; 535 536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 538 } 539 clear_high(d, oprsz, desc); 540 } 541 542 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 543 { 544 intptr_t oprsz = simd_oprsz(desc); 545 intptr_t i; 546 547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 549 } 550 clear_high(d, oprsz, desc); 551 } 552 553 void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc) 554 { 555 intptr_t oprsz = simd_oprsz(desc); 556 intptr_t i; 557 558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b; 560 } 561 clear_high(d, oprsz, desc); 562 } 563 564 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 565 { 566 intptr_t oprsz = simd_oprsz(desc); 567 intptr_t i; 568 569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 571 } 572 clear_high(d, oprsz, desc); 573 } 574 575 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 576 { 577 intptr_t oprsz = simd_oprsz(desc); 578 intptr_t i; 579 580 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 581 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 582 } 583 clear_high(d, oprsz, desc); 584 } 585 586 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 587 { 588 intptr_t oprsz = simd_oprsz(desc); 589 int shift = simd_data(desc); 590 intptr_t i; 591 592 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 593 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 594 } 595 clear_high(d, oprsz, desc); 596 } 597 598 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 599 { 600 intptr_t oprsz = simd_oprsz(desc); 601 int shift = simd_data(desc); 602 intptr_t i; 603 604 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 605 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 606 } 607 clear_high(d, oprsz, desc); 608 } 609 610 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 611 { 612 intptr_t oprsz = simd_oprsz(desc); 613 int shift = simd_data(desc); 614 intptr_t i; 615 616 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 617 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 618 } 619 clear_high(d, oprsz, desc); 620 } 621 622 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 623 { 624 intptr_t oprsz = simd_oprsz(desc); 625 int shift = simd_data(desc); 626 intptr_t i; 627 628 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 629 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 630 } 631 clear_high(d, oprsz, desc); 632 } 633 634 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 635 { 636 intptr_t oprsz = simd_oprsz(desc); 637 int shift = simd_data(desc); 638 intptr_t i; 639 640 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 641 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 642 } 643 clear_high(d, oprsz, desc); 644 } 645 646 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 647 { 648 intptr_t oprsz = simd_oprsz(desc); 649 int shift = simd_data(desc); 650 intptr_t i; 651 652 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 653 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 654 } 655 clear_high(d, oprsz, desc); 656 } 657 658 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 659 { 660 intptr_t oprsz = simd_oprsz(desc); 661 int shift = simd_data(desc); 662 intptr_t i; 663 664 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 665 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 666 } 667 clear_high(d, oprsz, desc); 668 } 669 670 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 671 { 672 intptr_t oprsz = simd_oprsz(desc); 673 int shift = simd_data(desc); 674 intptr_t i; 675 676 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 677 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 678 } 679 clear_high(d, oprsz, desc); 680 } 681 682 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 683 { 684 intptr_t oprsz = simd_oprsz(desc); 685 int shift = simd_data(desc); 686 intptr_t i; 687 688 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 689 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 690 } 691 clear_high(d, oprsz, desc); 692 } 693 694 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 695 { 696 intptr_t oprsz = simd_oprsz(desc); 697 int shift = simd_data(desc); 698 intptr_t i; 699 700 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 701 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 702 } 703 clear_high(d, oprsz, desc); 704 } 705 706 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 707 { 708 intptr_t oprsz = simd_oprsz(desc); 709 int shift = simd_data(desc); 710 intptr_t i; 711 712 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 713 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 714 } 715 clear_high(d, oprsz, desc); 716 } 717 718 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 719 { 720 intptr_t oprsz = simd_oprsz(desc); 721 int shift = simd_data(desc); 722 intptr_t i; 723 724 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 725 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 726 } 727 clear_high(d, oprsz, desc); 728 } 729 730 void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) 731 { 732 intptr_t oprsz = simd_oprsz(desc); 733 int shift = simd_data(desc); 734 intptr_t i; 735 736 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 737 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift); 738 } 739 clear_high(d, oprsz, desc); 740 } 741 742 void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) 743 { 744 intptr_t oprsz = simd_oprsz(desc); 745 int shift = simd_data(desc); 746 intptr_t i; 747 748 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 749 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift); 750 } 751 clear_high(d, oprsz, desc); 752 } 753 754 void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) 755 { 756 intptr_t oprsz = simd_oprsz(desc); 757 int shift = simd_data(desc); 758 intptr_t i; 759 760 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 761 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift); 762 } 763 clear_high(d, oprsz, desc); 764 } 765 766 void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) 767 { 768 intptr_t oprsz = simd_oprsz(desc); 769 int shift = simd_data(desc); 770 intptr_t i; 771 772 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 773 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift); 774 } 775 clear_high(d, oprsz, desc); 776 } 777 778 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 779 { 780 intptr_t oprsz = simd_oprsz(desc); 781 intptr_t i; 782 783 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 784 uint8_t sh = *(uint8_t *)(b + i) & 7; 785 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 786 } 787 clear_high(d, oprsz, desc); 788 } 789 790 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 791 { 792 intptr_t oprsz = simd_oprsz(desc); 793 intptr_t i; 794 795 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 796 uint8_t sh = *(uint16_t *)(b + i) & 15; 797 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 798 } 799 clear_high(d, oprsz, desc); 800 } 801 802 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 808 uint8_t sh = *(uint32_t *)(b + i) & 31; 809 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 810 } 811 clear_high(d, oprsz, desc); 812 } 813 814 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 815 { 816 intptr_t oprsz = simd_oprsz(desc); 817 intptr_t i; 818 819 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 820 uint8_t sh = *(uint64_t *)(b + i) & 63; 821 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 822 } 823 clear_high(d, oprsz, desc); 824 } 825 826 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 827 { 828 intptr_t oprsz = simd_oprsz(desc); 829 intptr_t i; 830 831 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 832 uint8_t sh = *(uint8_t *)(b + i) & 7; 833 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 834 } 835 clear_high(d, oprsz, desc); 836 } 837 838 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 839 { 840 intptr_t oprsz = simd_oprsz(desc); 841 intptr_t i; 842 843 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 844 uint8_t sh = *(uint16_t *)(b + i) & 15; 845 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 846 } 847 clear_high(d, oprsz, desc); 848 } 849 850 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 851 { 852 intptr_t oprsz = simd_oprsz(desc); 853 intptr_t i; 854 855 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 856 uint8_t sh = *(uint32_t *)(b + i) & 31; 857 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 858 } 859 clear_high(d, oprsz, desc); 860 } 861 862 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 863 { 864 intptr_t oprsz = simd_oprsz(desc); 865 intptr_t i; 866 867 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 868 uint8_t sh = *(uint64_t *)(b + i) & 63; 869 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 870 } 871 clear_high(d, oprsz, desc); 872 } 873 874 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 875 { 876 intptr_t oprsz = simd_oprsz(desc); 877 intptr_t i; 878 879 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 880 uint8_t sh = *(uint8_t *)(b + i) & 7; 881 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 882 } 883 clear_high(d, oprsz, desc); 884 } 885 886 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 887 { 888 intptr_t oprsz = simd_oprsz(desc); 889 intptr_t i; 890 891 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 892 uint8_t sh = *(uint16_t *)(b + i) & 15; 893 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 894 } 895 clear_high(d, oprsz, desc); 896 } 897 898 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 899 { 900 intptr_t oprsz = simd_oprsz(desc); 901 intptr_t i; 902 903 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 904 uint8_t sh = *(uint32_t *)(b + i) & 31; 905 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 906 } 907 clear_high(d, oprsz, desc); 908 } 909 910 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 911 { 912 intptr_t oprsz = simd_oprsz(desc); 913 intptr_t i; 914 915 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 916 uint8_t sh = *(uint64_t *)(b + i) & 63; 917 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 918 } 919 clear_high(d, oprsz, desc); 920 } 921 922 void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) 923 { 924 intptr_t oprsz = simd_oprsz(desc); 925 intptr_t i; 926 927 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 928 uint8_t sh = *(uint8_t *)(b + i) & 7; 929 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh); 930 } 931 clear_high(d, oprsz, desc); 932 } 933 934 void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) 935 { 936 intptr_t oprsz = simd_oprsz(desc); 937 intptr_t i; 938 939 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 940 uint8_t sh = *(uint16_t *)(b + i) & 15; 941 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh); 942 } 943 clear_high(d, oprsz, desc); 944 } 945 946 void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) 947 { 948 intptr_t oprsz = simd_oprsz(desc); 949 intptr_t i; 950 951 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 952 uint8_t sh = *(uint32_t *)(b + i) & 31; 953 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh); 954 } 955 clear_high(d, oprsz, desc); 956 } 957 958 void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) 959 { 960 intptr_t oprsz = simd_oprsz(desc); 961 intptr_t i; 962 963 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 964 uint8_t sh = *(uint64_t *)(b + i) & 63; 965 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh); 966 } 967 clear_high(d, oprsz, desc); 968 } 969 970 void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) 971 { 972 intptr_t oprsz = simd_oprsz(desc); 973 intptr_t i; 974 975 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 976 uint8_t sh = *(uint8_t *)(b + i) & 7; 977 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh); 978 } 979 clear_high(d, oprsz, desc); 980 } 981 982 void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) 983 { 984 intptr_t oprsz = simd_oprsz(desc); 985 intptr_t i; 986 987 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 988 uint8_t sh = *(uint16_t *)(b + i) & 15; 989 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh); 990 } 991 clear_high(d, oprsz, desc); 992 } 993 994 void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) 995 { 996 intptr_t oprsz = simd_oprsz(desc); 997 intptr_t i; 998 999 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1000 uint8_t sh = *(uint32_t *)(b + i) & 31; 1001 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh); 1002 } 1003 clear_high(d, oprsz, desc); 1004 } 1005 1006 void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) 1007 { 1008 intptr_t oprsz = simd_oprsz(desc); 1009 intptr_t i; 1010 1011 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1012 uint8_t sh = *(uint64_t *)(b + i) & 63; 1013 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh); 1014 } 1015 clear_high(d, oprsz, desc); 1016 } 1017 1018 #define DO_CMP1(NAME, TYPE, OP) \ 1019 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 1020 { \ 1021 intptr_t oprsz = simd_oprsz(desc); \ 1022 intptr_t i; \ 1023 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 1024 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 1025 } \ 1026 clear_high(d, oprsz, desc); \ 1027 } 1028 1029 #define DO_CMP2(SZ) \ 1030 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 1031 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 1032 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 1033 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 1034 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 1035 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 1036 1037 DO_CMP2(8) 1038 DO_CMP2(16) 1039 DO_CMP2(32) 1040 DO_CMP2(64) 1041 1042 #undef DO_CMP1 1043 #undef DO_CMP2 1044 1045 #define DO_CMP1(NAME, TYPE, OP) \ 1046 void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \ 1047 { \ 1048 intptr_t oprsz = simd_oprsz(desc); \ 1049 TYPE inv = simd_data(desc), b = b64; \ 1050 for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \ 1051 *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \ 1052 } \ 1053 clear_high(d, oprsz, desc); \ 1054 } 1055 1056 #define DO_CMP2(SZ) \ 1057 DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \ 1058 DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \ 1059 DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \ 1060 DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \ 1061 DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=) 1062 1063 DO_CMP2(8) 1064 DO_CMP2(16) 1065 DO_CMP2(32) 1066 DO_CMP2(64) 1067 1068 #undef DO_CMP1 1069 #undef DO_CMP2 1070 1071 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 1072 { 1073 intptr_t oprsz = simd_oprsz(desc); 1074 intptr_t i; 1075 1076 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1077 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 1078 if (r > INT8_MAX) { 1079 r = INT8_MAX; 1080 } else if (r < INT8_MIN) { 1081 r = INT8_MIN; 1082 } 1083 *(int8_t *)(d + i) = r; 1084 } 1085 clear_high(d, oprsz, desc); 1086 } 1087 1088 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 1089 { 1090 intptr_t oprsz = simd_oprsz(desc); 1091 intptr_t i; 1092 1093 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1094 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 1095 if (r > INT16_MAX) { 1096 r = INT16_MAX; 1097 } else if (r < INT16_MIN) { 1098 r = INT16_MIN; 1099 } 1100 *(int16_t *)(d + i) = r; 1101 } 1102 clear_high(d, oprsz, desc); 1103 } 1104 1105 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 1106 { 1107 intptr_t oprsz = simd_oprsz(desc); 1108 intptr_t i; 1109 1110 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1111 int32_t ai = *(int32_t *)(a + i); 1112 int32_t bi = *(int32_t *)(b + i); 1113 int32_t di; 1114 if (sadd32_overflow(ai, bi, &di)) { 1115 di = (di < 0 ? INT32_MAX : INT32_MIN); 1116 } 1117 *(int32_t *)(d + i) = di; 1118 } 1119 clear_high(d, oprsz, desc); 1120 } 1121 1122 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1123 { 1124 intptr_t oprsz = simd_oprsz(desc); 1125 intptr_t i; 1126 1127 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1128 int64_t ai = *(int64_t *)(a + i); 1129 int64_t bi = *(int64_t *)(b + i); 1130 int64_t di; 1131 if (sadd64_overflow(ai, bi, &di)) { 1132 di = (di < 0 ? INT64_MAX : INT64_MIN); 1133 } 1134 *(int64_t *)(d + i) = di; 1135 } 1136 clear_high(d, oprsz, desc); 1137 } 1138 1139 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1140 { 1141 intptr_t oprsz = simd_oprsz(desc); 1142 intptr_t i; 1143 1144 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1145 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1146 if (r > INT8_MAX) { 1147 r = INT8_MAX; 1148 } else if (r < INT8_MIN) { 1149 r = INT8_MIN; 1150 } 1151 *(uint8_t *)(d + i) = r; 1152 } 1153 clear_high(d, oprsz, desc); 1154 } 1155 1156 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1157 { 1158 intptr_t oprsz = simd_oprsz(desc); 1159 intptr_t i; 1160 1161 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1162 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1163 if (r > INT16_MAX) { 1164 r = INT16_MAX; 1165 } else if (r < INT16_MIN) { 1166 r = INT16_MIN; 1167 } 1168 *(int16_t *)(d + i) = r; 1169 } 1170 clear_high(d, oprsz, desc); 1171 } 1172 1173 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1174 { 1175 intptr_t oprsz = simd_oprsz(desc); 1176 intptr_t i; 1177 1178 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1179 int32_t ai = *(int32_t *)(a + i); 1180 int32_t bi = *(int32_t *)(b + i); 1181 int32_t di; 1182 if (ssub32_overflow(ai, bi, &di)) { 1183 di = (di < 0 ? INT32_MAX : INT32_MIN); 1184 } 1185 *(int32_t *)(d + i) = di; 1186 } 1187 clear_high(d, oprsz, desc); 1188 } 1189 1190 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1191 { 1192 intptr_t oprsz = simd_oprsz(desc); 1193 intptr_t i; 1194 1195 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1196 int64_t ai = *(int64_t *)(a + i); 1197 int64_t bi = *(int64_t *)(b + i); 1198 int64_t di; 1199 if (ssub64_overflow(ai, bi, &di)) { 1200 di = (di < 0 ? INT64_MAX : INT64_MIN); 1201 } 1202 *(int64_t *)(d + i) = di; 1203 } 1204 clear_high(d, oprsz, desc); 1205 } 1206 1207 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1208 { 1209 intptr_t oprsz = simd_oprsz(desc); 1210 intptr_t i; 1211 1212 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1213 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1214 if (r > UINT8_MAX) { 1215 r = UINT8_MAX; 1216 } 1217 *(uint8_t *)(d + i) = r; 1218 } 1219 clear_high(d, oprsz, desc); 1220 } 1221 1222 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1223 { 1224 intptr_t oprsz = simd_oprsz(desc); 1225 intptr_t i; 1226 1227 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1228 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1229 if (r > UINT16_MAX) { 1230 r = UINT16_MAX; 1231 } 1232 *(uint16_t *)(d + i) = r; 1233 } 1234 clear_high(d, oprsz, desc); 1235 } 1236 1237 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1238 { 1239 intptr_t oprsz = simd_oprsz(desc); 1240 intptr_t i; 1241 1242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1243 uint32_t ai = *(uint32_t *)(a + i); 1244 uint32_t bi = *(uint32_t *)(b + i); 1245 uint32_t di; 1246 if (uadd32_overflow(ai, bi, &di)) { 1247 di = UINT32_MAX; 1248 } 1249 *(uint32_t *)(d + i) = di; 1250 } 1251 clear_high(d, oprsz, desc); 1252 } 1253 1254 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1255 { 1256 intptr_t oprsz = simd_oprsz(desc); 1257 intptr_t i; 1258 1259 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1260 uint64_t ai = *(uint64_t *)(a + i); 1261 uint64_t bi = *(uint64_t *)(b + i); 1262 uint64_t di; 1263 if (uadd64_overflow(ai, bi, &di)) { 1264 di = UINT64_MAX; 1265 } 1266 *(uint64_t *)(d + i) = di; 1267 } 1268 clear_high(d, oprsz, desc); 1269 } 1270 1271 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1272 { 1273 intptr_t oprsz = simd_oprsz(desc); 1274 intptr_t i; 1275 1276 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1277 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1278 if (r < 0) { 1279 r = 0; 1280 } 1281 *(uint8_t *)(d + i) = r; 1282 } 1283 clear_high(d, oprsz, desc); 1284 } 1285 1286 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1287 { 1288 intptr_t oprsz = simd_oprsz(desc); 1289 intptr_t i; 1290 1291 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1292 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1293 if (r < 0) { 1294 r = 0; 1295 } 1296 *(uint16_t *)(d + i) = r; 1297 } 1298 clear_high(d, oprsz, desc); 1299 } 1300 1301 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1302 { 1303 intptr_t oprsz = simd_oprsz(desc); 1304 intptr_t i; 1305 1306 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1307 uint32_t ai = *(uint32_t *)(a + i); 1308 uint32_t bi = *(uint32_t *)(b + i); 1309 uint32_t di; 1310 if (usub32_overflow(ai, bi, &di)) { 1311 di = 0; 1312 } 1313 *(uint32_t *)(d + i) = di; 1314 } 1315 clear_high(d, oprsz, desc); 1316 } 1317 1318 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1319 { 1320 intptr_t oprsz = simd_oprsz(desc); 1321 intptr_t i; 1322 1323 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1324 uint64_t ai = *(uint64_t *)(a + i); 1325 uint64_t bi = *(uint64_t *)(b + i); 1326 uint64_t di; 1327 if (usub64_overflow(ai, bi, &di)) { 1328 di = 0; 1329 } 1330 *(uint64_t *)(d + i) = di; 1331 } 1332 clear_high(d, oprsz, desc); 1333 } 1334 1335 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1336 { 1337 intptr_t oprsz = simd_oprsz(desc); 1338 intptr_t i; 1339 1340 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1341 int8_t aa = *(int8_t *)(a + i); 1342 int8_t bb = *(int8_t *)(b + i); 1343 int8_t dd = aa < bb ? aa : bb; 1344 *(int8_t *)(d + i) = dd; 1345 } 1346 clear_high(d, oprsz, desc); 1347 } 1348 1349 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1350 { 1351 intptr_t oprsz = simd_oprsz(desc); 1352 intptr_t i; 1353 1354 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1355 int16_t aa = *(int16_t *)(a + i); 1356 int16_t bb = *(int16_t *)(b + i); 1357 int16_t dd = aa < bb ? aa : bb; 1358 *(int16_t *)(d + i) = dd; 1359 } 1360 clear_high(d, oprsz, desc); 1361 } 1362 1363 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1364 { 1365 intptr_t oprsz = simd_oprsz(desc); 1366 intptr_t i; 1367 1368 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1369 int32_t aa = *(int32_t *)(a + i); 1370 int32_t bb = *(int32_t *)(b + i); 1371 int32_t dd = aa < bb ? aa : bb; 1372 *(int32_t *)(d + i) = dd; 1373 } 1374 clear_high(d, oprsz, desc); 1375 } 1376 1377 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1378 { 1379 intptr_t oprsz = simd_oprsz(desc); 1380 intptr_t i; 1381 1382 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1383 int64_t aa = *(int64_t *)(a + i); 1384 int64_t bb = *(int64_t *)(b + i); 1385 int64_t dd = aa < bb ? aa : bb; 1386 *(int64_t *)(d + i) = dd; 1387 } 1388 clear_high(d, oprsz, desc); 1389 } 1390 1391 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1392 { 1393 intptr_t oprsz = simd_oprsz(desc); 1394 intptr_t i; 1395 1396 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1397 int8_t aa = *(int8_t *)(a + i); 1398 int8_t bb = *(int8_t *)(b + i); 1399 int8_t dd = aa > bb ? aa : bb; 1400 *(int8_t *)(d + i) = dd; 1401 } 1402 clear_high(d, oprsz, desc); 1403 } 1404 1405 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1406 { 1407 intptr_t oprsz = simd_oprsz(desc); 1408 intptr_t i; 1409 1410 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1411 int16_t aa = *(int16_t *)(a + i); 1412 int16_t bb = *(int16_t *)(b + i); 1413 int16_t dd = aa > bb ? aa : bb; 1414 *(int16_t *)(d + i) = dd; 1415 } 1416 clear_high(d, oprsz, desc); 1417 } 1418 1419 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1420 { 1421 intptr_t oprsz = simd_oprsz(desc); 1422 intptr_t i; 1423 1424 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1425 int32_t aa = *(int32_t *)(a + i); 1426 int32_t bb = *(int32_t *)(b + i); 1427 int32_t dd = aa > bb ? aa : bb; 1428 *(int32_t *)(d + i) = dd; 1429 } 1430 clear_high(d, oprsz, desc); 1431 } 1432 1433 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1434 { 1435 intptr_t oprsz = simd_oprsz(desc); 1436 intptr_t i; 1437 1438 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1439 int64_t aa = *(int64_t *)(a + i); 1440 int64_t bb = *(int64_t *)(b + i); 1441 int64_t dd = aa > bb ? aa : bb; 1442 *(int64_t *)(d + i) = dd; 1443 } 1444 clear_high(d, oprsz, desc); 1445 } 1446 1447 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1448 { 1449 intptr_t oprsz = simd_oprsz(desc); 1450 intptr_t i; 1451 1452 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1453 uint8_t aa = *(uint8_t *)(a + i); 1454 uint8_t bb = *(uint8_t *)(b + i); 1455 uint8_t dd = aa < bb ? aa : bb; 1456 *(uint8_t *)(d + i) = dd; 1457 } 1458 clear_high(d, oprsz, desc); 1459 } 1460 1461 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1462 { 1463 intptr_t oprsz = simd_oprsz(desc); 1464 intptr_t i; 1465 1466 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1467 uint16_t aa = *(uint16_t *)(a + i); 1468 uint16_t bb = *(uint16_t *)(b + i); 1469 uint16_t dd = aa < bb ? aa : bb; 1470 *(uint16_t *)(d + i) = dd; 1471 } 1472 clear_high(d, oprsz, desc); 1473 } 1474 1475 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1476 { 1477 intptr_t oprsz = simd_oprsz(desc); 1478 intptr_t i; 1479 1480 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1481 uint32_t aa = *(uint32_t *)(a + i); 1482 uint32_t bb = *(uint32_t *)(b + i); 1483 uint32_t dd = aa < bb ? aa : bb; 1484 *(uint32_t *)(d + i) = dd; 1485 } 1486 clear_high(d, oprsz, desc); 1487 } 1488 1489 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1490 { 1491 intptr_t oprsz = simd_oprsz(desc); 1492 intptr_t i; 1493 1494 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1495 uint64_t aa = *(uint64_t *)(a + i); 1496 uint64_t bb = *(uint64_t *)(b + i); 1497 uint64_t dd = aa < bb ? aa : bb; 1498 *(uint64_t *)(d + i) = dd; 1499 } 1500 clear_high(d, oprsz, desc); 1501 } 1502 1503 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1504 { 1505 intptr_t oprsz = simd_oprsz(desc); 1506 intptr_t i; 1507 1508 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1509 uint8_t aa = *(uint8_t *)(a + i); 1510 uint8_t bb = *(uint8_t *)(b + i); 1511 uint8_t dd = aa > bb ? aa : bb; 1512 *(uint8_t *)(d + i) = dd; 1513 } 1514 clear_high(d, oprsz, desc); 1515 } 1516 1517 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1518 { 1519 intptr_t oprsz = simd_oprsz(desc); 1520 intptr_t i; 1521 1522 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1523 uint16_t aa = *(uint16_t *)(a + i); 1524 uint16_t bb = *(uint16_t *)(b + i); 1525 uint16_t dd = aa > bb ? aa : bb; 1526 *(uint16_t *)(d + i) = dd; 1527 } 1528 clear_high(d, oprsz, desc); 1529 } 1530 1531 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1532 { 1533 intptr_t oprsz = simd_oprsz(desc); 1534 intptr_t i; 1535 1536 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1537 uint32_t aa = *(uint32_t *)(a + i); 1538 uint32_t bb = *(uint32_t *)(b + i); 1539 uint32_t dd = aa > bb ? aa : bb; 1540 *(uint32_t *)(d + i) = dd; 1541 } 1542 clear_high(d, oprsz, desc); 1543 } 1544 1545 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1546 { 1547 intptr_t oprsz = simd_oprsz(desc); 1548 intptr_t i; 1549 1550 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1551 uint64_t aa = *(uint64_t *)(a + i); 1552 uint64_t bb = *(uint64_t *)(b + i); 1553 uint64_t dd = aa > bb ? aa : bb; 1554 *(uint64_t *)(d + i) = dd; 1555 } 1556 clear_high(d, oprsz, desc); 1557 } 1558 1559 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1560 { 1561 intptr_t oprsz = simd_oprsz(desc); 1562 intptr_t i; 1563 1564 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1565 uint64_t aa = *(uint64_t *)(a + i); 1566 uint64_t bb = *(uint64_t *)(b + i); 1567 uint64_t cc = *(uint64_t *)(c + i); 1568 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 1569 } 1570 clear_high(d, oprsz, desc); 1571 } 1572