1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "tcg/tcg-gvec-desc.h" 25 26 27 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 28 { 29 intptr_t maxsz = simd_maxsz(desc); 30 intptr_t i; 31 32 if (unlikely(maxsz > oprsz)) { 33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 34 *(uint64_t *)(d + i) = 0; 35 } 36 } 37 } 38 39 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 40 { 41 intptr_t oprsz = simd_oprsz(desc); 42 intptr_t i; 43 44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 46 } 47 clear_high(d, oprsz, desc); 48 } 49 50 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 51 { 52 intptr_t oprsz = simd_oprsz(desc); 53 intptr_t i; 54 55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 57 } 58 clear_high(d, oprsz, desc); 59 } 60 61 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 62 { 63 intptr_t oprsz = simd_oprsz(desc); 64 intptr_t i; 65 66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 68 } 69 clear_high(d, oprsz, desc); 70 } 71 72 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 73 { 74 intptr_t oprsz = simd_oprsz(desc); 75 intptr_t i; 76 77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 79 } 80 clear_high(d, oprsz, desc); 81 } 82 83 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 84 { 85 intptr_t oprsz = simd_oprsz(desc); 86 intptr_t i; 87 88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 90 } 91 clear_high(d, oprsz, desc); 92 } 93 94 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 95 { 96 intptr_t oprsz = simd_oprsz(desc); 97 intptr_t i; 98 99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 101 } 102 clear_high(d, oprsz, desc); 103 } 104 105 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 106 { 107 intptr_t oprsz = simd_oprsz(desc); 108 intptr_t i; 109 110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 112 } 113 clear_high(d, oprsz, desc); 114 } 115 116 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 117 { 118 intptr_t oprsz = simd_oprsz(desc); 119 intptr_t i; 120 121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 123 } 124 clear_high(d, oprsz, desc); 125 } 126 127 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 128 { 129 intptr_t oprsz = simd_oprsz(desc); 130 intptr_t i; 131 132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 134 } 135 clear_high(d, oprsz, desc); 136 } 137 138 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 139 { 140 intptr_t oprsz = simd_oprsz(desc); 141 intptr_t i; 142 143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 145 } 146 clear_high(d, oprsz, desc); 147 } 148 149 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 150 { 151 intptr_t oprsz = simd_oprsz(desc); 152 intptr_t i; 153 154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 156 } 157 clear_high(d, oprsz, desc); 158 } 159 160 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 161 { 162 intptr_t oprsz = simd_oprsz(desc); 163 intptr_t i; 164 165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 167 } 168 clear_high(d, oprsz, desc); 169 } 170 171 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 172 { 173 intptr_t oprsz = simd_oprsz(desc); 174 intptr_t i; 175 176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 178 } 179 clear_high(d, oprsz, desc); 180 } 181 182 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 183 { 184 intptr_t oprsz = simd_oprsz(desc); 185 intptr_t i; 186 187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 189 } 190 clear_high(d, oprsz, desc); 191 } 192 193 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 194 { 195 intptr_t oprsz = simd_oprsz(desc); 196 intptr_t i; 197 198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 200 } 201 clear_high(d, oprsz, desc); 202 } 203 204 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 205 { 206 intptr_t oprsz = simd_oprsz(desc); 207 intptr_t i; 208 209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 211 } 212 clear_high(d, oprsz, desc); 213 } 214 215 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 216 { 217 intptr_t oprsz = simd_oprsz(desc); 218 intptr_t i; 219 220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 222 } 223 clear_high(d, oprsz, desc); 224 } 225 226 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 227 { 228 intptr_t oprsz = simd_oprsz(desc); 229 intptr_t i; 230 231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 233 } 234 clear_high(d, oprsz, desc); 235 } 236 237 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 238 { 239 intptr_t oprsz = simd_oprsz(desc); 240 intptr_t i; 241 242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 244 } 245 clear_high(d, oprsz, desc); 246 } 247 248 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 249 { 250 intptr_t oprsz = simd_oprsz(desc); 251 intptr_t i; 252 253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 255 } 256 clear_high(d, oprsz, desc); 257 } 258 259 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 260 { 261 intptr_t oprsz = simd_oprsz(desc); 262 intptr_t i; 263 264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 266 } 267 clear_high(d, oprsz, desc); 268 } 269 270 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 271 { 272 intptr_t oprsz = simd_oprsz(desc); 273 intptr_t i; 274 275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 277 } 278 clear_high(d, oprsz, desc); 279 } 280 281 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 282 { 283 intptr_t oprsz = simd_oprsz(desc); 284 intptr_t i; 285 286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 288 } 289 clear_high(d, oprsz, desc); 290 } 291 292 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 293 { 294 intptr_t oprsz = simd_oprsz(desc); 295 intptr_t i; 296 297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 299 } 300 clear_high(d, oprsz, desc); 301 } 302 303 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 304 { 305 intptr_t oprsz = simd_oprsz(desc); 306 intptr_t i; 307 308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 310 } 311 clear_high(d, oprsz, desc); 312 } 313 314 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 315 { 316 intptr_t oprsz = simd_oprsz(desc); 317 intptr_t i; 318 319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 321 } 322 clear_high(d, oprsz, desc); 323 } 324 325 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 326 { 327 intptr_t oprsz = simd_oprsz(desc); 328 intptr_t i; 329 330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 332 } 333 clear_high(d, oprsz, desc); 334 } 335 336 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 337 { 338 intptr_t oprsz = simd_oprsz(desc); 339 intptr_t i; 340 341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 343 } 344 clear_high(d, oprsz, desc); 345 } 346 347 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 348 { 349 intptr_t oprsz = simd_oprsz(desc); 350 intptr_t i; 351 352 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 353 int8_t aa = *(int8_t *)(a + i); 354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 355 } 356 clear_high(d, oprsz, desc); 357 } 358 359 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 360 { 361 intptr_t oprsz = simd_oprsz(desc); 362 intptr_t i; 363 364 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 365 int16_t aa = *(int16_t *)(a + i); 366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 367 } 368 clear_high(d, oprsz, desc); 369 } 370 371 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 372 { 373 intptr_t oprsz = simd_oprsz(desc); 374 intptr_t i; 375 376 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 377 int32_t aa = *(int32_t *)(a + i); 378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 379 } 380 clear_high(d, oprsz, desc); 381 } 382 383 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 384 { 385 intptr_t oprsz = simd_oprsz(desc); 386 intptr_t i; 387 388 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 389 int64_t aa = *(int64_t *)(a + i); 390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 391 } 392 clear_high(d, oprsz, desc); 393 } 394 395 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 396 { 397 intptr_t oprsz = simd_oprsz(desc); 398 399 memcpy(d, a, oprsz); 400 clear_high(d, oprsz, desc); 401 } 402 403 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 404 { 405 intptr_t oprsz = simd_oprsz(desc); 406 intptr_t i; 407 408 if (c == 0) { 409 oprsz = 0; 410 } else { 411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 412 *(uint64_t *)(d + i) = c; 413 } 414 } 415 clear_high(d, oprsz, desc); 416 } 417 418 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 419 { 420 intptr_t oprsz = simd_oprsz(desc); 421 intptr_t i; 422 423 if (c == 0) { 424 oprsz = 0; 425 } else { 426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 427 *(uint32_t *)(d + i) = c; 428 } 429 } 430 clear_high(d, oprsz, desc); 431 } 432 433 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 434 { 435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 436 } 437 438 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 439 { 440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 441 } 442 443 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 444 { 445 intptr_t oprsz = simd_oprsz(desc); 446 intptr_t i; 447 448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 450 } 451 clear_high(d, oprsz, desc); 452 } 453 454 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 455 { 456 intptr_t oprsz = simd_oprsz(desc); 457 intptr_t i; 458 459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 461 } 462 clear_high(d, oprsz, desc); 463 } 464 465 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 466 { 467 intptr_t oprsz = simd_oprsz(desc); 468 intptr_t i; 469 470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 472 } 473 clear_high(d, oprsz, desc); 474 } 475 476 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 477 { 478 intptr_t oprsz = simd_oprsz(desc); 479 intptr_t i; 480 481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 483 } 484 clear_high(d, oprsz, desc); 485 } 486 487 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 488 { 489 intptr_t oprsz = simd_oprsz(desc); 490 intptr_t i; 491 492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 494 } 495 clear_high(d, oprsz, desc); 496 } 497 498 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 499 { 500 intptr_t oprsz = simd_oprsz(desc); 501 intptr_t i; 502 503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 505 } 506 clear_high(d, oprsz, desc); 507 } 508 509 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 510 { 511 intptr_t oprsz = simd_oprsz(desc); 512 intptr_t i; 513 514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 516 } 517 clear_high(d, oprsz, desc); 518 } 519 520 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 521 { 522 intptr_t oprsz = simd_oprsz(desc); 523 intptr_t i; 524 525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 527 } 528 clear_high(d, oprsz, desc); 529 } 530 531 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 532 { 533 intptr_t oprsz = simd_oprsz(desc); 534 intptr_t i; 535 536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 538 } 539 clear_high(d, oprsz, desc); 540 } 541 542 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 543 { 544 intptr_t oprsz = simd_oprsz(desc); 545 intptr_t i; 546 547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 549 } 550 clear_high(d, oprsz, desc); 551 } 552 553 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 554 { 555 intptr_t oprsz = simd_oprsz(desc); 556 intptr_t i; 557 558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 560 } 561 clear_high(d, oprsz, desc); 562 } 563 564 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 565 { 566 intptr_t oprsz = simd_oprsz(desc); 567 intptr_t i; 568 569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 571 } 572 clear_high(d, oprsz, desc); 573 } 574 575 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 576 { 577 intptr_t oprsz = simd_oprsz(desc); 578 int shift = simd_data(desc); 579 intptr_t i; 580 581 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 582 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 583 } 584 clear_high(d, oprsz, desc); 585 } 586 587 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 588 { 589 intptr_t oprsz = simd_oprsz(desc); 590 int shift = simd_data(desc); 591 intptr_t i; 592 593 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 594 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 595 } 596 clear_high(d, oprsz, desc); 597 } 598 599 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 600 { 601 intptr_t oprsz = simd_oprsz(desc); 602 int shift = simd_data(desc); 603 intptr_t i; 604 605 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 606 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 607 } 608 clear_high(d, oprsz, desc); 609 } 610 611 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 612 { 613 intptr_t oprsz = simd_oprsz(desc); 614 int shift = simd_data(desc); 615 intptr_t i; 616 617 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 618 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 619 } 620 clear_high(d, oprsz, desc); 621 } 622 623 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 624 { 625 intptr_t oprsz = simd_oprsz(desc); 626 int shift = simd_data(desc); 627 intptr_t i; 628 629 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 630 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 631 } 632 clear_high(d, oprsz, desc); 633 } 634 635 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 636 { 637 intptr_t oprsz = simd_oprsz(desc); 638 int shift = simd_data(desc); 639 intptr_t i; 640 641 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 642 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 643 } 644 clear_high(d, oprsz, desc); 645 } 646 647 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 648 { 649 intptr_t oprsz = simd_oprsz(desc); 650 int shift = simd_data(desc); 651 intptr_t i; 652 653 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 654 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 655 } 656 clear_high(d, oprsz, desc); 657 } 658 659 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 660 { 661 intptr_t oprsz = simd_oprsz(desc); 662 int shift = simd_data(desc); 663 intptr_t i; 664 665 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 666 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 667 } 668 clear_high(d, oprsz, desc); 669 } 670 671 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 672 { 673 intptr_t oprsz = simd_oprsz(desc); 674 int shift = simd_data(desc); 675 intptr_t i; 676 677 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 678 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 679 } 680 clear_high(d, oprsz, desc); 681 } 682 683 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 684 { 685 intptr_t oprsz = simd_oprsz(desc); 686 int shift = simd_data(desc); 687 intptr_t i; 688 689 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 690 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 691 } 692 clear_high(d, oprsz, desc); 693 } 694 695 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 696 { 697 intptr_t oprsz = simd_oprsz(desc); 698 int shift = simd_data(desc); 699 intptr_t i; 700 701 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 702 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 703 } 704 clear_high(d, oprsz, desc); 705 } 706 707 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 708 { 709 intptr_t oprsz = simd_oprsz(desc); 710 int shift = simd_data(desc); 711 intptr_t i; 712 713 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 714 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 715 } 716 clear_high(d, oprsz, desc); 717 } 718 719 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 720 { 721 intptr_t oprsz = simd_oprsz(desc); 722 intptr_t i; 723 724 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 725 uint8_t sh = *(uint8_t *)(b + i) & 7; 726 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 727 } 728 clear_high(d, oprsz, desc); 729 } 730 731 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 732 { 733 intptr_t oprsz = simd_oprsz(desc); 734 intptr_t i; 735 736 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 737 uint8_t sh = *(uint16_t *)(b + i) & 15; 738 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 739 } 740 clear_high(d, oprsz, desc); 741 } 742 743 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 744 { 745 intptr_t oprsz = simd_oprsz(desc); 746 intptr_t i; 747 748 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 749 uint8_t sh = *(uint32_t *)(b + i) & 31; 750 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 751 } 752 clear_high(d, oprsz, desc); 753 } 754 755 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 756 { 757 intptr_t oprsz = simd_oprsz(desc); 758 intptr_t i; 759 760 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 761 uint8_t sh = *(uint64_t *)(b + i) & 63; 762 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 763 } 764 clear_high(d, oprsz, desc); 765 } 766 767 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 768 { 769 intptr_t oprsz = simd_oprsz(desc); 770 intptr_t i; 771 772 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 773 uint8_t sh = *(uint8_t *)(b + i) & 7; 774 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 775 } 776 clear_high(d, oprsz, desc); 777 } 778 779 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 780 { 781 intptr_t oprsz = simd_oprsz(desc); 782 intptr_t i; 783 784 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 785 uint8_t sh = *(uint16_t *)(b + i) & 15; 786 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 787 } 788 clear_high(d, oprsz, desc); 789 } 790 791 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 792 { 793 intptr_t oprsz = simd_oprsz(desc); 794 intptr_t i; 795 796 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 797 uint8_t sh = *(uint32_t *)(b + i) & 31; 798 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 799 } 800 clear_high(d, oprsz, desc); 801 } 802 803 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 804 { 805 intptr_t oprsz = simd_oprsz(desc); 806 intptr_t i; 807 808 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 809 uint8_t sh = *(uint64_t *)(b + i) & 63; 810 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 811 } 812 clear_high(d, oprsz, desc); 813 } 814 815 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 816 { 817 intptr_t oprsz = simd_oprsz(desc); 818 intptr_t i; 819 820 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 821 uint8_t sh = *(uint8_t *)(b + i) & 7; 822 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 823 } 824 clear_high(d, oprsz, desc); 825 } 826 827 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 828 { 829 intptr_t oprsz = simd_oprsz(desc); 830 intptr_t i; 831 832 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 833 uint8_t sh = *(uint16_t *)(b + i) & 15; 834 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 835 } 836 clear_high(d, oprsz, desc); 837 } 838 839 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 840 { 841 intptr_t oprsz = simd_oprsz(desc); 842 intptr_t i; 843 844 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 845 uint8_t sh = *(uint32_t *)(b + i) & 31; 846 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 847 } 848 clear_high(d, oprsz, desc); 849 } 850 851 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 852 { 853 intptr_t oprsz = simd_oprsz(desc); 854 intptr_t i; 855 856 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 857 uint8_t sh = *(uint64_t *)(b + i) & 63; 858 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 859 } 860 clear_high(d, oprsz, desc); 861 } 862 863 #define DO_CMP0(X) -(X) 864 865 #define DO_CMP1(NAME, TYPE, OP) \ 866 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 867 { \ 868 intptr_t oprsz = simd_oprsz(desc); \ 869 intptr_t i; \ 870 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 871 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 872 } \ 873 clear_high(d, oprsz, desc); \ 874 } 875 876 #define DO_CMP2(SZ) \ 877 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 878 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 879 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 880 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 881 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 882 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 883 884 DO_CMP2(8) 885 DO_CMP2(16) 886 DO_CMP2(32) 887 DO_CMP2(64) 888 889 #undef DO_CMP0 890 #undef DO_CMP1 891 #undef DO_CMP2 892 893 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 894 { 895 intptr_t oprsz = simd_oprsz(desc); 896 intptr_t i; 897 898 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 899 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 900 if (r > INT8_MAX) { 901 r = INT8_MAX; 902 } else if (r < INT8_MIN) { 903 r = INT8_MIN; 904 } 905 *(int8_t *)(d + i) = r; 906 } 907 clear_high(d, oprsz, desc); 908 } 909 910 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 911 { 912 intptr_t oprsz = simd_oprsz(desc); 913 intptr_t i; 914 915 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 916 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 917 if (r > INT16_MAX) { 918 r = INT16_MAX; 919 } else if (r < INT16_MIN) { 920 r = INT16_MIN; 921 } 922 *(int16_t *)(d + i) = r; 923 } 924 clear_high(d, oprsz, desc); 925 } 926 927 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 928 { 929 intptr_t oprsz = simd_oprsz(desc); 930 intptr_t i; 931 932 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 933 int32_t ai = *(int32_t *)(a + i); 934 int32_t bi = *(int32_t *)(b + i); 935 int32_t di = ai + bi; 936 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 937 /* Signed overflow. */ 938 di = (di < 0 ? INT32_MAX : INT32_MIN); 939 } 940 *(int32_t *)(d + i) = di; 941 } 942 clear_high(d, oprsz, desc); 943 } 944 945 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 946 { 947 intptr_t oprsz = simd_oprsz(desc); 948 intptr_t i; 949 950 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 951 int64_t ai = *(int64_t *)(a + i); 952 int64_t bi = *(int64_t *)(b + i); 953 int64_t di = ai + bi; 954 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 955 /* Signed overflow. */ 956 di = (di < 0 ? INT64_MAX : INT64_MIN); 957 } 958 *(int64_t *)(d + i) = di; 959 } 960 clear_high(d, oprsz, desc); 961 } 962 963 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 964 { 965 intptr_t oprsz = simd_oprsz(desc); 966 intptr_t i; 967 968 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 969 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 970 if (r > INT8_MAX) { 971 r = INT8_MAX; 972 } else if (r < INT8_MIN) { 973 r = INT8_MIN; 974 } 975 *(uint8_t *)(d + i) = r; 976 } 977 clear_high(d, oprsz, desc); 978 } 979 980 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 981 { 982 intptr_t oprsz = simd_oprsz(desc); 983 intptr_t i; 984 985 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 986 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 987 if (r > INT16_MAX) { 988 r = INT16_MAX; 989 } else if (r < INT16_MIN) { 990 r = INT16_MIN; 991 } 992 *(int16_t *)(d + i) = r; 993 } 994 clear_high(d, oprsz, desc); 995 } 996 997 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 998 { 999 intptr_t oprsz = simd_oprsz(desc); 1000 intptr_t i; 1001 1002 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1003 int32_t ai = *(int32_t *)(a + i); 1004 int32_t bi = *(int32_t *)(b + i); 1005 int32_t di = ai - bi; 1006 if (((di ^ ai) & (ai ^ bi)) < 0) { 1007 /* Signed overflow. */ 1008 di = (di < 0 ? INT32_MAX : INT32_MIN); 1009 } 1010 *(int32_t *)(d + i) = di; 1011 } 1012 clear_high(d, oprsz, desc); 1013 } 1014 1015 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1016 { 1017 intptr_t oprsz = simd_oprsz(desc); 1018 intptr_t i; 1019 1020 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1021 int64_t ai = *(int64_t *)(a + i); 1022 int64_t bi = *(int64_t *)(b + i); 1023 int64_t di = ai - bi; 1024 if (((di ^ ai) & (ai ^ bi)) < 0) { 1025 /* Signed overflow. */ 1026 di = (di < 0 ? INT64_MAX : INT64_MIN); 1027 } 1028 *(int64_t *)(d + i) = di; 1029 } 1030 clear_high(d, oprsz, desc); 1031 } 1032 1033 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1034 { 1035 intptr_t oprsz = simd_oprsz(desc); 1036 intptr_t i; 1037 1038 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1039 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1040 if (r > UINT8_MAX) { 1041 r = UINT8_MAX; 1042 } 1043 *(uint8_t *)(d + i) = r; 1044 } 1045 clear_high(d, oprsz, desc); 1046 } 1047 1048 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1049 { 1050 intptr_t oprsz = simd_oprsz(desc); 1051 intptr_t i; 1052 1053 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1054 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1055 if (r > UINT16_MAX) { 1056 r = UINT16_MAX; 1057 } 1058 *(uint16_t *)(d + i) = r; 1059 } 1060 clear_high(d, oprsz, desc); 1061 } 1062 1063 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1064 { 1065 intptr_t oprsz = simd_oprsz(desc); 1066 intptr_t i; 1067 1068 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1069 uint32_t ai = *(uint32_t *)(a + i); 1070 uint32_t bi = *(uint32_t *)(b + i); 1071 uint32_t di = ai + bi; 1072 if (di < ai) { 1073 di = UINT32_MAX; 1074 } 1075 *(uint32_t *)(d + i) = di; 1076 } 1077 clear_high(d, oprsz, desc); 1078 } 1079 1080 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1081 { 1082 intptr_t oprsz = simd_oprsz(desc); 1083 intptr_t i; 1084 1085 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1086 uint64_t ai = *(uint64_t *)(a + i); 1087 uint64_t bi = *(uint64_t *)(b + i); 1088 uint64_t di = ai + bi; 1089 if (di < ai) { 1090 di = UINT64_MAX; 1091 } 1092 *(uint64_t *)(d + i) = di; 1093 } 1094 clear_high(d, oprsz, desc); 1095 } 1096 1097 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1098 { 1099 intptr_t oprsz = simd_oprsz(desc); 1100 intptr_t i; 1101 1102 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1103 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1104 if (r < 0) { 1105 r = 0; 1106 } 1107 *(uint8_t *)(d + i) = r; 1108 } 1109 clear_high(d, oprsz, desc); 1110 } 1111 1112 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1113 { 1114 intptr_t oprsz = simd_oprsz(desc); 1115 intptr_t i; 1116 1117 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1118 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1119 if (r < 0) { 1120 r = 0; 1121 } 1122 *(uint16_t *)(d + i) = r; 1123 } 1124 clear_high(d, oprsz, desc); 1125 } 1126 1127 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1128 { 1129 intptr_t oprsz = simd_oprsz(desc); 1130 intptr_t i; 1131 1132 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1133 uint32_t ai = *(uint32_t *)(a + i); 1134 uint32_t bi = *(uint32_t *)(b + i); 1135 uint32_t di = ai - bi; 1136 if (ai < bi) { 1137 di = 0; 1138 } 1139 *(uint32_t *)(d + i) = di; 1140 } 1141 clear_high(d, oprsz, desc); 1142 } 1143 1144 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1145 { 1146 intptr_t oprsz = simd_oprsz(desc); 1147 intptr_t i; 1148 1149 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1150 uint64_t ai = *(uint64_t *)(a + i); 1151 uint64_t bi = *(uint64_t *)(b + i); 1152 uint64_t di = ai - bi; 1153 if (ai < bi) { 1154 di = 0; 1155 } 1156 *(uint64_t *)(d + i) = di; 1157 } 1158 clear_high(d, oprsz, desc); 1159 } 1160 1161 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1162 { 1163 intptr_t oprsz = simd_oprsz(desc); 1164 intptr_t i; 1165 1166 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1167 int8_t aa = *(int8_t *)(a + i); 1168 int8_t bb = *(int8_t *)(b + i); 1169 int8_t dd = aa < bb ? aa : bb; 1170 *(int8_t *)(d + i) = dd; 1171 } 1172 clear_high(d, oprsz, desc); 1173 } 1174 1175 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1176 { 1177 intptr_t oprsz = simd_oprsz(desc); 1178 intptr_t i; 1179 1180 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1181 int16_t aa = *(int16_t *)(a + i); 1182 int16_t bb = *(int16_t *)(b + i); 1183 int16_t dd = aa < bb ? aa : bb; 1184 *(int16_t *)(d + i) = dd; 1185 } 1186 clear_high(d, oprsz, desc); 1187 } 1188 1189 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1190 { 1191 intptr_t oprsz = simd_oprsz(desc); 1192 intptr_t i; 1193 1194 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1195 int32_t aa = *(int32_t *)(a + i); 1196 int32_t bb = *(int32_t *)(b + i); 1197 int32_t dd = aa < bb ? aa : bb; 1198 *(int32_t *)(d + i) = dd; 1199 } 1200 clear_high(d, oprsz, desc); 1201 } 1202 1203 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1204 { 1205 intptr_t oprsz = simd_oprsz(desc); 1206 intptr_t i; 1207 1208 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1209 int64_t aa = *(int64_t *)(a + i); 1210 int64_t bb = *(int64_t *)(b + i); 1211 int64_t dd = aa < bb ? aa : bb; 1212 *(int64_t *)(d + i) = dd; 1213 } 1214 clear_high(d, oprsz, desc); 1215 } 1216 1217 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1218 { 1219 intptr_t oprsz = simd_oprsz(desc); 1220 intptr_t i; 1221 1222 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1223 int8_t aa = *(int8_t *)(a + i); 1224 int8_t bb = *(int8_t *)(b + i); 1225 int8_t dd = aa > bb ? aa : bb; 1226 *(int8_t *)(d + i) = dd; 1227 } 1228 clear_high(d, oprsz, desc); 1229 } 1230 1231 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1232 { 1233 intptr_t oprsz = simd_oprsz(desc); 1234 intptr_t i; 1235 1236 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1237 int16_t aa = *(int16_t *)(a + i); 1238 int16_t bb = *(int16_t *)(b + i); 1239 int16_t dd = aa > bb ? aa : bb; 1240 *(int16_t *)(d + i) = dd; 1241 } 1242 clear_high(d, oprsz, desc); 1243 } 1244 1245 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1246 { 1247 intptr_t oprsz = simd_oprsz(desc); 1248 intptr_t i; 1249 1250 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1251 int32_t aa = *(int32_t *)(a + i); 1252 int32_t bb = *(int32_t *)(b + i); 1253 int32_t dd = aa > bb ? aa : bb; 1254 *(int32_t *)(d + i) = dd; 1255 } 1256 clear_high(d, oprsz, desc); 1257 } 1258 1259 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1260 { 1261 intptr_t oprsz = simd_oprsz(desc); 1262 intptr_t i; 1263 1264 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1265 int64_t aa = *(int64_t *)(a + i); 1266 int64_t bb = *(int64_t *)(b + i); 1267 int64_t dd = aa > bb ? aa : bb; 1268 *(int64_t *)(d + i) = dd; 1269 } 1270 clear_high(d, oprsz, desc); 1271 } 1272 1273 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1274 { 1275 intptr_t oprsz = simd_oprsz(desc); 1276 intptr_t i; 1277 1278 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1279 uint8_t aa = *(uint8_t *)(a + i); 1280 uint8_t bb = *(uint8_t *)(b + i); 1281 uint8_t dd = aa < bb ? aa : bb; 1282 *(uint8_t *)(d + i) = dd; 1283 } 1284 clear_high(d, oprsz, desc); 1285 } 1286 1287 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1288 { 1289 intptr_t oprsz = simd_oprsz(desc); 1290 intptr_t i; 1291 1292 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1293 uint16_t aa = *(uint16_t *)(a + i); 1294 uint16_t bb = *(uint16_t *)(b + i); 1295 uint16_t dd = aa < bb ? aa : bb; 1296 *(uint16_t *)(d + i) = dd; 1297 } 1298 clear_high(d, oprsz, desc); 1299 } 1300 1301 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1302 { 1303 intptr_t oprsz = simd_oprsz(desc); 1304 intptr_t i; 1305 1306 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1307 uint32_t aa = *(uint32_t *)(a + i); 1308 uint32_t bb = *(uint32_t *)(b + i); 1309 uint32_t dd = aa < bb ? aa : bb; 1310 *(uint32_t *)(d + i) = dd; 1311 } 1312 clear_high(d, oprsz, desc); 1313 } 1314 1315 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1316 { 1317 intptr_t oprsz = simd_oprsz(desc); 1318 intptr_t i; 1319 1320 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1321 uint64_t aa = *(uint64_t *)(a + i); 1322 uint64_t bb = *(uint64_t *)(b + i); 1323 uint64_t dd = aa < bb ? aa : bb; 1324 *(uint64_t *)(d + i) = dd; 1325 } 1326 clear_high(d, oprsz, desc); 1327 } 1328 1329 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1330 { 1331 intptr_t oprsz = simd_oprsz(desc); 1332 intptr_t i; 1333 1334 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1335 uint8_t aa = *(uint8_t *)(a + i); 1336 uint8_t bb = *(uint8_t *)(b + i); 1337 uint8_t dd = aa > bb ? aa : bb; 1338 *(uint8_t *)(d + i) = dd; 1339 } 1340 clear_high(d, oprsz, desc); 1341 } 1342 1343 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1344 { 1345 intptr_t oprsz = simd_oprsz(desc); 1346 intptr_t i; 1347 1348 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1349 uint16_t aa = *(uint16_t *)(a + i); 1350 uint16_t bb = *(uint16_t *)(b + i); 1351 uint16_t dd = aa > bb ? aa : bb; 1352 *(uint16_t *)(d + i) = dd; 1353 } 1354 clear_high(d, oprsz, desc); 1355 } 1356 1357 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1358 { 1359 intptr_t oprsz = simd_oprsz(desc); 1360 intptr_t i; 1361 1362 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1363 uint32_t aa = *(uint32_t *)(a + i); 1364 uint32_t bb = *(uint32_t *)(b + i); 1365 uint32_t dd = aa > bb ? aa : bb; 1366 *(uint32_t *)(d + i) = dd; 1367 } 1368 clear_high(d, oprsz, desc); 1369 } 1370 1371 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1372 { 1373 intptr_t oprsz = simd_oprsz(desc); 1374 intptr_t i; 1375 1376 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1377 uint64_t aa = *(uint64_t *)(a + i); 1378 uint64_t bb = *(uint64_t *)(b + i); 1379 uint64_t dd = aa > bb ? aa : bb; 1380 *(uint64_t *)(d + i) = dd; 1381 } 1382 clear_high(d, oprsz, desc); 1383 } 1384 1385 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1386 { 1387 intptr_t oprsz = simd_oprsz(desc); 1388 intptr_t i; 1389 1390 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1391 uint64_t aa = *(uint64_t *)(a + i); 1392 uint64_t bb = *(uint64_t *)(b + i); 1393 uint64_t cc = *(uint64_t *)(c + i); 1394 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 1395 } 1396 clear_high(d, oprsz, desc); 1397 } 1398