1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "tcg/tcg-gvec-desc.h" 25 26 27 /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28 * them via GCC's generic vector extension. This turns out to be simpler and 29 * more reliable than getting the compiler to autovectorize. 30 * 31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32 * are multiples of 16. 33 * 34 * When the compiler does not support all of the operations we require, the 35 * loops are written so that we can always fall back on the base types. 36 */ 37 #ifdef CONFIG_VECTOR16 38 typedef uint8_t vec8 __attribute__((vector_size(16))); 39 typedef uint16_t vec16 __attribute__((vector_size(16))); 40 typedef uint32_t vec32 __attribute__((vector_size(16))); 41 typedef uint64_t vec64 __attribute__((vector_size(16))); 42 43 typedef int8_t svec8 __attribute__((vector_size(16))); 44 typedef int16_t svec16 __attribute__((vector_size(16))); 45 typedef int32_t svec32 __attribute__((vector_size(16))); 46 typedef int64_t svec64 __attribute__((vector_size(16))); 47 48 #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49 #define DUP8(X) { X, X, X, X, X, X, X, X } 50 #define DUP4(X) { X, X, X, X } 51 #define DUP2(X) { X, X } 52 #else 53 typedef uint8_t vec8; 54 typedef uint16_t vec16; 55 typedef uint32_t vec32; 56 typedef uint64_t vec64; 57 58 typedef int8_t svec8; 59 typedef int16_t svec16; 60 typedef int32_t svec32; 61 typedef int64_t svec64; 62 63 #define DUP16(X) X 64 #define DUP8(X) X 65 #define DUP4(X) X 66 #define DUP2(X) X 67 #endif /* CONFIG_VECTOR16 */ 68 69 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70 { 71 intptr_t maxsz = simd_maxsz(desc); 72 intptr_t i; 73 74 if (unlikely(maxsz > oprsz)) { 75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76 *(uint64_t *)(d + i) = 0; 77 } 78 } 79 } 80 81 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82 { 83 intptr_t oprsz = simd_oprsz(desc); 84 intptr_t i; 85 86 for (i = 0; i < oprsz; i += sizeof(vec8)) { 87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88 } 89 clear_high(d, oprsz, desc); 90 } 91 92 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93 { 94 intptr_t oprsz = simd_oprsz(desc); 95 intptr_t i; 96 97 for (i = 0; i < oprsz; i += sizeof(vec16)) { 98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99 } 100 clear_high(d, oprsz, desc); 101 } 102 103 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104 { 105 intptr_t oprsz = simd_oprsz(desc); 106 intptr_t i; 107 108 for (i = 0; i < oprsz; i += sizeof(vec32)) { 109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110 } 111 clear_high(d, oprsz, desc); 112 } 113 114 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115 { 116 intptr_t oprsz = simd_oprsz(desc); 117 intptr_t i; 118 119 for (i = 0; i < oprsz; i += sizeof(vec64)) { 120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121 } 122 clear_high(d, oprsz, desc); 123 } 124 125 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 126 { 127 intptr_t oprsz = simd_oprsz(desc); 128 vec8 vecb = (vec8)DUP16(b); 129 intptr_t i; 130 131 for (i = 0; i < oprsz; i += sizeof(vec8)) { 132 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 133 } 134 clear_high(d, oprsz, desc); 135 } 136 137 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 138 { 139 intptr_t oprsz = simd_oprsz(desc); 140 vec16 vecb = (vec16)DUP8(b); 141 intptr_t i; 142 143 for (i = 0; i < oprsz; i += sizeof(vec16)) { 144 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 145 } 146 clear_high(d, oprsz, desc); 147 } 148 149 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 150 { 151 intptr_t oprsz = simd_oprsz(desc); 152 vec32 vecb = (vec32)DUP4(b); 153 intptr_t i; 154 155 for (i = 0; i < oprsz; i += sizeof(vec32)) { 156 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 157 } 158 clear_high(d, oprsz, desc); 159 } 160 161 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 162 { 163 intptr_t oprsz = simd_oprsz(desc); 164 vec64 vecb = (vec64)DUP2(b); 165 intptr_t i; 166 167 for (i = 0; i < oprsz; i += sizeof(vec64)) { 168 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 169 } 170 clear_high(d, oprsz, desc); 171 } 172 173 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 174 { 175 intptr_t oprsz = simd_oprsz(desc); 176 intptr_t i; 177 178 for (i = 0; i < oprsz; i += sizeof(vec8)) { 179 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 180 } 181 clear_high(d, oprsz, desc); 182 } 183 184 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 185 { 186 intptr_t oprsz = simd_oprsz(desc); 187 intptr_t i; 188 189 for (i = 0; i < oprsz; i += sizeof(vec16)) { 190 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 191 } 192 clear_high(d, oprsz, desc); 193 } 194 195 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 196 { 197 intptr_t oprsz = simd_oprsz(desc); 198 intptr_t i; 199 200 for (i = 0; i < oprsz; i += sizeof(vec32)) { 201 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 202 } 203 clear_high(d, oprsz, desc); 204 } 205 206 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 207 { 208 intptr_t oprsz = simd_oprsz(desc); 209 intptr_t i; 210 211 for (i = 0; i < oprsz; i += sizeof(vec64)) { 212 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 213 } 214 clear_high(d, oprsz, desc); 215 } 216 217 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 218 { 219 intptr_t oprsz = simd_oprsz(desc); 220 vec8 vecb = (vec8)DUP16(b); 221 intptr_t i; 222 223 for (i = 0; i < oprsz; i += sizeof(vec8)) { 224 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 225 } 226 clear_high(d, oprsz, desc); 227 } 228 229 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 230 { 231 intptr_t oprsz = simd_oprsz(desc); 232 vec16 vecb = (vec16)DUP8(b); 233 intptr_t i; 234 235 for (i = 0; i < oprsz; i += sizeof(vec16)) { 236 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 237 } 238 clear_high(d, oprsz, desc); 239 } 240 241 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 242 { 243 intptr_t oprsz = simd_oprsz(desc); 244 vec32 vecb = (vec32)DUP4(b); 245 intptr_t i; 246 247 for (i = 0; i < oprsz; i += sizeof(vec32)) { 248 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 249 } 250 clear_high(d, oprsz, desc); 251 } 252 253 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 254 { 255 intptr_t oprsz = simd_oprsz(desc); 256 vec64 vecb = (vec64)DUP2(b); 257 intptr_t i; 258 259 for (i = 0; i < oprsz; i += sizeof(vec64)) { 260 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 261 } 262 clear_high(d, oprsz, desc); 263 } 264 265 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 266 { 267 intptr_t oprsz = simd_oprsz(desc); 268 intptr_t i; 269 270 for (i = 0; i < oprsz; i += sizeof(vec8)) { 271 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 272 } 273 clear_high(d, oprsz, desc); 274 } 275 276 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 277 { 278 intptr_t oprsz = simd_oprsz(desc); 279 intptr_t i; 280 281 for (i = 0; i < oprsz; i += sizeof(vec16)) { 282 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 283 } 284 clear_high(d, oprsz, desc); 285 } 286 287 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 288 { 289 intptr_t oprsz = simd_oprsz(desc); 290 intptr_t i; 291 292 for (i = 0; i < oprsz; i += sizeof(vec32)) { 293 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 294 } 295 clear_high(d, oprsz, desc); 296 } 297 298 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 299 { 300 intptr_t oprsz = simd_oprsz(desc); 301 intptr_t i; 302 303 for (i = 0; i < oprsz; i += sizeof(vec64)) { 304 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 305 } 306 clear_high(d, oprsz, desc); 307 } 308 309 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 310 { 311 intptr_t oprsz = simd_oprsz(desc); 312 vec8 vecb = (vec8)DUP16(b); 313 intptr_t i; 314 315 for (i = 0; i < oprsz; i += sizeof(vec8)) { 316 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 317 } 318 clear_high(d, oprsz, desc); 319 } 320 321 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 322 { 323 intptr_t oprsz = simd_oprsz(desc); 324 vec16 vecb = (vec16)DUP8(b); 325 intptr_t i; 326 327 for (i = 0; i < oprsz; i += sizeof(vec16)) { 328 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 329 } 330 clear_high(d, oprsz, desc); 331 } 332 333 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 334 { 335 intptr_t oprsz = simd_oprsz(desc); 336 vec32 vecb = (vec32)DUP4(b); 337 intptr_t i; 338 339 for (i = 0; i < oprsz; i += sizeof(vec32)) { 340 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 341 } 342 clear_high(d, oprsz, desc); 343 } 344 345 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 346 { 347 intptr_t oprsz = simd_oprsz(desc); 348 vec64 vecb = (vec64)DUP2(b); 349 intptr_t i; 350 351 for (i = 0; i < oprsz; i += sizeof(vec64)) { 352 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 353 } 354 clear_high(d, oprsz, desc); 355 } 356 357 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 358 { 359 intptr_t oprsz = simd_oprsz(desc); 360 intptr_t i; 361 362 for (i = 0; i < oprsz; i += sizeof(vec8)) { 363 *(vec8 *)(d + i) = -*(vec8 *)(a + i); 364 } 365 clear_high(d, oprsz, desc); 366 } 367 368 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 369 { 370 intptr_t oprsz = simd_oprsz(desc); 371 intptr_t i; 372 373 for (i = 0; i < oprsz; i += sizeof(vec16)) { 374 *(vec16 *)(d + i) = -*(vec16 *)(a + i); 375 } 376 clear_high(d, oprsz, desc); 377 } 378 379 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 380 { 381 intptr_t oprsz = simd_oprsz(desc); 382 intptr_t i; 383 384 for (i = 0; i < oprsz; i += sizeof(vec32)) { 385 *(vec32 *)(d + i) = -*(vec32 *)(a + i); 386 } 387 clear_high(d, oprsz, desc); 388 } 389 390 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 391 { 392 intptr_t oprsz = simd_oprsz(desc); 393 intptr_t i; 394 395 for (i = 0; i < oprsz; i += sizeof(vec64)) { 396 *(vec64 *)(d + i) = -*(vec64 *)(a + i); 397 } 398 clear_high(d, oprsz, desc); 399 } 400 401 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 402 { 403 intptr_t oprsz = simd_oprsz(desc); 404 intptr_t i; 405 406 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 407 int8_t aa = *(int8_t *)(a + i); 408 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 409 } 410 clear_high(d, oprsz, desc); 411 } 412 413 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 414 { 415 intptr_t oprsz = simd_oprsz(desc); 416 intptr_t i; 417 418 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 419 int16_t aa = *(int16_t *)(a + i); 420 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 421 } 422 clear_high(d, oprsz, desc); 423 } 424 425 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 426 { 427 intptr_t oprsz = simd_oprsz(desc); 428 intptr_t i; 429 430 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 431 int32_t aa = *(int32_t *)(a + i); 432 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 433 } 434 clear_high(d, oprsz, desc); 435 } 436 437 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 438 { 439 intptr_t oprsz = simd_oprsz(desc); 440 intptr_t i; 441 442 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 443 int64_t aa = *(int64_t *)(a + i); 444 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 445 } 446 clear_high(d, oprsz, desc); 447 } 448 449 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 450 { 451 intptr_t oprsz = simd_oprsz(desc); 452 453 memcpy(d, a, oprsz); 454 clear_high(d, oprsz, desc); 455 } 456 457 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 458 { 459 intptr_t oprsz = simd_oprsz(desc); 460 intptr_t i; 461 462 if (c == 0) { 463 oprsz = 0; 464 } else { 465 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 466 *(uint64_t *)(d + i) = c; 467 } 468 } 469 clear_high(d, oprsz, desc); 470 } 471 472 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 473 { 474 intptr_t oprsz = simd_oprsz(desc); 475 intptr_t i; 476 477 if (c == 0) { 478 oprsz = 0; 479 } else { 480 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 481 *(uint32_t *)(d + i) = c; 482 } 483 } 484 clear_high(d, oprsz, desc); 485 } 486 487 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 488 { 489 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 490 } 491 492 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 493 { 494 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 495 } 496 497 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 498 { 499 intptr_t oprsz = simd_oprsz(desc); 500 intptr_t i; 501 502 for (i = 0; i < oprsz; i += sizeof(vec64)) { 503 *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 504 } 505 clear_high(d, oprsz, desc); 506 } 507 508 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 509 { 510 intptr_t oprsz = simd_oprsz(desc); 511 intptr_t i; 512 513 for (i = 0; i < oprsz; i += sizeof(vec64)) { 514 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 515 } 516 clear_high(d, oprsz, desc); 517 } 518 519 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 520 { 521 intptr_t oprsz = simd_oprsz(desc); 522 intptr_t i; 523 524 for (i = 0; i < oprsz; i += sizeof(vec64)) { 525 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 526 } 527 clear_high(d, oprsz, desc); 528 } 529 530 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 531 { 532 intptr_t oprsz = simd_oprsz(desc); 533 intptr_t i; 534 535 for (i = 0; i < oprsz; i += sizeof(vec64)) { 536 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 537 } 538 clear_high(d, oprsz, desc); 539 } 540 541 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 542 { 543 intptr_t oprsz = simd_oprsz(desc); 544 intptr_t i; 545 546 for (i = 0; i < oprsz; i += sizeof(vec64)) { 547 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 548 } 549 clear_high(d, oprsz, desc); 550 } 551 552 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 553 { 554 intptr_t oprsz = simd_oprsz(desc); 555 intptr_t i; 556 557 for (i = 0; i < oprsz; i += sizeof(vec64)) { 558 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 559 } 560 clear_high(d, oprsz, desc); 561 } 562 563 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 564 { 565 intptr_t oprsz = simd_oprsz(desc); 566 intptr_t i; 567 568 for (i = 0; i < oprsz; i += sizeof(vec64)) { 569 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 570 } 571 clear_high(d, oprsz, desc); 572 } 573 574 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 575 { 576 intptr_t oprsz = simd_oprsz(desc); 577 intptr_t i; 578 579 for (i = 0; i < oprsz; i += sizeof(vec64)) { 580 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 581 } 582 clear_high(d, oprsz, desc); 583 } 584 585 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 586 { 587 intptr_t oprsz = simd_oprsz(desc); 588 intptr_t i; 589 590 for (i = 0; i < oprsz; i += sizeof(vec64)) { 591 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 592 } 593 clear_high(d, oprsz, desc); 594 } 595 596 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 597 { 598 intptr_t oprsz = simd_oprsz(desc); 599 vec64 vecb = (vec64)DUP2(b); 600 intptr_t i; 601 602 for (i = 0; i < oprsz; i += sizeof(vec64)) { 603 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 604 } 605 clear_high(d, oprsz, desc); 606 } 607 608 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 609 { 610 intptr_t oprsz = simd_oprsz(desc); 611 vec64 vecb = (vec64)DUP2(b); 612 intptr_t i; 613 614 for (i = 0; i < oprsz; i += sizeof(vec64)) { 615 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 616 } 617 clear_high(d, oprsz, desc); 618 } 619 620 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 621 { 622 intptr_t oprsz = simd_oprsz(desc); 623 vec64 vecb = (vec64)DUP2(b); 624 intptr_t i; 625 626 for (i = 0; i < oprsz; i += sizeof(vec64)) { 627 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 628 } 629 clear_high(d, oprsz, desc); 630 } 631 632 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 633 { 634 intptr_t oprsz = simd_oprsz(desc); 635 int shift = simd_data(desc); 636 intptr_t i; 637 638 for (i = 0; i < oprsz; i += sizeof(vec8)) { 639 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 640 } 641 clear_high(d, oprsz, desc); 642 } 643 644 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 645 { 646 intptr_t oprsz = simd_oprsz(desc); 647 int shift = simd_data(desc); 648 intptr_t i; 649 650 for (i = 0; i < oprsz; i += sizeof(vec16)) { 651 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 652 } 653 clear_high(d, oprsz, desc); 654 } 655 656 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 657 { 658 intptr_t oprsz = simd_oprsz(desc); 659 int shift = simd_data(desc); 660 intptr_t i; 661 662 for (i = 0; i < oprsz; i += sizeof(vec32)) { 663 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 664 } 665 clear_high(d, oprsz, desc); 666 } 667 668 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 669 { 670 intptr_t oprsz = simd_oprsz(desc); 671 int shift = simd_data(desc); 672 intptr_t i; 673 674 for (i = 0; i < oprsz; i += sizeof(vec64)) { 675 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 676 } 677 clear_high(d, oprsz, desc); 678 } 679 680 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 681 { 682 intptr_t oprsz = simd_oprsz(desc); 683 int shift = simd_data(desc); 684 intptr_t i; 685 686 for (i = 0; i < oprsz; i += sizeof(vec8)) { 687 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 688 } 689 clear_high(d, oprsz, desc); 690 } 691 692 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 693 { 694 intptr_t oprsz = simd_oprsz(desc); 695 int shift = simd_data(desc); 696 intptr_t i; 697 698 for (i = 0; i < oprsz; i += sizeof(vec16)) { 699 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 700 } 701 clear_high(d, oprsz, desc); 702 } 703 704 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 705 { 706 intptr_t oprsz = simd_oprsz(desc); 707 int shift = simd_data(desc); 708 intptr_t i; 709 710 for (i = 0; i < oprsz; i += sizeof(vec32)) { 711 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 712 } 713 clear_high(d, oprsz, desc); 714 } 715 716 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 717 { 718 intptr_t oprsz = simd_oprsz(desc); 719 int shift = simd_data(desc); 720 intptr_t i; 721 722 for (i = 0; i < oprsz; i += sizeof(vec64)) { 723 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 724 } 725 clear_high(d, oprsz, desc); 726 } 727 728 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 729 { 730 intptr_t oprsz = simd_oprsz(desc); 731 int shift = simd_data(desc); 732 intptr_t i; 733 734 for (i = 0; i < oprsz; i += sizeof(vec8)) { 735 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 736 } 737 clear_high(d, oprsz, desc); 738 } 739 740 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 741 { 742 intptr_t oprsz = simd_oprsz(desc); 743 int shift = simd_data(desc); 744 intptr_t i; 745 746 for (i = 0; i < oprsz; i += sizeof(vec16)) { 747 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 748 } 749 clear_high(d, oprsz, desc); 750 } 751 752 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 753 { 754 intptr_t oprsz = simd_oprsz(desc); 755 int shift = simd_data(desc); 756 intptr_t i; 757 758 for (i = 0; i < oprsz; i += sizeof(vec32)) { 759 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 760 } 761 clear_high(d, oprsz, desc); 762 } 763 764 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 765 { 766 intptr_t oprsz = simd_oprsz(desc); 767 int shift = simd_data(desc); 768 intptr_t i; 769 770 for (i = 0; i < oprsz; i += sizeof(vec64)) { 771 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 772 } 773 clear_high(d, oprsz, desc); 774 } 775 776 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 777 { 778 intptr_t oprsz = simd_oprsz(desc); 779 intptr_t i; 780 781 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 782 uint8_t sh = *(uint8_t *)(b + i) & 7; 783 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 784 } 785 clear_high(d, oprsz, desc); 786 } 787 788 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 789 { 790 intptr_t oprsz = simd_oprsz(desc); 791 intptr_t i; 792 793 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 794 uint8_t sh = *(uint16_t *)(b + i) & 15; 795 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 796 } 797 clear_high(d, oprsz, desc); 798 } 799 800 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 801 { 802 intptr_t oprsz = simd_oprsz(desc); 803 intptr_t i; 804 805 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 806 uint8_t sh = *(uint32_t *)(b + i) & 31; 807 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 808 } 809 clear_high(d, oprsz, desc); 810 } 811 812 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 uint8_t sh = *(uint64_t *)(b + i) & 63; 819 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 820 } 821 clear_high(d, oprsz, desc); 822 } 823 824 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 825 { 826 intptr_t oprsz = simd_oprsz(desc); 827 intptr_t i; 828 829 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 830 uint8_t sh = *(uint8_t *)(b + i) & 7; 831 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 832 } 833 clear_high(d, oprsz, desc); 834 } 835 836 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 837 { 838 intptr_t oprsz = simd_oprsz(desc); 839 intptr_t i; 840 841 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 842 uint8_t sh = *(uint16_t *)(b + i) & 15; 843 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 844 } 845 clear_high(d, oprsz, desc); 846 } 847 848 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 849 { 850 intptr_t oprsz = simd_oprsz(desc); 851 intptr_t i; 852 853 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 854 uint8_t sh = *(uint32_t *)(b + i) & 31; 855 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 856 } 857 clear_high(d, oprsz, desc); 858 } 859 860 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 861 { 862 intptr_t oprsz = simd_oprsz(desc); 863 intptr_t i; 864 865 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 866 uint8_t sh = *(uint64_t *)(b + i) & 63; 867 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 868 } 869 clear_high(d, oprsz, desc); 870 } 871 872 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 873 { 874 intptr_t oprsz = simd_oprsz(desc); 875 intptr_t i; 876 877 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 878 uint8_t sh = *(uint8_t *)(b + i) & 7; 879 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 880 } 881 clear_high(d, oprsz, desc); 882 } 883 884 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 885 { 886 intptr_t oprsz = simd_oprsz(desc); 887 intptr_t i; 888 889 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 890 uint8_t sh = *(uint16_t *)(b + i) & 15; 891 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 892 } 893 clear_high(d, oprsz, desc); 894 } 895 896 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 897 { 898 intptr_t oprsz = simd_oprsz(desc); 899 intptr_t i; 900 901 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 902 uint8_t sh = *(uint32_t *)(b + i) & 31; 903 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 904 } 905 clear_high(d, oprsz, desc); 906 } 907 908 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 909 { 910 intptr_t oprsz = simd_oprsz(desc); 911 intptr_t i; 912 913 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 914 uint8_t sh = *(uint64_t *)(b + i) & 63; 915 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 916 } 917 clear_high(d, oprsz, desc); 918 } 919 920 /* If vectors are enabled, the compiler fills in -1 for true. 921 Otherwise, we must take care of this by hand. */ 922 #ifdef CONFIG_VECTOR16 923 # define DO_CMP0(X) X 924 #else 925 # define DO_CMP0(X) -(X) 926 #endif 927 928 #define DO_CMP1(NAME, TYPE, OP) \ 929 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 930 { \ 931 intptr_t oprsz = simd_oprsz(desc); \ 932 intptr_t i; \ 933 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 934 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 935 } \ 936 clear_high(d, oprsz, desc); \ 937 } 938 939 #define DO_CMP2(SZ) \ 940 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 941 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 942 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 943 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 944 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 945 DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 946 947 DO_CMP2(8) 948 DO_CMP2(16) 949 DO_CMP2(32) 950 DO_CMP2(64) 951 952 #undef DO_CMP0 953 #undef DO_CMP1 954 #undef DO_CMP2 955 956 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 957 { 958 intptr_t oprsz = simd_oprsz(desc); 959 intptr_t i; 960 961 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 962 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 963 if (r > INT8_MAX) { 964 r = INT8_MAX; 965 } else if (r < INT8_MIN) { 966 r = INT8_MIN; 967 } 968 *(int8_t *)(d + i) = r; 969 } 970 clear_high(d, oprsz, desc); 971 } 972 973 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 974 { 975 intptr_t oprsz = simd_oprsz(desc); 976 intptr_t i; 977 978 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 979 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 980 if (r > INT16_MAX) { 981 r = INT16_MAX; 982 } else if (r < INT16_MIN) { 983 r = INT16_MIN; 984 } 985 *(int16_t *)(d + i) = r; 986 } 987 clear_high(d, oprsz, desc); 988 } 989 990 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 991 { 992 intptr_t oprsz = simd_oprsz(desc); 993 intptr_t i; 994 995 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 996 int32_t ai = *(int32_t *)(a + i); 997 int32_t bi = *(int32_t *)(b + i); 998 int32_t di = ai + bi; 999 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 1000 /* Signed overflow. */ 1001 di = (di < 0 ? INT32_MAX : INT32_MIN); 1002 } 1003 *(int32_t *)(d + i) = di; 1004 } 1005 clear_high(d, oprsz, desc); 1006 } 1007 1008 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1009 { 1010 intptr_t oprsz = simd_oprsz(desc); 1011 intptr_t i; 1012 1013 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1014 int64_t ai = *(int64_t *)(a + i); 1015 int64_t bi = *(int64_t *)(b + i); 1016 int64_t di = ai + bi; 1017 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 1018 /* Signed overflow. */ 1019 di = (di < 0 ? INT64_MAX : INT64_MIN); 1020 } 1021 *(int64_t *)(d + i) = di; 1022 } 1023 clear_high(d, oprsz, desc); 1024 } 1025 1026 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1027 { 1028 intptr_t oprsz = simd_oprsz(desc); 1029 intptr_t i; 1030 1031 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1032 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1033 if (r > INT8_MAX) { 1034 r = INT8_MAX; 1035 } else if (r < INT8_MIN) { 1036 r = INT8_MIN; 1037 } 1038 *(uint8_t *)(d + i) = r; 1039 } 1040 clear_high(d, oprsz, desc); 1041 } 1042 1043 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1044 { 1045 intptr_t oprsz = simd_oprsz(desc); 1046 intptr_t i; 1047 1048 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1049 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1050 if (r > INT16_MAX) { 1051 r = INT16_MAX; 1052 } else if (r < INT16_MIN) { 1053 r = INT16_MIN; 1054 } 1055 *(int16_t *)(d + i) = r; 1056 } 1057 clear_high(d, oprsz, desc); 1058 } 1059 1060 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1061 { 1062 intptr_t oprsz = simd_oprsz(desc); 1063 intptr_t i; 1064 1065 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1066 int32_t ai = *(int32_t *)(a + i); 1067 int32_t bi = *(int32_t *)(b + i); 1068 int32_t di = ai - bi; 1069 if (((di ^ ai) & (ai ^ bi)) < 0) { 1070 /* Signed overflow. */ 1071 di = (di < 0 ? INT32_MAX : INT32_MIN); 1072 } 1073 *(int32_t *)(d + i) = di; 1074 } 1075 clear_high(d, oprsz, desc); 1076 } 1077 1078 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1079 { 1080 intptr_t oprsz = simd_oprsz(desc); 1081 intptr_t i; 1082 1083 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1084 int64_t ai = *(int64_t *)(a + i); 1085 int64_t bi = *(int64_t *)(b + i); 1086 int64_t di = ai - bi; 1087 if (((di ^ ai) & (ai ^ bi)) < 0) { 1088 /* Signed overflow. */ 1089 di = (di < 0 ? INT64_MAX : INT64_MIN); 1090 } 1091 *(int64_t *)(d + i) = di; 1092 } 1093 clear_high(d, oprsz, desc); 1094 } 1095 1096 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1097 { 1098 intptr_t oprsz = simd_oprsz(desc); 1099 intptr_t i; 1100 1101 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1102 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1103 if (r > UINT8_MAX) { 1104 r = UINT8_MAX; 1105 } 1106 *(uint8_t *)(d + i) = r; 1107 } 1108 clear_high(d, oprsz, desc); 1109 } 1110 1111 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1112 { 1113 intptr_t oprsz = simd_oprsz(desc); 1114 intptr_t i; 1115 1116 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1117 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1118 if (r > UINT16_MAX) { 1119 r = UINT16_MAX; 1120 } 1121 *(uint16_t *)(d + i) = r; 1122 } 1123 clear_high(d, oprsz, desc); 1124 } 1125 1126 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1127 { 1128 intptr_t oprsz = simd_oprsz(desc); 1129 intptr_t i; 1130 1131 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1132 uint32_t ai = *(uint32_t *)(a + i); 1133 uint32_t bi = *(uint32_t *)(b + i); 1134 uint32_t di = ai + bi; 1135 if (di < ai) { 1136 di = UINT32_MAX; 1137 } 1138 *(uint32_t *)(d + i) = di; 1139 } 1140 clear_high(d, oprsz, desc); 1141 } 1142 1143 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1144 { 1145 intptr_t oprsz = simd_oprsz(desc); 1146 intptr_t i; 1147 1148 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1149 uint64_t ai = *(uint64_t *)(a + i); 1150 uint64_t bi = *(uint64_t *)(b + i); 1151 uint64_t di = ai + bi; 1152 if (di < ai) { 1153 di = UINT64_MAX; 1154 } 1155 *(uint64_t *)(d + i) = di; 1156 } 1157 clear_high(d, oprsz, desc); 1158 } 1159 1160 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1161 { 1162 intptr_t oprsz = simd_oprsz(desc); 1163 intptr_t i; 1164 1165 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1166 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1167 if (r < 0) { 1168 r = 0; 1169 } 1170 *(uint8_t *)(d + i) = r; 1171 } 1172 clear_high(d, oprsz, desc); 1173 } 1174 1175 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1176 { 1177 intptr_t oprsz = simd_oprsz(desc); 1178 intptr_t i; 1179 1180 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1181 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1182 if (r < 0) { 1183 r = 0; 1184 } 1185 *(uint16_t *)(d + i) = r; 1186 } 1187 clear_high(d, oprsz, desc); 1188 } 1189 1190 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1191 { 1192 intptr_t oprsz = simd_oprsz(desc); 1193 intptr_t i; 1194 1195 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1196 uint32_t ai = *(uint32_t *)(a + i); 1197 uint32_t bi = *(uint32_t *)(b + i); 1198 uint32_t di = ai - bi; 1199 if (ai < bi) { 1200 di = 0; 1201 } 1202 *(uint32_t *)(d + i) = di; 1203 } 1204 clear_high(d, oprsz, desc); 1205 } 1206 1207 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1208 { 1209 intptr_t oprsz = simd_oprsz(desc); 1210 intptr_t i; 1211 1212 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1213 uint64_t ai = *(uint64_t *)(a + i); 1214 uint64_t bi = *(uint64_t *)(b + i); 1215 uint64_t di = ai - bi; 1216 if (ai < bi) { 1217 di = 0; 1218 } 1219 *(uint64_t *)(d + i) = di; 1220 } 1221 clear_high(d, oprsz, desc); 1222 } 1223 1224 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1225 { 1226 intptr_t oprsz = simd_oprsz(desc); 1227 intptr_t i; 1228 1229 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1230 int8_t aa = *(int8_t *)(a + i); 1231 int8_t bb = *(int8_t *)(b + i); 1232 int8_t dd = aa < bb ? aa : bb; 1233 *(int8_t *)(d + i) = dd; 1234 } 1235 clear_high(d, oprsz, desc); 1236 } 1237 1238 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1239 { 1240 intptr_t oprsz = simd_oprsz(desc); 1241 intptr_t i; 1242 1243 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1244 int16_t aa = *(int16_t *)(a + i); 1245 int16_t bb = *(int16_t *)(b + i); 1246 int16_t dd = aa < bb ? aa : bb; 1247 *(int16_t *)(d + i) = dd; 1248 } 1249 clear_high(d, oprsz, desc); 1250 } 1251 1252 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1253 { 1254 intptr_t oprsz = simd_oprsz(desc); 1255 intptr_t i; 1256 1257 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1258 int32_t aa = *(int32_t *)(a + i); 1259 int32_t bb = *(int32_t *)(b + i); 1260 int32_t dd = aa < bb ? aa : bb; 1261 *(int32_t *)(d + i) = dd; 1262 } 1263 clear_high(d, oprsz, desc); 1264 } 1265 1266 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1267 { 1268 intptr_t oprsz = simd_oprsz(desc); 1269 intptr_t i; 1270 1271 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1272 int64_t aa = *(int64_t *)(a + i); 1273 int64_t bb = *(int64_t *)(b + i); 1274 int64_t dd = aa < bb ? aa : bb; 1275 *(int64_t *)(d + i) = dd; 1276 } 1277 clear_high(d, oprsz, desc); 1278 } 1279 1280 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1281 { 1282 intptr_t oprsz = simd_oprsz(desc); 1283 intptr_t i; 1284 1285 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1286 int8_t aa = *(int8_t *)(a + i); 1287 int8_t bb = *(int8_t *)(b + i); 1288 int8_t dd = aa > bb ? aa : bb; 1289 *(int8_t *)(d + i) = dd; 1290 } 1291 clear_high(d, oprsz, desc); 1292 } 1293 1294 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1295 { 1296 intptr_t oprsz = simd_oprsz(desc); 1297 intptr_t i; 1298 1299 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1300 int16_t aa = *(int16_t *)(a + i); 1301 int16_t bb = *(int16_t *)(b + i); 1302 int16_t dd = aa > bb ? aa : bb; 1303 *(int16_t *)(d + i) = dd; 1304 } 1305 clear_high(d, oprsz, desc); 1306 } 1307 1308 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1309 { 1310 intptr_t oprsz = simd_oprsz(desc); 1311 intptr_t i; 1312 1313 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1314 int32_t aa = *(int32_t *)(a + i); 1315 int32_t bb = *(int32_t *)(b + i); 1316 int32_t dd = aa > bb ? aa : bb; 1317 *(int32_t *)(d + i) = dd; 1318 } 1319 clear_high(d, oprsz, desc); 1320 } 1321 1322 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1323 { 1324 intptr_t oprsz = simd_oprsz(desc); 1325 intptr_t i; 1326 1327 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1328 int64_t aa = *(int64_t *)(a + i); 1329 int64_t bb = *(int64_t *)(b + i); 1330 int64_t dd = aa > bb ? aa : bb; 1331 *(int64_t *)(d + i) = dd; 1332 } 1333 clear_high(d, oprsz, desc); 1334 } 1335 1336 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1337 { 1338 intptr_t oprsz = simd_oprsz(desc); 1339 intptr_t i; 1340 1341 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1342 uint8_t aa = *(uint8_t *)(a + i); 1343 uint8_t bb = *(uint8_t *)(b + i); 1344 uint8_t dd = aa < bb ? aa : bb; 1345 *(uint8_t *)(d + i) = dd; 1346 } 1347 clear_high(d, oprsz, desc); 1348 } 1349 1350 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1351 { 1352 intptr_t oprsz = simd_oprsz(desc); 1353 intptr_t i; 1354 1355 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1356 uint16_t aa = *(uint16_t *)(a + i); 1357 uint16_t bb = *(uint16_t *)(b + i); 1358 uint16_t dd = aa < bb ? aa : bb; 1359 *(uint16_t *)(d + i) = dd; 1360 } 1361 clear_high(d, oprsz, desc); 1362 } 1363 1364 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1365 { 1366 intptr_t oprsz = simd_oprsz(desc); 1367 intptr_t i; 1368 1369 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1370 uint32_t aa = *(uint32_t *)(a + i); 1371 uint32_t bb = *(uint32_t *)(b + i); 1372 uint32_t dd = aa < bb ? aa : bb; 1373 *(uint32_t *)(d + i) = dd; 1374 } 1375 clear_high(d, oprsz, desc); 1376 } 1377 1378 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1379 { 1380 intptr_t oprsz = simd_oprsz(desc); 1381 intptr_t i; 1382 1383 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1384 uint64_t aa = *(uint64_t *)(a + i); 1385 uint64_t bb = *(uint64_t *)(b + i); 1386 uint64_t dd = aa < bb ? aa : bb; 1387 *(uint64_t *)(d + i) = dd; 1388 } 1389 clear_high(d, oprsz, desc); 1390 } 1391 1392 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1393 { 1394 intptr_t oprsz = simd_oprsz(desc); 1395 intptr_t i; 1396 1397 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1398 uint8_t aa = *(uint8_t *)(a + i); 1399 uint8_t bb = *(uint8_t *)(b + i); 1400 uint8_t dd = aa > bb ? aa : bb; 1401 *(uint8_t *)(d + i) = dd; 1402 } 1403 clear_high(d, oprsz, desc); 1404 } 1405 1406 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1407 { 1408 intptr_t oprsz = simd_oprsz(desc); 1409 intptr_t i; 1410 1411 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1412 uint16_t aa = *(uint16_t *)(a + i); 1413 uint16_t bb = *(uint16_t *)(b + i); 1414 uint16_t dd = aa > bb ? aa : bb; 1415 *(uint16_t *)(d + i) = dd; 1416 } 1417 clear_high(d, oprsz, desc); 1418 } 1419 1420 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1421 { 1422 intptr_t oprsz = simd_oprsz(desc); 1423 intptr_t i; 1424 1425 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1426 uint32_t aa = *(uint32_t *)(a + i); 1427 uint32_t bb = *(uint32_t *)(b + i); 1428 uint32_t dd = aa > bb ? aa : bb; 1429 *(uint32_t *)(d + i) = dd; 1430 } 1431 clear_high(d, oprsz, desc); 1432 } 1433 1434 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1435 { 1436 intptr_t oprsz = simd_oprsz(desc); 1437 intptr_t i; 1438 1439 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1440 uint64_t aa = *(uint64_t *)(a + i); 1441 uint64_t bb = *(uint64_t *)(b + i); 1442 uint64_t dd = aa > bb ? aa : bb; 1443 *(uint64_t *)(d + i) = dd; 1444 } 1445 clear_high(d, oprsz, desc); 1446 } 1447 1448 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1449 { 1450 intptr_t oprsz = simd_oprsz(desc); 1451 intptr_t i; 1452 1453 for (i = 0; i < oprsz; i += sizeof(vec64)) { 1454 vec64 aa = *(vec64 *)(a + i); 1455 vec64 bb = *(vec64 *)(b + i); 1456 vec64 cc = *(vec64 *)(c + i); 1457 *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa); 1458 } 1459 clear_high(d, oprsz, desc); 1460 } 1461