1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "tcg-gvec-desc.h" 25 26 27 /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28 * them via GCC's generic vector extension. This turns out to be simpler and 29 * more reliable than getting the compiler to autovectorize. 30 * 31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32 * are multiples of 16. 33 * 34 * When the compiler does not support all of the operations we require, the 35 * loops are written so that we can always fall back on the base types. 36 */ 37 #ifdef CONFIG_VECTOR16 38 typedef uint8_t vec8 __attribute__((vector_size(16))); 39 typedef uint16_t vec16 __attribute__((vector_size(16))); 40 typedef uint32_t vec32 __attribute__((vector_size(16))); 41 typedef uint64_t vec64 __attribute__((vector_size(16))); 42 43 typedef int8_t svec8 __attribute__((vector_size(16))); 44 typedef int16_t svec16 __attribute__((vector_size(16))); 45 typedef int32_t svec32 __attribute__((vector_size(16))); 46 typedef int64_t svec64 __attribute__((vector_size(16))); 47 48 #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49 #define DUP8(X) { X, X, X, X, X, X, X, X } 50 #define DUP4(X) { X, X, X, X } 51 #define DUP2(X) { X, X } 52 #else 53 typedef uint8_t vec8; 54 typedef uint16_t vec16; 55 typedef uint32_t vec32; 56 typedef uint64_t vec64; 57 58 typedef int8_t svec8; 59 typedef int16_t svec16; 60 typedef int32_t svec32; 61 typedef int64_t svec64; 62 63 #define DUP16(X) X 64 #define DUP8(X) X 65 #define DUP4(X) X 66 #define DUP2(X) X 67 #endif /* CONFIG_VECTOR16 */ 68 69 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70 { 71 intptr_t maxsz = simd_maxsz(desc); 72 intptr_t i; 73 74 if (unlikely(maxsz > oprsz)) { 75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76 *(uint64_t *)(d + i) = 0; 77 } 78 } 79 } 80 81 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82 { 83 intptr_t oprsz = simd_oprsz(desc); 84 intptr_t i; 85 86 for (i = 0; i < oprsz; i += sizeof(vec8)) { 87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88 } 89 clear_high(d, oprsz, desc); 90 } 91 92 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93 { 94 intptr_t oprsz = simd_oprsz(desc); 95 intptr_t i; 96 97 for (i = 0; i < oprsz; i += sizeof(vec16)) { 98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99 } 100 clear_high(d, oprsz, desc); 101 } 102 103 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104 { 105 intptr_t oprsz = simd_oprsz(desc); 106 intptr_t i; 107 108 for (i = 0; i < oprsz; i += sizeof(vec32)) { 109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110 } 111 clear_high(d, oprsz, desc); 112 } 113 114 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115 { 116 intptr_t oprsz = simd_oprsz(desc); 117 intptr_t i; 118 119 for (i = 0; i < oprsz; i += sizeof(vec64)) { 120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121 } 122 clear_high(d, oprsz, desc); 123 } 124 125 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 126 { 127 intptr_t oprsz = simd_oprsz(desc); 128 vec8 vecb = (vec8)DUP16(b); 129 intptr_t i; 130 131 for (i = 0; i < oprsz; i += sizeof(vec8)) { 132 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 133 } 134 clear_high(d, oprsz, desc); 135 } 136 137 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 138 { 139 intptr_t oprsz = simd_oprsz(desc); 140 vec16 vecb = (vec16)DUP8(b); 141 intptr_t i; 142 143 for (i = 0; i < oprsz; i += sizeof(vec16)) { 144 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 145 } 146 clear_high(d, oprsz, desc); 147 } 148 149 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 150 { 151 intptr_t oprsz = simd_oprsz(desc); 152 vec32 vecb = (vec32)DUP4(b); 153 intptr_t i; 154 155 for (i = 0; i < oprsz; i += sizeof(vec32)) { 156 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 157 } 158 clear_high(d, oprsz, desc); 159 } 160 161 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 162 { 163 intptr_t oprsz = simd_oprsz(desc); 164 vec64 vecb = (vec64)DUP2(b); 165 intptr_t i; 166 167 for (i = 0; i < oprsz; i += sizeof(vec64)) { 168 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 169 } 170 clear_high(d, oprsz, desc); 171 } 172 173 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 174 { 175 intptr_t oprsz = simd_oprsz(desc); 176 intptr_t i; 177 178 for (i = 0; i < oprsz; i += sizeof(vec8)) { 179 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 180 } 181 clear_high(d, oprsz, desc); 182 } 183 184 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 185 { 186 intptr_t oprsz = simd_oprsz(desc); 187 intptr_t i; 188 189 for (i = 0; i < oprsz; i += sizeof(vec16)) { 190 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 191 } 192 clear_high(d, oprsz, desc); 193 } 194 195 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 196 { 197 intptr_t oprsz = simd_oprsz(desc); 198 intptr_t i; 199 200 for (i = 0; i < oprsz; i += sizeof(vec32)) { 201 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 202 } 203 clear_high(d, oprsz, desc); 204 } 205 206 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 207 { 208 intptr_t oprsz = simd_oprsz(desc); 209 intptr_t i; 210 211 for (i = 0; i < oprsz; i += sizeof(vec64)) { 212 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 213 } 214 clear_high(d, oprsz, desc); 215 } 216 217 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 218 { 219 intptr_t oprsz = simd_oprsz(desc); 220 vec8 vecb = (vec8)DUP16(b); 221 intptr_t i; 222 223 for (i = 0; i < oprsz; i += sizeof(vec8)) { 224 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 225 } 226 clear_high(d, oprsz, desc); 227 } 228 229 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 230 { 231 intptr_t oprsz = simd_oprsz(desc); 232 vec16 vecb = (vec16)DUP8(b); 233 intptr_t i; 234 235 for (i = 0; i < oprsz; i += sizeof(vec16)) { 236 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 237 } 238 clear_high(d, oprsz, desc); 239 } 240 241 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 242 { 243 intptr_t oprsz = simd_oprsz(desc); 244 vec32 vecb = (vec32)DUP4(b); 245 intptr_t i; 246 247 for (i = 0; i < oprsz; i += sizeof(vec32)) { 248 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 249 } 250 clear_high(d, oprsz, desc); 251 } 252 253 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 254 { 255 intptr_t oprsz = simd_oprsz(desc); 256 vec64 vecb = (vec64)DUP2(b); 257 intptr_t i; 258 259 for (i = 0; i < oprsz; i += sizeof(vec64)) { 260 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 261 } 262 clear_high(d, oprsz, desc); 263 } 264 265 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 266 { 267 intptr_t oprsz = simd_oprsz(desc); 268 intptr_t i; 269 270 for (i = 0; i < oprsz; i += sizeof(vec8)) { 271 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 272 } 273 clear_high(d, oprsz, desc); 274 } 275 276 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 277 { 278 intptr_t oprsz = simd_oprsz(desc); 279 intptr_t i; 280 281 for (i = 0; i < oprsz; i += sizeof(vec16)) { 282 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 283 } 284 clear_high(d, oprsz, desc); 285 } 286 287 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 288 { 289 intptr_t oprsz = simd_oprsz(desc); 290 intptr_t i; 291 292 for (i = 0; i < oprsz; i += sizeof(vec32)) { 293 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 294 } 295 clear_high(d, oprsz, desc); 296 } 297 298 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 299 { 300 intptr_t oprsz = simd_oprsz(desc); 301 intptr_t i; 302 303 for (i = 0; i < oprsz; i += sizeof(vec64)) { 304 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 305 } 306 clear_high(d, oprsz, desc); 307 } 308 309 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 310 { 311 intptr_t oprsz = simd_oprsz(desc); 312 vec8 vecb = (vec8)DUP16(b); 313 intptr_t i; 314 315 for (i = 0; i < oprsz; i += sizeof(vec8)) { 316 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 317 } 318 clear_high(d, oprsz, desc); 319 } 320 321 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 322 { 323 intptr_t oprsz = simd_oprsz(desc); 324 vec16 vecb = (vec16)DUP8(b); 325 intptr_t i; 326 327 for (i = 0; i < oprsz; i += sizeof(vec16)) { 328 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 329 } 330 clear_high(d, oprsz, desc); 331 } 332 333 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 334 { 335 intptr_t oprsz = simd_oprsz(desc); 336 vec32 vecb = (vec32)DUP4(b); 337 intptr_t i; 338 339 for (i = 0; i < oprsz; i += sizeof(vec32)) { 340 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 341 } 342 clear_high(d, oprsz, desc); 343 } 344 345 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 346 { 347 intptr_t oprsz = simd_oprsz(desc); 348 vec64 vecb = (vec64)DUP2(b); 349 intptr_t i; 350 351 for (i = 0; i < oprsz; i += sizeof(vec64)) { 352 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 353 } 354 clear_high(d, oprsz, desc); 355 } 356 357 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 358 { 359 intptr_t oprsz = simd_oprsz(desc); 360 intptr_t i; 361 362 for (i = 0; i < oprsz; i += sizeof(vec8)) { 363 *(vec8 *)(d + i) = -*(vec8 *)(a + i); 364 } 365 clear_high(d, oprsz, desc); 366 } 367 368 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 369 { 370 intptr_t oprsz = simd_oprsz(desc); 371 intptr_t i; 372 373 for (i = 0; i < oprsz; i += sizeof(vec16)) { 374 *(vec16 *)(d + i) = -*(vec16 *)(a + i); 375 } 376 clear_high(d, oprsz, desc); 377 } 378 379 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 380 { 381 intptr_t oprsz = simd_oprsz(desc); 382 intptr_t i; 383 384 for (i = 0; i < oprsz; i += sizeof(vec32)) { 385 *(vec32 *)(d + i) = -*(vec32 *)(a + i); 386 } 387 clear_high(d, oprsz, desc); 388 } 389 390 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 391 { 392 intptr_t oprsz = simd_oprsz(desc); 393 intptr_t i; 394 395 for (i = 0; i < oprsz; i += sizeof(vec64)) { 396 *(vec64 *)(d + i) = -*(vec64 *)(a + i); 397 } 398 clear_high(d, oprsz, desc); 399 } 400 401 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 402 { 403 intptr_t oprsz = simd_oprsz(desc); 404 405 memcpy(d, a, oprsz); 406 clear_high(d, oprsz, desc); 407 } 408 409 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 410 { 411 intptr_t oprsz = simd_oprsz(desc); 412 intptr_t i; 413 414 if (c == 0) { 415 oprsz = 0; 416 } else { 417 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 418 *(uint64_t *)(d + i) = c; 419 } 420 } 421 clear_high(d, oprsz, desc); 422 } 423 424 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 425 { 426 intptr_t oprsz = simd_oprsz(desc); 427 intptr_t i; 428 429 if (c == 0) { 430 oprsz = 0; 431 } else { 432 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 433 *(uint32_t *)(d + i) = c; 434 } 435 } 436 clear_high(d, oprsz, desc); 437 } 438 439 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 440 { 441 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 442 } 443 444 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 445 { 446 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 447 } 448 449 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 450 { 451 intptr_t oprsz = simd_oprsz(desc); 452 intptr_t i; 453 454 for (i = 0; i < oprsz; i += sizeof(vec64)) { 455 *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 456 } 457 clear_high(d, oprsz, desc); 458 } 459 460 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 461 { 462 intptr_t oprsz = simd_oprsz(desc); 463 intptr_t i; 464 465 for (i = 0; i < oprsz; i += sizeof(vec64)) { 466 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 467 } 468 clear_high(d, oprsz, desc); 469 } 470 471 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 472 { 473 intptr_t oprsz = simd_oprsz(desc); 474 intptr_t i; 475 476 for (i = 0; i < oprsz; i += sizeof(vec64)) { 477 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 478 } 479 clear_high(d, oprsz, desc); 480 } 481 482 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 483 { 484 intptr_t oprsz = simd_oprsz(desc); 485 intptr_t i; 486 487 for (i = 0; i < oprsz; i += sizeof(vec64)) { 488 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 489 } 490 clear_high(d, oprsz, desc); 491 } 492 493 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 494 { 495 intptr_t oprsz = simd_oprsz(desc); 496 intptr_t i; 497 498 for (i = 0; i < oprsz; i += sizeof(vec64)) { 499 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 500 } 501 clear_high(d, oprsz, desc); 502 } 503 504 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 505 { 506 intptr_t oprsz = simd_oprsz(desc); 507 intptr_t i; 508 509 for (i = 0; i < oprsz; i += sizeof(vec64)) { 510 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 511 } 512 clear_high(d, oprsz, desc); 513 } 514 515 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 516 { 517 intptr_t oprsz = simd_oprsz(desc); 518 intptr_t i; 519 520 for (i = 0; i < oprsz; i += sizeof(vec64)) { 521 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 522 } 523 clear_high(d, oprsz, desc); 524 } 525 526 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 527 { 528 intptr_t oprsz = simd_oprsz(desc); 529 intptr_t i; 530 531 for (i = 0; i < oprsz; i += sizeof(vec64)) { 532 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 533 } 534 clear_high(d, oprsz, desc); 535 } 536 537 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 538 { 539 intptr_t oprsz = simd_oprsz(desc); 540 intptr_t i; 541 542 for (i = 0; i < oprsz; i += sizeof(vec64)) { 543 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 544 } 545 clear_high(d, oprsz, desc); 546 } 547 548 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 549 { 550 intptr_t oprsz = simd_oprsz(desc); 551 vec64 vecb = (vec64)DUP2(b); 552 intptr_t i; 553 554 for (i = 0; i < oprsz; i += sizeof(vec64)) { 555 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 556 } 557 clear_high(d, oprsz, desc); 558 } 559 560 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 561 { 562 intptr_t oprsz = simd_oprsz(desc); 563 vec64 vecb = (vec64)DUP2(b); 564 intptr_t i; 565 566 for (i = 0; i < oprsz; i += sizeof(vec64)) { 567 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 568 } 569 clear_high(d, oprsz, desc); 570 } 571 572 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 573 { 574 intptr_t oprsz = simd_oprsz(desc); 575 vec64 vecb = (vec64)DUP2(b); 576 intptr_t i; 577 578 for (i = 0; i < oprsz; i += sizeof(vec64)) { 579 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 580 } 581 clear_high(d, oprsz, desc); 582 } 583 584 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 585 { 586 intptr_t oprsz = simd_oprsz(desc); 587 int shift = simd_data(desc); 588 intptr_t i; 589 590 for (i = 0; i < oprsz; i += sizeof(vec8)) { 591 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 592 } 593 clear_high(d, oprsz, desc); 594 } 595 596 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 597 { 598 intptr_t oprsz = simd_oprsz(desc); 599 int shift = simd_data(desc); 600 intptr_t i; 601 602 for (i = 0; i < oprsz; i += sizeof(vec16)) { 603 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 604 } 605 clear_high(d, oprsz, desc); 606 } 607 608 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 609 { 610 intptr_t oprsz = simd_oprsz(desc); 611 int shift = simd_data(desc); 612 intptr_t i; 613 614 for (i = 0; i < oprsz; i += sizeof(vec32)) { 615 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 616 } 617 clear_high(d, oprsz, desc); 618 } 619 620 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 621 { 622 intptr_t oprsz = simd_oprsz(desc); 623 int shift = simd_data(desc); 624 intptr_t i; 625 626 for (i = 0; i < oprsz; i += sizeof(vec64)) { 627 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 628 } 629 clear_high(d, oprsz, desc); 630 } 631 632 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 633 { 634 intptr_t oprsz = simd_oprsz(desc); 635 int shift = simd_data(desc); 636 intptr_t i; 637 638 for (i = 0; i < oprsz; i += sizeof(vec8)) { 639 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 640 } 641 clear_high(d, oprsz, desc); 642 } 643 644 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 645 { 646 intptr_t oprsz = simd_oprsz(desc); 647 int shift = simd_data(desc); 648 intptr_t i; 649 650 for (i = 0; i < oprsz; i += sizeof(vec16)) { 651 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 652 } 653 clear_high(d, oprsz, desc); 654 } 655 656 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 657 { 658 intptr_t oprsz = simd_oprsz(desc); 659 int shift = simd_data(desc); 660 intptr_t i; 661 662 for (i = 0; i < oprsz; i += sizeof(vec32)) { 663 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 664 } 665 clear_high(d, oprsz, desc); 666 } 667 668 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 669 { 670 intptr_t oprsz = simd_oprsz(desc); 671 int shift = simd_data(desc); 672 intptr_t i; 673 674 for (i = 0; i < oprsz; i += sizeof(vec64)) { 675 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 676 } 677 clear_high(d, oprsz, desc); 678 } 679 680 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 681 { 682 intptr_t oprsz = simd_oprsz(desc); 683 int shift = simd_data(desc); 684 intptr_t i; 685 686 for (i = 0; i < oprsz; i += sizeof(vec8)) { 687 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 688 } 689 clear_high(d, oprsz, desc); 690 } 691 692 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 693 { 694 intptr_t oprsz = simd_oprsz(desc); 695 int shift = simd_data(desc); 696 intptr_t i; 697 698 for (i = 0; i < oprsz; i += sizeof(vec16)) { 699 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 700 } 701 clear_high(d, oprsz, desc); 702 } 703 704 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 705 { 706 intptr_t oprsz = simd_oprsz(desc); 707 int shift = simd_data(desc); 708 intptr_t i; 709 710 for (i = 0; i < oprsz; i += sizeof(vec32)) { 711 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 712 } 713 clear_high(d, oprsz, desc); 714 } 715 716 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 717 { 718 intptr_t oprsz = simd_oprsz(desc); 719 int shift = simd_data(desc); 720 intptr_t i; 721 722 for (i = 0; i < oprsz; i += sizeof(vec64)) { 723 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 724 } 725 clear_high(d, oprsz, desc); 726 } 727 728 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 729 { 730 intptr_t oprsz = simd_oprsz(desc); 731 intptr_t i; 732 733 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 734 uint8_t sh = *(uint8_t *)(b + i) & 7; 735 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 736 } 737 clear_high(d, oprsz, desc); 738 } 739 740 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 741 { 742 intptr_t oprsz = simd_oprsz(desc); 743 intptr_t i; 744 745 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 746 uint8_t sh = *(uint16_t *)(b + i) & 15; 747 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 748 } 749 clear_high(d, oprsz, desc); 750 } 751 752 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 753 { 754 intptr_t oprsz = simd_oprsz(desc); 755 intptr_t i; 756 757 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 758 uint8_t sh = *(uint32_t *)(b + i) & 31; 759 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 760 } 761 clear_high(d, oprsz, desc); 762 } 763 764 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 765 { 766 intptr_t oprsz = simd_oprsz(desc); 767 intptr_t i; 768 769 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 770 uint8_t sh = *(uint64_t *)(b + i) & 63; 771 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 772 } 773 clear_high(d, oprsz, desc); 774 } 775 776 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 777 { 778 intptr_t oprsz = simd_oprsz(desc); 779 intptr_t i; 780 781 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 782 uint8_t sh = *(uint8_t *)(b + i) & 7; 783 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 784 } 785 clear_high(d, oprsz, desc); 786 } 787 788 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 789 { 790 intptr_t oprsz = simd_oprsz(desc); 791 intptr_t i; 792 793 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 794 uint8_t sh = *(uint16_t *)(b + i) & 15; 795 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 796 } 797 clear_high(d, oprsz, desc); 798 } 799 800 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 801 { 802 intptr_t oprsz = simd_oprsz(desc); 803 intptr_t i; 804 805 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 806 uint8_t sh = *(uint32_t *)(b + i) & 31; 807 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 808 } 809 clear_high(d, oprsz, desc); 810 } 811 812 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 uint8_t sh = *(uint64_t *)(b + i) & 63; 819 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 820 } 821 clear_high(d, oprsz, desc); 822 } 823 824 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 825 { 826 intptr_t oprsz = simd_oprsz(desc); 827 intptr_t i; 828 829 for (i = 0; i < oprsz; i += sizeof(vec8)) { 830 uint8_t sh = *(uint8_t *)(b + i) & 7; 831 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 832 } 833 clear_high(d, oprsz, desc); 834 } 835 836 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 837 { 838 intptr_t oprsz = simd_oprsz(desc); 839 intptr_t i; 840 841 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 842 uint8_t sh = *(uint16_t *)(b + i) & 15; 843 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 844 } 845 clear_high(d, oprsz, desc); 846 } 847 848 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 849 { 850 intptr_t oprsz = simd_oprsz(desc); 851 intptr_t i; 852 853 for (i = 0; i < oprsz; i += sizeof(vec32)) { 854 uint8_t sh = *(uint32_t *)(b + i) & 31; 855 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 856 } 857 clear_high(d, oprsz, desc); 858 } 859 860 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 861 { 862 intptr_t oprsz = simd_oprsz(desc); 863 intptr_t i; 864 865 for (i = 0; i < oprsz; i += sizeof(vec64)) { 866 uint8_t sh = *(uint64_t *)(b + i) & 63; 867 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 868 } 869 clear_high(d, oprsz, desc); 870 } 871 872 /* If vectors are enabled, the compiler fills in -1 for true. 873 Otherwise, we must take care of this by hand. */ 874 #ifdef CONFIG_VECTOR16 875 # define DO_CMP0(X) X 876 #else 877 # define DO_CMP0(X) -(X) 878 #endif 879 880 #define DO_CMP1(NAME, TYPE, OP) \ 881 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 882 { \ 883 intptr_t oprsz = simd_oprsz(desc); \ 884 intptr_t i; \ 885 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 886 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 887 } \ 888 clear_high(d, oprsz, desc); \ 889 } 890 891 #define DO_CMP2(SZ) \ 892 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 893 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 894 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 895 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 896 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 897 DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 898 899 DO_CMP2(8) 900 DO_CMP2(16) 901 DO_CMP2(32) 902 DO_CMP2(64) 903 904 #undef DO_CMP0 905 #undef DO_CMP1 906 #undef DO_CMP2 907 908 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 909 { 910 intptr_t oprsz = simd_oprsz(desc); 911 intptr_t i; 912 913 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 914 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 915 if (r > INT8_MAX) { 916 r = INT8_MAX; 917 } else if (r < INT8_MIN) { 918 r = INT8_MIN; 919 } 920 *(int8_t *)(d + i) = r; 921 } 922 clear_high(d, oprsz, desc); 923 } 924 925 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 926 { 927 intptr_t oprsz = simd_oprsz(desc); 928 intptr_t i; 929 930 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 931 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 932 if (r > INT16_MAX) { 933 r = INT16_MAX; 934 } else if (r < INT16_MIN) { 935 r = INT16_MIN; 936 } 937 *(int16_t *)(d + i) = r; 938 } 939 clear_high(d, oprsz, desc); 940 } 941 942 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 943 { 944 intptr_t oprsz = simd_oprsz(desc); 945 intptr_t i; 946 947 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 948 int32_t ai = *(int32_t *)(a + i); 949 int32_t bi = *(int32_t *)(b + i); 950 int32_t di = ai + bi; 951 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 952 /* Signed overflow. */ 953 di = (di < 0 ? INT32_MAX : INT32_MIN); 954 } 955 *(int32_t *)(d + i) = di; 956 } 957 clear_high(d, oprsz, desc); 958 } 959 960 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 961 { 962 intptr_t oprsz = simd_oprsz(desc); 963 intptr_t i; 964 965 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 966 int64_t ai = *(int64_t *)(a + i); 967 int64_t bi = *(int64_t *)(b + i); 968 int64_t di = ai + bi; 969 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 970 /* Signed overflow. */ 971 di = (di < 0 ? INT64_MAX : INT64_MIN); 972 } 973 *(int64_t *)(d + i) = di; 974 } 975 clear_high(d, oprsz, desc); 976 } 977 978 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 979 { 980 intptr_t oprsz = simd_oprsz(desc); 981 intptr_t i; 982 983 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 984 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 985 if (r > INT8_MAX) { 986 r = INT8_MAX; 987 } else if (r < INT8_MIN) { 988 r = INT8_MIN; 989 } 990 *(uint8_t *)(d + i) = r; 991 } 992 clear_high(d, oprsz, desc); 993 } 994 995 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 996 { 997 intptr_t oprsz = simd_oprsz(desc); 998 intptr_t i; 999 1000 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1001 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1002 if (r > INT16_MAX) { 1003 r = INT16_MAX; 1004 } else if (r < INT16_MIN) { 1005 r = INT16_MIN; 1006 } 1007 *(int16_t *)(d + i) = r; 1008 } 1009 clear_high(d, oprsz, desc); 1010 } 1011 1012 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1013 { 1014 intptr_t oprsz = simd_oprsz(desc); 1015 intptr_t i; 1016 1017 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1018 int32_t ai = *(int32_t *)(a + i); 1019 int32_t bi = *(int32_t *)(b + i); 1020 int32_t di = ai - bi; 1021 if (((di ^ ai) & (ai ^ bi)) < 0) { 1022 /* Signed overflow. */ 1023 di = (di < 0 ? INT32_MAX : INT32_MIN); 1024 } 1025 *(int32_t *)(d + i) = di; 1026 } 1027 clear_high(d, oprsz, desc); 1028 } 1029 1030 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1031 { 1032 intptr_t oprsz = simd_oprsz(desc); 1033 intptr_t i; 1034 1035 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1036 int64_t ai = *(int64_t *)(a + i); 1037 int64_t bi = *(int64_t *)(b + i); 1038 int64_t di = ai - bi; 1039 if (((di ^ ai) & (ai ^ bi)) < 0) { 1040 /* Signed overflow. */ 1041 di = (di < 0 ? INT64_MAX : INT64_MIN); 1042 } 1043 *(int64_t *)(d + i) = di; 1044 } 1045 clear_high(d, oprsz, desc); 1046 } 1047 1048 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1049 { 1050 intptr_t oprsz = simd_oprsz(desc); 1051 intptr_t i; 1052 1053 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1054 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1055 if (r > UINT8_MAX) { 1056 r = UINT8_MAX; 1057 } 1058 *(uint8_t *)(d + i) = r; 1059 } 1060 clear_high(d, oprsz, desc); 1061 } 1062 1063 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1064 { 1065 intptr_t oprsz = simd_oprsz(desc); 1066 intptr_t i; 1067 1068 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1069 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1070 if (r > UINT16_MAX) { 1071 r = UINT16_MAX; 1072 } 1073 *(uint16_t *)(d + i) = r; 1074 } 1075 clear_high(d, oprsz, desc); 1076 } 1077 1078 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1079 { 1080 intptr_t oprsz = simd_oprsz(desc); 1081 intptr_t i; 1082 1083 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1084 uint32_t ai = *(uint32_t *)(a + i); 1085 uint32_t bi = *(uint32_t *)(b + i); 1086 uint32_t di = ai + bi; 1087 if (di < ai) { 1088 di = UINT32_MAX; 1089 } 1090 *(uint32_t *)(d + i) = di; 1091 } 1092 clear_high(d, oprsz, desc); 1093 } 1094 1095 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1096 { 1097 intptr_t oprsz = simd_oprsz(desc); 1098 intptr_t i; 1099 1100 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1101 uint64_t ai = *(uint64_t *)(a + i); 1102 uint64_t bi = *(uint64_t *)(b + i); 1103 uint64_t di = ai + bi; 1104 if (di < ai) { 1105 di = UINT64_MAX; 1106 } 1107 *(uint64_t *)(d + i) = di; 1108 } 1109 clear_high(d, oprsz, desc); 1110 } 1111 1112 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1113 { 1114 intptr_t oprsz = simd_oprsz(desc); 1115 intptr_t i; 1116 1117 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1118 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1119 if (r < 0) { 1120 r = 0; 1121 } 1122 *(uint8_t *)(d + i) = r; 1123 } 1124 clear_high(d, oprsz, desc); 1125 } 1126 1127 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1128 { 1129 intptr_t oprsz = simd_oprsz(desc); 1130 intptr_t i; 1131 1132 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1133 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1134 if (r < 0) { 1135 r = 0; 1136 } 1137 *(uint16_t *)(d + i) = r; 1138 } 1139 clear_high(d, oprsz, desc); 1140 } 1141 1142 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1143 { 1144 intptr_t oprsz = simd_oprsz(desc); 1145 intptr_t i; 1146 1147 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1148 uint32_t ai = *(uint32_t *)(a + i); 1149 uint32_t bi = *(uint32_t *)(b + i); 1150 uint32_t di = ai - bi; 1151 if (ai < bi) { 1152 di = 0; 1153 } 1154 *(uint32_t *)(d + i) = di; 1155 } 1156 clear_high(d, oprsz, desc); 1157 } 1158 1159 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1160 { 1161 intptr_t oprsz = simd_oprsz(desc); 1162 intptr_t i; 1163 1164 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1165 uint64_t ai = *(uint64_t *)(a + i); 1166 uint64_t bi = *(uint64_t *)(b + i); 1167 uint64_t di = ai - bi; 1168 if (ai < bi) { 1169 di = 0; 1170 } 1171 *(uint64_t *)(d + i) = di; 1172 } 1173 clear_high(d, oprsz, desc); 1174 } 1175 1176 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1177 { 1178 intptr_t oprsz = simd_oprsz(desc); 1179 intptr_t i; 1180 1181 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1182 int8_t aa = *(int8_t *)(a + i); 1183 int8_t bb = *(int8_t *)(b + i); 1184 int8_t dd = aa < bb ? aa : bb; 1185 *(int8_t *)(d + i) = dd; 1186 } 1187 clear_high(d, oprsz, desc); 1188 } 1189 1190 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1191 { 1192 intptr_t oprsz = simd_oprsz(desc); 1193 intptr_t i; 1194 1195 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1196 int16_t aa = *(int16_t *)(a + i); 1197 int16_t bb = *(int16_t *)(b + i); 1198 int16_t dd = aa < bb ? aa : bb; 1199 *(int16_t *)(d + i) = dd; 1200 } 1201 clear_high(d, oprsz, desc); 1202 } 1203 1204 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1205 { 1206 intptr_t oprsz = simd_oprsz(desc); 1207 intptr_t i; 1208 1209 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1210 int32_t aa = *(int32_t *)(a + i); 1211 int32_t bb = *(int32_t *)(b + i); 1212 int32_t dd = aa < bb ? aa : bb; 1213 *(int32_t *)(d + i) = dd; 1214 } 1215 clear_high(d, oprsz, desc); 1216 } 1217 1218 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1219 { 1220 intptr_t oprsz = simd_oprsz(desc); 1221 intptr_t i; 1222 1223 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1224 int64_t aa = *(int64_t *)(a + i); 1225 int64_t bb = *(int64_t *)(b + i); 1226 int64_t dd = aa < bb ? aa : bb; 1227 *(int64_t *)(d + i) = dd; 1228 } 1229 clear_high(d, oprsz, desc); 1230 } 1231 1232 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1233 { 1234 intptr_t oprsz = simd_oprsz(desc); 1235 intptr_t i; 1236 1237 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1238 int8_t aa = *(int8_t *)(a + i); 1239 int8_t bb = *(int8_t *)(b + i); 1240 int8_t dd = aa > bb ? aa : bb; 1241 *(int8_t *)(d + i) = dd; 1242 } 1243 clear_high(d, oprsz, desc); 1244 } 1245 1246 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1247 { 1248 intptr_t oprsz = simd_oprsz(desc); 1249 intptr_t i; 1250 1251 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1252 int16_t aa = *(int16_t *)(a + i); 1253 int16_t bb = *(int16_t *)(b + i); 1254 int16_t dd = aa > bb ? aa : bb; 1255 *(int16_t *)(d + i) = dd; 1256 } 1257 clear_high(d, oprsz, desc); 1258 } 1259 1260 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1261 { 1262 intptr_t oprsz = simd_oprsz(desc); 1263 intptr_t i; 1264 1265 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1266 int32_t aa = *(int32_t *)(a + i); 1267 int32_t bb = *(int32_t *)(b + i); 1268 int32_t dd = aa > bb ? aa : bb; 1269 *(int32_t *)(d + i) = dd; 1270 } 1271 clear_high(d, oprsz, desc); 1272 } 1273 1274 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1275 { 1276 intptr_t oprsz = simd_oprsz(desc); 1277 intptr_t i; 1278 1279 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1280 int64_t aa = *(int64_t *)(a + i); 1281 int64_t bb = *(int64_t *)(b + i); 1282 int64_t dd = aa > bb ? aa : bb; 1283 *(int64_t *)(d + i) = dd; 1284 } 1285 clear_high(d, oprsz, desc); 1286 } 1287 1288 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1289 { 1290 intptr_t oprsz = simd_oprsz(desc); 1291 intptr_t i; 1292 1293 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1294 uint8_t aa = *(uint8_t *)(a + i); 1295 uint8_t bb = *(uint8_t *)(b + i); 1296 uint8_t dd = aa < bb ? aa : bb; 1297 *(uint8_t *)(d + i) = dd; 1298 } 1299 clear_high(d, oprsz, desc); 1300 } 1301 1302 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1303 { 1304 intptr_t oprsz = simd_oprsz(desc); 1305 intptr_t i; 1306 1307 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1308 uint16_t aa = *(uint16_t *)(a + i); 1309 uint16_t bb = *(uint16_t *)(b + i); 1310 uint16_t dd = aa < bb ? aa : bb; 1311 *(uint16_t *)(d + i) = dd; 1312 } 1313 clear_high(d, oprsz, desc); 1314 } 1315 1316 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1317 { 1318 intptr_t oprsz = simd_oprsz(desc); 1319 intptr_t i; 1320 1321 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1322 uint32_t aa = *(uint32_t *)(a + i); 1323 uint32_t bb = *(uint32_t *)(b + i); 1324 uint32_t dd = aa < bb ? aa : bb; 1325 *(uint32_t *)(d + i) = dd; 1326 } 1327 clear_high(d, oprsz, desc); 1328 } 1329 1330 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1331 { 1332 intptr_t oprsz = simd_oprsz(desc); 1333 intptr_t i; 1334 1335 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1336 uint64_t aa = *(uint64_t *)(a + i); 1337 uint64_t bb = *(uint64_t *)(b + i); 1338 uint64_t dd = aa < bb ? aa : bb; 1339 *(uint64_t *)(d + i) = dd; 1340 } 1341 clear_high(d, oprsz, desc); 1342 } 1343 1344 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1345 { 1346 intptr_t oprsz = simd_oprsz(desc); 1347 intptr_t i; 1348 1349 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1350 uint8_t aa = *(uint8_t *)(a + i); 1351 uint8_t bb = *(uint8_t *)(b + i); 1352 uint8_t dd = aa > bb ? aa : bb; 1353 *(uint8_t *)(d + i) = dd; 1354 } 1355 clear_high(d, oprsz, desc); 1356 } 1357 1358 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1359 { 1360 intptr_t oprsz = simd_oprsz(desc); 1361 intptr_t i; 1362 1363 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1364 uint16_t aa = *(uint16_t *)(a + i); 1365 uint16_t bb = *(uint16_t *)(b + i); 1366 uint16_t dd = aa > bb ? aa : bb; 1367 *(uint16_t *)(d + i) = dd; 1368 } 1369 clear_high(d, oprsz, desc); 1370 } 1371 1372 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1373 { 1374 intptr_t oprsz = simd_oprsz(desc); 1375 intptr_t i; 1376 1377 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1378 uint32_t aa = *(uint32_t *)(a + i); 1379 uint32_t bb = *(uint32_t *)(b + i); 1380 uint32_t dd = aa > bb ? aa : bb; 1381 *(uint32_t *)(d + i) = dd; 1382 } 1383 clear_high(d, oprsz, desc); 1384 } 1385 1386 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1387 { 1388 intptr_t oprsz = simd_oprsz(desc); 1389 intptr_t i; 1390 1391 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1392 uint64_t aa = *(uint64_t *)(a + i); 1393 uint64_t bb = *(uint64_t *)(b + i); 1394 uint64_t dd = aa > bb ? aa : bb; 1395 *(uint64_t *)(d + i) = dd; 1396 } 1397 clear_high(d, oprsz, desc); 1398 } 1399