1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/log.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "crypto/aes-round.h" 28 #include "crypto/clmul.h" 29 #include "fpu/softfloat.h" 30 #include "qapi/error.h" 31 #include "qemu/guest-random.h" 32 #include "tcg/tcg-gvec-desc.h" 33 34 #include "helper_regs.h" 35 /*****************************************************************************/ 36 /* Fixed point operations helpers */ 37 38 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 39 { 40 if (unlikely(ov)) { 41 env->so = env->ov = env->ov32 = 1; 42 } else { 43 env->ov = env->ov32 = 0; 44 } 45 } 46 47 target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb, 48 uint32_t oe) 49 { 50 uint64_t rt = 0; 51 int overflow = 0; 52 53 uint64_t dividend = (uint64_t)ra << 32; 54 uint64_t divisor = (uint32_t)rb; 55 56 if (unlikely(divisor == 0)) { 57 overflow = 1; 58 } else { 59 rt = dividend / divisor; 60 overflow = rt > UINT32_MAX; 61 } 62 63 if (unlikely(overflow)) { 64 rt = 0; /* Undefined */ 65 } 66 67 if (oe) { 68 helper_update_ov_legacy(env, overflow); 69 } 70 71 return (target_ulong)rt; 72 } 73 74 target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, 75 uint32_t oe) 76 { 77 int64_t rt = 0; 78 int overflow = 0; 79 80 int64_t dividend = (int64_t)ra << 32; 81 int64_t divisor = (int64_t)((int32_t)rb); 82 83 if (unlikely((divisor == 0) || 84 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 85 overflow = 1; 86 } else { 87 rt = dividend / divisor; 88 overflow = rt != (int32_t)rt; 89 } 90 91 if (unlikely(overflow)) { 92 rt = 0; /* Undefined */ 93 } 94 95 if (oe) { 96 helper_update_ov_legacy(env, overflow); 97 } 98 99 return (target_ulong)rt; 100 } 101 102 #if defined(TARGET_PPC64) 103 104 uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 105 { 106 uint64_t rt = 0; 107 int overflow = 0; 108 109 if (unlikely(rb == 0 || ra >= rb)) { 110 overflow = 1; 111 rt = 0; /* Undefined */ 112 } else { 113 divu128(&rt, &ra, rb); 114 } 115 116 if (oe) { 117 helper_update_ov_legacy(env, overflow); 118 } 119 120 return rt; 121 } 122 123 uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 124 { 125 uint64_t rt = 0; 126 int64_t ra = (int64_t)rau; 127 int64_t rb = (int64_t)rbu; 128 int overflow = 0; 129 130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 131 overflow = 1; 132 rt = 0; /* Undefined */ 133 } else { 134 divs128(&rt, &ra, rb); 135 } 136 137 if (oe) { 138 helper_update_ov_legacy(env, overflow); 139 } 140 141 return rt; 142 } 143 144 #endif 145 146 147 #if defined(TARGET_PPC64) 148 /* if x = 0xab, returns 0xababababababababa */ 149 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 150 151 /* 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each 153 * byte. 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 156 */ 157 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 158 159 /* When you XOR the pattern and there is a match, that byte will be zero */ 160 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 161 162 uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb) 163 { 164 return hasvalue(rb, ra) ? CRF_GT : 0; 165 } 166 167 #undef pattern 168 #undef haszero 169 #undef hasvalue 170 171 /* 172 * Return a random number. 173 */ 174 uint64_t helper_DARN32(void) 175 { 176 Error *err = NULL; 177 uint32_t ret; 178 179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 181 error_get_pretty(err)); 182 error_free(err); 183 return -1; 184 } 185 186 return ret; 187 } 188 189 uint64_t helper_DARN64(void) 190 { 191 Error *err = NULL; 192 uint64_t ret; 193 194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 196 error_get_pretty(err)); 197 error_free(err); 198 return -1; 199 } 200 201 return ret; 202 } 203 204 uint64_t helper_BPERMD(uint64_t rs, uint64_t rb) 205 { 206 int i; 207 uint64_t ra = 0; 208 209 for (i = 0; i < 8; i++) { 210 int index = (rs >> (i * 8)) & 0xFF; 211 if (index < 64) { 212 if (rb & PPC_BIT(index)) { 213 ra |= 1 << i; 214 } 215 } 216 } 217 return ra; 218 } 219 220 #endif 221 222 target_ulong helper_CMPB(target_ulong rs, target_ulong rb) 223 { 224 target_ulong mask = 0xff; 225 target_ulong ra = 0; 226 int i; 227 228 for (i = 0; i < sizeof(target_ulong); i++) { 229 if ((rs & mask) == (rb & mask)) { 230 ra |= mask; 231 } 232 mask <<= 8; 233 } 234 return ra; 235 } 236 237 /* shift right arithmetic helper */ 238 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int32_t ret; 242 243 if (likely(!(shift & 0x20))) { 244 if (likely((uint32_t)shift != 0)) { 245 shift &= 0x1f; 246 ret = (int32_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int32_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int32_t)value >> 31; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return (target_long)ret; 261 } 262 263 #if defined(TARGET_PPC64) 264 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 265 target_ulong shift) 266 { 267 int64_t ret; 268 269 if (likely(!(shift & 0x40))) { 270 if (likely((uint64_t)shift != 0)) { 271 shift &= 0x3f; 272 ret = (int64_t)value >> shift; 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 274 env->ca32 = env->ca = 0; 275 } else { 276 env->ca32 = env->ca = 1; 277 } 278 } else { 279 ret = (int64_t)value; 280 env->ca32 = env->ca = 0; 281 } 282 } else { 283 ret = (int64_t)value >> 63; 284 env->ca32 = env->ca = (ret != 0); 285 } 286 return ret; 287 } 288 #endif 289 290 #if defined(TARGET_PPC64) 291 target_ulong helper_POPCNTB(target_ulong val) 292 { 293 /* Note that we don't fold past bytes */ 294 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 295 0x5555555555555555ULL); 296 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 297 0x3333333333333333ULL); 298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 299 0x0f0f0f0f0f0f0f0fULL); 300 return val; 301 } 302 303 target_ulong helper_POPCNTW(target_ulong val) 304 { 305 /* Note that we don't fold past words. */ 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 #else 319 target_ulong helper_POPCNTB(target_ulong val) 320 { 321 /* Note that we don't fold past bytes */ 322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 325 return val; 326 } 327 #endif 328 329 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 330 { 331 /* 332 * Instead of processing the mask bit-by-bit from the most significant to 333 * the least significant bit, as described in PowerISA, we'll handle it in 334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 335 * ctz or cto, we negate the mask at the end of the loop. 336 */ 337 target_ulong m, left = 0, right = 0; 338 unsigned int n, i = 64; 339 bool bit = false; /* tracks if we are processing zeros or ones */ 340 341 if (mask == 0 || mask == -1) { 342 return src; 343 } 344 345 /* Processes the mask in blocks, from LSB to MSB */ 346 while (i) { 347 /* Find how many bits we should take */ 348 n = ctz64(mask); 349 if (n > i) { 350 n = i; 351 } 352 353 /* 354 * Extracts 'n' trailing bits of src and put them on the leading 'n' 355 * bits of 'right' or 'left', pushing down the previously extracted 356 * values. 357 */ 358 m = (1ll << n) - 1; 359 if (bit) { 360 right = ror64(right | (src & m), n); 361 } else { 362 left = ror64(left | (src & m), n); 363 } 364 365 /* 366 * Discards the processed bits from 'src' and 'mask'. Note that we are 367 * removing 'n' trailing zeros from 'mask', but the logical shift will 368 * add 'n' leading zeros back, so the population count of 'mask' is kept 369 * the same. 370 */ 371 src >>= n; 372 mask >>= n; 373 i -= n; 374 bit = !bit; 375 mask = ~mask; 376 } 377 378 /* 379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 380 * we'll shift it more 64-ctpop(mask) times. 381 */ 382 if (bit) { 383 n = ctpop64(mask); 384 } else { 385 n = 64 - ctpop64(mask); 386 } 387 388 return left | (right >> n); 389 } 390 391 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 392 { 393 int i, o; 394 uint64_t result = 0; 395 396 if (mask == -1) { 397 return src; 398 } 399 400 for (i = 0; mask != 0; i++) { 401 o = ctz64(mask); 402 mask &= mask - 1; 403 result |= ((src >> i) & 1) << o; 404 } 405 406 return result; 407 } 408 409 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 410 { 411 int i, o; 412 uint64_t result = 0; 413 414 if (mask == -1) { 415 return src; 416 } 417 418 for (o = 0; mask != 0; o++) { 419 i = ctz64(mask); 420 mask &= mask - 1; 421 result |= ((src >> i) & 1) << o; 422 } 423 424 return result; 425 } 426 427 /*****************************************************************************/ 428 /* Altivec extension helpers */ 429 #if HOST_BIG_ENDIAN 430 #define VECTOR_FOR_INORDER_I(index, element) \ 431 for (index = 0; index < ARRAY_SIZE(r->element); index++) 432 #else 433 #define VECTOR_FOR_INORDER_I(index, element) \ 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 435 #endif 436 437 /* Saturating arithmetic helpers. */ 438 #define SATCVT(from, to, from_type, to_type, min, max) \ 439 static inline to_type cvt##from##to(from_type x, int *sat) \ 440 { \ 441 to_type r; \ 442 \ 443 if (x < (from_type)min) { \ 444 r = min; \ 445 *sat = 1; \ 446 } else if (x > (from_type)max) { \ 447 r = max; \ 448 *sat = 1; \ 449 } else { \ 450 r = x; \ 451 } \ 452 return r; \ 453 } 454 #define SATCVTU(from, to, from_type, to_type, min, max) \ 455 static inline to_type cvt##from##to(from_type x, int *sat) \ 456 { \ 457 to_type r; \ 458 \ 459 if (x > (from_type)max) { \ 460 r = max; \ 461 *sat = 1; \ 462 } else { \ 463 r = x; \ 464 } \ 465 return r; \ 466 } 467 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 468 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 469 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 470 471 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 472 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 473 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 474 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 475 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 476 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 477 #undef SATCVT 478 #undef SATCVTU 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 ppc_store_vscr(env, vscr); 483 } 484 485 uint32_t helper_mfvscr(CPUPPCState *env) 486 { 487 return ppc_get_vscr(env); 488 } 489 490 static inline void set_vscr_sat(CPUPPCState *env) 491 { 492 /* The choice of non-zero value is arbitrary. */ 493 env->vscr_sat.u32[0] = 1; 494 } 495 496 /* vprtybq */ 497 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 498 { 499 uint64_t res = b->u64[0] ^ b->u64[1]; 500 res ^= res >> 32; 501 res ^= res >> 16; 502 res ^= res >> 8; 503 r->VsrD(1) = res & 1; 504 r->VsrD(0) = 0; 505 } 506 507 #define VARITHFP(suffix, func) \ 508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 509 ppc_avr_t *b) \ 510 { \ 511 int i; \ 512 \ 513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 515 } \ 516 } 517 VARITHFP(addfp, float32_add) 518 VARITHFP(subfp, float32_sub) 519 VARITHFP(minfp, float32_min) 520 VARITHFP(maxfp, float32_max) 521 #undef VARITHFP 522 523 #define VARITHFPFMA(suffix, type) \ 524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 525 ppc_avr_t *b, ppc_avr_t *c) \ 526 { \ 527 int i; \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 530 type, &env->vec_status); \ 531 } \ 532 } 533 VARITHFPFMA(maddfp, 0); 534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 535 #undef VARITHFPFMA 536 537 #define VARITHSAT_CASE(type, op, cvt, element) \ 538 { \ 539 type result = (type)a->element[i] op (type)b->element[i]; \ 540 r->element[i] = cvt(result, &sat); \ 541 } 542 543 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 544 void helper_V##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 546 { \ 547 int sat = 0; \ 548 int i; \ 549 \ 550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 551 VARITHSAT_CASE(optype, op, cvt, element); \ 552 } \ 553 if (sat) { \ 554 vscr_sat->u32[0] = 1; \ 555 } \ 556 } 557 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 558 VARITHSAT_DO(ADDS##suffix##S, +, optype, cvt, element) \ 559 VARITHSAT_DO(SUBS##suffix##S, -, optype, cvt, element) 560 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 561 VARITHSAT_DO(ADDU##suffix##S, +, optype, cvt, element) \ 562 VARITHSAT_DO(SUBU##suffix##S, -, optype, cvt, element) 563 VARITHSAT_SIGNED(B, s8, int16_t, cvtshsb) 564 VARITHSAT_SIGNED(H, s16, int32_t, cvtswsh) 565 VARITHSAT_SIGNED(W, s32, int64_t, cvtsdsw) 566 VARITHSAT_UNSIGNED(B, u8, uint16_t, cvtshub) 567 VARITHSAT_UNSIGNED(H, u16, uint32_t, cvtswuh) 568 VARITHSAT_UNSIGNED(W, u32, uint64_t, cvtsduw) 569 #undef VARITHSAT_CASE 570 #undef VARITHSAT_DO 571 #undef VARITHSAT_SIGNED 572 #undef VARITHSAT_UNSIGNED 573 574 #define VAVG(name, element, etype) \ 575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 576 { \ 577 int i; \ 578 \ 579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 581 r->element[i] = x >> 1; \ 582 } \ 583 } 584 585 VAVG(VAVGSB, s8, int16_t) 586 VAVG(VAVGUB, u8, uint16_t) 587 VAVG(VAVGSH, s16, int32_t) 588 VAVG(VAVGUH, u16, uint32_t) 589 VAVG(VAVGSW, s32, int64_t) 590 VAVG(VAVGUW, u32, uint64_t) 591 #undef VAVG 592 593 #define VABSDU(name, element) \ 594 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 r->element[i] = (a->element[i] > b->element[i]) ? \ 600 (a->element[i] - b->element[i]) : \ 601 (b->element[i] - a->element[i]); \ 602 } \ 603 } 604 605 /* 606 * VABSDU - Vector absolute difference unsigned 607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 608 * element - element type to access from vector 609 */ 610 VABSDU(VABSDUB, u8) 611 VABSDU(VABSDUH, u16) 612 VABSDU(VABSDUW, u32) 613 #undef VABSDU 614 615 #define VCF(suffix, cvt, element) \ 616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 617 ppc_avr_t *b, uint32_t uim) \ 618 { \ 619 int i; \ 620 \ 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 622 float32 t = cvt(b->element[i], &env->vec_status); \ 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 624 } \ 625 } 626 VCF(ux, uint32_to_float32, u32) 627 VCF(sx, int32_to_float32, s32) 628 #undef VCF 629 630 #define VCMPNEZ(NAME, ELEM) \ 631 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 632 { \ 633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 636 } \ 637 } 638 VCMPNEZ(VCMPNEZB, u8) 639 VCMPNEZ(VCMPNEZH, u16) 640 VCMPNEZ(VCMPNEZW, u32) 641 #undef VCMPNEZ 642 643 #define VCMPFP_DO(suffix, compare, order, record) \ 644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 645 ppc_avr_t *a, ppc_avr_t *b) \ 646 { \ 647 uint32_t ones = (uint32_t)-1; \ 648 uint32_t all = ones; \ 649 uint32_t none = 0; \ 650 int i; \ 651 \ 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 653 uint32_t result; \ 654 FloatRelation rel = \ 655 float32_compare_quiet(a->f32[i], b->f32[i], \ 656 &env->vec_status); \ 657 if (rel == float_relation_unordered) { \ 658 result = 0; \ 659 } else if (rel compare order) { \ 660 result = ones; \ 661 } else { \ 662 result = 0; \ 663 } \ 664 r->u32[i] = result; \ 665 all &= result; \ 666 none |= result; \ 667 } \ 668 if (record) { \ 669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 670 } \ 671 } 672 #define VCMPFP(suffix, compare, order) \ 673 VCMPFP_DO(suffix, compare, order, 0) \ 674 VCMPFP_DO(suffix##_dot, compare, order, 1) 675 VCMPFP(eqfp, ==, float_relation_equal) 676 VCMPFP(gefp, !=, float_relation_less) 677 VCMPFP(gtfp, ==, float_relation_greater) 678 #undef VCMPFP_DO 679 #undef VCMPFP 680 681 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 682 ppc_avr_t *a, ppc_avr_t *b, int record) 683 { 684 int i; 685 int all_in = 0; 686 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 689 &env->vec_status); 690 if (le_rel == float_relation_unordered) { 691 r->u32[i] = 0xc0000000; 692 all_in = 1; 693 } else { 694 float32 bneg = float32_chs(b->f32[i]); 695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 696 &env->vec_status); 697 int le = le_rel != float_relation_greater; 698 int ge = ge_rel != float_relation_less; 699 700 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 701 all_in |= (!le | !ge); 702 } 703 } 704 if (record) { 705 env->crf[6] = (all_in == 0) << 1; 706 } 707 } 708 709 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 710 { 711 vcmpbfp_internal(env, r, a, b, 0); 712 } 713 714 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 715 ppc_avr_t *b) 716 { 717 vcmpbfp_internal(env, r, a, b, 1); 718 } 719 720 #define VCT(suffix, satcvt, element) \ 721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 722 ppc_avr_t *b, uint32_t uim) \ 723 { \ 724 int i; \ 725 int sat = 0; \ 726 float_status s = env->vec_status; \ 727 \ 728 set_float_rounding_mode(float_round_to_zero, &s); \ 729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 730 if (float32_is_any_nan(b->f32[i])) { \ 731 r->element[i] = 0; \ 732 } else { \ 733 float64 t = float32_to_float64(b->f32[i], &s); \ 734 int64_t j; \ 735 \ 736 t = float64_scalbn(t, uim, &s); \ 737 j = float64_to_int64(t, &s); \ 738 r->element[i] = satcvt(j, &sat); \ 739 } \ 740 } \ 741 if (sat) { \ 742 set_vscr_sat(env); \ 743 } \ 744 } 745 VCT(uxs, cvtsduw, u32) 746 VCT(sxs, cvtsdsw, s32) 747 #undef VCT 748 749 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 750 751 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 752 { 753 int64_t psum = 0; 754 for (int i = 0; i < 8; i++, mask >>= 1) { 755 if (mask & 1) { 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 757 } 758 } 759 return psum; 760 } 761 762 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 763 { 764 int64_t psum = 0; 765 for (int i = 0; i < 4; i++, mask >>= 1) { 766 if (mask & 1) { 767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 768 } 769 } 770 return psum; 771 } 772 773 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 774 { 775 int64_t psum = 0; 776 for (int i = 0; i < 2; i++, mask >>= 1) { 777 if (mask & 1) { 778 psum += (int64_t)sextract32(a, 16 * i, 16) * 779 sextract32(b, 16 * i, 16); 780 } 781 } 782 return psum; 783 } 784 785 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 786 uint32_t mask, bool sat, bool acc, do_ger ger) 787 { 788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 791 uint8_t xmsk_bit, ymsk_bit; 792 int64_t psum; 793 int i, j; 794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 798 if (acc) { 799 psum += at[i].VsrSW(j); 800 } 801 if (sat && psum > INT32_MAX) { 802 set_vscr_sat(env); 803 at[i].VsrSW(j) = INT32_MAX; 804 } else if (sat && psum < INT32_MIN) { 805 set_vscr_sat(env); 806 at[i].VsrSW(j) = INT32_MIN; 807 } else { 808 at[i].VsrSW(j) = (int32_t) psum; 809 } 810 } else { 811 at[i].VsrSW(j) = 0; 812 } 813 } 814 } 815 } 816 817 QEMU_FLATTEN 818 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 819 ppc_acc_t *at, uint32_t mask) 820 { 821 xviger(env, a, b, at, mask, false, false, ger_rank8); 822 } 823 824 QEMU_FLATTEN 825 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 826 ppc_acc_t *at, uint32_t mask) 827 { 828 xviger(env, a, b, at, mask, false, true, ger_rank8); 829 } 830 831 QEMU_FLATTEN 832 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 833 ppc_acc_t *at, uint32_t mask) 834 { 835 xviger(env, a, b, at, mask, false, false, ger_rank4); 836 } 837 838 QEMU_FLATTEN 839 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 840 ppc_acc_t *at, uint32_t mask) 841 { 842 xviger(env, a, b, at, mask, false, true, ger_rank4); 843 } 844 845 QEMU_FLATTEN 846 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 847 ppc_acc_t *at, uint32_t mask) 848 { 849 xviger(env, a, b, at, mask, true, true, ger_rank4); 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank2); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, true, false, ger_rank2); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, true, ger_rank2); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, true, true, ger_rank2); 878 } 879 880 target_ulong helper_vclzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 target_ulong helper_vctzlsbb(ppc_avr_t *r) 894 { 895 target_ulong count = 0; 896 int i; 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 if (r->VsrB(i) & 0x01) { 899 break; 900 } 901 count++; 902 } 903 return count; 904 } 905 906 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 907 ppc_avr_t *b, ppc_avr_t *c) 908 { 909 int sat = 0; 910 int i; 911 912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 913 int32_t prod = a->s16[i] * b->s16[i]; 914 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 915 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 925 ppc_avr_t *b, ppc_avr_t *c) 926 { 927 int sat = 0; 928 int i; 929 930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 932 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 933 r->s16[i] = cvtswsh(t, &sat); 934 } 935 936 if (sat) { 937 set_vscr_sat(env); 938 } 939 } 940 941 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 942 uint32_t v) 943 { 944 int i; 945 946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 947 int32_t prod = a->s16[i] * b->s16[i]; 948 r->s16[i] = (int16_t) (prod + c->s16[i]); 949 } 950 } 951 952 #define VMRG_DO(name, element, access, ofs) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 ppc_avr_t result; \ 956 int i, half = ARRAY_SIZE(r->element) / 2; \ 957 \ 958 for (i = 0; i < half; i++) { \ 959 result.access(i * 2 + 0) = a->access(i + ofs); \ 960 result.access(i * 2 + 1) = b->access(i + ofs); \ 961 } \ 962 *r = result; \ 963 } 964 965 #define VMRG(suffix, element, access) \ 966 VMRG_DO(mrgl##suffix, element, access, half) \ 967 VMRG_DO(mrgh##suffix, element, access, 0) 968 VMRG(b, u8, VsrB) 969 VMRG(h, u16, VsrH) 970 VMRG(w, u32, VsrW) 971 #undef VMRG_DO 972 #undef VMRG 973 974 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int32_t prod[16]; 977 int i; 978 979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 980 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 981 } 982 983 VECTOR_FOR_INORDER_I(i, s32) { 984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 985 prod[4 * i + 2] + prod[4 * i + 3]; 986 } 987 } 988 989 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 990 { 991 int32_t prod[8]; 992 int i; 993 994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 995 prod[i] = a->s16[i] * b->s16[i]; 996 } 997 998 VECTOR_FOR_INORDER_I(i, s32) { 999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1000 } 1001 } 1002 1003 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1004 ppc_avr_t *b, ppc_avr_t *c) 1005 { 1006 int32_t prod[8]; 1007 int i; 1008 int sat = 0; 1009 1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1011 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1012 } 1013 1014 VECTOR_FOR_INORDER_I(i, s32) { 1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1016 1017 r->u32[i] = cvtsdsw(t, &sat); 1018 } 1019 1020 if (sat) { 1021 set_vscr_sat(env); 1022 } 1023 } 1024 1025 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 uint16_t prod[16]; 1028 int i; 1029 1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1031 prod[i] = a->u8[i] * b->u8[i]; 1032 } 1033 1034 VECTOR_FOR_INORDER_I(i, u32) { 1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1036 prod[4 * i + 2] + prod[4 * i + 3]; 1037 } 1038 } 1039 1040 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1041 { 1042 uint32_t prod[8]; 1043 int i; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1046 prod[i] = a->u16[i] * b->u16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, u32) { 1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 } 1052 } 1053 1054 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 int sat = 0; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1062 prod[i] = a->u16[i] * b->u16[i]; 1063 } 1064 1065 VECTOR_FOR_INORDER_I(i, s32) { 1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1067 1068 r->u32[i] = cvtuduw(t, &sat); 1069 } 1070 1071 if (sat) { 1072 set_vscr_sat(env); 1073 } 1074 } 1075 1076 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1083 (cast)b->mul_access(i); \ 1084 } \ 1085 } 1086 1087 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1089 { \ 1090 int i; \ 1091 \ 1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1094 (cast)b->mul_access(i + 1); \ 1095 } \ 1096 } 1097 1098 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1101 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1102 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1103 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1104 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1105 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1106 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1107 #undef VMUL_DO_EVN 1108 #undef VMUL_DO_ODD 1109 #undef VMUL 1110 1111 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1112 target_ulong uim) 1113 { 1114 int i, idx; 1115 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1116 1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1118 if ((pcv->VsrB(i) >> 5) == uim) { 1119 idx = pcv->VsrB(i) & 0x1f; 1120 if (idx < ARRAY_SIZE(t->u8)) { 1121 tmp.VsrB(i) = s0->VsrB(idx); 1122 } else { 1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1124 } 1125 } 1126 } 1127 1128 *t = tmp; 1129 } 1130 1131 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 Int128 neg1 = int128_makes64(-1); 1134 Int128 int128_min = int128_make128(0, INT64_MIN); 1135 if (likely(int128_nz(b->s128) && 1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1137 t->s128 = int128_divs(a->s128, b->s128); 1138 } else { 1139 t->s128 = a->s128; /* Undefined behavior */ 1140 } 1141 } 1142 1143 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1144 { 1145 if (int128_nz(b->s128)) { 1146 t->s128 = int128_divu(a->s128, b->s128); 1147 } else { 1148 t->s128 = a->s128; /* Undefined behavior */ 1149 } 1150 } 1151 1152 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i; 1155 int64_t high; 1156 uint64_t low; 1157 for (i = 0; i < 2; i++) { 1158 high = a->s64[i]; 1159 low = 0; 1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1161 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1162 } else { 1163 divs128(&low, &high, b->s64[i]); 1164 t->s64[i] = low; 1165 } 1166 } 1167 } 1168 1169 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1170 { 1171 int i; 1172 uint64_t high, low; 1173 for (i = 0; i < 2; i++) { 1174 high = a->u64[i]; 1175 low = 0; 1176 if (unlikely(!b->u64[i])) { 1177 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1178 } else { 1179 divu128(&low, &high, b->u64[i]); 1180 t->u64[i] = low; 1181 } 1182 } 1183 } 1184 1185 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 Int128 high, low; 1188 Int128 int128_min = int128_make128(0, INT64_MIN); 1189 Int128 neg1 = int128_makes64(-1); 1190 1191 high = a->s128; 1192 low = int128_zero(); 1193 if (unlikely(!int128_nz(b->s128) || 1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1195 t->s128 = a->s128; /* Undefined behavior */ 1196 } else { 1197 divs256(&low, &high, b->s128); 1198 t->s128 = low; 1199 } 1200 } 1201 1202 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 Int128 high, low; 1205 1206 high = a->s128; 1207 low = int128_zero(); 1208 if (unlikely(!int128_nz(b->s128))) { 1209 t->s128 = a->s128; /* Undefined behavior */ 1210 } else { 1211 divu256(&low, &high, b->s128); 1212 t->s128 = low; 1213 } 1214 } 1215 1216 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1217 { 1218 Int128 neg1 = int128_makes64(-1); 1219 Int128 int128_min = int128_make128(0, INT64_MIN); 1220 if (likely(int128_nz(b->s128) && 1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1222 t->s128 = int128_rems(a->s128, b->s128); 1223 } else { 1224 t->s128 = int128_zero(); /* Undefined behavior */ 1225 } 1226 } 1227 1228 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 if (likely(int128_nz(b->s128))) { 1231 t->s128 = int128_remu(a->s128, b->s128); 1232 } else { 1233 t->s128 = int128_zero(); /* Undefined behavior */ 1234 } 1235 } 1236 1237 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1238 { 1239 ppc_avr_t result; 1240 int i; 1241 1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1243 int s = c->VsrB(i) & 0x1f; 1244 int index = s & 0xf; 1245 1246 if (s & 0x10) { 1247 result.VsrB(i) = b->VsrB(index); 1248 } else { 1249 result.VsrB(i) = a->VsrB(index); 1250 } 1251 } 1252 *r = result; 1253 } 1254 1255 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1256 { 1257 ppc_avr_t result; 1258 int i; 1259 1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1261 int s = c->VsrB(i) & 0x1f; 1262 int index = 15 - (s & 0xf); 1263 1264 if (s & 0x10) { 1265 result.VsrB(i) = a->VsrB(index); 1266 } else { 1267 result.VsrB(i) = b->VsrB(index); 1268 } 1269 } 1270 *r = result; 1271 } 1272 1273 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1274 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1275 { \ 1276 ppc_vsr_t tmp; \ 1277 \ 1278 /* Initialize tmp with the result of an all-zeros mask */ \ 1279 tmp.VsrD(0) = 0x1011121314151617; \ 1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1281 \ 1282 /* Iterate over the most significant byte of each element */ \ 1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1284 if (b->VsrB(i) & 0x80) { \ 1285 /* Update each byte of the element */ \ 1286 for (int k = 0; k < SZ; k++) { \ 1287 tmp.VsrB(i + k) = j + k; \ 1288 } \ 1289 j += SZ; \ 1290 } \ 1291 } \ 1292 \ 1293 *t = tmp; \ 1294 } 1295 1296 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1297 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1298 { \ 1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1300 \ 1301 /* Iterate over the most significant byte of each element */ \ 1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1303 if (b->VsrB(i) & 0x80) { \ 1304 /* Update each byte of the element */ \ 1305 for (int k = 0; k < SZ; k++) { \ 1306 tmp.VsrB(j + k) = i + k; \ 1307 } \ 1308 j += SZ; \ 1309 } \ 1310 } \ 1311 \ 1312 *t = tmp; \ 1313 } 1314 1315 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1316 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1317 { \ 1318 ppc_vsr_t tmp; \ 1319 \ 1320 /* Initialize tmp with the result of an all-zeros mask */ \ 1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1322 tmp.VsrD(1) = 0x1716151413121110; \ 1323 \ 1324 /* Iterate over the most significant byte of each element */ \ 1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1326 /* Reverse indexing of "i" */ \ 1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1328 if (b->VsrB(idx) & 0x80) { \ 1329 /* Update each byte of the element */ \ 1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1331 tmp.VsrB(idx + rk) = j + k; \ 1332 } \ 1333 j += SZ; \ 1334 } \ 1335 } \ 1336 \ 1337 *t = tmp; \ 1338 } 1339 1340 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1341 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1342 { \ 1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1344 \ 1345 /* Iterate over the most significant byte of each element */ \ 1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1348 /* Update each byte of the element */ \ 1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1350 /* Reverse indexing of "j" */ \ 1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1352 tmp.VsrB(idx + rk) = i + k; \ 1353 } \ 1354 j += SZ; \ 1355 } \ 1356 } \ 1357 \ 1358 *t = tmp; \ 1359 } 1360 1361 #define XXGENPCV(NAME, SZ) \ 1362 XXGENPCV_BE_EXP(NAME, SZ) \ 1363 XXGENPCV_BE_COMP(NAME, SZ) \ 1364 XXGENPCV_LE_EXP(NAME, SZ) \ 1365 XXGENPCV_LE_COMP(NAME, SZ) \ 1366 1367 XXGENPCV(XXGENPCVBM, 1) 1368 XXGENPCV(XXGENPCVHM, 2) 1369 XXGENPCV(XXGENPCVWM, 4) 1370 XXGENPCV(XXGENPCVDM, 8) 1371 1372 #undef XXGENPCV_BE_EXP 1373 #undef XXGENPCV_BE_COMP 1374 #undef XXGENPCV_LE_EXP 1375 #undef XXGENPCV_LE_COMP 1376 #undef XXGENPCV 1377 1378 #if HOST_BIG_ENDIAN 1379 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1380 #define VBPERMD_INDEX(i) (i) 1381 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1382 #else 1383 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1384 #define VBPERMD_INDEX(i) (1 - i) 1385 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1386 #endif 1387 #define EXTRACT_BIT(avr, i, index) \ 1388 (extract64((avr)->VsrD(i), 63 - index, 1)) 1389 1390 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1391 { 1392 int i, j; 1393 ppc_avr_t result = { .u64 = { 0, 0 } }; 1394 VECTOR_FOR_INORDER_I(i, u64) { 1395 for (j = 0; j < 8; j++) { 1396 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1397 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1399 } 1400 } 1401 } 1402 *r = result; 1403 } 1404 1405 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1406 { 1407 int i; 1408 uint64_t perm = 0; 1409 1410 VECTOR_FOR_INORDER_I(i, u8) { 1411 int index = VBPERMQ_INDEX(b, i); 1412 1413 if (index < 128) { 1414 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1415 if (a->u64[VBPERMQ_DW(index)] & mask) { 1416 perm |= (0x8000 >> i); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(0) = perm; 1422 r->VsrD(1) = 0; 1423 } 1424 1425 #undef VBPERMQ_INDEX 1426 #undef VBPERMQ_DW 1427 1428 /* 1429 * There is no carry across the two doublewords, so their order does 1430 * not matter. Nor is there partial overlap between registers. 1431 */ 1432 void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1433 { 1434 for (int i = 0; i < 2; ++i) { 1435 uint64_t aa = a->u64[i], bb = b->u64[i]; 1436 r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb); 1437 } 1438 } 1439 1440 void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1441 { 1442 for (int i = 0; i < 2; ++i) { 1443 uint64_t aa = a->u64[i], bb = b->u64[i]; 1444 r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb); 1445 } 1446 } 1447 1448 void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1449 { 1450 for (int i = 0; i < 2; ++i) { 1451 uint64_t aa = a->u64[i], bb = b->u64[i]; 1452 r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32); 1453 } 1454 } 1455 1456 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1457 { 1458 Int128 e = clmul_64(a->u64[0], b->u64[0]); 1459 Int128 o = clmul_64(a->u64[1], b->u64[1]); 1460 r->s128 = int128_xor(e, o); 1461 } 1462 1463 #if HOST_BIG_ENDIAN 1464 #define PKBIG 1 1465 #else 1466 #define PKBIG 0 1467 #endif 1468 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1469 { 1470 int i, j; 1471 ppc_avr_t result; 1472 #if HOST_BIG_ENDIAN 1473 const ppc_avr_t *x[2] = { a, b }; 1474 #else 1475 const ppc_avr_t *x[2] = { b, a }; 1476 #endif 1477 1478 VECTOR_FOR_INORDER_I(i, u64) { 1479 VECTOR_FOR_INORDER_I(j, u32) { 1480 uint32_t e = x[i]->u32[j]; 1481 1482 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1483 ((e >> 6) & 0x3e0) | 1484 ((e >> 3) & 0x1f)); 1485 } 1486 } 1487 *r = result; 1488 } 1489 1490 #define VPK(suffix, from, to, cvt, dosat) \ 1491 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1492 ppc_avr_t *a, ppc_avr_t *b) \ 1493 { \ 1494 int i; \ 1495 int sat = 0; \ 1496 ppc_avr_t result; \ 1497 ppc_avr_t *a0 = PKBIG ? a : b; \ 1498 ppc_avr_t *a1 = PKBIG ? b : a; \ 1499 \ 1500 VECTOR_FOR_INORDER_I(i, from) { \ 1501 result.to[i] = cvt(a0->from[i], &sat); \ 1502 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1503 } \ 1504 *r = result; \ 1505 if (dosat && sat) { \ 1506 set_vscr_sat(env); \ 1507 } \ 1508 } 1509 #define I(x, y) (x) 1510 VPK(shss, s16, s8, cvtshsb, 1) 1511 VPK(shus, s16, u8, cvtshub, 1) 1512 VPK(swss, s32, s16, cvtswsh, 1) 1513 VPK(swus, s32, u16, cvtswuh, 1) 1514 VPK(sdss, s64, s32, cvtsdsw, 1) 1515 VPK(sdus, s64, u32, cvtsduw, 1) 1516 VPK(uhus, u16, u8, cvtuhub, 1) 1517 VPK(uwus, u32, u16, cvtuwuh, 1) 1518 VPK(udus, u64, u32, cvtuduw, 1) 1519 VPK(uhum, u16, u8, I, 0) 1520 VPK(uwum, u32, u16, I, 0) 1521 VPK(udum, u64, u32, I, 0) 1522 #undef I 1523 #undef VPK 1524 #undef PKBIG 1525 1526 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1527 { 1528 int i; 1529 1530 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1531 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1532 } 1533 } 1534 1535 #define VRFI(suffix, rounding) \ 1536 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1537 ppc_avr_t *b) \ 1538 { \ 1539 int i; \ 1540 float_status s = env->vec_status; \ 1541 \ 1542 set_float_rounding_mode(rounding, &s); \ 1543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1544 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1545 } \ 1546 } 1547 VRFI(n, float_round_nearest_even) 1548 VRFI(m, float_round_down) 1549 VRFI(p, float_round_up) 1550 VRFI(z, float_round_to_zero) 1551 #undef VRFI 1552 1553 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1554 { 1555 int i; 1556 1557 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1558 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1559 1560 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1561 } 1562 } 1563 1564 #define VRLMI(name, size, element, insert) \ 1565 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1566 { \ 1567 int i; \ 1568 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1569 uint##size##_t src1 = a->element[i]; \ 1570 uint##size##_t src2 = b->element[i]; \ 1571 uint##size##_t src3 = r->element[i]; \ 1572 uint##size##_t begin, end, shift, mask, rot_val; \ 1573 \ 1574 shift = extract##size(src2, 0, 6); \ 1575 end = extract##size(src2, 8, 6); \ 1576 begin = extract##size(src2, 16, 6); \ 1577 rot_val = rol##size(src1, shift); \ 1578 mask = mask_u##size(begin, end); \ 1579 if (insert) { \ 1580 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1581 } else { \ 1582 r->element[i] = (rot_val & mask); \ 1583 } \ 1584 } \ 1585 } 1586 1587 VRLMI(VRLDMI, 64, u64, 1); 1588 VRLMI(VRLWMI, 32, u32, 1); 1589 VRLMI(VRLDNM, 64, u64, 0); 1590 VRLMI(VRLWNM, 32, u32, 0); 1591 1592 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1593 { 1594 int i; 1595 1596 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1597 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1598 } 1599 } 1600 1601 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1602 { 1603 int i; 1604 1605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1606 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1607 } 1608 } 1609 1610 #define VEXTU_X_DO(name, size, left) \ 1611 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1612 { \ 1613 int index = (a & 0xf) * 8; \ 1614 if (left) { \ 1615 index = 128 - index - size; \ 1616 } \ 1617 return int128_getlo(int128_rshift(b->s128, index)) & \ 1618 MAKE_64BIT_MASK(0, size); \ 1619 } 1620 VEXTU_X_DO(vextublx, 8, 1) 1621 VEXTU_X_DO(vextuhlx, 16, 1) 1622 VEXTU_X_DO(vextuwlx, 32, 1) 1623 VEXTU_X_DO(vextubrx, 8, 0) 1624 VEXTU_X_DO(vextuhrx, 16, 0) 1625 VEXTU_X_DO(vextuwrx, 32, 0) 1626 #undef VEXTU_X_DO 1627 1628 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1629 { 1630 int i; 1631 unsigned int shift, bytes, size; 1632 1633 size = ARRAY_SIZE(r->u8); 1634 for (i = 0; i < size; i++) { 1635 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1636 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1637 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1638 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1639 } 1640 } 1641 1642 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1643 { 1644 int i; 1645 unsigned int shift, bytes; 1646 1647 /* 1648 * Use reverse order, as destination and source register can be 1649 * same. Its being modified in place saving temporary, reverse 1650 * order will guarantee that computed result is not fed back. 1651 */ 1652 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1653 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1654 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1655 /* extract adjacent bytes */ 1656 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1657 } 1658 } 1659 1660 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1661 { 1662 int sh = shift & 0xf; 1663 int i; 1664 ppc_avr_t result; 1665 1666 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1667 int index = sh + i; 1668 if (index > 0xf) { 1669 result.VsrB(i) = b->VsrB(index - 0x10); 1670 } else { 1671 result.VsrB(i) = a->VsrB(index); 1672 } 1673 } 1674 *r = result; 1675 } 1676 1677 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1678 { 1679 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1680 1681 #if HOST_BIG_ENDIAN 1682 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1683 memset(&r->u8[16 - sh], 0, sh); 1684 #else 1685 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1686 memset(&r->u8[0], 0, sh); 1687 #endif 1688 } 1689 1690 #if HOST_BIG_ENDIAN 1691 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1692 #else 1693 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1694 #endif 1695 1696 #define VINSX(SUFFIX, TYPE) \ 1697 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1698 uint64_t val, target_ulong index) \ 1699 { \ 1700 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1701 target_long idx = index; \ 1702 \ 1703 if (idx < 0 || idx > maxidx) { \ 1704 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1705 qemu_log_mask(LOG_GUEST_ERROR, \ 1706 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1707 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1708 } else { \ 1709 TYPE src = val; \ 1710 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1711 } \ 1712 } 1713 VINSX(B, uint8_t) 1714 VINSX(H, uint16_t) 1715 VINSX(W, uint32_t) 1716 VINSX(D, uint64_t) 1717 #undef ELEM_ADDR 1718 #undef VINSX 1719 #if HOST_BIG_ENDIAN 1720 #define VEXTDVLX(NAME, SIZE) \ 1721 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1722 target_ulong index) \ 1723 { \ 1724 const target_long idx = index; \ 1725 ppc_avr_t tmp[2] = { *a, *b }; \ 1726 memset(t, 0, sizeof(*t)); \ 1727 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1728 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1729 } else { \ 1730 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1731 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1732 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1733 } \ 1734 } 1735 #else 1736 #define VEXTDVLX(NAME, SIZE) \ 1737 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1738 target_ulong index) \ 1739 { \ 1740 const target_long idx = index; \ 1741 ppc_avr_t tmp[2] = { *b, *a }; \ 1742 memset(t, 0, sizeof(*t)); \ 1743 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1744 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1745 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1746 } else { \ 1747 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1748 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1749 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1750 } \ 1751 } 1752 #endif 1753 VEXTDVLX(VEXTDUBVLX, 1) 1754 VEXTDVLX(VEXTDUHVLX, 2) 1755 VEXTDVLX(VEXTDUWVLX, 4) 1756 VEXTDVLX(VEXTDDVLX, 8) 1757 #undef VEXTDVLX 1758 #if HOST_BIG_ENDIAN 1759 #define VEXTRACT(suffix, element) \ 1760 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1761 { \ 1762 uint32_t es = sizeof(r->element[0]); \ 1763 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1764 memset(&r->u8[8], 0, 8); \ 1765 memset(&r->u8[0], 0, 8 - es); \ 1766 } 1767 #else 1768 #define VEXTRACT(suffix, element) \ 1769 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1770 { \ 1771 uint32_t es = sizeof(r->element[0]); \ 1772 uint32_t s = (16 - index) - es; \ 1773 memmove(&r->u8[8], &b->u8[s], es); \ 1774 memset(&r->u8[0], 0, 8); \ 1775 memset(&r->u8[8 + es], 0, 8 - es); \ 1776 } 1777 #endif 1778 VEXTRACT(ub, u8) 1779 VEXTRACT(uh, u16) 1780 VEXTRACT(uw, u32) 1781 VEXTRACT(d, u64) 1782 #undef VEXTRACT 1783 1784 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1785 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1786 { \ 1787 int i, idx, crf = 0; \ 1788 \ 1789 for (i = 0; i < NUM_ELEMS; i++) { \ 1790 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1791 if (b->Vsr##ELEM(idx)) { \ 1792 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1793 } else { \ 1794 crf = 0b0010; \ 1795 break; \ 1796 } \ 1797 } \ 1798 \ 1799 for (; i < NUM_ELEMS; i++) { \ 1800 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1801 t->Vsr##ELEM(idx) = 0; \ 1802 } \ 1803 \ 1804 return crf; \ 1805 } 1806 VSTRI(VSTRIBL, B, 16, true) 1807 VSTRI(VSTRIBR, B, 16, false) 1808 VSTRI(VSTRIHL, H, 8, true) 1809 VSTRI(VSTRIHR, H, 8, false) 1810 #undef VSTRI 1811 1812 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1813 { 1814 ppc_vsr_t t = { }; 1815 size_t es = sizeof(uint32_t); 1816 uint32_t ext_index; 1817 int i; 1818 1819 ext_index = index; 1820 for (i = 0; i < es; i++, ext_index++) { 1821 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1822 } 1823 1824 *xt = t; 1825 } 1826 1827 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1828 { 1829 ppc_vsr_t t = *xt; 1830 size_t es = sizeof(uint32_t); 1831 int ins_index, i = 0; 1832 1833 ins_index = index; 1834 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1835 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1836 } 1837 1838 *xt = t; 1839 } 1840 1841 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1842 uint32_t desc) 1843 { 1844 /* 1845 * Instead of processing imm bit-by-bit, we'll skip the computation of 1846 * conjunctions whose corresponding bit is unset. 1847 */ 1848 int bit, imm = simd_data(desc); 1849 Int128 conj, disj = int128_zero(); 1850 1851 /* Iterate over set bits from the least to the most significant bit */ 1852 while (imm) { 1853 /* 1854 * Get the next bit to be processed with ctz64. Invert the result of 1855 * ctz64 to match the indexing used by PowerISA. 1856 */ 1857 bit = 7 - ctzl(imm); 1858 if (bit & 0x4) { 1859 conj = a->s128; 1860 } else { 1861 conj = int128_not(a->s128); 1862 } 1863 if (bit & 0x2) { 1864 conj = int128_and(conj, b->s128); 1865 } else { 1866 conj = int128_and(conj, int128_not(b->s128)); 1867 } 1868 if (bit & 0x1) { 1869 conj = int128_and(conj, c->s128); 1870 } else { 1871 conj = int128_and(conj, int128_not(c->s128)); 1872 } 1873 disj = int128_or(disj, conj); 1874 1875 /* Unset the least significant bit that is set */ 1876 imm &= imm - 1; 1877 } 1878 1879 t->s128 = disj; 1880 } 1881 1882 #define XXBLEND(name, sz) \ 1883 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1884 ppc_avr_t *c, uint32_t desc) \ 1885 { \ 1886 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1887 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1888 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1889 } \ 1890 } 1891 XXBLEND(B, 8) 1892 XXBLEND(H, 16) 1893 XXBLEND(W, 32) 1894 XXBLEND(D, 64) 1895 #undef XXBLEND 1896 1897 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1898 { 1899 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1900 1901 #if HOST_BIG_ENDIAN 1902 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1903 memset(&r->u8[0], 0, sh); 1904 #else 1905 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1906 memset(&r->u8[16 - sh], 0, sh); 1907 #endif 1908 } 1909 1910 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1911 { 1912 int64_t t; 1913 int i, upper; 1914 ppc_avr_t result; 1915 int sat = 0; 1916 1917 upper = ARRAY_SIZE(r->s32) - 1; 1918 t = (int64_t)b->VsrSW(upper); 1919 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1920 t += a->VsrSW(i); 1921 result.VsrSW(i) = 0; 1922 } 1923 result.VsrSW(upper) = cvtsdsw(t, &sat); 1924 *r = result; 1925 1926 if (sat) { 1927 set_vscr_sat(env); 1928 } 1929 } 1930 1931 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1932 { 1933 int i, j, upper; 1934 ppc_avr_t result; 1935 int sat = 0; 1936 1937 upper = 1; 1938 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1939 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1940 1941 result.VsrD(i) = 0; 1942 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1943 t += a->VsrSW(2 * i + j); 1944 } 1945 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1946 } 1947 1948 *r = result; 1949 if (sat) { 1950 set_vscr_sat(env); 1951 } 1952 } 1953 1954 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1955 { 1956 int i, j; 1957 int sat = 0; 1958 1959 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1960 int64_t t = (int64_t)b->s32[i]; 1961 1962 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1963 t += a->s8[4 * i + j]; 1964 } 1965 r->s32[i] = cvtsdsw(t, &sat); 1966 } 1967 1968 if (sat) { 1969 set_vscr_sat(env); 1970 } 1971 } 1972 1973 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1974 { 1975 int sat = 0; 1976 int i; 1977 1978 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1979 int64_t t = (int64_t)b->s32[i]; 1980 1981 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1982 r->s32[i] = cvtsdsw(t, &sat); 1983 } 1984 1985 if (sat) { 1986 set_vscr_sat(env); 1987 } 1988 } 1989 1990 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1991 { 1992 int i, j; 1993 int sat = 0; 1994 1995 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1996 uint64_t t = (uint64_t)b->u32[i]; 1997 1998 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1999 t += a->u8[4 * i + j]; 2000 } 2001 r->u32[i] = cvtuduw(t, &sat); 2002 } 2003 2004 if (sat) { 2005 set_vscr_sat(env); 2006 } 2007 } 2008 2009 #if HOST_BIG_ENDIAN 2010 #define UPKHI 1 2011 #define UPKLO 0 2012 #else 2013 #define UPKHI 0 2014 #define UPKLO 1 2015 #endif 2016 #define VUPKPX(suffix, hi) \ 2017 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2018 { \ 2019 int i; \ 2020 ppc_avr_t result; \ 2021 \ 2022 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2023 uint16_t _e = b->u16[hi ? i : i + 4]; \ 2024 uint8_t _a = (_e >> 15) ? 0xff : 0; \ 2025 uint8_t _r = (_e >> 10) & 0x1f; \ 2026 uint8_t _g = (_e >> 5) & 0x1f; \ 2027 uint8_t _b = _e & 0x1f; \ 2028 \ 2029 result.u32[i] = (_a << 24) | (_r << 16) | (_g << 8) | _b; \ 2030 } \ 2031 *r = result; \ 2032 } 2033 VUPKPX(lpx, UPKLO) 2034 VUPKPX(hpx, UPKHI) 2035 #undef VUPKPX 2036 2037 #define VUPK(suffix, unpacked, packee, hi) \ 2038 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2039 { \ 2040 int i; \ 2041 ppc_avr_t result; \ 2042 \ 2043 if (hi) { \ 2044 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2045 result.unpacked[i] = b->packee[i]; \ 2046 } \ 2047 } else { \ 2048 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2049 i++) { \ 2050 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2051 } \ 2052 } \ 2053 *r = result; \ 2054 } 2055 VUPK(hsb, s16, s8, UPKHI) 2056 VUPK(hsh, s32, s16, UPKHI) 2057 VUPK(hsw, s64, s32, UPKHI) 2058 VUPK(lsb, s16, s8, UPKLO) 2059 VUPK(lsh, s32, s16, UPKLO) 2060 VUPK(lsw, s64, s32, UPKLO) 2061 #undef VUPK 2062 #undef UPKHI 2063 #undef UPKLO 2064 2065 #define VGENERIC_DO(name, element) \ 2066 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2067 { \ 2068 int i; \ 2069 \ 2070 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2071 r->element[i] = name(b->element[i]); \ 2072 } \ 2073 } 2074 2075 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2076 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2077 2078 VGENERIC_DO(clzb, u8) 2079 VGENERIC_DO(clzh, u16) 2080 2081 #undef clzb 2082 #undef clzh 2083 2084 #define ctzb(v) ((v) ? ctz32(v) : 8) 2085 #define ctzh(v) ((v) ? ctz32(v) : 16) 2086 #define ctzw(v) ctz32((v)) 2087 #define ctzd(v) ctz64((v)) 2088 2089 VGENERIC_DO(ctzb, u8) 2090 VGENERIC_DO(ctzh, u16) 2091 VGENERIC_DO(ctzw, u32) 2092 VGENERIC_DO(ctzd, u64) 2093 2094 #undef ctzb 2095 #undef ctzh 2096 #undef ctzw 2097 #undef ctzd 2098 2099 #define popcntb(v) ctpop8(v) 2100 #define popcnth(v) ctpop16(v) 2101 #define popcntw(v) ctpop32(v) 2102 #define popcntd(v) ctpop64(v) 2103 2104 VGENERIC_DO(popcntb, u8) 2105 VGENERIC_DO(popcnth, u16) 2106 VGENERIC_DO(popcntw, u32) 2107 VGENERIC_DO(popcntd, u64) 2108 2109 #undef popcntb 2110 #undef popcnth 2111 #undef popcntw 2112 #undef popcntd 2113 2114 #undef VGENERIC_DO 2115 2116 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2117 { 2118 r->s128 = int128_add(a->s128, b->s128); 2119 } 2120 2121 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2122 { 2123 r->s128 = int128_add(int128_add(a->s128, b->s128), 2124 int128_make64(int128_getlo(c->s128) & 1)); 2125 } 2126 2127 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2128 { 2129 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2130 r->VsrD(0) = 0; 2131 } 2132 2133 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2134 { 2135 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2136 carry_in = int128_getlo(c->s128) & 1; 2137 2138 if (!carry_out && carry_in) { 2139 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2140 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2141 } 2142 2143 r->VsrD(0) = 0; 2144 r->VsrD(1) = carry_out; 2145 } 2146 2147 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2148 { 2149 r->s128 = int128_sub(a->s128, b->s128); 2150 } 2151 2152 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2153 { 2154 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2155 int128_make64(int128_getlo(c->s128) & 1)); 2156 } 2157 2158 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2159 { 2160 Int128 tmp = int128_not(b->s128); 2161 2162 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2163 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2164 r->VsrD(0) = 0; 2165 } 2166 2167 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2168 { 2169 Int128 tmp = int128_not(b->s128); 2170 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2171 carry_in = int128_getlo(c->s128) & 1; 2172 2173 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2174 int128_makes64(-1))); 2175 r->VsrD(0) = 0; 2176 } 2177 2178 #define BCD_PLUS_PREF_1 0xC 2179 #define BCD_PLUS_PREF_2 0xF 2180 #define BCD_PLUS_ALT_1 0xA 2181 #define BCD_NEG_PREF 0xD 2182 #define BCD_NEG_ALT 0xB 2183 #define BCD_PLUS_ALT_2 0xE 2184 #define NATIONAL_PLUS 0x2B 2185 #define NATIONAL_NEG 0x2D 2186 2187 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2188 2189 static int bcd_get_sgn(ppc_avr_t *bcd) 2190 { 2191 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2192 case BCD_PLUS_PREF_1: 2193 case BCD_PLUS_PREF_2: 2194 case BCD_PLUS_ALT_1: 2195 case BCD_PLUS_ALT_2: 2196 { 2197 return 1; 2198 } 2199 2200 case BCD_NEG_PREF: 2201 case BCD_NEG_ALT: 2202 { 2203 return -1; 2204 } 2205 2206 default: 2207 { 2208 return 0; 2209 } 2210 } 2211 } 2212 2213 static int bcd_preferred_sgn(int sgn, int ps) 2214 { 2215 if (sgn >= 0) { 2216 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2217 } else { 2218 return BCD_NEG_PREF; 2219 } 2220 } 2221 2222 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2223 { 2224 uint8_t result; 2225 if (n & 1) { 2226 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2227 } else { 2228 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2229 } 2230 2231 if (unlikely(result > 9)) { 2232 *invalid = true; 2233 } 2234 return result; 2235 } 2236 2237 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2238 { 2239 if (n & 1) { 2240 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2241 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2242 } else { 2243 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2244 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2245 } 2246 } 2247 2248 static bool bcd_is_valid(ppc_avr_t *bcd) 2249 { 2250 int i; 2251 int invalid = 0; 2252 2253 if (bcd_get_sgn(bcd) == 0) { 2254 return false; 2255 } 2256 2257 for (i = 1; i < 32; i++) { 2258 bcd_get_digit(bcd, i, &invalid); 2259 if (unlikely(invalid)) { 2260 return false; 2261 } 2262 } 2263 return true; 2264 } 2265 2266 static int bcd_cmp_zero(ppc_avr_t *bcd) 2267 { 2268 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2269 return CRF_EQ; 2270 } else { 2271 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2272 } 2273 } 2274 2275 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2276 { 2277 return reg->VsrH(7 - n); 2278 } 2279 2280 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2281 { 2282 reg->VsrH(7 - n) = val; 2283 } 2284 2285 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2286 { 2287 int i; 2288 int invalid = 0; 2289 for (i = 31; i > 0; i--) { 2290 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2291 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2292 if (unlikely(invalid)) { 2293 return 0; /* doesn't matter */ 2294 } else if (dig_a > dig_b) { 2295 return 1; 2296 } else if (dig_a < dig_b) { 2297 return -1; 2298 } 2299 } 2300 2301 return 0; 2302 } 2303 2304 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2305 int *overflow) 2306 { 2307 int carry = 0; 2308 int i; 2309 int is_zero = 1; 2310 2311 for (i = 1; i <= 31; i++) { 2312 uint8_t digit = bcd_get_digit(a, i, invalid) + 2313 bcd_get_digit(b, i, invalid) + carry; 2314 is_zero &= (digit == 0); 2315 if (digit > 9) { 2316 carry = 1; 2317 digit -= 10; 2318 } else { 2319 carry = 0; 2320 } 2321 2322 bcd_put_digit(t, digit, i); 2323 } 2324 2325 *overflow = carry; 2326 return is_zero; 2327 } 2328 2329 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2330 int *overflow) 2331 { 2332 int carry = 0; 2333 int i; 2334 2335 for (i = 1; i <= 31; i++) { 2336 uint8_t digit = bcd_get_digit(a, i, invalid) - 2337 bcd_get_digit(b, i, invalid) + carry; 2338 if (digit & 0x80) { 2339 carry = -1; 2340 digit += 10; 2341 } else { 2342 carry = 0; 2343 } 2344 2345 bcd_put_digit(t, digit, i); 2346 } 2347 2348 *overflow = carry; 2349 } 2350 2351 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2352 { 2353 2354 int sgna = bcd_get_sgn(a); 2355 int sgnb = bcd_get_sgn(b); 2356 int invalid = (sgna == 0) || (sgnb == 0); 2357 int overflow = 0; 2358 int zero = 0; 2359 uint32_t cr = 0; 2360 ppc_avr_t result = { .u64 = { 0, 0 } }; 2361 2362 if (!invalid) { 2363 if (sgna == sgnb) { 2364 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2365 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2366 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2367 } else { 2368 int magnitude = bcd_cmp_mag(a, b); 2369 if (magnitude > 0) { 2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2371 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2372 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2373 } else if (magnitude < 0) { 2374 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2375 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2376 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2377 } else { 2378 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2379 cr = CRF_EQ; 2380 } 2381 } 2382 } 2383 2384 if (unlikely(invalid)) { 2385 result.VsrD(0) = result.VsrD(1) = -1; 2386 cr = CRF_SO; 2387 } else if (overflow) { 2388 cr |= CRF_SO; 2389 } else if (zero) { 2390 cr |= CRF_EQ; 2391 } 2392 2393 *r = result; 2394 2395 return cr; 2396 } 2397 2398 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2399 { 2400 ppc_avr_t bcopy = *b; 2401 int sgnb = bcd_get_sgn(b); 2402 if (sgnb < 0) { 2403 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2404 } else if (sgnb > 0) { 2405 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2406 } 2407 /* else invalid ... defer to bcdadd code for proper handling */ 2408 2409 return helper_bcdadd(r, a, &bcopy, ps); 2410 } 2411 2412 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2413 { 2414 int i; 2415 int cr = 0; 2416 uint16_t national = 0; 2417 uint16_t sgnb = get_national_digit(b, 0); 2418 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2419 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2420 2421 for (i = 1; i < 8; i++) { 2422 national = get_national_digit(b, i); 2423 if (unlikely(national < 0x30 || national > 0x39)) { 2424 invalid = 1; 2425 break; 2426 } 2427 2428 bcd_put_digit(&ret, national & 0xf, i); 2429 } 2430 2431 if (sgnb == NATIONAL_PLUS) { 2432 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2433 } else { 2434 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2435 } 2436 2437 cr = bcd_cmp_zero(&ret); 2438 2439 if (unlikely(invalid)) { 2440 cr = CRF_SO; 2441 } 2442 2443 *r = ret; 2444 2445 return cr; 2446 } 2447 2448 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2449 { 2450 int i; 2451 int cr = 0; 2452 int sgnb = bcd_get_sgn(b); 2453 int invalid = (sgnb == 0); 2454 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2455 2456 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2457 2458 for (i = 1; i < 8; i++) { 2459 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2460 2461 if (unlikely(invalid)) { 2462 break; 2463 } 2464 } 2465 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2466 2467 cr = bcd_cmp_zero(b); 2468 2469 if (ox_flag) { 2470 cr |= CRF_SO; 2471 } 2472 2473 if (unlikely(invalid)) { 2474 cr = CRF_SO; 2475 } 2476 2477 *r = ret; 2478 2479 return cr; 2480 } 2481 2482 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2483 { 2484 int i; 2485 int cr = 0; 2486 int invalid = 0; 2487 int zone_digit = 0; 2488 int zone_lead = ps ? 0xF : 0x3; 2489 int digit = 0; 2490 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2491 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2492 2493 if (unlikely((sgnb < 0xA) && ps)) { 2494 invalid = 1; 2495 } 2496 2497 for (i = 0; i < 16; i++) { 2498 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2499 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2500 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2501 invalid = 1; 2502 break; 2503 } 2504 2505 bcd_put_digit(&ret, digit, i + 1); 2506 } 2507 2508 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2509 (!ps && (sgnb & 0x4))) { 2510 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2511 } else { 2512 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2513 } 2514 2515 cr = bcd_cmp_zero(&ret); 2516 2517 if (unlikely(invalid)) { 2518 cr = CRF_SO; 2519 } 2520 2521 *r = ret; 2522 2523 return cr; 2524 } 2525 2526 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2527 { 2528 int i; 2529 int cr = 0; 2530 uint8_t digit = 0; 2531 int sgnb = bcd_get_sgn(b); 2532 int zone_lead = (ps) ? 0xF0 : 0x30; 2533 int invalid = (sgnb == 0); 2534 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2535 2536 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2537 2538 for (i = 0; i < 16; i++) { 2539 digit = bcd_get_digit(b, i + 1, &invalid); 2540 2541 if (unlikely(invalid)) { 2542 break; 2543 } 2544 2545 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2546 } 2547 2548 if (ps) { 2549 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2550 } else { 2551 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2552 } 2553 2554 cr = bcd_cmp_zero(b); 2555 2556 if (ox_flag) { 2557 cr |= CRF_SO; 2558 } 2559 2560 if (unlikely(invalid)) { 2561 cr = CRF_SO; 2562 } 2563 2564 *r = ret; 2565 2566 return cr; 2567 } 2568 2569 /** 2570 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2571 * 2572 * Returns: 2573 * > 0 if ahi|alo > bhi|blo, 2574 * 0 if ahi|alo == bhi|blo, 2575 * < 0 if ahi|alo < bhi|blo 2576 */ 2577 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2578 uint64_t blo, uint64_t bhi) 2579 { 2580 return (ahi == bhi) ? 2581 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2582 (ahi > bhi ? 1 : -1); 2583 } 2584 2585 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2586 { 2587 int i; 2588 int cr; 2589 uint64_t lo_value; 2590 uint64_t hi_value; 2591 uint64_t rem; 2592 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2593 2594 if (b->VsrSD(0) < 0) { 2595 lo_value = -b->VsrSD(1); 2596 hi_value = ~b->VsrD(0) + !lo_value; 2597 bcd_put_digit(&ret, 0xD, 0); 2598 2599 cr = CRF_LT; 2600 } else { 2601 lo_value = b->VsrD(1); 2602 hi_value = b->VsrD(0); 2603 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2604 2605 if (hi_value == 0 && lo_value == 0) { 2606 cr = CRF_EQ; 2607 } else { 2608 cr = CRF_GT; 2609 } 2610 } 2611 2612 /* 2613 * Check src limits: abs(src) <= 10^31 - 1 2614 * 2615 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2616 */ 2617 if (ucmp128(lo_value, hi_value, 2618 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2619 cr |= CRF_SO; 2620 2621 /* 2622 * According to the ISA, if src wouldn't fit in the destination 2623 * register, the result is undefined. 2624 * In that case, we leave r unchanged. 2625 */ 2626 } else { 2627 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2628 2629 for (i = 1; i < 16; rem /= 10, i++) { 2630 bcd_put_digit(&ret, rem % 10, i); 2631 } 2632 2633 for (; i < 32; lo_value /= 10, i++) { 2634 bcd_put_digit(&ret, lo_value % 10, i); 2635 } 2636 2637 *r = ret; 2638 } 2639 2640 return cr; 2641 } 2642 2643 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2644 { 2645 uint8_t i; 2646 int cr; 2647 uint64_t carry; 2648 uint64_t unused; 2649 uint64_t lo_value; 2650 uint64_t hi_value = 0; 2651 int sgnb = bcd_get_sgn(b); 2652 int invalid = (sgnb == 0); 2653 2654 lo_value = bcd_get_digit(b, 31, &invalid); 2655 for (i = 30; i > 0; i--) { 2656 mulu64(&lo_value, &carry, lo_value, 10ULL); 2657 mulu64(&hi_value, &unused, hi_value, 10ULL); 2658 lo_value += bcd_get_digit(b, i, &invalid); 2659 hi_value += carry; 2660 2661 if (unlikely(invalid)) { 2662 break; 2663 } 2664 } 2665 2666 if (sgnb == -1) { 2667 r->VsrSD(1) = -lo_value; 2668 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2669 } else { 2670 r->VsrSD(1) = lo_value; 2671 r->VsrSD(0) = hi_value; 2672 } 2673 2674 cr = bcd_cmp_zero(b); 2675 2676 if (unlikely(invalid)) { 2677 cr = CRF_SO; 2678 } 2679 2680 return cr; 2681 } 2682 2683 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2684 { 2685 int i; 2686 int invalid = 0; 2687 2688 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2689 return CRF_SO; 2690 } 2691 2692 *r = *a; 2693 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2694 2695 for (i = 1; i < 32; i++) { 2696 bcd_get_digit(a, i, &invalid); 2697 bcd_get_digit(b, i, &invalid); 2698 if (unlikely(invalid)) { 2699 return CRF_SO; 2700 } 2701 } 2702 2703 return bcd_cmp_zero(r); 2704 } 2705 2706 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2707 { 2708 int sgnb = bcd_get_sgn(b); 2709 2710 *r = *b; 2711 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2712 2713 if (bcd_is_valid(b) == false) { 2714 return CRF_SO; 2715 } 2716 2717 return bcd_cmp_zero(r); 2718 } 2719 2720 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2721 { 2722 int cr; 2723 int i = a->VsrSB(7); 2724 bool ox_flag = false; 2725 int sgnb = bcd_get_sgn(b); 2726 ppc_avr_t ret = *b; 2727 ret.VsrD(1) &= ~0xf; 2728 2729 if (bcd_is_valid(b) == false) { 2730 return CRF_SO; 2731 } 2732 2733 if (unlikely(i > 31)) { 2734 i = 31; 2735 } else if (unlikely(i < -31)) { 2736 i = -31; 2737 } 2738 2739 if (i > 0) { 2740 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2741 } else { 2742 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2743 } 2744 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2745 2746 *r = ret; 2747 2748 cr = bcd_cmp_zero(r); 2749 if (ox_flag) { 2750 cr |= CRF_SO; 2751 } 2752 2753 return cr; 2754 } 2755 2756 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2757 { 2758 int cr; 2759 int i; 2760 int invalid = 0; 2761 bool ox_flag = false; 2762 ppc_avr_t ret = *b; 2763 2764 for (i = 0; i < 32; i++) { 2765 bcd_get_digit(b, i, &invalid); 2766 2767 if (unlikely(invalid)) { 2768 return CRF_SO; 2769 } 2770 } 2771 2772 i = a->VsrSB(7); 2773 if (i >= 32) { 2774 ox_flag = true; 2775 ret.VsrD(1) = ret.VsrD(0) = 0; 2776 } else if (i <= -32) { 2777 ret.VsrD(1) = ret.VsrD(0) = 0; 2778 } else if (i > 0) { 2779 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2780 } else { 2781 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2782 } 2783 *r = ret; 2784 2785 cr = bcd_cmp_zero(r); 2786 if (ox_flag) { 2787 cr |= CRF_SO; 2788 } 2789 2790 return cr; 2791 } 2792 2793 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2794 { 2795 int cr; 2796 int unused = 0; 2797 int invalid = 0; 2798 bool ox_flag = false; 2799 int sgnb = bcd_get_sgn(b); 2800 ppc_avr_t ret = *b; 2801 ret.VsrD(1) &= ~0xf; 2802 2803 int i = a->VsrSB(7); 2804 ppc_avr_t bcd_one; 2805 2806 bcd_one.VsrD(0) = 0; 2807 bcd_one.VsrD(1) = 0x10; 2808 2809 if (bcd_is_valid(b) == false) { 2810 return CRF_SO; 2811 } 2812 2813 if (unlikely(i > 31)) { 2814 i = 31; 2815 } else if (unlikely(i < -31)) { 2816 i = -31; 2817 } 2818 2819 if (i > 0) { 2820 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2821 } else { 2822 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2823 2824 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2825 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2826 } 2827 } 2828 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2829 2830 cr = bcd_cmp_zero(&ret); 2831 if (ox_flag) { 2832 cr |= CRF_SO; 2833 } 2834 *r = ret; 2835 2836 return cr; 2837 } 2838 2839 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2840 { 2841 uint64_t mask; 2842 uint32_t ox_flag = 0; 2843 int i = a->VsrSH(3) + 1; 2844 ppc_avr_t ret = *b; 2845 2846 if (bcd_is_valid(b) == false) { 2847 return CRF_SO; 2848 } 2849 2850 if (i > 16 && i < 32) { 2851 mask = (uint64_t)-1 >> (128 - i * 4); 2852 if (ret.VsrD(0) & ~mask) { 2853 ox_flag = CRF_SO; 2854 } 2855 2856 ret.VsrD(0) &= mask; 2857 } else if (i >= 0 && i <= 16) { 2858 mask = (uint64_t)-1 >> (64 - i * 4); 2859 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2860 ox_flag = CRF_SO; 2861 } 2862 2863 ret.VsrD(1) &= mask; 2864 ret.VsrD(0) = 0; 2865 } 2866 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2867 *r = ret; 2868 2869 return bcd_cmp_zero(&ret) | ox_flag; 2870 } 2871 2872 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2873 { 2874 int i; 2875 uint64_t mask; 2876 uint32_t ox_flag = 0; 2877 int invalid = 0; 2878 ppc_avr_t ret = *b; 2879 2880 for (i = 0; i < 32; i++) { 2881 bcd_get_digit(b, i, &invalid); 2882 2883 if (unlikely(invalid)) { 2884 return CRF_SO; 2885 } 2886 } 2887 2888 i = a->VsrSH(3); 2889 if (i > 16 && i < 33) { 2890 mask = (uint64_t)-1 >> (128 - i * 4); 2891 if (ret.VsrD(0) & ~mask) { 2892 ox_flag = CRF_SO; 2893 } 2894 2895 ret.VsrD(0) &= mask; 2896 } else if (i > 0 && i <= 16) { 2897 mask = (uint64_t)-1 >> (64 - i * 4); 2898 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2899 ox_flag = CRF_SO; 2900 } 2901 2902 ret.VsrD(1) &= mask; 2903 ret.VsrD(0) = 0; 2904 } else if (i == 0) { 2905 if (ret.VsrD(0) || ret.VsrD(1)) { 2906 ox_flag = CRF_SO; 2907 } 2908 ret.VsrD(0) = ret.VsrD(1) = 0; 2909 } 2910 2911 *r = ret; 2912 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2913 return ox_flag | CRF_EQ; 2914 } 2915 2916 return ox_flag | CRF_GT; 2917 } 2918 2919 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2920 { 2921 int i; 2922 VECTOR_FOR_INORDER_I(i, u8) { 2923 r->u8[i] = AES_sbox[a->u8[i]]; 2924 } 2925 } 2926 2927 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2928 { 2929 AESState *ad = (AESState *)r; 2930 AESState *st = (AESState *)a; 2931 AESState *rk = (AESState *)b; 2932 2933 aesenc_SB_SR_MC_AK(ad, st, rk, true); 2934 } 2935 2936 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2937 { 2938 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2939 } 2940 2941 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2942 { 2943 AESState *ad = (AESState *)r; 2944 AESState *st = (AESState *)a; 2945 AESState *rk = (AESState *)b; 2946 2947 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true); 2948 } 2949 2950 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2951 { 2952 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2953 } 2954 2955 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2956 { 2957 int st = (st_six & 0x10) != 0; 2958 int six = st_six & 0xF; 2959 int i; 2960 2961 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2962 if (st == 0) { 2963 if ((six & (0x8 >> i)) == 0) { 2964 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2965 ror32(a->VsrW(i), 18) ^ 2966 (a->VsrW(i) >> 3); 2967 } else { /* six.bit[i] == 1 */ 2968 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2969 ror32(a->VsrW(i), 19) ^ 2970 (a->VsrW(i) >> 10); 2971 } 2972 } else { /* st == 1 */ 2973 if ((six & (0x8 >> i)) == 0) { 2974 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2975 ror32(a->VsrW(i), 13) ^ 2976 ror32(a->VsrW(i), 22); 2977 } else { /* six.bit[i] == 1 */ 2978 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2979 ror32(a->VsrW(i), 11) ^ 2980 ror32(a->VsrW(i), 25); 2981 } 2982 } 2983 } 2984 } 2985 2986 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2987 { 2988 int st = (st_six & 0x10) != 0; 2989 int six = st_six & 0xF; 2990 int i; 2991 2992 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2993 if (st == 0) { 2994 if ((six & (0x8 >> (2 * i))) == 0) { 2995 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2996 ror64(a->VsrD(i), 8) ^ 2997 (a->VsrD(i) >> 7); 2998 } else { /* six.bit[2*i] == 1 */ 2999 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3000 ror64(a->VsrD(i), 61) ^ 3001 (a->VsrD(i) >> 6); 3002 } 3003 } else { /* st == 1 */ 3004 if ((six & (0x8 >> (2 * i))) == 0) { 3005 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3006 ror64(a->VsrD(i), 34) ^ 3007 ror64(a->VsrD(i), 39); 3008 } else { /* six.bit[2*i] == 1 */ 3009 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3010 ror64(a->VsrD(i), 18) ^ 3011 ror64(a->VsrD(i), 41); 3012 } 3013 } 3014 } 3015 } 3016 3017 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3018 { 3019 ppc_avr_t result; 3020 int i; 3021 3022 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3023 int indexA = c->VsrB(i) >> 4; 3024 int indexB = c->VsrB(i) & 0xF; 3025 3026 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3027 } 3028 *r = result; 3029 } 3030 3031 #undef VECTOR_FOR_INORDER_I 3032 3033 /*****************************************************************************/ 3034 /* SPE extension helpers */ 3035 /* Use a table to make this quicker */ 3036 static const uint8_t hbrev[16] = { 3037 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3038 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3039 }; 3040 3041 static inline uint8_t byte_reverse(uint8_t val) 3042 { 3043 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3044 } 3045 3046 static inline uint32_t word_reverse(uint32_t val) 3047 { 3048 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3049 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3050 } 3051 3052 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3053 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3054 { 3055 uint32_t a, b, d, mask; 3056 3057 mask = UINT32_MAX >> (32 - MASKBITS); 3058 a = arg1 & mask; 3059 b = arg2 & mask; 3060 d = word_reverse(1 + word_reverse(a | ~b)); 3061 return (arg1 & ~mask) | (d & b); 3062 } 3063 3064 uint32_t helper_cntlsw32(uint32_t val) 3065 { 3066 if (val & 0x80000000) { 3067 return clz32(~val); 3068 } else { 3069 return clz32(val); 3070 } 3071 } 3072 3073 uint32_t helper_cntlzw32(uint32_t val) 3074 { 3075 return clz32(val); 3076 } 3077 3078 /* 440 specific */ 3079 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3080 target_ulong low, uint32_t update_Rc) 3081 { 3082 target_ulong mask; 3083 int i; 3084 3085 i = 1; 3086 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3087 if ((high & mask) == 0) { 3088 if (update_Rc) { 3089 env->crf[0] = 0x4; 3090 } 3091 goto done; 3092 } 3093 i++; 3094 } 3095 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3096 if ((low & mask) == 0) { 3097 if (update_Rc) { 3098 env->crf[0] = 0x8; 3099 } 3100 goto done; 3101 } 3102 i++; 3103 } 3104 i = 8; 3105 if (update_Rc) { 3106 env->crf[0] = 0x2; 3107 } 3108 done: 3109 env->xer = (env->xer & ~0x7F) | i; 3110 if (update_Rc) { 3111 env->crf[0] |= xer_so; 3112 } 3113 return i; 3114 } 3115