1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "crypto/aes-round.h" 29 #include "fpu/softfloat.h" 30 #include "qapi/error.h" 31 #include "qemu/guest-random.h" 32 #include "tcg/tcg-gvec-desc.h" 33 34 #include "helper_regs.h" 35 /*****************************************************************************/ 36 /* Fixed point operations helpers */ 37 38 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 39 { 40 if (unlikely(ov)) { 41 env->so = env->ov = env->ov32 = 1; 42 } else { 43 env->ov = env->ov32 = 0; 44 } 45 } 46 47 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 48 uint32_t oe) 49 { 50 uint64_t rt = 0; 51 int overflow = 0; 52 53 uint64_t dividend = (uint64_t)ra << 32; 54 uint64_t divisor = (uint32_t)rb; 55 56 if (unlikely(divisor == 0)) { 57 overflow = 1; 58 } else { 59 rt = dividend / divisor; 60 overflow = rt > UINT32_MAX; 61 } 62 63 if (unlikely(overflow)) { 64 rt = 0; /* Undefined */ 65 } 66 67 if (oe) { 68 helper_update_ov_legacy(env, overflow); 69 } 70 71 return (target_ulong)rt; 72 } 73 74 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 75 uint32_t oe) 76 { 77 int64_t rt = 0; 78 int overflow = 0; 79 80 int64_t dividend = (int64_t)ra << 32; 81 int64_t divisor = (int64_t)((int32_t)rb); 82 83 if (unlikely((divisor == 0) || 84 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 85 overflow = 1; 86 } else { 87 rt = dividend / divisor; 88 overflow = rt != (int32_t)rt; 89 } 90 91 if (unlikely(overflow)) { 92 rt = 0; /* Undefined */ 93 } 94 95 if (oe) { 96 helper_update_ov_legacy(env, overflow); 97 } 98 99 return (target_ulong)rt; 100 } 101 102 #if defined(TARGET_PPC64) 103 104 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 105 { 106 uint64_t rt = 0; 107 int overflow = 0; 108 109 if (unlikely(rb == 0 || ra >= rb)) { 110 overflow = 1; 111 rt = 0; /* Undefined */ 112 } else { 113 divu128(&rt, &ra, rb); 114 } 115 116 if (oe) { 117 helper_update_ov_legacy(env, overflow); 118 } 119 120 return rt; 121 } 122 123 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 124 { 125 uint64_t rt = 0; 126 int64_t ra = (int64_t)rau; 127 int64_t rb = (int64_t)rbu; 128 int overflow = 0; 129 130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 131 overflow = 1; 132 rt = 0; /* Undefined */ 133 } else { 134 divs128(&rt, &ra, rb); 135 } 136 137 if (oe) { 138 helper_update_ov_legacy(env, overflow); 139 } 140 141 return rt; 142 } 143 144 #endif 145 146 147 #if defined(TARGET_PPC64) 148 /* if x = 0xab, returns 0xababababababababa */ 149 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 150 151 /* 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each 153 * byte. 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 156 */ 157 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 158 159 /* When you XOR the pattern and there is a match, that byte will be zero */ 160 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 161 162 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 163 { 164 return hasvalue(rb, ra) ? CRF_GT : 0; 165 } 166 167 #undef pattern 168 #undef haszero 169 #undef hasvalue 170 171 /* 172 * Return a random number. 173 */ 174 uint64_t helper_darn32(void) 175 { 176 Error *err = NULL; 177 uint32_t ret; 178 179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 181 error_get_pretty(err)); 182 error_free(err); 183 return -1; 184 } 185 186 return ret; 187 } 188 189 uint64_t helper_darn64(void) 190 { 191 Error *err = NULL; 192 uint64_t ret; 193 194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 196 error_get_pretty(err)); 197 error_free(err); 198 return -1; 199 } 200 201 return ret; 202 } 203 204 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 205 { 206 int i; 207 uint64_t ra = 0; 208 209 for (i = 0; i < 8; i++) { 210 int index = (rs >> (i * 8)) & 0xFF; 211 if (index < 64) { 212 if (rb & PPC_BIT(index)) { 213 ra |= 1 << i; 214 } 215 } 216 } 217 return ra; 218 } 219 220 #endif 221 222 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 223 { 224 target_ulong mask = 0xff; 225 target_ulong ra = 0; 226 int i; 227 228 for (i = 0; i < sizeof(target_ulong); i++) { 229 if ((rs & mask) == (rb & mask)) { 230 ra |= mask; 231 } 232 mask <<= 8; 233 } 234 return ra; 235 } 236 237 /* shift right arithmetic helper */ 238 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int32_t ret; 242 243 if (likely(!(shift & 0x20))) { 244 if (likely((uint32_t)shift != 0)) { 245 shift &= 0x1f; 246 ret = (int32_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int32_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int32_t)value >> 31; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return (target_long)ret; 261 } 262 263 #if defined(TARGET_PPC64) 264 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 265 target_ulong shift) 266 { 267 int64_t ret; 268 269 if (likely(!(shift & 0x40))) { 270 if (likely((uint64_t)shift != 0)) { 271 shift &= 0x3f; 272 ret = (int64_t)value >> shift; 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 274 env->ca32 = env->ca = 0; 275 } else { 276 env->ca32 = env->ca = 1; 277 } 278 } else { 279 ret = (int64_t)value; 280 env->ca32 = env->ca = 0; 281 } 282 } else { 283 ret = (int64_t)value >> 63; 284 env->ca32 = env->ca = (ret != 0); 285 } 286 return ret; 287 } 288 #endif 289 290 #if defined(TARGET_PPC64) 291 target_ulong helper_popcntb(target_ulong val) 292 { 293 /* Note that we don't fold past bytes */ 294 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 295 0x5555555555555555ULL); 296 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 297 0x3333333333333333ULL); 298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 299 0x0f0f0f0f0f0f0f0fULL); 300 return val; 301 } 302 303 target_ulong helper_popcntw(target_ulong val) 304 { 305 /* Note that we don't fold past words. */ 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 #else 319 target_ulong helper_popcntb(target_ulong val) 320 { 321 /* Note that we don't fold past bytes */ 322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 325 return val; 326 } 327 #endif 328 329 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 330 { 331 /* 332 * Instead of processing the mask bit-by-bit from the most significant to 333 * the least significant bit, as described in PowerISA, we'll handle it in 334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 335 * ctz or cto, we negate the mask at the end of the loop. 336 */ 337 target_ulong m, left = 0, right = 0; 338 unsigned int n, i = 64; 339 bool bit = false; /* tracks if we are processing zeros or ones */ 340 341 if (mask == 0 || mask == -1) { 342 return src; 343 } 344 345 /* Processes the mask in blocks, from LSB to MSB */ 346 while (i) { 347 /* Find how many bits we should take */ 348 n = ctz64(mask); 349 if (n > i) { 350 n = i; 351 } 352 353 /* 354 * Extracts 'n' trailing bits of src and put them on the leading 'n' 355 * bits of 'right' or 'left', pushing down the previously extracted 356 * values. 357 */ 358 m = (1ll << n) - 1; 359 if (bit) { 360 right = ror64(right | (src & m), n); 361 } else { 362 left = ror64(left | (src & m), n); 363 } 364 365 /* 366 * Discards the processed bits from 'src' and 'mask'. Note that we are 367 * removing 'n' trailing zeros from 'mask', but the logical shift will 368 * add 'n' leading zeros back, so the population count of 'mask' is kept 369 * the same. 370 */ 371 src >>= n; 372 mask >>= n; 373 i -= n; 374 bit = !bit; 375 mask = ~mask; 376 } 377 378 /* 379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 380 * we'll shift it more 64-ctpop(mask) times. 381 */ 382 if (bit) { 383 n = ctpop64(mask); 384 } else { 385 n = 64 - ctpop64(mask); 386 } 387 388 return left | (right >> n); 389 } 390 391 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 392 { 393 int i, o; 394 uint64_t result = 0; 395 396 if (mask == -1) { 397 return src; 398 } 399 400 for (i = 0; mask != 0; i++) { 401 o = ctz64(mask); 402 mask &= mask - 1; 403 result |= ((src >> i) & 1) << o; 404 } 405 406 return result; 407 } 408 409 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 410 { 411 int i, o; 412 uint64_t result = 0; 413 414 if (mask == -1) { 415 return src; 416 } 417 418 for (o = 0; mask != 0; o++) { 419 i = ctz64(mask); 420 mask &= mask - 1; 421 result |= ((src >> i) & 1) << o; 422 } 423 424 return result; 425 } 426 427 /*****************************************************************************/ 428 /* Altivec extension helpers */ 429 #if HOST_BIG_ENDIAN 430 #define VECTOR_FOR_INORDER_I(index, element) \ 431 for (index = 0; index < ARRAY_SIZE(r->element); index++) 432 #else 433 #define VECTOR_FOR_INORDER_I(index, element) \ 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 435 #endif 436 437 /* Saturating arithmetic helpers. */ 438 #define SATCVT(from, to, from_type, to_type, min, max) \ 439 static inline to_type cvt##from##to(from_type x, int *sat) \ 440 { \ 441 to_type r; \ 442 \ 443 if (x < (from_type)min) { \ 444 r = min; \ 445 *sat = 1; \ 446 } else if (x > (from_type)max) { \ 447 r = max; \ 448 *sat = 1; \ 449 } else { \ 450 r = x; \ 451 } \ 452 return r; \ 453 } 454 #define SATCVTU(from, to, from_type, to_type, min, max) \ 455 static inline to_type cvt##from##to(from_type x, int *sat) \ 456 { \ 457 to_type r; \ 458 \ 459 if (x > (from_type)max) { \ 460 r = max; \ 461 *sat = 1; \ 462 } else { \ 463 r = x; \ 464 } \ 465 return r; \ 466 } 467 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 468 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 469 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 470 471 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 472 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 473 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 474 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 475 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 476 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 477 #undef SATCVT 478 #undef SATCVTU 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 ppc_store_vscr(env, vscr); 483 } 484 485 uint32_t helper_mfvscr(CPUPPCState *env) 486 { 487 return ppc_get_vscr(env); 488 } 489 490 static inline void set_vscr_sat(CPUPPCState *env) 491 { 492 /* The choice of non-zero value is arbitrary. */ 493 env->vscr_sat.u32[0] = 1; 494 } 495 496 /* vprtybq */ 497 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 498 { 499 uint64_t res = b->u64[0] ^ b->u64[1]; 500 res ^= res >> 32; 501 res ^= res >> 16; 502 res ^= res >> 8; 503 r->VsrD(1) = res & 1; 504 r->VsrD(0) = 0; 505 } 506 507 #define VARITHFP(suffix, func) \ 508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 509 ppc_avr_t *b) \ 510 { \ 511 int i; \ 512 \ 513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 515 } \ 516 } 517 VARITHFP(addfp, float32_add) 518 VARITHFP(subfp, float32_sub) 519 VARITHFP(minfp, float32_min) 520 VARITHFP(maxfp, float32_max) 521 #undef VARITHFP 522 523 #define VARITHFPFMA(suffix, type) \ 524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 525 ppc_avr_t *b, ppc_avr_t *c) \ 526 { \ 527 int i; \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 530 type, &env->vec_status); \ 531 } \ 532 } 533 VARITHFPFMA(maddfp, 0); 534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 535 #undef VARITHFPFMA 536 537 #define VARITHSAT_CASE(type, op, cvt, element) \ 538 { \ 539 type result = (type)a->element[i] op (type)b->element[i]; \ 540 r->element[i] = cvt(result, &sat); \ 541 } 542 543 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 544 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 546 { \ 547 int sat = 0; \ 548 int i; \ 549 \ 550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 551 VARITHSAT_CASE(optype, op, cvt, element); \ 552 } \ 553 if (sat) { \ 554 vscr_sat->u32[0] = 1; \ 555 } \ 556 } 557 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 558 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 559 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 560 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 561 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 562 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 563 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 564 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 565 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 566 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 567 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 568 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 569 #undef VARITHSAT_CASE 570 #undef VARITHSAT_DO 571 #undef VARITHSAT_SIGNED 572 #undef VARITHSAT_UNSIGNED 573 574 #define VAVG(name, element, etype) \ 575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 576 { \ 577 int i; \ 578 \ 579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 581 r->element[i] = x >> 1; \ 582 } \ 583 } 584 585 VAVG(VAVGSB, s8, int16_t) 586 VAVG(VAVGUB, u8, uint16_t) 587 VAVG(VAVGSH, s16, int32_t) 588 VAVG(VAVGUH, u16, uint32_t) 589 VAVG(VAVGSW, s32, int64_t) 590 VAVG(VAVGUW, u32, uint64_t) 591 #undef VAVG 592 593 #define VABSDU(name, element) \ 594 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 r->element[i] = (a->element[i] > b->element[i]) ? \ 600 (a->element[i] - b->element[i]) : \ 601 (b->element[i] - a->element[i]); \ 602 } \ 603 } 604 605 /* 606 * VABSDU - Vector absolute difference unsigned 607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 608 * element - element type to access from vector 609 */ 610 VABSDU(VABSDUB, u8) 611 VABSDU(VABSDUH, u16) 612 VABSDU(VABSDUW, u32) 613 #undef VABSDU 614 615 #define VCF(suffix, cvt, element) \ 616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 617 ppc_avr_t *b, uint32_t uim) \ 618 { \ 619 int i; \ 620 \ 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 622 float32 t = cvt(b->element[i], &env->vec_status); \ 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 624 } \ 625 } 626 VCF(ux, uint32_to_float32, u32) 627 VCF(sx, int32_to_float32, s32) 628 #undef VCF 629 630 #define VCMPNEZ(NAME, ELEM) \ 631 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 632 { \ 633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 636 } \ 637 } 638 VCMPNEZ(VCMPNEZB, u8) 639 VCMPNEZ(VCMPNEZH, u16) 640 VCMPNEZ(VCMPNEZW, u32) 641 #undef VCMPNEZ 642 643 #define VCMPFP_DO(suffix, compare, order, record) \ 644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 645 ppc_avr_t *a, ppc_avr_t *b) \ 646 { \ 647 uint32_t ones = (uint32_t)-1; \ 648 uint32_t all = ones; \ 649 uint32_t none = 0; \ 650 int i; \ 651 \ 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 653 uint32_t result; \ 654 FloatRelation rel = \ 655 float32_compare_quiet(a->f32[i], b->f32[i], \ 656 &env->vec_status); \ 657 if (rel == float_relation_unordered) { \ 658 result = 0; \ 659 } else if (rel compare order) { \ 660 result = ones; \ 661 } else { \ 662 result = 0; \ 663 } \ 664 r->u32[i] = result; \ 665 all &= result; \ 666 none |= result; \ 667 } \ 668 if (record) { \ 669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 670 } \ 671 } 672 #define VCMPFP(suffix, compare, order) \ 673 VCMPFP_DO(suffix, compare, order, 0) \ 674 VCMPFP_DO(suffix##_dot, compare, order, 1) 675 VCMPFP(eqfp, ==, float_relation_equal) 676 VCMPFP(gefp, !=, float_relation_less) 677 VCMPFP(gtfp, ==, float_relation_greater) 678 #undef VCMPFP_DO 679 #undef VCMPFP 680 681 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 682 ppc_avr_t *a, ppc_avr_t *b, int record) 683 { 684 int i; 685 int all_in = 0; 686 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 689 &env->vec_status); 690 if (le_rel == float_relation_unordered) { 691 r->u32[i] = 0xc0000000; 692 all_in = 1; 693 } else { 694 float32 bneg = float32_chs(b->f32[i]); 695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 696 &env->vec_status); 697 int le = le_rel != float_relation_greater; 698 int ge = ge_rel != float_relation_less; 699 700 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 701 all_in |= (!le | !ge); 702 } 703 } 704 if (record) { 705 env->crf[6] = (all_in == 0) << 1; 706 } 707 } 708 709 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 710 { 711 vcmpbfp_internal(env, r, a, b, 0); 712 } 713 714 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 715 ppc_avr_t *b) 716 { 717 vcmpbfp_internal(env, r, a, b, 1); 718 } 719 720 #define VCT(suffix, satcvt, element) \ 721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 722 ppc_avr_t *b, uint32_t uim) \ 723 { \ 724 int i; \ 725 int sat = 0; \ 726 float_status s = env->vec_status; \ 727 \ 728 set_float_rounding_mode(float_round_to_zero, &s); \ 729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 730 if (float32_is_any_nan(b->f32[i])) { \ 731 r->element[i] = 0; \ 732 } else { \ 733 float64 t = float32_to_float64(b->f32[i], &s); \ 734 int64_t j; \ 735 \ 736 t = float64_scalbn(t, uim, &s); \ 737 j = float64_to_int64(t, &s); \ 738 r->element[i] = satcvt(j, &sat); \ 739 } \ 740 } \ 741 if (sat) { \ 742 set_vscr_sat(env); \ 743 } \ 744 } 745 VCT(uxs, cvtsduw, u32) 746 VCT(sxs, cvtsdsw, s32) 747 #undef VCT 748 749 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 750 751 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 752 { 753 int64_t psum = 0; 754 for (int i = 0; i < 8; i++, mask >>= 1) { 755 if (mask & 1) { 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 757 } 758 } 759 return psum; 760 } 761 762 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 763 { 764 int64_t psum = 0; 765 for (int i = 0; i < 4; i++, mask >>= 1) { 766 if (mask & 1) { 767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 768 } 769 } 770 return psum; 771 } 772 773 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 774 { 775 int64_t psum = 0; 776 for (int i = 0; i < 2; i++, mask >>= 1) { 777 if (mask & 1) { 778 psum += (int64_t)sextract32(a, 16 * i, 16) * 779 sextract32(b, 16 * i, 16); 780 } 781 } 782 return psum; 783 } 784 785 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 786 uint32_t mask, bool sat, bool acc, do_ger ger) 787 { 788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 791 uint8_t xmsk_bit, ymsk_bit; 792 int64_t psum; 793 int i, j; 794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 798 if (acc) { 799 psum += at[i].VsrSW(j); 800 } 801 if (sat && psum > INT32_MAX) { 802 set_vscr_sat(env); 803 at[i].VsrSW(j) = INT32_MAX; 804 } else if (sat && psum < INT32_MIN) { 805 set_vscr_sat(env); 806 at[i].VsrSW(j) = INT32_MIN; 807 } else { 808 at[i].VsrSW(j) = (int32_t) psum; 809 } 810 } else { 811 at[i].VsrSW(j) = 0; 812 } 813 } 814 } 815 } 816 817 QEMU_FLATTEN 818 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 819 ppc_acc_t *at, uint32_t mask) 820 { 821 xviger(env, a, b, at, mask, false, false, ger_rank8); 822 } 823 824 QEMU_FLATTEN 825 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 826 ppc_acc_t *at, uint32_t mask) 827 { 828 xviger(env, a, b, at, mask, false, true, ger_rank8); 829 } 830 831 QEMU_FLATTEN 832 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 833 ppc_acc_t *at, uint32_t mask) 834 { 835 xviger(env, a, b, at, mask, false, false, ger_rank4); 836 } 837 838 QEMU_FLATTEN 839 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 840 ppc_acc_t *at, uint32_t mask) 841 { 842 xviger(env, a, b, at, mask, false, true, ger_rank4); 843 } 844 845 QEMU_FLATTEN 846 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 847 ppc_acc_t *at, uint32_t mask) 848 { 849 xviger(env, a, b, at, mask, true, true, ger_rank4); 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank2); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, true, false, ger_rank2); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, true, ger_rank2); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, true, true, ger_rank2); 878 } 879 880 target_ulong helper_vclzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 target_ulong helper_vctzlsbb(ppc_avr_t *r) 894 { 895 target_ulong count = 0; 896 int i; 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 if (r->VsrB(i) & 0x01) { 899 break; 900 } 901 count++; 902 } 903 return count; 904 } 905 906 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 907 ppc_avr_t *b, ppc_avr_t *c) 908 { 909 int sat = 0; 910 int i; 911 912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 913 int32_t prod = a->s16[i] * b->s16[i]; 914 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 915 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 925 ppc_avr_t *b, ppc_avr_t *c) 926 { 927 int sat = 0; 928 int i; 929 930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 932 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 933 r->s16[i] = cvtswsh(t, &sat); 934 } 935 936 if (sat) { 937 set_vscr_sat(env); 938 } 939 } 940 941 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 942 uint32_t v) 943 { 944 int i; 945 946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 947 int32_t prod = a->s16[i] * b->s16[i]; 948 r->s16[i] = (int16_t) (prod + c->s16[i]); 949 } 950 } 951 952 #define VMRG_DO(name, element, access, ofs) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 ppc_avr_t result; \ 956 int i, half = ARRAY_SIZE(r->element) / 2; \ 957 \ 958 for (i = 0; i < half; i++) { \ 959 result.access(i * 2 + 0) = a->access(i + ofs); \ 960 result.access(i * 2 + 1) = b->access(i + ofs); \ 961 } \ 962 *r = result; \ 963 } 964 965 #define VMRG(suffix, element, access) \ 966 VMRG_DO(mrgl##suffix, element, access, half) \ 967 VMRG_DO(mrgh##suffix, element, access, 0) 968 VMRG(b, u8, VsrB) 969 VMRG(h, u16, VsrH) 970 VMRG(w, u32, VsrW) 971 #undef VMRG_DO 972 #undef VMRG 973 974 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int32_t prod[16]; 977 int i; 978 979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 980 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 981 } 982 983 VECTOR_FOR_INORDER_I(i, s32) { 984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 985 prod[4 * i + 2] + prod[4 * i + 3]; 986 } 987 } 988 989 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 990 { 991 int32_t prod[8]; 992 int i; 993 994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 995 prod[i] = a->s16[i] * b->s16[i]; 996 } 997 998 VECTOR_FOR_INORDER_I(i, s32) { 999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1000 } 1001 } 1002 1003 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1004 ppc_avr_t *b, ppc_avr_t *c) 1005 { 1006 int32_t prod[8]; 1007 int i; 1008 int sat = 0; 1009 1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1011 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1012 } 1013 1014 VECTOR_FOR_INORDER_I(i, s32) { 1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1016 1017 r->u32[i] = cvtsdsw(t, &sat); 1018 } 1019 1020 if (sat) { 1021 set_vscr_sat(env); 1022 } 1023 } 1024 1025 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 uint16_t prod[16]; 1028 int i; 1029 1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1031 prod[i] = a->u8[i] * b->u8[i]; 1032 } 1033 1034 VECTOR_FOR_INORDER_I(i, u32) { 1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1036 prod[4 * i + 2] + prod[4 * i + 3]; 1037 } 1038 } 1039 1040 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1041 { 1042 uint32_t prod[8]; 1043 int i; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1046 prod[i] = a->u16[i] * b->u16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, u32) { 1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 } 1052 } 1053 1054 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 int sat = 0; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1062 prod[i] = a->u16[i] * b->u16[i]; 1063 } 1064 1065 VECTOR_FOR_INORDER_I(i, s32) { 1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1067 1068 r->u32[i] = cvtuduw(t, &sat); 1069 } 1070 1071 if (sat) { 1072 set_vscr_sat(env); 1073 } 1074 } 1075 1076 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1083 (cast)b->mul_access(i); \ 1084 } \ 1085 } 1086 1087 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1089 { \ 1090 int i; \ 1091 \ 1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1094 (cast)b->mul_access(i + 1); \ 1095 } \ 1096 } 1097 1098 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1101 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1102 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1103 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1104 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1105 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1106 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1107 #undef VMUL_DO_EVN 1108 #undef VMUL_DO_ODD 1109 #undef VMUL 1110 1111 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1112 target_ulong uim) 1113 { 1114 int i, idx; 1115 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1116 1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1118 if ((pcv->VsrB(i) >> 5) == uim) { 1119 idx = pcv->VsrB(i) & 0x1f; 1120 if (idx < ARRAY_SIZE(t->u8)) { 1121 tmp.VsrB(i) = s0->VsrB(idx); 1122 } else { 1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1124 } 1125 } 1126 } 1127 1128 *t = tmp; 1129 } 1130 1131 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 Int128 neg1 = int128_makes64(-1); 1134 Int128 int128_min = int128_make128(0, INT64_MIN); 1135 if (likely(int128_nz(b->s128) && 1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1137 t->s128 = int128_divs(a->s128, b->s128); 1138 } else { 1139 t->s128 = a->s128; /* Undefined behavior */ 1140 } 1141 } 1142 1143 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1144 { 1145 if (int128_nz(b->s128)) { 1146 t->s128 = int128_divu(a->s128, b->s128); 1147 } else { 1148 t->s128 = a->s128; /* Undefined behavior */ 1149 } 1150 } 1151 1152 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i; 1155 int64_t high; 1156 uint64_t low; 1157 for (i = 0; i < 2; i++) { 1158 high = a->s64[i]; 1159 low = 0; 1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1161 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1162 } else { 1163 divs128(&low, &high, b->s64[i]); 1164 t->s64[i] = low; 1165 } 1166 } 1167 } 1168 1169 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1170 { 1171 int i; 1172 uint64_t high, low; 1173 for (i = 0; i < 2; i++) { 1174 high = a->u64[i]; 1175 low = 0; 1176 if (unlikely(!b->u64[i])) { 1177 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1178 } else { 1179 divu128(&low, &high, b->u64[i]); 1180 t->u64[i] = low; 1181 } 1182 } 1183 } 1184 1185 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 Int128 high, low; 1188 Int128 int128_min = int128_make128(0, INT64_MIN); 1189 Int128 neg1 = int128_makes64(-1); 1190 1191 high = a->s128; 1192 low = int128_zero(); 1193 if (unlikely(!int128_nz(b->s128) || 1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1195 t->s128 = a->s128; /* Undefined behavior */ 1196 } else { 1197 divs256(&low, &high, b->s128); 1198 t->s128 = low; 1199 } 1200 } 1201 1202 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 Int128 high, low; 1205 1206 high = a->s128; 1207 low = int128_zero(); 1208 if (unlikely(!int128_nz(b->s128))) { 1209 t->s128 = a->s128; /* Undefined behavior */ 1210 } else { 1211 divu256(&low, &high, b->s128); 1212 t->s128 = low; 1213 } 1214 } 1215 1216 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1217 { 1218 Int128 neg1 = int128_makes64(-1); 1219 Int128 int128_min = int128_make128(0, INT64_MIN); 1220 if (likely(int128_nz(b->s128) && 1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1222 t->s128 = int128_rems(a->s128, b->s128); 1223 } else { 1224 t->s128 = int128_zero(); /* Undefined behavior */ 1225 } 1226 } 1227 1228 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 if (likely(int128_nz(b->s128))) { 1231 t->s128 = int128_remu(a->s128, b->s128); 1232 } else { 1233 t->s128 = int128_zero(); /* Undefined behavior */ 1234 } 1235 } 1236 1237 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1238 { 1239 ppc_avr_t result; 1240 int i; 1241 1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1243 int s = c->VsrB(i) & 0x1f; 1244 int index = s & 0xf; 1245 1246 if (s & 0x10) { 1247 result.VsrB(i) = b->VsrB(index); 1248 } else { 1249 result.VsrB(i) = a->VsrB(index); 1250 } 1251 } 1252 *r = result; 1253 } 1254 1255 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1256 { 1257 ppc_avr_t result; 1258 int i; 1259 1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1261 int s = c->VsrB(i) & 0x1f; 1262 int index = 15 - (s & 0xf); 1263 1264 if (s & 0x10) { 1265 result.VsrB(i) = a->VsrB(index); 1266 } else { 1267 result.VsrB(i) = b->VsrB(index); 1268 } 1269 } 1270 *r = result; 1271 } 1272 1273 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1274 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1275 { \ 1276 ppc_vsr_t tmp; \ 1277 \ 1278 /* Initialize tmp with the result of an all-zeros mask */ \ 1279 tmp.VsrD(0) = 0x1011121314151617; \ 1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1281 \ 1282 /* Iterate over the most significant byte of each element */ \ 1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1284 if (b->VsrB(i) & 0x80) { \ 1285 /* Update each byte of the element */ \ 1286 for (int k = 0; k < SZ; k++) { \ 1287 tmp.VsrB(i + k) = j + k; \ 1288 } \ 1289 j += SZ; \ 1290 } \ 1291 } \ 1292 \ 1293 *t = tmp; \ 1294 } 1295 1296 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1297 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1298 { \ 1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1300 \ 1301 /* Iterate over the most significant byte of each element */ \ 1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1303 if (b->VsrB(i) & 0x80) { \ 1304 /* Update each byte of the element */ \ 1305 for (int k = 0; k < SZ; k++) { \ 1306 tmp.VsrB(j + k) = i + k; \ 1307 } \ 1308 j += SZ; \ 1309 } \ 1310 } \ 1311 \ 1312 *t = tmp; \ 1313 } 1314 1315 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1316 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1317 { \ 1318 ppc_vsr_t tmp; \ 1319 \ 1320 /* Initialize tmp with the result of an all-zeros mask */ \ 1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1322 tmp.VsrD(1) = 0x1716151413121110; \ 1323 \ 1324 /* Iterate over the most significant byte of each element */ \ 1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1326 /* Reverse indexing of "i" */ \ 1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1328 if (b->VsrB(idx) & 0x80) { \ 1329 /* Update each byte of the element */ \ 1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1331 tmp.VsrB(idx + rk) = j + k; \ 1332 } \ 1333 j += SZ; \ 1334 } \ 1335 } \ 1336 \ 1337 *t = tmp; \ 1338 } 1339 1340 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1341 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1342 { \ 1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1344 \ 1345 /* Iterate over the most significant byte of each element */ \ 1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1348 /* Update each byte of the element */ \ 1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1350 /* Reverse indexing of "j" */ \ 1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1352 tmp.VsrB(idx + rk) = i + k; \ 1353 } \ 1354 j += SZ; \ 1355 } \ 1356 } \ 1357 \ 1358 *t = tmp; \ 1359 } 1360 1361 #define XXGENPCV(NAME, SZ) \ 1362 XXGENPCV_BE_EXP(NAME, SZ) \ 1363 XXGENPCV_BE_COMP(NAME, SZ) \ 1364 XXGENPCV_LE_EXP(NAME, SZ) \ 1365 XXGENPCV_LE_COMP(NAME, SZ) \ 1366 1367 XXGENPCV(XXGENPCVBM, 1) 1368 XXGENPCV(XXGENPCVHM, 2) 1369 XXGENPCV(XXGENPCVWM, 4) 1370 XXGENPCV(XXGENPCVDM, 8) 1371 1372 #undef XXGENPCV_BE_EXP 1373 #undef XXGENPCV_BE_COMP 1374 #undef XXGENPCV_LE_EXP 1375 #undef XXGENPCV_LE_COMP 1376 #undef XXGENPCV 1377 1378 #if HOST_BIG_ENDIAN 1379 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1380 #define VBPERMD_INDEX(i) (i) 1381 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1382 #else 1383 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1384 #define VBPERMD_INDEX(i) (1 - i) 1385 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1386 #endif 1387 #define EXTRACT_BIT(avr, i, index) \ 1388 (extract64((avr)->VsrD(i), 63 - index, 1)) 1389 1390 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1391 { 1392 int i, j; 1393 ppc_avr_t result = { .u64 = { 0, 0 } }; 1394 VECTOR_FOR_INORDER_I(i, u64) { 1395 for (j = 0; j < 8; j++) { 1396 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1397 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1399 } 1400 } 1401 } 1402 *r = result; 1403 } 1404 1405 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1406 { 1407 int i; 1408 uint64_t perm = 0; 1409 1410 VECTOR_FOR_INORDER_I(i, u8) { 1411 int index = VBPERMQ_INDEX(b, i); 1412 1413 if (index < 128) { 1414 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1415 if (a->u64[VBPERMQ_DW(index)] & mask) { 1416 perm |= (0x8000 >> i); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(0) = perm; 1422 r->VsrD(1) = 0; 1423 } 1424 1425 #undef VBPERMQ_INDEX 1426 #undef VBPERMQ_DW 1427 1428 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1429 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1430 { \ 1431 int i, j; \ 1432 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1433 \ 1434 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1435 prod[i] = 0; \ 1436 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1437 if (a->srcfld[i] & (1ull << j)) { \ 1438 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1439 } \ 1440 } \ 1441 } \ 1442 \ 1443 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1444 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1445 } \ 1446 } 1447 1448 PMSUM(vpmsumb, u8, u16, uint16_t) 1449 PMSUM(vpmsumh, u16, u32, uint32_t) 1450 PMSUM(vpmsumw, u32, u64, uint64_t) 1451 1452 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1453 { 1454 int i, j; 1455 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1456 1457 for (j = 0; j < 64; j++) { 1458 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1459 if (a->VsrD(i) & (1ull << j)) { 1460 tmp = int128_make64(b->VsrD(i)); 1461 tmp = int128_lshift(tmp, j); 1462 prod[i] = int128_xor(prod[i], tmp); 1463 } 1464 } 1465 } 1466 1467 r->s128 = int128_xor(prod[0], prod[1]); 1468 } 1469 1470 #if HOST_BIG_ENDIAN 1471 #define PKBIG 1 1472 #else 1473 #define PKBIG 0 1474 #endif 1475 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1476 { 1477 int i, j; 1478 ppc_avr_t result; 1479 #if HOST_BIG_ENDIAN 1480 const ppc_avr_t *x[2] = { a, b }; 1481 #else 1482 const ppc_avr_t *x[2] = { b, a }; 1483 #endif 1484 1485 VECTOR_FOR_INORDER_I(i, u64) { 1486 VECTOR_FOR_INORDER_I(j, u32) { 1487 uint32_t e = x[i]->u32[j]; 1488 1489 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1490 ((e >> 6) & 0x3e0) | 1491 ((e >> 3) & 0x1f)); 1492 } 1493 } 1494 *r = result; 1495 } 1496 1497 #define VPK(suffix, from, to, cvt, dosat) \ 1498 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1499 ppc_avr_t *a, ppc_avr_t *b) \ 1500 { \ 1501 int i; \ 1502 int sat = 0; \ 1503 ppc_avr_t result; \ 1504 ppc_avr_t *a0 = PKBIG ? a : b; \ 1505 ppc_avr_t *a1 = PKBIG ? b : a; \ 1506 \ 1507 VECTOR_FOR_INORDER_I(i, from) { \ 1508 result.to[i] = cvt(a0->from[i], &sat); \ 1509 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1510 } \ 1511 *r = result; \ 1512 if (dosat && sat) { \ 1513 set_vscr_sat(env); \ 1514 } \ 1515 } 1516 #define I(x, y) (x) 1517 VPK(shss, s16, s8, cvtshsb, 1) 1518 VPK(shus, s16, u8, cvtshub, 1) 1519 VPK(swss, s32, s16, cvtswsh, 1) 1520 VPK(swus, s32, u16, cvtswuh, 1) 1521 VPK(sdss, s64, s32, cvtsdsw, 1) 1522 VPK(sdus, s64, u32, cvtsduw, 1) 1523 VPK(uhus, u16, u8, cvtuhub, 1) 1524 VPK(uwus, u32, u16, cvtuwuh, 1) 1525 VPK(udus, u64, u32, cvtuduw, 1) 1526 VPK(uhum, u16, u8, I, 0) 1527 VPK(uwum, u32, u16, I, 0) 1528 VPK(udum, u64, u32, I, 0) 1529 #undef I 1530 #undef VPK 1531 #undef PKBIG 1532 1533 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1534 { 1535 int i; 1536 1537 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1538 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1539 } 1540 } 1541 1542 #define VRFI(suffix, rounding) \ 1543 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1544 ppc_avr_t *b) \ 1545 { \ 1546 int i; \ 1547 float_status s = env->vec_status; \ 1548 \ 1549 set_float_rounding_mode(rounding, &s); \ 1550 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1551 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1552 } \ 1553 } 1554 VRFI(n, float_round_nearest_even) 1555 VRFI(m, float_round_down) 1556 VRFI(p, float_round_up) 1557 VRFI(z, float_round_to_zero) 1558 #undef VRFI 1559 1560 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1561 { 1562 int i; 1563 1564 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1565 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1566 1567 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1568 } 1569 } 1570 1571 #define VRLMI(name, size, element, insert) \ 1572 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1573 { \ 1574 int i; \ 1575 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1576 uint##size##_t src1 = a->element[i]; \ 1577 uint##size##_t src2 = b->element[i]; \ 1578 uint##size##_t src3 = r->element[i]; \ 1579 uint##size##_t begin, end, shift, mask, rot_val; \ 1580 \ 1581 shift = extract##size(src2, 0, 6); \ 1582 end = extract##size(src2, 8, 6); \ 1583 begin = extract##size(src2, 16, 6); \ 1584 rot_val = rol##size(src1, shift); \ 1585 mask = mask_u##size(begin, end); \ 1586 if (insert) { \ 1587 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1588 } else { \ 1589 r->element[i] = (rot_val & mask); \ 1590 } \ 1591 } \ 1592 } 1593 1594 VRLMI(VRLDMI, 64, u64, 1); 1595 VRLMI(VRLWMI, 32, u32, 1); 1596 VRLMI(VRLDNM, 64, u64, 0); 1597 VRLMI(VRLWNM, 32, u32, 0); 1598 1599 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1600 { 1601 int i; 1602 1603 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1604 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1605 } 1606 } 1607 1608 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1609 { 1610 int i; 1611 1612 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1613 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1614 } 1615 } 1616 1617 #define VEXTU_X_DO(name, size, left) \ 1618 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1619 { \ 1620 int index = (a & 0xf) * 8; \ 1621 if (left) { \ 1622 index = 128 - index - size; \ 1623 } \ 1624 return int128_getlo(int128_rshift(b->s128, index)) & \ 1625 MAKE_64BIT_MASK(0, size); \ 1626 } 1627 VEXTU_X_DO(vextublx, 8, 1) 1628 VEXTU_X_DO(vextuhlx, 16, 1) 1629 VEXTU_X_DO(vextuwlx, 32, 1) 1630 VEXTU_X_DO(vextubrx, 8, 0) 1631 VEXTU_X_DO(vextuhrx, 16, 0) 1632 VEXTU_X_DO(vextuwrx, 32, 0) 1633 #undef VEXTU_X_DO 1634 1635 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1636 { 1637 int i; 1638 unsigned int shift, bytes, size; 1639 1640 size = ARRAY_SIZE(r->u8); 1641 for (i = 0; i < size; i++) { 1642 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1643 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1644 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1645 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1646 } 1647 } 1648 1649 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1650 { 1651 int i; 1652 unsigned int shift, bytes; 1653 1654 /* 1655 * Use reverse order, as destination and source register can be 1656 * same. Its being modified in place saving temporary, reverse 1657 * order will guarantee that computed result is not fed back. 1658 */ 1659 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1660 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1661 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1662 /* extract adjacent bytes */ 1663 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1664 } 1665 } 1666 1667 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1668 { 1669 int sh = shift & 0xf; 1670 int i; 1671 ppc_avr_t result; 1672 1673 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1674 int index = sh + i; 1675 if (index > 0xf) { 1676 result.VsrB(i) = b->VsrB(index - 0x10); 1677 } else { 1678 result.VsrB(i) = a->VsrB(index); 1679 } 1680 } 1681 *r = result; 1682 } 1683 1684 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1685 { 1686 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1687 1688 #if HOST_BIG_ENDIAN 1689 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1690 memset(&r->u8[16 - sh], 0, sh); 1691 #else 1692 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1693 memset(&r->u8[0], 0, sh); 1694 #endif 1695 } 1696 1697 #if HOST_BIG_ENDIAN 1698 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1699 #else 1700 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1701 #endif 1702 1703 #define VINSX(SUFFIX, TYPE) \ 1704 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1705 uint64_t val, target_ulong index) \ 1706 { \ 1707 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1708 target_long idx = index; \ 1709 \ 1710 if (idx < 0 || idx > maxidx) { \ 1711 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1712 qemu_log_mask(LOG_GUEST_ERROR, \ 1713 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1714 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1715 } else { \ 1716 TYPE src = val; \ 1717 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1718 } \ 1719 } 1720 VINSX(B, uint8_t) 1721 VINSX(H, uint16_t) 1722 VINSX(W, uint32_t) 1723 VINSX(D, uint64_t) 1724 #undef ELEM_ADDR 1725 #undef VINSX 1726 #if HOST_BIG_ENDIAN 1727 #define VEXTDVLX(NAME, SIZE) \ 1728 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1729 target_ulong index) \ 1730 { \ 1731 const target_long idx = index; \ 1732 ppc_avr_t tmp[2] = { *a, *b }; \ 1733 memset(t, 0, sizeof(*t)); \ 1734 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1735 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1736 } else { \ 1737 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1738 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1739 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1740 } \ 1741 } 1742 #else 1743 #define VEXTDVLX(NAME, SIZE) \ 1744 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1745 target_ulong index) \ 1746 { \ 1747 const target_long idx = index; \ 1748 ppc_avr_t tmp[2] = { *b, *a }; \ 1749 memset(t, 0, sizeof(*t)); \ 1750 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1751 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1752 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1753 } else { \ 1754 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1755 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1756 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1757 } \ 1758 } 1759 #endif 1760 VEXTDVLX(VEXTDUBVLX, 1) 1761 VEXTDVLX(VEXTDUHVLX, 2) 1762 VEXTDVLX(VEXTDUWVLX, 4) 1763 VEXTDVLX(VEXTDDVLX, 8) 1764 #undef VEXTDVLX 1765 #if HOST_BIG_ENDIAN 1766 #define VEXTRACT(suffix, element) \ 1767 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1768 { \ 1769 uint32_t es = sizeof(r->element[0]); \ 1770 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1771 memset(&r->u8[8], 0, 8); \ 1772 memset(&r->u8[0], 0, 8 - es); \ 1773 } 1774 #else 1775 #define VEXTRACT(suffix, element) \ 1776 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1777 { \ 1778 uint32_t es = sizeof(r->element[0]); \ 1779 uint32_t s = (16 - index) - es; \ 1780 memmove(&r->u8[8], &b->u8[s], es); \ 1781 memset(&r->u8[0], 0, 8); \ 1782 memset(&r->u8[8 + es], 0, 8 - es); \ 1783 } 1784 #endif 1785 VEXTRACT(ub, u8) 1786 VEXTRACT(uh, u16) 1787 VEXTRACT(uw, u32) 1788 VEXTRACT(d, u64) 1789 #undef VEXTRACT 1790 1791 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1792 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1793 { \ 1794 int i, idx, crf = 0; \ 1795 \ 1796 for (i = 0; i < NUM_ELEMS; i++) { \ 1797 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1798 if (b->Vsr##ELEM(idx)) { \ 1799 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1800 } else { \ 1801 crf = 0b0010; \ 1802 break; \ 1803 } \ 1804 } \ 1805 \ 1806 for (; i < NUM_ELEMS; i++) { \ 1807 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1808 t->Vsr##ELEM(idx) = 0; \ 1809 } \ 1810 \ 1811 return crf; \ 1812 } 1813 VSTRI(VSTRIBL, B, 16, true) 1814 VSTRI(VSTRIBR, B, 16, false) 1815 VSTRI(VSTRIHL, H, 8, true) 1816 VSTRI(VSTRIHR, H, 8, false) 1817 #undef VSTRI 1818 1819 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1820 { 1821 ppc_vsr_t t = { }; 1822 size_t es = sizeof(uint32_t); 1823 uint32_t ext_index; 1824 int i; 1825 1826 ext_index = index; 1827 for (i = 0; i < es; i++, ext_index++) { 1828 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1829 } 1830 1831 *xt = t; 1832 } 1833 1834 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1835 { 1836 ppc_vsr_t t = *xt; 1837 size_t es = sizeof(uint32_t); 1838 int ins_index, i = 0; 1839 1840 ins_index = index; 1841 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1842 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1843 } 1844 1845 *xt = t; 1846 } 1847 1848 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1849 uint32_t desc) 1850 { 1851 /* 1852 * Instead of processing imm bit-by-bit, we'll skip the computation of 1853 * conjunctions whose corresponding bit is unset. 1854 */ 1855 int bit, imm = simd_data(desc); 1856 Int128 conj, disj = int128_zero(); 1857 1858 /* Iterate over set bits from the least to the most significant bit */ 1859 while (imm) { 1860 /* 1861 * Get the next bit to be processed with ctz64. Invert the result of 1862 * ctz64 to match the indexing used by PowerISA. 1863 */ 1864 bit = 7 - ctzl(imm); 1865 if (bit & 0x4) { 1866 conj = a->s128; 1867 } else { 1868 conj = int128_not(a->s128); 1869 } 1870 if (bit & 0x2) { 1871 conj = int128_and(conj, b->s128); 1872 } else { 1873 conj = int128_and(conj, int128_not(b->s128)); 1874 } 1875 if (bit & 0x1) { 1876 conj = int128_and(conj, c->s128); 1877 } else { 1878 conj = int128_and(conj, int128_not(c->s128)); 1879 } 1880 disj = int128_or(disj, conj); 1881 1882 /* Unset the least significant bit that is set */ 1883 imm &= imm - 1; 1884 } 1885 1886 t->s128 = disj; 1887 } 1888 1889 #define XXBLEND(name, sz) \ 1890 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1891 ppc_avr_t *c, uint32_t desc) \ 1892 { \ 1893 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1894 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1895 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1896 } \ 1897 } 1898 XXBLEND(B, 8) 1899 XXBLEND(H, 16) 1900 XXBLEND(W, 32) 1901 XXBLEND(D, 64) 1902 #undef XXBLEND 1903 1904 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1905 { 1906 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1907 1908 #if HOST_BIG_ENDIAN 1909 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1910 memset(&r->u8[0], 0, sh); 1911 #else 1912 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1913 memset(&r->u8[16 - sh], 0, sh); 1914 #endif 1915 } 1916 1917 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1918 { 1919 int64_t t; 1920 int i, upper; 1921 ppc_avr_t result; 1922 int sat = 0; 1923 1924 upper = ARRAY_SIZE(r->s32) - 1; 1925 t = (int64_t)b->VsrSW(upper); 1926 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1927 t += a->VsrSW(i); 1928 result.VsrSW(i) = 0; 1929 } 1930 result.VsrSW(upper) = cvtsdsw(t, &sat); 1931 *r = result; 1932 1933 if (sat) { 1934 set_vscr_sat(env); 1935 } 1936 } 1937 1938 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1939 { 1940 int i, j, upper; 1941 ppc_avr_t result; 1942 int sat = 0; 1943 1944 upper = 1; 1945 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1946 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1947 1948 result.VsrD(i) = 0; 1949 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1950 t += a->VsrSW(2 * i + j); 1951 } 1952 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1953 } 1954 1955 *r = result; 1956 if (sat) { 1957 set_vscr_sat(env); 1958 } 1959 } 1960 1961 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1962 { 1963 int i, j; 1964 int sat = 0; 1965 1966 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1967 int64_t t = (int64_t)b->s32[i]; 1968 1969 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1970 t += a->s8[4 * i + j]; 1971 } 1972 r->s32[i] = cvtsdsw(t, &sat); 1973 } 1974 1975 if (sat) { 1976 set_vscr_sat(env); 1977 } 1978 } 1979 1980 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1981 { 1982 int sat = 0; 1983 int i; 1984 1985 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1986 int64_t t = (int64_t)b->s32[i]; 1987 1988 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1989 r->s32[i] = cvtsdsw(t, &sat); 1990 } 1991 1992 if (sat) { 1993 set_vscr_sat(env); 1994 } 1995 } 1996 1997 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1998 { 1999 int i, j; 2000 int sat = 0; 2001 2002 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2003 uint64_t t = (uint64_t)b->u32[i]; 2004 2005 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2006 t += a->u8[4 * i + j]; 2007 } 2008 r->u32[i] = cvtuduw(t, &sat); 2009 } 2010 2011 if (sat) { 2012 set_vscr_sat(env); 2013 } 2014 } 2015 2016 #if HOST_BIG_ENDIAN 2017 #define UPKHI 1 2018 #define UPKLO 0 2019 #else 2020 #define UPKHI 0 2021 #define UPKLO 1 2022 #endif 2023 #define VUPKPX(suffix, hi) \ 2024 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2025 { \ 2026 int i; \ 2027 ppc_avr_t result; \ 2028 \ 2029 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2030 uint16_t e = b->u16[hi ? i : i + 4]; \ 2031 uint8_t a = (e >> 15) ? 0xff : 0; \ 2032 uint8_t r = (e >> 10) & 0x1f; \ 2033 uint8_t g = (e >> 5) & 0x1f; \ 2034 uint8_t b = e & 0x1f; \ 2035 \ 2036 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2037 } \ 2038 *r = result; \ 2039 } 2040 VUPKPX(lpx, UPKLO) 2041 VUPKPX(hpx, UPKHI) 2042 #undef VUPKPX 2043 2044 #define VUPK(suffix, unpacked, packee, hi) \ 2045 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2046 { \ 2047 int i; \ 2048 ppc_avr_t result; \ 2049 \ 2050 if (hi) { \ 2051 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2052 result.unpacked[i] = b->packee[i]; \ 2053 } \ 2054 } else { \ 2055 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2056 i++) { \ 2057 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2058 } \ 2059 } \ 2060 *r = result; \ 2061 } 2062 VUPK(hsb, s16, s8, UPKHI) 2063 VUPK(hsh, s32, s16, UPKHI) 2064 VUPK(hsw, s64, s32, UPKHI) 2065 VUPK(lsb, s16, s8, UPKLO) 2066 VUPK(lsh, s32, s16, UPKLO) 2067 VUPK(lsw, s64, s32, UPKLO) 2068 #undef VUPK 2069 #undef UPKHI 2070 #undef UPKLO 2071 2072 #define VGENERIC_DO(name, element) \ 2073 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2074 { \ 2075 int i; \ 2076 \ 2077 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2078 r->element[i] = name(b->element[i]); \ 2079 } \ 2080 } 2081 2082 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2083 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2084 2085 VGENERIC_DO(clzb, u8) 2086 VGENERIC_DO(clzh, u16) 2087 2088 #undef clzb 2089 #undef clzh 2090 2091 #define ctzb(v) ((v) ? ctz32(v) : 8) 2092 #define ctzh(v) ((v) ? ctz32(v) : 16) 2093 #define ctzw(v) ctz32((v)) 2094 #define ctzd(v) ctz64((v)) 2095 2096 VGENERIC_DO(ctzb, u8) 2097 VGENERIC_DO(ctzh, u16) 2098 VGENERIC_DO(ctzw, u32) 2099 VGENERIC_DO(ctzd, u64) 2100 2101 #undef ctzb 2102 #undef ctzh 2103 #undef ctzw 2104 #undef ctzd 2105 2106 #define popcntb(v) ctpop8(v) 2107 #define popcnth(v) ctpop16(v) 2108 #define popcntw(v) ctpop32(v) 2109 #define popcntd(v) ctpop64(v) 2110 2111 VGENERIC_DO(popcntb, u8) 2112 VGENERIC_DO(popcnth, u16) 2113 VGENERIC_DO(popcntw, u32) 2114 VGENERIC_DO(popcntd, u64) 2115 2116 #undef popcntb 2117 #undef popcnth 2118 #undef popcntw 2119 #undef popcntd 2120 2121 #undef VGENERIC_DO 2122 2123 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2124 { 2125 r->s128 = int128_add(a->s128, b->s128); 2126 } 2127 2128 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2129 { 2130 r->s128 = int128_add(int128_add(a->s128, b->s128), 2131 int128_make64(int128_getlo(c->s128) & 1)); 2132 } 2133 2134 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2135 { 2136 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2137 r->VsrD(0) = 0; 2138 } 2139 2140 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2141 { 2142 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2143 carry_in = int128_getlo(c->s128) & 1; 2144 2145 if (!carry_out && carry_in) { 2146 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2147 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2148 } 2149 2150 r->VsrD(0) = 0; 2151 r->VsrD(1) = carry_out; 2152 } 2153 2154 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2155 { 2156 r->s128 = int128_sub(a->s128, b->s128); 2157 } 2158 2159 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2160 { 2161 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2162 int128_make64(int128_getlo(c->s128) & 1)); 2163 } 2164 2165 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2166 { 2167 Int128 tmp = int128_not(b->s128); 2168 2169 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2170 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2171 r->VsrD(0) = 0; 2172 } 2173 2174 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2175 { 2176 Int128 tmp = int128_not(b->s128); 2177 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2178 carry_in = int128_getlo(c->s128) & 1; 2179 2180 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2181 int128_makes64(-1))); 2182 r->VsrD(0) = 0; 2183 } 2184 2185 #define BCD_PLUS_PREF_1 0xC 2186 #define BCD_PLUS_PREF_2 0xF 2187 #define BCD_PLUS_ALT_1 0xA 2188 #define BCD_NEG_PREF 0xD 2189 #define BCD_NEG_ALT 0xB 2190 #define BCD_PLUS_ALT_2 0xE 2191 #define NATIONAL_PLUS 0x2B 2192 #define NATIONAL_NEG 0x2D 2193 2194 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2195 2196 static int bcd_get_sgn(ppc_avr_t *bcd) 2197 { 2198 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2199 case BCD_PLUS_PREF_1: 2200 case BCD_PLUS_PREF_2: 2201 case BCD_PLUS_ALT_1: 2202 case BCD_PLUS_ALT_2: 2203 { 2204 return 1; 2205 } 2206 2207 case BCD_NEG_PREF: 2208 case BCD_NEG_ALT: 2209 { 2210 return -1; 2211 } 2212 2213 default: 2214 { 2215 return 0; 2216 } 2217 } 2218 } 2219 2220 static int bcd_preferred_sgn(int sgn, int ps) 2221 { 2222 if (sgn >= 0) { 2223 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2224 } else { 2225 return BCD_NEG_PREF; 2226 } 2227 } 2228 2229 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2230 { 2231 uint8_t result; 2232 if (n & 1) { 2233 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2234 } else { 2235 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2236 } 2237 2238 if (unlikely(result > 9)) { 2239 *invalid = true; 2240 } 2241 return result; 2242 } 2243 2244 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2245 { 2246 if (n & 1) { 2247 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2248 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2249 } else { 2250 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2251 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2252 } 2253 } 2254 2255 static bool bcd_is_valid(ppc_avr_t *bcd) 2256 { 2257 int i; 2258 int invalid = 0; 2259 2260 if (bcd_get_sgn(bcd) == 0) { 2261 return false; 2262 } 2263 2264 for (i = 1; i < 32; i++) { 2265 bcd_get_digit(bcd, i, &invalid); 2266 if (unlikely(invalid)) { 2267 return false; 2268 } 2269 } 2270 return true; 2271 } 2272 2273 static int bcd_cmp_zero(ppc_avr_t *bcd) 2274 { 2275 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2276 return CRF_EQ; 2277 } else { 2278 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2279 } 2280 } 2281 2282 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2283 { 2284 return reg->VsrH(7 - n); 2285 } 2286 2287 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2288 { 2289 reg->VsrH(7 - n) = val; 2290 } 2291 2292 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2293 { 2294 int i; 2295 int invalid = 0; 2296 for (i = 31; i > 0; i--) { 2297 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2298 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2299 if (unlikely(invalid)) { 2300 return 0; /* doesn't matter */ 2301 } else if (dig_a > dig_b) { 2302 return 1; 2303 } else if (dig_a < dig_b) { 2304 return -1; 2305 } 2306 } 2307 2308 return 0; 2309 } 2310 2311 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2312 int *overflow) 2313 { 2314 int carry = 0; 2315 int i; 2316 int is_zero = 1; 2317 2318 for (i = 1; i <= 31; i++) { 2319 uint8_t digit = bcd_get_digit(a, i, invalid) + 2320 bcd_get_digit(b, i, invalid) + carry; 2321 is_zero &= (digit == 0); 2322 if (digit > 9) { 2323 carry = 1; 2324 digit -= 10; 2325 } else { 2326 carry = 0; 2327 } 2328 2329 bcd_put_digit(t, digit, i); 2330 } 2331 2332 *overflow = carry; 2333 return is_zero; 2334 } 2335 2336 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2337 int *overflow) 2338 { 2339 int carry = 0; 2340 int i; 2341 2342 for (i = 1; i <= 31; i++) { 2343 uint8_t digit = bcd_get_digit(a, i, invalid) - 2344 bcd_get_digit(b, i, invalid) + carry; 2345 if (digit & 0x80) { 2346 carry = -1; 2347 digit += 10; 2348 } else { 2349 carry = 0; 2350 } 2351 2352 bcd_put_digit(t, digit, i); 2353 } 2354 2355 *overflow = carry; 2356 } 2357 2358 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2359 { 2360 2361 int sgna = bcd_get_sgn(a); 2362 int sgnb = bcd_get_sgn(b); 2363 int invalid = (sgna == 0) || (sgnb == 0); 2364 int overflow = 0; 2365 int zero = 0; 2366 uint32_t cr = 0; 2367 ppc_avr_t result = { .u64 = { 0, 0 } }; 2368 2369 if (!invalid) { 2370 if (sgna == sgnb) { 2371 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2372 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2373 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2374 } else { 2375 int magnitude = bcd_cmp_mag(a, b); 2376 if (magnitude > 0) { 2377 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2378 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2379 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2380 } else if (magnitude < 0) { 2381 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2382 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2383 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2384 } else { 2385 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2386 cr = CRF_EQ; 2387 } 2388 } 2389 } 2390 2391 if (unlikely(invalid)) { 2392 result.VsrD(0) = result.VsrD(1) = -1; 2393 cr = CRF_SO; 2394 } else if (overflow) { 2395 cr |= CRF_SO; 2396 } else if (zero) { 2397 cr |= CRF_EQ; 2398 } 2399 2400 *r = result; 2401 2402 return cr; 2403 } 2404 2405 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2406 { 2407 ppc_avr_t bcopy = *b; 2408 int sgnb = bcd_get_sgn(b); 2409 if (sgnb < 0) { 2410 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2411 } else if (sgnb > 0) { 2412 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2413 } 2414 /* else invalid ... defer to bcdadd code for proper handling */ 2415 2416 return helper_bcdadd(r, a, &bcopy, ps); 2417 } 2418 2419 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2420 { 2421 int i; 2422 int cr = 0; 2423 uint16_t national = 0; 2424 uint16_t sgnb = get_national_digit(b, 0); 2425 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2426 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2427 2428 for (i = 1; i < 8; i++) { 2429 national = get_national_digit(b, i); 2430 if (unlikely(national < 0x30 || national > 0x39)) { 2431 invalid = 1; 2432 break; 2433 } 2434 2435 bcd_put_digit(&ret, national & 0xf, i); 2436 } 2437 2438 if (sgnb == NATIONAL_PLUS) { 2439 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2440 } else { 2441 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2442 } 2443 2444 cr = bcd_cmp_zero(&ret); 2445 2446 if (unlikely(invalid)) { 2447 cr = CRF_SO; 2448 } 2449 2450 *r = ret; 2451 2452 return cr; 2453 } 2454 2455 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2456 { 2457 int i; 2458 int cr = 0; 2459 int sgnb = bcd_get_sgn(b); 2460 int invalid = (sgnb == 0); 2461 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2462 2463 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2464 2465 for (i = 1; i < 8; i++) { 2466 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2467 2468 if (unlikely(invalid)) { 2469 break; 2470 } 2471 } 2472 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2473 2474 cr = bcd_cmp_zero(b); 2475 2476 if (ox_flag) { 2477 cr |= CRF_SO; 2478 } 2479 2480 if (unlikely(invalid)) { 2481 cr = CRF_SO; 2482 } 2483 2484 *r = ret; 2485 2486 return cr; 2487 } 2488 2489 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2490 { 2491 int i; 2492 int cr = 0; 2493 int invalid = 0; 2494 int zone_digit = 0; 2495 int zone_lead = ps ? 0xF : 0x3; 2496 int digit = 0; 2497 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2498 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2499 2500 if (unlikely((sgnb < 0xA) && ps)) { 2501 invalid = 1; 2502 } 2503 2504 for (i = 0; i < 16; i++) { 2505 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2506 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2507 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2508 invalid = 1; 2509 break; 2510 } 2511 2512 bcd_put_digit(&ret, digit, i + 1); 2513 } 2514 2515 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2516 (!ps && (sgnb & 0x4))) { 2517 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2518 } else { 2519 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2520 } 2521 2522 cr = bcd_cmp_zero(&ret); 2523 2524 if (unlikely(invalid)) { 2525 cr = CRF_SO; 2526 } 2527 2528 *r = ret; 2529 2530 return cr; 2531 } 2532 2533 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2534 { 2535 int i; 2536 int cr = 0; 2537 uint8_t digit = 0; 2538 int sgnb = bcd_get_sgn(b); 2539 int zone_lead = (ps) ? 0xF0 : 0x30; 2540 int invalid = (sgnb == 0); 2541 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2542 2543 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2544 2545 for (i = 0; i < 16; i++) { 2546 digit = bcd_get_digit(b, i + 1, &invalid); 2547 2548 if (unlikely(invalid)) { 2549 break; 2550 } 2551 2552 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2553 } 2554 2555 if (ps) { 2556 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2557 } else { 2558 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2559 } 2560 2561 cr = bcd_cmp_zero(b); 2562 2563 if (ox_flag) { 2564 cr |= CRF_SO; 2565 } 2566 2567 if (unlikely(invalid)) { 2568 cr = CRF_SO; 2569 } 2570 2571 *r = ret; 2572 2573 return cr; 2574 } 2575 2576 /** 2577 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2578 * 2579 * Returns: 2580 * > 0 if ahi|alo > bhi|blo, 2581 * 0 if ahi|alo == bhi|blo, 2582 * < 0 if ahi|alo < bhi|blo 2583 */ 2584 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2585 uint64_t blo, uint64_t bhi) 2586 { 2587 return (ahi == bhi) ? 2588 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2589 (ahi > bhi ? 1 : -1); 2590 } 2591 2592 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2593 { 2594 int i; 2595 int cr; 2596 uint64_t lo_value; 2597 uint64_t hi_value; 2598 uint64_t rem; 2599 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2600 2601 if (b->VsrSD(0) < 0) { 2602 lo_value = -b->VsrSD(1); 2603 hi_value = ~b->VsrD(0) + !lo_value; 2604 bcd_put_digit(&ret, 0xD, 0); 2605 2606 cr = CRF_LT; 2607 } else { 2608 lo_value = b->VsrD(1); 2609 hi_value = b->VsrD(0); 2610 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2611 2612 if (hi_value == 0 && lo_value == 0) { 2613 cr = CRF_EQ; 2614 } else { 2615 cr = CRF_GT; 2616 } 2617 } 2618 2619 /* 2620 * Check src limits: abs(src) <= 10^31 - 1 2621 * 2622 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2623 */ 2624 if (ucmp128(lo_value, hi_value, 2625 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2626 cr |= CRF_SO; 2627 2628 /* 2629 * According to the ISA, if src wouldn't fit in the destination 2630 * register, the result is undefined. 2631 * In that case, we leave r unchanged. 2632 */ 2633 } else { 2634 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2635 2636 for (i = 1; i < 16; rem /= 10, i++) { 2637 bcd_put_digit(&ret, rem % 10, i); 2638 } 2639 2640 for (; i < 32; lo_value /= 10, i++) { 2641 bcd_put_digit(&ret, lo_value % 10, i); 2642 } 2643 2644 *r = ret; 2645 } 2646 2647 return cr; 2648 } 2649 2650 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2651 { 2652 uint8_t i; 2653 int cr; 2654 uint64_t carry; 2655 uint64_t unused; 2656 uint64_t lo_value; 2657 uint64_t hi_value = 0; 2658 int sgnb = bcd_get_sgn(b); 2659 int invalid = (sgnb == 0); 2660 2661 lo_value = bcd_get_digit(b, 31, &invalid); 2662 for (i = 30; i > 0; i--) { 2663 mulu64(&lo_value, &carry, lo_value, 10ULL); 2664 mulu64(&hi_value, &unused, hi_value, 10ULL); 2665 lo_value += bcd_get_digit(b, i, &invalid); 2666 hi_value += carry; 2667 2668 if (unlikely(invalid)) { 2669 break; 2670 } 2671 } 2672 2673 if (sgnb == -1) { 2674 r->VsrSD(1) = -lo_value; 2675 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2676 } else { 2677 r->VsrSD(1) = lo_value; 2678 r->VsrSD(0) = hi_value; 2679 } 2680 2681 cr = bcd_cmp_zero(b); 2682 2683 if (unlikely(invalid)) { 2684 cr = CRF_SO; 2685 } 2686 2687 return cr; 2688 } 2689 2690 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2691 { 2692 int i; 2693 int invalid = 0; 2694 2695 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2696 return CRF_SO; 2697 } 2698 2699 *r = *a; 2700 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2701 2702 for (i = 1; i < 32; i++) { 2703 bcd_get_digit(a, i, &invalid); 2704 bcd_get_digit(b, i, &invalid); 2705 if (unlikely(invalid)) { 2706 return CRF_SO; 2707 } 2708 } 2709 2710 return bcd_cmp_zero(r); 2711 } 2712 2713 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2714 { 2715 int sgnb = bcd_get_sgn(b); 2716 2717 *r = *b; 2718 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2719 2720 if (bcd_is_valid(b) == false) { 2721 return CRF_SO; 2722 } 2723 2724 return bcd_cmp_zero(r); 2725 } 2726 2727 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2728 { 2729 int cr; 2730 int i = a->VsrSB(7); 2731 bool ox_flag = false; 2732 int sgnb = bcd_get_sgn(b); 2733 ppc_avr_t ret = *b; 2734 ret.VsrD(1) &= ~0xf; 2735 2736 if (bcd_is_valid(b) == false) { 2737 return CRF_SO; 2738 } 2739 2740 if (unlikely(i > 31)) { 2741 i = 31; 2742 } else if (unlikely(i < -31)) { 2743 i = -31; 2744 } 2745 2746 if (i > 0) { 2747 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2748 } else { 2749 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2750 } 2751 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2752 2753 *r = ret; 2754 2755 cr = bcd_cmp_zero(r); 2756 if (ox_flag) { 2757 cr |= CRF_SO; 2758 } 2759 2760 return cr; 2761 } 2762 2763 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2764 { 2765 int cr; 2766 int i; 2767 int invalid = 0; 2768 bool ox_flag = false; 2769 ppc_avr_t ret = *b; 2770 2771 for (i = 0; i < 32; i++) { 2772 bcd_get_digit(b, i, &invalid); 2773 2774 if (unlikely(invalid)) { 2775 return CRF_SO; 2776 } 2777 } 2778 2779 i = a->VsrSB(7); 2780 if (i >= 32) { 2781 ox_flag = true; 2782 ret.VsrD(1) = ret.VsrD(0) = 0; 2783 } else if (i <= -32) { 2784 ret.VsrD(1) = ret.VsrD(0) = 0; 2785 } else if (i > 0) { 2786 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2787 } else { 2788 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2789 } 2790 *r = ret; 2791 2792 cr = bcd_cmp_zero(r); 2793 if (ox_flag) { 2794 cr |= CRF_SO; 2795 } 2796 2797 return cr; 2798 } 2799 2800 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2801 { 2802 int cr; 2803 int unused = 0; 2804 int invalid = 0; 2805 bool ox_flag = false; 2806 int sgnb = bcd_get_sgn(b); 2807 ppc_avr_t ret = *b; 2808 ret.VsrD(1) &= ~0xf; 2809 2810 int i = a->VsrSB(7); 2811 ppc_avr_t bcd_one; 2812 2813 bcd_one.VsrD(0) = 0; 2814 bcd_one.VsrD(1) = 0x10; 2815 2816 if (bcd_is_valid(b) == false) { 2817 return CRF_SO; 2818 } 2819 2820 if (unlikely(i > 31)) { 2821 i = 31; 2822 } else if (unlikely(i < -31)) { 2823 i = -31; 2824 } 2825 2826 if (i > 0) { 2827 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2828 } else { 2829 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2830 2831 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2832 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2833 } 2834 } 2835 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2836 2837 cr = bcd_cmp_zero(&ret); 2838 if (ox_flag) { 2839 cr |= CRF_SO; 2840 } 2841 *r = ret; 2842 2843 return cr; 2844 } 2845 2846 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2847 { 2848 uint64_t mask; 2849 uint32_t ox_flag = 0; 2850 int i = a->VsrSH(3) + 1; 2851 ppc_avr_t ret = *b; 2852 2853 if (bcd_is_valid(b) == false) { 2854 return CRF_SO; 2855 } 2856 2857 if (i > 16 && i < 32) { 2858 mask = (uint64_t)-1 >> (128 - i * 4); 2859 if (ret.VsrD(0) & ~mask) { 2860 ox_flag = CRF_SO; 2861 } 2862 2863 ret.VsrD(0) &= mask; 2864 } else if (i >= 0 && i <= 16) { 2865 mask = (uint64_t)-1 >> (64 - i * 4); 2866 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2867 ox_flag = CRF_SO; 2868 } 2869 2870 ret.VsrD(1) &= mask; 2871 ret.VsrD(0) = 0; 2872 } 2873 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2874 *r = ret; 2875 2876 return bcd_cmp_zero(&ret) | ox_flag; 2877 } 2878 2879 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2880 { 2881 int i; 2882 uint64_t mask; 2883 uint32_t ox_flag = 0; 2884 int invalid = 0; 2885 ppc_avr_t ret = *b; 2886 2887 for (i = 0; i < 32; i++) { 2888 bcd_get_digit(b, i, &invalid); 2889 2890 if (unlikely(invalid)) { 2891 return CRF_SO; 2892 } 2893 } 2894 2895 i = a->VsrSH(3); 2896 if (i > 16 && i < 33) { 2897 mask = (uint64_t)-1 >> (128 - i * 4); 2898 if (ret.VsrD(0) & ~mask) { 2899 ox_flag = CRF_SO; 2900 } 2901 2902 ret.VsrD(0) &= mask; 2903 } else if (i > 0 && i <= 16) { 2904 mask = (uint64_t)-1 >> (64 - i * 4); 2905 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2906 ox_flag = CRF_SO; 2907 } 2908 2909 ret.VsrD(1) &= mask; 2910 ret.VsrD(0) = 0; 2911 } else if (i == 0) { 2912 if (ret.VsrD(0) || ret.VsrD(1)) { 2913 ox_flag = CRF_SO; 2914 } 2915 ret.VsrD(0) = ret.VsrD(1) = 0; 2916 } 2917 2918 *r = ret; 2919 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2920 return ox_flag | CRF_EQ; 2921 } 2922 2923 return ox_flag | CRF_GT; 2924 } 2925 2926 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2927 { 2928 int i; 2929 VECTOR_FOR_INORDER_I(i, u8) { 2930 r->u8[i] = AES_sbox[a->u8[i]]; 2931 } 2932 } 2933 2934 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2935 { 2936 AESState *ad = (AESState *)r; 2937 AESState *st = (AESState *)a; 2938 AESState *rk = (AESState *)b; 2939 2940 aesenc_SB_SR_MC_AK(ad, st, rk, true); 2941 } 2942 2943 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2944 { 2945 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2946 } 2947 2948 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2949 { 2950 AESState *ad = (AESState *)r; 2951 AESState *st = (AESState *)a; 2952 AESState *rk = (AESState *)b; 2953 2954 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true); 2955 } 2956 2957 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2958 { 2959 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2960 } 2961 2962 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2963 { 2964 int st = (st_six & 0x10) != 0; 2965 int six = st_six & 0xF; 2966 int i; 2967 2968 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2969 if (st == 0) { 2970 if ((six & (0x8 >> i)) == 0) { 2971 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2972 ror32(a->VsrW(i), 18) ^ 2973 (a->VsrW(i) >> 3); 2974 } else { /* six.bit[i] == 1 */ 2975 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2976 ror32(a->VsrW(i), 19) ^ 2977 (a->VsrW(i) >> 10); 2978 } 2979 } else { /* st == 1 */ 2980 if ((six & (0x8 >> i)) == 0) { 2981 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2982 ror32(a->VsrW(i), 13) ^ 2983 ror32(a->VsrW(i), 22); 2984 } else { /* six.bit[i] == 1 */ 2985 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2986 ror32(a->VsrW(i), 11) ^ 2987 ror32(a->VsrW(i), 25); 2988 } 2989 } 2990 } 2991 } 2992 2993 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2994 { 2995 int st = (st_six & 0x10) != 0; 2996 int six = st_six & 0xF; 2997 int i; 2998 2999 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3000 if (st == 0) { 3001 if ((six & (0x8 >> (2 * i))) == 0) { 3002 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3003 ror64(a->VsrD(i), 8) ^ 3004 (a->VsrD(i) >> 7); 3005 } else { /* six.bit[2*i] == 1 */ 3006 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3007 ror64(a->VsrD(i), 61) ^ 3008 (a->VsrD(i) >> 6); 3009 } 3010 } else { /* st == 1 */ 3011 if ((six & (0x8 >> (2 * i))) == 0) { 3012 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3013 ror64(a->VsrD(i), 34) ^ 3014 ror64(a->VsrD(i), 39); 3015 } else { /* six.bit[2*i] == 1 */ 3016 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3017 ror64(a->VsrD(i), 18) ^ 3018 ror64(a->VsrD(i), 41); 3019 } 3020 } 3021 } 3022 } 3023 3024 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3025 { 3026 ppc_avr_t result; 3027 int i; 3028 3029 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3030 int indexA = c->VsrB(i) >> 4; 3031 int indexB = c->VsrB(i) & 0xF; 3032 3033 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3034 } 3035 *r = result; 3036 } 3037 3038 #undef VECTOR_FOR_INORDER_I 3039 3040 /*****************************************************************************/ 3041 /* SPE extension helpers */ 3042 /* Use a table to make this quicker */ 3043 static const uint8_t hbrev[16] = { 3044 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3045 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3046 }; 3047 3048 static inline uint8_t byte_reverse(uint8_t val) 3049 { 3050 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3051 } 3052 3053 static inline uint32_t word_reverse(uint32_t val) 3054 { 3055 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3056 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3057 } 3058 3059 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3060 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3061 { 3062 uint32_t a, b, d, mask; 3063 3064 mask = UINT32_MAX >> (32 - MASKBITS); 3065 a = arg1 & mask; 3066 b = arg2 & mask; 3067 d = word_reverse(1 + word_reverse(a | ~b)); 3068 return (arg1 & ~mask) | (d & b); 3069 } 3070 3071 uint32_t helper_cntlsw32(uint32_t val) 3072 { 3073 if (val & 0x80000000) { 3074 return clz32(~val); 3075 } else { 3076 return clz32(val); 3077 } 3078 } 3079 3080 uint32_t helper_cntlzw32(uint32_t val) 3081 { 3082 return clz32(val); 3083 } 3084 3085 /* 440 specific */ 3086 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3087 target_ulong low, uint32_t update_Rc) 3088 { 3089 target_ulong mask; 3090 int i; 3091 3092 i = 1; 3093 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3094 if ((high & mask) == 0) { 3095 if (update_Rc) { 3096 env->crf[0] = 0x4; 3097 } 3098 goto done; 3099 } 3100 i++; 3101 } 3102 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3103 if ((low & mask) == 0) { 3104 if (update_Rc) { 3105 env->crf[0] = 0x8; 3106 } 3107 goto done; 3108 } 3109 i++; 3110 } 3111 i = 8; 3112 if (update_Rc) { 3113 env->crf[0] = 0x2; 3114 } 3115 done: 3116 env->xer = (env->xer & ~0x7F) | i; 3117 if (update_Rc) { 3118 env->crf[0] |= xer_so; 3119 } 3120 return i; 3121 } 3122