1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = env->ov32 = 1; 41 } else { 42 env->ov = env->ov32 = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 /* vprtybq */ 496 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 497 { 498 uint64_t res = b->u64[0] ^ b->u64[1]; 499 res ^= res >> 32; 500 res ^= res >> 16; 501 res ^= res >> 8; 502 r->VsrD(1) = res & 1; 503 r->VsrD(0) = 0; 504 } 505 506 #define VARITHFP(suffix, func) \ 507 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 508 ppc_avr_t *b) \ 509 { \ 510 int i; \ 511 \ 512 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 513 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 514 } \ 515 } 516 VARITHFP(addfp, float32_add) 517 VARITHFP(subfp, float32_sub) 518 VARITHFP(minfp, float32_min) 519 VARITHFP(maxfp, float32_max) 520 #undef VARITHFP 521 522 #define VARITHFPFMA(suffix, type) \ 523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 524 ppc_avr_t *b, ppc_avr_t *c) \ 525 { \ 526 int i; \ 527 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 528 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 529 type, &env->vec_status); \ 530 } \ 531 } 532 VARITHFPFMA(maddfp, 0); 533 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 534 #undef VARITHFPFMA 535 536 #define VARITHSAT_CASE(type, op, cvt, element) \ 537 { \ 538 type result = (type)a->element[i] op (type)b->element[i]; \ 539 r->element[i] = cvt(result, &sat); \ 540 } 541 542 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 544 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 545 { \ 546 int sat = 0; \ 547 int i; \ 548 \ 549 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 550 VARITHSAT_CASE(optype, op, cvt, element); \ 551 } \ 552 if (sat) { \ 553 vscr_sat->u32[0] = 1; \ 554 } \ 555 } 556 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 557 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 558 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 559 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 560 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 561 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 562 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 563 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 564 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 565 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 566 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 567 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 568 #undef VARITHSAT_CASE 569 #undef VARITHSAT_DO 570 #undef VARITHSAT_SIGNED 571 #undef VARITHSAT_UNSIGNED 572 573 #define VAVG(name, element, etype) \ 574 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 575 { \ 576 int i; \ 577 \ 578 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 579 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 580 r->element[i] = x >> 1; \ 581 } \ 582 } 583 584 VAVG(VAVGSB, s8, int16_t) 585 VAVG(VAVGUB, u8, uint16_t) 586 VAVG(VAVGSH, s16, int32_t) 587 VAVG(VAVGUH, u16, uint32_t) 588 VAVG(VAVGSW, s32, int64_t) 589 VAVG(VAVGUW, u32, uint64_t) 590 #undef VAVG 591 592 #define VABSDU(name, element) \ 593 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 594 { \ 595 int i; \ 596 \ 597 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 598 r->element[i] = (a->element[i] > b->element[i]) ? \ 599 (a->element[i] - b->element[i]) : \ 600 (b->element[i] - a->element[i]); \ 601 } \ 602 } 603 604 /* 605 * VABSDU - Vector absolute difference unsigned 606 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 607 * element - element type to access from vector 608 */ 609 VABSDU(VABSDUB, u8) 610 VABSDU(VABSDUH, u16) 611 VABSDU(VABSDUW, u32) 612 #undef VABSDU 613 614 #define VCF(suffix, cvt, element) \ 615 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 616 ppc_avr_t *b, uint32_t uim) \ 617 { \ 618 int i; \ 619 \ 620 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 621 float32 t = cvt(b->element[i], &env->vec_status); \ 622 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 623 } \ 624 } 625 VCF(ux, uint32_to_float32, u32) 626 VCF(sx, int32_to_float32, s32) 627 #undef VCF 628 629 #define VCMPNEZ(NAME, ELEM) \ 630 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 631 { \ 632 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 633 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 634 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 635 } \ 636 } 637 VCMPNEZ(VCMPNEZB, u8) 638 VCMPNEZ(VCMPNEZH, u16) 639 VCMPNEZ(VCMPNEZW, u32) 640 #undef VCMPNEZ 641 642 #define VCMPFP_DO(suffix, compare, order, record) \ 643 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 644 ppc_avr_t *a, ppc_avr_t *b) \ 645 { \ 646 uint32_t ones = (uint32_t)-1; \ 647 uint32_t all = ones; \ 648 uint32_t none = 0; \ 649 int i; \ 650 \ 651 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 652 uint32_t result; \ 653 FloatRelation rel = \ 654 float32_compare_quiet(a->f32[i], b->f32[i], \ 655 &env->vec_status); \ 656 if (rel == float_relation_unordered) { \ 657 result = 0; \ 658 } else if (rel compare order) { \ 659 result = ones; \ 660 } else { \ 661 result = 0; \ 662 } \ 663 r->u32[i] = result; \ 664 all &= result; \ 665 none |= result; \ 666 } \ 667 if (record) { \ 668 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 669 } \ 670 } 671 #define VCMPFP(suffix, compare, order) \ 672 VCMPFP_DO(suffix, compare, order, 0) \ 673 VCMPFP_DO(suffix##_dot, compare, order, 1) 674 VCMPFP(eqfp, ==, float_relation_equal) 675 VCMPFP(gefp, !=, float_relation_less) 676 VCMPFP(gtfp, ==, float_relation_greater) 677 #undef VCMPFP_DO 678 #undef VCMPFP 679 680 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 681 ppc_avr_t *a, ppc_avr_t *b, int record) 682 { 683 int i; 684 int all_in = 0; 685 686 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 687 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 688 &env->vec_status); 689 if (le_rel == float_relation_unordered) { 690 r->u32[i] = 0xc0000000; 691 all_in = 1; 692 } else { 693 float32 bneg = float32_chs(b->f32[i]); 694 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 695 &env->vec_status); 696 int le = le_rel != float_relation_greater; 697 int ge = ge_rel != float_relation_less; 698 699 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 700 all_in |= (!le | !ge); 701 } 702 } 703 if (record) { 704 env->crf[6] = (all_in == 0) << 1; 705 } 706 } 707 708 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 709 { 710 vcmpbfp_internal(env, r, a, b, 0); 711 } 712 713 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 714 ppc_avr_t *b) 715 { 716 vcmpbfp_internal(env, r, a, b, 1); 717 } 718 719 #define VCT(suffix, satcvt, element) \ 720 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 721 ppc_avr_t *b, uint32_t uim) \ 722 { \ 723 int i; \ 724 int sat = 0; \ 725 float_status s = env->vec_status; \ 726 \ 727 set_float_rounding_mode(float_round_to_zero, &s); \ 728 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 729 if (float32_is_any_nan(b->f32[i])) { \ 730 r->element[i] = 0; \ 731 } else { \ 732 float64 t = float32_to_float64(b->f32[i], &s); \ 733 int64_t j; \ 734 \ 735 t = float64_scalbn(t, uim, &s); \ 736 j = float64_to_int64(t, &s); \ 737 r->element[i] = satcvt(j, &sat); \ 738 } \ 739 } \ 740 if (sat) { \ 741 set_vscr_sat(env); \ 742 } \ 743 } 744 VCT(uxs, cvtsduw, u32) 745 VCT(sxs, cvtsdsw, s32) 746 #undef VCT 747 748 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 749 750 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 751 { 752 int64_t psum = 0; 753 for (int i = 0; i < 8; i++, mask >>= 1) { 754 if (mask & 1) { 755 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 756 } 757 } 758 return psum; 759 } 760 761 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 762 { 763 int64_t psum = 0; 764 for (int i = 0; i < 4; i++, mask >>= 1) { 765 if (mask & 1) { 766 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 767 } 768 } 769 return psum; 770 } 771 772 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 773 { 774 int64_t psum = 0; 775 for (int i = 0; i < 2; i++, mask >>= 1) { 776 if (mask & 1) { 777 psum += (int64_t)sextract32(a, 16 * i, 16) * 778 sextract32(b, 16 * i, 16); 779 } 780 } 781 return psum; 782 } 783 784 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 785 uint32_t mask, bool sat, bool acc, do_ger ger) 786 { 787 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 788 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 789 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 790 uint8_t xmsk_bit, ymsk_bit; 791 int64_t psum; 792 int i, j; 793 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 794 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 795 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 796 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 797 if (acc) { 798 psum += at[i].VsrSW(j); 799 } 800 if (sat && psum > INT32_MAX) { 801 set_vscr_sat(env); 802 at[i].VsrSW(j) = INT32_MAX; 803 } else if (sat && psum < INT32_MIN) { 804 set_vscr_sat(env); 805 at[i].VsrSW(j) = INT32_MIN; 806 } else { 807 at[i].VsrSW(j) = (int32_t) psum; 808 } 809 } else { 810 at[i].VsrSW(j) = 0; 811 } 812 } 813 } 814 } 815 816 QEMU_FLATTEN 817 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 818 ppc_acc_t *at, uint32_t mask) 819 { 820 xviger(env, a, b, at, mask, false, false, ger_rank8); 821 } 822 823 QEMU_FLATTEN 824 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 825 ppc_acc_t *at, uint32_t mask) 826 { 827 xviger(env, a, b, at, mask, false, true, ger_rank8); 828 } 829 830 QEMU_FLATTEN 831 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 832 ppc_acc_t *at, uint32_t mask) 833 { 834 xviger(env, a, b, at, mask, false, false, ger_rank4); 835 } 836 837 QEMU_FLATTEN 838 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 839 ppc_acc_t *at, uint32_t mask) 840 { 841 xviger(env, a, b, at, mask, false, true, ger_rank4); 842 } 843 844 QEMU_FLATTEN 845 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 846 ppc_acc_t *at, uint32_t mask) 847 { 848 xviger(env, a, b, at, mask, true, true, ger_rank4); 849 } 850 851 QEMU_FLATTEN 852 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 853 ppc_acc_t *at, uint32_t mask) 854 { 855 xviger(env, a, b, at, mask, false, false, ger_rank2); 856 } 857 858 QEMU_FLATTEN 859 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 860 ppc_acc_t *at, uint32_t mask) 861 { 862 xviger(env, a, b, at, mask, true, false, ger_rank2); 863 } 864 865 QEMU_FLATTEN 866 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 867 ppc_acc_t *at, uint32_t mask) 868 { 869 xviger(env, a, b, at, mask, false, true, ger_rank2); 870 } 871 872 QEMU_FLATTEN 873 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 874 ppc_acc_t *at, uint32_t mask) 875 { 876 xviger(env, a, b, at, mask, true, true, ger_rank2); 877 } 878 879 target_ulong helper_vclzlsbb(ppc_avr_t *r) 880 { 881 target_ulong count = 0; 882 int i; 883 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 884 if (r->VsrB(i) & 0x01) { 885 break; 886 } 887 count++; 888 } 889 return count; 890 } 891 892 target_ulong helper_vctzlsbb(ppc_avr_t *r) 893 { 894 target_ulong count = 0; 895 int i; 896 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 897 if (r->VsrB(i) & 0x01) { 898 break; 899 } 900 count++; 901 } 902 return count; 903 } 904 905 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 906 ppc_avr_t *b, ppc_avr_t *c) 907 { 908 int sat = 0; 909 int i; 910 911 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 912 int32_t prod = a->s16[i] * b->s16[i]; 913 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 914 915 r->s16[i] = cvtswsh(t, &sat); 916 } 917 918 if (sat) { 919 set_vscr_sat(env); 920 } 921 } 922 923 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 924 ppc_avr_t *b, ppc_avr_t *c) 925 { 926 int sat = 0; 927 int i; 928 929 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 930 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 931 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 932 r->s16[i] = cvtswsh(t, &sat); 933 } 934 935 if (sat) { 936 set_vscr_sat(env); 937 } 938 } 939 940 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 941 uint32_t v) 942 { 943 int i; 944 945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 946 int32_t prod = a->s16[i] * b->s16[i]; 947 r->s16[i] = (int16_t) (prod + c->s16[i]); 948 } 949 } 950 951 #define VMRG_DO(name, element, access, ofs) \ 952 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 953 { \ 954 ppc_avr_t result; \ 955 int i, half = ARRAY_SIZE(r->element) / 2; \ 956 \ 957 for (i = 0; i < half; i++) { \ 958 result.access(i * 2 + 0) = a->access(i + ofs); \ 959 result.access(i * 2 + 1) = b->access(i + ofs); \ 960 } \ 961 *r = result; \ 962 } 963 964 #define VMRG(suffix, element, access) \ 965 VMRG_DO(mrgl##suffix, element, access, half) \ 966 VMRG_DO(mrgh##suffix, element, access, 0) 967 VMRG(b, u8, VsrB) 968 VMRG(h, u16, VsrH) 969 VMRG(w, u32, VsrW) 970 #undef VMRG_DO 971 #undef VMRG 972 973 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 974 { 975 int32_t prod[16]; 976 int i; 977 978 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 979 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 980 } 981 982 VECTOR_FOR_INORDER_I(i, s32) { 983 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 984 prod[4 * i + 2] + prod[4 * i + 3]; 985 } 986 } 987 988 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 989 { 990 int32_t prod[8]; 991 int i; 992 993 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 994 prod[i] = a->s16[i] * b->s16[i]; 995 } 996 997 VECTOR_FOR_INORDER_I(i, s32) { 998 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 999 } 1000 } 1001 1002 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1003 ppc_avr_t *b, ppc_avr_t *c) 1004 { 1005 int32_t prod[8]; 1006 int i; 1007 int sat = 0; 1008 1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1010 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1011 } 1012 1013 VECTOR_FOR_INORDER_I(i, s32) { 1014 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1015 1016 r->u32[i] = cvtsdsw(t, &sat); 1017 } 1018 1019 if (sat) { 1020 set_vscr_sat(env); 1021 } 1022 } 1023 1024 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1025 { 1026 uint16_t prod[16]; 1027 int i; 1028 1029 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1030 prod[i] = a->u8[i] * b->u8[i]; 1031 } 1032 1033 VECTOR_FOR_INORDER_I(i, u32) { 1034 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1035 prod[4 * i + 2] + prod[4 * i + 3]; 1036 } 1037 } 1038 1039 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1040 { 1041 uint32_t prod[8]; 1042 int i; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1045 prod[i] = a->u16[i] * b->u16[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, u32) { 1049 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1050 } 1051 } 1052 1053 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1054 ppc_avr_t *b, ppc_avr_t *c) 1055 { 1056 uint32_t prod[8]; 1057 int i; 1058 int sat = 0; 1059 1060 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1061 prod[i] = a->u16[i] * b->u16[i]; 1062 } 1063 1064 VECTOR_FOR_INORDER_I(i, s32) { 1065 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1066 1067 r->u32[i] = cvtuduw(t, &sat); 1068 } 1069 1070 if (sat) { 1071 set_vscr_sat(env); 1072 } 1073 } 1074 1075 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1076 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1077 { \ 1078 int i; \ 1079 \ 1080 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1081 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1082 (cast)b->mul_access(i); \ 1083 } \ 1084 } 1085 1086 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1087 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1088 { \ 1089 int i; \ 1090 \ 1091 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1092 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1093 (cast)b->mul_access(i + 1); \ 1094 } \ 1095 } 1096 1097 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1098 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1100 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1101 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1102 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1103 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1104 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1105 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1106 #undef VMUL_DO_EVN 1107 #undef VMUL_DO_ODD 1108 #undef VMUL 1109 1110 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1111 target_ulong uim) 1112 { 1113 int i, idx; 1114 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1115 1116 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1117 if ((pcv->VsrB(i) >> 5) == uim) { 1118 idx = pcv->VsrB(i) & 0x1f; 1119 if (idx < ARRAY_SIZE(t->u8)) { 1120 tmp.VsrB(i) = s0->VsrB(idx); 1121 } else { 1122 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1123 } 1124 } 1125 } 1126 1127 *t = tmp; 1128 } 1129 1130 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1131 { 1132 Int128 neg1 = int128_makes64(-1); 1133 Int128 int128_min = int128_make128(0, INT64_MIN); 1134 if (likely(int128_nz(b->s128) && 1135 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1136 t->s128 = int128_divs(a->s128, b->s128); 1137 } else { 1138 t->s128 = a->s128; /* Undefined behavior */ 1139 } 1140 } 1141 1142 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1143 { 1144 if (int128_nz(b->s128)) { 1145 t->s128 = int128_divu(a->s128, b->s128); 1146 } else { 1147 t->s128 = a->s128; /* Undefined behavior */ 1148 } 1149 } 1150 1151 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1152 { 1153 int i; 1154 int64_t high; 1155 uint64_t low; 1156 for (i = 0; i < 2; i++) { 1157 high = a->s64[i]; 1158 low = 0; 1159 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1160 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1161 } else { 1162 divs128(&low, &high, b->s64[i]); 1163 t->s64[i] = low; 1164 } 1165 } 1166 } 1167 1168 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1169 { 1170 int i; 1171 uint64_t high, low; 1172 for (i = 0; i < 2; i++) { 1173 high = a->u64[i]; 1174 low = 0; 1175 if (unlikely(!b->u64[i])) { 1176 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1177 } else { 1178 divu128(&low, &high, b->u64[i]); 1179 t->u64[i] = low; 1180 } 1181 } 1182 } 1183 1184 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1185 { 1186 Int128 high, low; 1187 Int128 int128_min = int128_make128(0, INT64_MIN); 1188 Int128 neg1 = int128_makes64(-1); 1189 1190 high = a->s128; 1191 low = int128_zero(); 1192 if (unlikely(!int128_nz(b->s128) || 1193 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1194 t->s128 = a->s128; /* Undefined behavior */ 1195 } else { 1196 divs256(&low, &high, b->s128); 1197 t->s128 = low; 1198 } 1199 } 1200 1201 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1202 { 1203 Int128 high, low; 1204 1205 high = a->s128; 1206 low = int128_zero(); 1207 if (unlikely(!int128_nz(b->s128))) { 1208 t->s128 = a->s128; /* Undefined behavior */ 1209 } else { 1210 divu256(&low, &high, b->s128); 1211 t->s128 = low; 1212 } 1213 } 1214 1215 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1216 { 1217 Int128 neg1 = int128_makes64(-1); 1218 Int128 int128_min = int128_make128(0, INT64_MIN); 1219 if (likely(int128_nz(b->s128) && 1220 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1221 t->s128 = int128_rems(a->s128, b->s128); 1222 } else { 1223 t->s128 = int128_zero(); /* Undefined behavior */ 1224 } 1225 } 1226 1227 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1228 { 1229 if (likely(int128_nz(b->s128))) { 1230 t->s128 = int128_remu(a->s128, b->s128); 1231 } else { 1232 t->s128 = int128_zero(); /* Undefined behavior */ 1233 } 1234 } 1235 1236 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1237 { 1238 ppc_avr_t result; 1239 int i; 1240 1241 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1242 int s = c->VsrB(i) & 0x1f; 1243 int index = s & 0xf; 1244 1245 if (s & 0x10) { 1246 result.VsrB(i) = b->VsrB(index); 1247 } else { 1248 result.VsrB(i) = a->VsrB(index); 1249 } 1250 } 1251 *r = result; 1252 } 1253 1254 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1255 { 1256 ppc_avr_t result; 1257 int i; 1258 1259 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1260 int s = c->VsrB(i) & 0x1f; 1261 int index = 15 - (s & 0xf); 1262 1263 if (s & 0x10) { 1264 result.VsrB(i) = a->VsrB(index); 1265 } else { 1266 result.VsrB(i) = b->VsrB(index); 1267 } 1268 } 1269 *r = result; 1270 } 1271 1272 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1273 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1274 { \ 1275 ppc_vsr_t tmp; \ 1276 \ 1277 /* Initialize tmp with the result of an all-zeros mask */ \ 1278 tmp.VsrD(0) = 0x1011121314151617; \ 1279 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1280 \ 1281 /* Iterate over the most significant byte of each element */ \ 1282 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1283 if (b->VsrB(i) & 0x80) { \ 1284 /* Update each byte of the element */ \ 1285 for (int k = 0; k < SZ; k++) { \ 1286 tmp.VsrB(i + k) = j + k; \ 1287 } \ 1288 j += SZ; \ 1289 } \ 1290 } \ 1291 \ 1292 *t = tmp; \ 1293 } 1294 1295 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1296 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1297 { \ 1298 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1299 \ 1300 /* Iterate over the most significant byte of each element */ \ 1301 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1302 if (b->VsrB(i) & 0x80) { \ 1303 /* Update each byte of the element */ \ 1304 for (int k = 0; k < SZ; k++) { \ 1305 tmp.VsrB(j + k) = i + k; \ 1306 } \ 1307 j += SZ; \ 1308 } \ 1309 } \ 1310 \ 1311 *t = tmp; \ 1312 } 1313 1314 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1315 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1316 { \ 1317 ppc_vsr_t tmp; \ 1318 \ 1319 /* Initialize tmp with the result of an all-zeros mask */ \ 1320 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1321 tmp.VsrD(1) = 0x1716151413121110; \ 1322 \ 1323 /* Iterate over the most significant byte of each element */ \ 1324 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1325 /* Reverse indexing of "i" */ \ 1326 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1327 if (b->VsrB(idx) & 0x80) { \ 1328 /* Update each byte of the element */ \ 1329 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1330 tmp.VsrB(idx + rk) = j + k; \ 1331 } \ 1332 j += SZ; \ 1333 } \ 1334 } \ 1335 \ 1336 *t = tmp; \ 1337 } 1338 1339 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1340 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1341 { \ 1342 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1343 \ 1344 /* Iterate over the most significant byte of each element */ \ 1345 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1346 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1347 /* Update each byte of the element */ \ 1348 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1349 /* Reverse indexing of "j" */ \ 1350 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1351 tmp.VsrB(idx + rk) = i + k; \ 1352 } \ 1353 j += SZ; \ 1354 } \ 1355 } \ 1356 \ 1357 *t = tmp; \ 1358 } 1359 1360 #define XXGENPCV(NAME, SZ) \ 1361 XXGENPCV_BE_EXP(NAME, SZ) \ 1362 XXGENPCV_BE_COMP(NAME, SZ) \ 1363 XXGENPCV_LE_EXP(NAME, SZ) \ 1364 XXGENPCV_LE_COMP(NAME, SZ) \ 1365 1366 XXGENPCV(XXGENPCVBM, 1) 1367 XXGENPCV(XXGENPCVHM, 2) 1368 XXGENPCV(XXGENPCVWM, 4) 1369 XXGENPCV(XXGENPCVDM, 8) 1370 1371 #undef XXGENPCV_BE_EXP 1372 #undef XXGENPCV_BE_COMP 1373 #undef XXGENPCV_LE_EXP 1374 #undef XXGENPCV_LE_COMP 1375 #undef XXGENPCV 1376 1377 #if HOST_BIG_ENDIAN 1378 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1379 #define VBPERMD_INDEX(i) (i) 1380 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1381 #else 1382 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1383 #define VBPERMD_INDEX(i) (1 - i) 1384 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1385 #endif 1386 #define EXTRACT_BIT(avr, i, index) \ 1387 (extract64((avr)->VsrD(i), 63 - index, 1)) 1388 1389 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1390 { 1391 int i, j; 1392 ppc_avr_t result = { .u64 = { 0, 0 } }; 1393 VECTOR_FOR_INORDER_I(i, u64) { 1394 for (j = 0; j < 8; j++) { 1395 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1396 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1397 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1398 } 1399 } 1400 } 1401 *r = result; 1402 } 1403 1404 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1405 { 1406 int i; 1407 uint64_t perm = 0; 1408 1409 VECTOR_FOR_INORDER_I(i, u8) { 1410 int index = VBPERMQ_INDEX(b, i); 1411 1412 if (index < 128) { 1413 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1414 if (a->u64[VBPERMQ_DW(index)] & mask) { 1415 perm |= (0x8000 >> i); 1416 } 1417 } 1418 } 1419 1420 r->VsrD(0) = perm; 1421 r->VsrD(1) = 0; 1422 } 1423 1424 #undef VBPERMQ_INDEX 1425 #undef VBPERMQ_DW 1426 1427 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1428 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1429 { \ 1430 int i, j; \ 1431 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1432 \ 1433 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1434 prod[i] = 0; \ 1435 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1436 if (a->srcfld[i] & (1ull << j)) { \ 1437 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1438 } \ 1439 } \ 1440 } \ 1441 \ 1442 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1443 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1444 } \ 1445 } 1446 1447 PMSUM(vpmsumb, u8, u16, uint16_t) 1448 PMSUM(vpmsumh, u16, u32, uint32_t) 1449 PMSUM(vpmsumw, u32, u64, uint64_t) 1450 1451 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1452 { 1453 int i, j; 1454 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1455 1456 for (j = 0; j < 64; j++) { 1457 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1458 if (a->VsrD(i) & (1ull << j)) { 1459 tmp = int128_make64(b->VsrD(i)); 1460 tmp = int128_lshift(tmp, j); 1461 prod[i] = int128_xor(prod[i], tmp); 1462 } 1463 } 1464 } 1465 1466 r->s128 = int128_xor(prod[0], prod[1]); 1467 } 1468 1469 #if HOST_BIG_ENDIAN 1470 #define PKBIG 1 1471 #else 1472 #define PKBIG 0 1473 #endif 1474 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1475 { 1476 int i, j; 1477 ppc_avr_t result; 1478 #if HOST_BIG_ENDIAN 1479 const ppc_avr_t *x[2] = { a, b }; 1480 #else 1481 const ppc_avr_t *x[2] = { b, a }; 1482 #endif 1483 1484 VECTOR_FOR_INORDER_I(i, u64) { 1485 VECTOR_FOR_INORDER_I(j, u32) { 1486 uint32_t e = x[i]->u32[j]; 1487 1488 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1489 ((e >> 6) & 0x3e0) | 1490 ((e >> 3) & 0x1f)); 1491 } 1492 } 1493 *r = result; 1494 } 1495 1496 #define VPK(suffix, from, to, cvt, dosat) \ 1497 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1498 ppc_avr_t *a, ppc_avr_t *b) \ 1499 { \ 1500 int i; \ 1501 int sat = 0; \ 1502 ppc_avr_t result; \ 1503 ppc_avr_t *a0 = PKBIG ? a : b; \ 1504 ppc_avr_t *a1 = PKBIG ? b : a; \ 1505 \ 1506 VECTOR_FOR_INORDER_I(i, from) { \ 1507 result.to[i] = cvt(a0->from[i], &sat); \ 1508 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1509 } \ 1510 *r = result; \ 1511 if (dosat && sat) { \ 1512 set_vscr_sat(env); \ 1513 } \ 1514 } 1515 #define I(x, y) (x) 1516 VPK(shss, s16, s8, cvtshsb, 1) 1517 VPK(shus, s16, u8, cvtshub, 1) 1518 VPK(swss, s32, s16, cvtswsh, 1) 1519 VPK(swus, s32, u16, cvtswuh, 1) 1520 VPK(sdss, s64, s32, cvtsdsw, 1) 1521 VPK(sdus, s64, u32, cvtsduw, 1) 1522 VPK(uhus, u16, u8, cvtuhub, 1) 1523 VPK(uwus, u32, u16, cvtuwuh, 1) 1524 VPK(udus, u64, u32, cvtuduw, 1) 1525 VPK(uhum, u16, u8, I, 0) 1526 VPK(uwum, u32, u16, I, 0) 1527 VPK(udum, u64, u32, I, 0) 1528 #undef I 1529 #undef VPK 1530 #undef PKBIG 1531 1532 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1533 { 1534 int i; 1535 1536 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1537 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1538 } 1539 } 1540 1541 #define VRFI(suffix, rounding) \ 1542 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1543 ppc_avr_t *b) \ 1544 { \ 1545 int i; \ 1546 float_status s = env->vec_status; \ 1547 \ 1548 set_float_rounding_mode(rounding, &s); \ 1549 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1550 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1551 } \ 1552 } 1553 VRFI(n, float_round_nearest_even) 1554 VRFI(m, float_round_down) 1555 VRFI(p, float_round_up) 1556 VRFI(z, float_round_to_zero) 1557 #undef VRFI 1558 1559 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1560 { 1561 int i; 1562 1563 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1564 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1565 1566 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1567 } 1568 } 1569 1570 #define VRLMI(name, size, element, insert) \ 1571 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1572 { \ 1573 int i; \ 1574 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1575 uint##size##_t src1 = a->element[i]; \ 1576 uint##size##_t src2 = b->element[i]; \ 1577 uint##size##_t src3 = r->element[i]; \ 1578 uint##size##_t begin, end, shift, mask, rot_val; \ 1579 \ 1580 shift = extract##size(src2, 0, 6); \ 1581 end = extract##size(src2, 8, 6); \ 1582 begin = extract##size(src2, 16, 6); \ 1583 rot_val = rol##size(src1, shift); \ 1584 mask = mask_u##size(begin, end); \ 1585 if (insert) { \ 1586 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1587 } else { \ 1588 r->element[i] = (rot_val & mask); \ 1589 } \ 1590 } \ 1591 } 1592 1593 VRLMI(VRLDMI, 64, u64, 1); 1594 VRLMI(VRLWMI, 32, u32, 1); 1595 VRLMI(VRLDNM, 64, u64, 0); 1596 VRLMI(VRLWNM, 32, u32, 0); 1597 1598 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1599 { 1600 int i; 1601 1602 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1603 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1604 } 1605 } 1606 1607 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1608 { 1609 int i; 1610 1611 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1612 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1613 } 1614 } 1615 1616 #define VEXTU_X_DO(name, size, left) \ 1617 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1618 { \ 1619 int index = (a & 0xf) * 8; \ 1620 if (left) { \ 1621 index = 128 - index - size; \ 1622 } \ 1623 return int128_getlo(int128_rshift(b->s128, index)) & \ 1624 MAKE_64BIT_MASK(0, size); \ 1625 } 1626 VEXTU_X_DO(vextublx, 8, 1) 1627 VEXTU_X_DO(vextuhlx, 16, 1) 1628 VEXTU_X_DO(vextuwlx, 32, 1) 1629 VEXTU_X_DO(vextubrx, 8, 0) 1630 VEXTU_X_DO(vextuhrx, 16, 0) 1631 VEXTU_X_DO(vextuwrx, 32, 0) 1632 #undef VEXTU_X_DO 1633 1634 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1635 { 1636 int i; 1637 unsigned int shift, bytes, size; 1638 1639 size = ARRAY_SIZE(r->u8); 1640 for (i = 0; i < size; i++) { 1641 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1642 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1643 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1644 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1645 } 1646 } 1647 1648 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1649 { 1650 int i; 1651 unsigned int shift, bytes; 1652 1653 /* 1654 * Use reverse order, as destination and source register can be 1655 * same. Its being modified in place saving temporary, reverse 1656 * order will guarantee that computed result is not fed back. 1657 */ 1658 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1659 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1660 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1661 /* extract adjacent bytes */ 1662 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1663 } 1664 } 1665 1666 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1667 { 1668 int sh = shift & 0xf; 1669 int i; 1670 ppc_avr_t result; 1671 1672 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1673 int index = sh + i; 1674 if (index > 0xf) { 1675 result.VsrB(i) = b->VsrB(index - 0x10); 1676 } else { 1677 result.VsrB(i) = a->VsrB(index); 1678 } 1679 } 1680 *r = result; 1681 } 1682 1683 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1684 { 1685 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1686 1687 #if HOST_BIG_ENDIAN 1688 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1689 memset(&r->u8[16 - sh], 0, sh); 1690 #else 1691 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1692 memset(&r->u8[0], 0, sh); 1693 #endif 1694 } 1695 1696 #if HOST_BIG_ENDIAN 1697 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1698 #else 1699 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1700 #endif 1701 1702 #define VINSX(SUFFIX, TYPE) \ 1703 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1704 uint64_t val, target_ulong index) \ 1705 { \ 1706 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1707 target_long idx = index; \ 1708 \ 1709 if (idx < 0 || idx > maxidx) { \ 1710 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1711 qemu_log_mask(LOG_GUEST_ERROR, \ 1712 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1713 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1714 } else { \ 1715 TYPE src = val; \ 1716 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1717 } \ 1718 } 1719 VINSX(B, uint8_t) 1720 VINSX(H, uint16_t) 1721 VINSX(W, uint32_t) 1722 VINSX(D, uint64_t) 1723 #undef ELEM_ADDR 1724 #undef VINSX 1725 #if HOST_BIG_ENDIAN 1726 #define VEXTDVLX(NAME, SIZE) \ 1727 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1728 target_ulong index) \ 1729 { \ 1730 const target_long idx = index; \ 1731 ppc_avr_t tmp[2] = { *a, *b }; \ 1732 memset(t, 0, sizeof(*t)); \ 1733 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1734 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1735 } else { \ 1736 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1737 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1738 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1739 } \ 1740 } 1741 #else 1742 #define VEXTDVLX(NAME, SIZE) \ 1743 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1744 target_ulong index) \ 1745 { \ 1746 const target_long idx = index; \ 1747 ppc_avr_t tmp[2] = { *b, *a }; \ 1748 memset(t, 0, sizeof(*t)); \ 1749 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1750 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1751 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1752 } else { \ 1753 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1754 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1755 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1756 } \ 1757 } 1758 #endif 1759 VEXTDVLX(VEXTDUBVLX, 1) 1760 VEXTDVLX(VEXTDUHVLX, 2) 1761 VEXTDVLX(VEXTDUWVLX, 4) 1762 VEXTDVLX(VEXTDDVLX, 8) 1763 #undef VEXTDVLX 1764 #if HOST_BIG_ENDIAN 1765 #define VEXTRACT(suffix, element) \ 1766 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1767 { \ 1768 uint32_t es = sizeof(r->element[0]); \ 1769 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1770 memset(&r->u8[8], 0, 8); \ 1771 memset(&r->u8[0], 0, 8 - es); \ 1772 } 1773 #else 1774 #define VEXTRACT(suffix, element) \ 1775 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1776 { \ 1777 uint32_t es = sizeof(r->element[0]); \ 1778 uint32_t s = (16 - index) - es; \ 1779 memmove(&r->u8[8], &b->u8[s], es); \ 1780 memset(&r->u8[0], 0, 8); \ 1781 memset(&r->u8[8 + es], 0, 8 - es); \ 1782 } 1783 #endif 1784 VEXTRACT(ub, u8) 1785 VEXTRACT(uh, u16) 1786 VEXTRACT(uw, u32) 1787 VEXTRACT(d, u64) 1788 #undef VEXTRACT 1789 1790 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1791 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1792 { \ 1793 int i, idx, crf = 0; \ 1794 \ 1795 for (i = 0; i < NUM_ELEMS; i++) { \ 1796 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1797 if (b->Vsr##ELEM(idx)) { \ 1798 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1799 } else { \ 1800 crf = 0b0010; \ 1801 break; \ 1802 } \ 1803 } \ 1804 \ 1805 for (; i < NUM_ELEMS; i++) { \ 1806 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1807 t->Vsr##ELEM(idx) = 0; \ 1808 } \ 1809 \ 1810 return crf; \ 1811 } 1812 VSTRI(VSTRIBL, B, 16, true) 1813 VSTRI(VSTRIBR, B, 16, false) 1814 VSTRI(VSTRIHL, H, 8, true) 1815 VSTRI(VSTRIHR, H, 8, false) 1816 #undef VSTRI 1817 1818 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1819 { 1820 ppc_vsr_t t = { }; 1821 size_t es = sizeof(uint32_t); 1822 uint32_t ext_index; 1823 int i; 1824 1825 ext_index = index; 1826 for (i = 0; i < es; i++, ext_index++) { 1827 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1828 } 1829 1830 *xt = t; 1831 } 1832 1833 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1834 { 1835 ppc_vsr_t t = *xt; 1836 size_t es = sizeof(uint32_t); 1837 int ins_index, i = 0; 1838 1839 ins_index = index; 1840 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1841 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1842 } 1843 1844 *xt = t; 1845 } 1846 1847 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1848 uint32_t desc) 1849 { 1850 /* 1851 * Instead of processing imm bit-by-bit, we'll skip the computation of 1852 * conjunctions whose corresponding bit is unset. 1853 */ 1854 int bit, imm = simd_data(desc); 1855 Int128 conj, disj = int128_zero(); 1856 1857 /* Iterate over set bits from the least to the most significant bit */ 1858 while (imm) { 1859 /* 1860 * Get the next bit to be processed with ctz64. Invert the result of 1861 * ctz64 to match the indexing used by PowerISA. 1862 */ 1863 bit = 7 - ctzl(imm); 1864 if (bit & 0x4) { 1865 conj = a->s128; 1866 } else { 1867 conj = int128_not(a->s128); 1868 } 1869 if (bit & 0x2) { 1870 conj = int128_and(conj, b->s128); 1871 } else { 1872 conj = int128_and(conj, int128_not(b->s128)); 1873 } 1874 if (bit & 0x1) { 1875 conj = int128_and(conj, c->s128); 1876 } else { 1877 conj = int128_and(conj, int128_not(c->s128)); 1878 } 1879 disj = int128_or(disj, conj); 1880 1881 /* Unset the least significant bit that is set */ 1882 imm &= imm - 1; 1883 } 1884 1885 t->s128 = disj; 1886 } 1887 1888 #define XXBLEND(name, sz) \ 1889 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1890 ppc_avr_t *c, uint32_t desc) \ 1891 { \ 1892 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1893 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1894 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1895 } \ 1896 } 1897 XXBLEND(B, 8) 1898 XXBLEND(H, 16) 1899 XXBLEND(W, 32) 1900 XXBLEND(D, 64) 1901 #undef XXBLEND 1902 1903 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1904 { 1905 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1906 1907 #if HOST_BIG_ENDIAN 1908 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1909 memset(&r->u8[0], 0, sh); 1910 #else 1911 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1912 memset(&r->u8[16 - sh], 0, sh); 1913 #endif 1914 } 1915 1916 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1917 { 1918 int64_t t; 1919 int i, upper; 1920 ppc_avr_t result; 1921 int sat = 0; 1922 1923 upper = ARRAY_SIZE(r->s32) - 1; 1924 t = (int64_t)b->VsrSW(upper); 1925 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1926 t += a->VsrSW(i); 1927 result.VsrSW(i) = 0; 1928 } 1929 result.VsrSW(upper) = cvtsdsw(t, &sat); 1930 *r = result; 1931 1932 if (sat) { 1933 set_vscr_sat(env); 1934 } 1935 } 1936 1937 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1938 { 1939 int i, j, upper; 1940 ppc_avr_t result; 1941 int sat = 0; 1942 1943 upper = 1; 1944 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1945 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1946 1947 result.VsrD(i) = 0; 1948 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1949 t += a->VsrSW(2 * i + j); 1950 } 1951 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1952 } 1953 1954 *r = result; 1955 if (sat) { 1956 set_vscr_sat(env); 1957 } 1958 } 1959 1960 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1961 { 1962 int i, j; 1963 int sat = 0; 1964 1965 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1966 int64_t t = (int64_t)b->s32[i]; 1967 1968 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1969 t += a->s8[4 * i + j]; 1970 } 1971 r->s32[i] = cvtsdsw(t, &sat); 1972 } 1973 1974 if (sat) { 1975 set_vscr_sat(env); 1976 } 1977 } 1978 1979 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1980 { 1981 int sat = 0; 1982 int i; 1983 1984 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1985 int64_t t = (int64_t)b->s32[i]; 1986 1987 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1988 r->s32[i] = cvtsdsw(t, &sat); 1989 } 1990 1991 if (sat) { 1992 set_vscr_sat(env); 1993 } 1994 } 1995 1996 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1997 { 1998 int i, j; 1999 int sat = 0; 2000 2001 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2002 uint64_t t = (uint64_t)b->u32[i]; 2003 2004 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2005 t += a->u8[4 * i + j]; 2006 } 2007 r->u32[i] = cvtuduw(t, &sat); 2008 } 2009 2010 if (sat) { 2011 set_vscr_sat(env); 2012 } 2013 } 2014 2015 #if HOST_BIG_ENDIAN 2016 #define UPKHI 1 2017 #define UPKLO 0 2018 #else 2019 #define UPKHI 0 2020 #define UPKLO 1 2021 #endif 2022 #define VUPKPX(suffix, hi) \ 2023 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2024 { \ 2025 int i; \ 2026 ppc_avr_t result; \ 2027 \ 2028 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2029 uint16_t e = b->u16[hi ? i : i + 4]; \ 2030 uint8_t a = (e >> 15) ? 0xff : 0; \ 2031 uint8_t r = (e >> 10) & 0x1f; \ 2032 uint8_t g = (e >> 5) & 0x1f; \ 2033 uint8_t b = e & 0x1f; \ 2034 \ 2035 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2036 } \ 2037 *r = result; \ 2038 } 2039 VUPKPX(lpx, UPKLO) 2040 VUPKPX(hpx, UPKHI) 2041 #undef VUPKPX 2042 2043 #define VUPK(suffix, unpacked, packee, hi) \ 2044 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2045 { \ 2046 int i; \ 2047 ppc_avr_t result; \ 2048 \ 2049 if (hi) { \ 2050 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2051 result.unpacked[i] = b->packee[i]; \ 2052 } \ 2053 } else { \ 2054 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2055 i++) { \ 2056 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2057 } \ 2058 } \ 2059 *r = result; \ 2060 } 2061 VUPK(hsb, s16, s8, UPKHI) 2062 VUPK(hsh, s32, s16, UPKHI) 2063 VUPK(hsw, s64, s32, UPKHI) 2064 VUPK(lsb, s16, s8, UPKLO) 2065 VUPK(lsh, s32, s16, UPKLO) 2066 VUPK(lsw, s64, s32, UPKLO) 2067 #undef VUPK 2068 #undef UPKHI 2069 #undef UPKLO 2070 2071 #define VGENERIC_DO(name, element) \ 2072 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2073 { \ 2074 int i; \ 2075 \ 2076 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2077 r->element[i] = name(b->element[i]); \ 2078 } \ 2079 } 2080 2081 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2082 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2083 2084 VGENERIC_DO(clzb, u8) 2085 VGENERIC_DO(clzh, u16) 2086 2087 #undef clzb 2088 #undef clzh 2089 2090 #define ctzb(v) ((v) ? ctz32(v) : 8) 2091 #define ctzh(v) ((v) ? ctz32(v) : 16) 2092 #define ctzw(v) ctz32((v)) 2093 #define ctzd(v) ctz64((v)) 2094 2095 VGENERIC_DO(ctzb, u8) 2096 VGENERIC_DO(ctzh, u16) 2097 VGENERIC_DO(ctzw, u32) 2098 VGENERIC_DO(ctzd, u64) 2099 2100 #undef ctzb 2101 #undef ctzh 2102 #undef ctzw 2103 #undef ctzd 2104 2105 #define popcntb(v) ctpop8(v) 2106 #define popcnth(v) ctpop16(v) 2107 #define popcntw(v) ctpop32(v) 2108 #define popcntd(v) ctpop64(v) 2109 2110 VGENERIC_DO(popcntb, u8) 2111 VGENERIC_DO(popcnth, u16) 2112 VGENERIC_DO(popcntw, u32) 2113 VGENERIC_DO(popcntd, u64) 2114 2115 #undef popcntb 2116 #undef popcnth 2117 #undef popcntw 2118 #undef popcntd 2119 2120 #undef VGENERIC_DO 2121 2122 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2123 { 2124 r->s128 = int128_add(a->s128, b->s128); 2125 } 2126 2127 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2128 { 2129 r->s128 = int128_add(int128_add(a->s128, b->s128), 2130 int128_make64(int128_getlo(c->s128) & 1)); 2131 } 2132 2133 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2134 { 2135 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2136 r->VsrD(0) = 0; 2137 } 2138 2139 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2140 { 2141 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2142 carry_in = int128_getlo(c->s128) & 1; 2143 2144 if (!carry_out && carry_in) { 2145 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2146 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2147 } 2148 2149 r->VsrD(0) = 0; 2150 r->VsrD(1) = carry_out; 2151 } 2152 2153 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2154 { 2155 r->s128 = int128_sub(a->s128, b->s128); 2156 } 2157 2158 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2159 { 2160 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2161 int128_make64(int128_getlo(c->s128) & 1)); 2162 } 2163 2164 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2165 { 2166 Int128 tmp = int128_not(b->s128); 2167 2168 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2169 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2170 r->VsrD(0) = 0; 2171 } 2172 2173 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2174 { 2175 Int128 tmp = int128_not(b->s128); 2176 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2177 carry_in = int128_getlo(c->s128) & 1; 2178 2179 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2180 int128_makes64(-1))); 2181 r->VsrD(0) = 0; 2182 } 2183 2184 #define BCD_PLUS_PREF_1 0xC 2185 #define BCD_PLUS_PREF_2 0xF 2186 #define BCD_PLUS_ALT_1 0xA 2187 #define BCD_NEG_PREF 0xD 2188 #define BCD_NEG_ALT 0xB 2189 #define BCD_PLUS_ALT_2 0xE 2190 #define NATIONAL_PLUS 0x2B 2191 #define NATIONAL_NEG 0x2D 2192 2193 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2194 2195 static int bcd_get_sgn(ppc_avr_t *bcd) 2196 { 2197 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2198 case BCD_PLUS_PREF_1: 2199 case BCD_PLUS_PREF_2: 2200 case BCD_PLUS_ALT_1: 2201 case BCD_PLUS_ALT_2: 2202 { 2203 return 1; 2204 } 2205 2206 case BCD_NEG_PREF: 2207 case BCD_NEG_ALT: 2208 { 2209 return -1; 2210 } 2211 2212 default: 2213 { 2214 return 0; 2215 } 2216 } 2217 } 2218 2219 static int bcd_preferred_sgn(int sgn, int ps) 2220 { 2221 if (sgn >= 0) { 2222 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2223 } else { 2224 return BCD_NEG_PREF; 2225 } 2226 } 2227 2228 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2229 { 2230 uint8_t result; 2231 if (n & 1) { 2232 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2233 } else { 2234 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2235 } 2236 2237 if (unlikely(result > 9)) { 2238 *invalid = true; 2239 } 2240 return result; 2241 } 2242 2243 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2244 { 2245 if (n & 1) { 2246 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2247 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2248 } else { 2249 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2250 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2251 } 2252 } 2253 2254 static bool bcd_is_valid(ppc_avr_t *bcd) 2255 { 2256 int i; 2257 int invalid = 0; 2258 2259 if (bcd_get_sgn(bcd) == 0) { 2260 return false; 2261 } 2262 2263 for (i = 1; i < 32; i++) { 2264 bcd_get_digit(bcd, i, &invalid); 2265 if (unlikely(invalid)) { 2266 return false; 2267 } 2268 } 2269 return true; 2270 } 2271 2272 static int bcd_cmp_zero(ppc_avr_t *bcd) 2273 { 2274 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2275 return CRF_EQ; 2276 } else { 2277 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2278 } 2279 } 2280 2281 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2282 { 2283 return reg->VsrH(7 - n); 2284 } 2285 2286 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2287 { 2288 reg->VsrH(7 - n) = val; 2289 } 2290 2291 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2292 { 2293 int i; 2294 int invalid = 0; 2295 for (i = 31; i > 0; i--) { 2296 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2297 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2298 if (unlikely(invalid)) { 2299 return 0; /* doesn't matter */ 2300 } else if (dig_a > dig_b) { 2301 return 1; 2302 } else if (dig_a < dig_b) { 2303 return -1; 2304 } 2305 } 2306 2307 return 0; 2308 } 2309 2310 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2311 int *overflow) 2312 { 2313 int carry = 0; 2314 int i; 2315 int is_zero = 1; 2316 2317 for (i = 1; i <= 31; i++) { 2318 uint8_t digit = bcd_get_digit(a, i, invalid) + 2319 bcd_get_digit(b, i, invalid) + carry; 2320 is_zero &= (digit == 0); 2321 if (digit > 9) { 2322 carry = 1; 2323 digit -= 10; 2324 } else { 2325 carry = 0; 2326 } 2327 2328 bcd_put_digit(t, digit, i); 2329 } 2330 2331 *overflow = carry; 2332 return is_zero; 2333 } 2334 2335 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2336 int *overflow) 2337 { 2338 int carry = 0; 2339 int i; 2340 2341 for (i = 1; i <= 31; i++) { 2342 uint8_t digit = bcd_get_digit(a, i, invalid) - 2343 bcd_get_digit(b, i, invalid) + carry; 2344 if (digit & 0x80) { 2345 carry = -1; 2346 digit += 10; 2347 } else { 2348 carry = 0; 2349 } 2350 2351 bcd_put_digit(t, digit, i); 2352 } 2353 2354 *overflow = carry; 2355 } 2356 2357 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2358 { 2359 2360 int sgna = bcd_get_sgn(a); 2361 int sgnb = bcd_get_sgn(b); 2362 int invalid = (sgna == 0) || (sgnb == 0); 2363 int overflow = 0; 2364 int zero = 0; 2365 uint32_t cr = 0; 2366 ppc_avr_t result = { .u64 = { 0, 0 } }; 2367 2368 if (!invalid) { 2369 if (sgna == sgnb) { 2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2371 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2372 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2373 } else { 2374 int magnitude = bcd_cmp_mag(a, b); 2375 if (magnitude > 0) { 2376 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2377 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2378 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2379 } else if (magnitude < 0) { 2380 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2381 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2382 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2383 } else { 2384 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2385 cr = CRF_EQ; 2386 } 2387 } 2388 } 2389 2390 if (unlikely(invalid)) { 2391 result.VsrD(0) = result.VsrD(1) = -1; 2392 cr = CRF_SO; 2393 } else if (overflow) { 2394 cr |= CRF_SO; 2395 } else if (zero) { 2396 cr |= CRF_EQ; 2397 } 2398 2399 *r = result; 2400 2401 return cr; 2402 } 2403 2404 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2405 { 2406 ppc_avr_t bcopy = *b; 2407 int sgnb = bcd_get_sgn(b); 2408 if (sgnb < 0) { 2409 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2410 } else if (sgnb > 0) { 2411 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2412 } 2413 /* else invalid ... defer to bcdadd code for proper handling */ 2414 2415 return helper_bcdadd(r, a, &bcopy, ps); 2416 } 2417 2418 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2419 { 2420 int i; 2421 int cr = 0; 2422 uint16_t national = 0; 2423 uint16_t sgnb = get_national_digit(b, 0); 2424 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2425 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2426 2427 for (i = 1; i < 8; i++) { 2428 national = get_national_digit(b, i); 2429 if (unlikely(national < 0x30 || national > 0x39)) { 2430 invalid = 1; 2431 break; 2432 } 2433 2434 bcd_put_digit(&ret, national & 0xf, i); 2435 } 2436 2437 if (sgnb == NATIONAL_PLUS) { 2438 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2439 } else { 2440 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2441 } 2442 2443 cr = bcd_cmp_zero(&ret); 2444 2445 if (unlikely(invalid)) { 2446 cr = CRF_SO; 2447 } 2448 2449 *r = ret; 2450 2451 return cr; 2452 } 2453 2454 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2455 { 2456 int i; 2457 int cr = 0; 2458 int sgnb = bcd_get_sgn(b); 2459 int invalid = (sgnb == 0); 2460 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2461 2462 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2463 2464 for (i = 1; i < 8; i++) { 2465 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2466 2467 if (unlikely(invalid)) { 2468 break; 2469 } 2470 } 2471 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2472 2473 cr = bcd_cmp_zero(b); 2474 2475 if (ox_flag) { 2476 cr |= CRF_SO; 2477 } 2478 2479 if (unlikely(invalid)) { 2480 cr = CRF_SO; 2481 } 2482 2483 *r = ret; 2484 2485 return cr; 2486 } 2487 2488 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2489 { 2490 int i; 2491 int cr = 0; 2492 int invalid = 0; 2493 int zone_digit = 0; 2494 int zone_lead = ps ? 0xF : 0x3; 2495 int digit = 0; 2496 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2497 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2498 2499 if (unlikely((sgnb < 0xA) && ps)) { 2500 invalid = 1; 2501 } 2502 2503 for (i = 0; i < 16; i++) { 2504 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2505 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2506 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2507 invalid = 1; 2508 break; 2509 } 2510 2511 bcd_put_digit(&ret, digit, i + 1); 2512 } 2513 2514 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2515 (!ps && (sgnb & 0x4))) { 2516 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2517 } else { 2518 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2519 } 2520 2521 cr = bcd_cmp_zero(&ret); 2522 2523 if (unlikely(invalid)) { 2524 cr = CRF_SO; 2525 } 2526 2527 *r = ret; 2528 2529 return cr; 2530 } 2531 2532 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2533 { 2534 int i; 2535 int cr = 0; 2536 uint8_t digit = 0; 2537 int sgnb = bcd_get_sgn(b); 2538 int zone_lead = (ps) ? 0xF0 : 0x30; 2539 int invalid = (sgnb == 0); 2540 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2541 2542 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2543 2544 for (i = 0; i < 16; i++) { 2545 digit = bcd_get_digit(b, i + 1, &invalid); 2546 2547 if (unlikely(invalid)) { 2548 break; 2549 } 2550 2551 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2552 } 2553 2554 if (ps) { 2555 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2556 } else { 2557 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2558 } 2559 2560 cr = bcd_cmp_zero(b); 2561 2562 if (ox_flag) { 2563 cr |= CRF_SO; 2564 } 2565 2566 if (unlikely(invalid)) { 2567 cr = CRF_SO; 2568 } 2569 2570 *r = ret; 2571 2572 return cr; 2573 } 2574 2575 /** 2576 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2577 * 2578 * Returns: 2579 * > 0 if ahi|alo > bhi|blo, 2580 * 0 if ahi|alo == bhi|blo, 2581 * < 0 if ahi|alo < bhi|blo 2582 */ 2583 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2584 uint64_t blo, uint64_t bhi) 2585 { 2586 return (ahi == bhi) ? 2587 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2588 (ahi > bhi ? 1 : -1); 2589 } 2590 2591 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2592 { 2593 int i; 2594 int cr; 2595 uint64_t lo_value; 2596 uint64_t hi_value; 2597 uint64_t rem; 2598 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2599 2600 if (b->VsrSD(0) < 0) { 2601 lo_value = -b->VsrSD(1); 2602 hi_value = ~b->VsrD(0) + !lo_value; 2603 bcd_put_digit(&ret, 0xD, 0); 2604 2605 cr = CRF_LT; 2606 } else { 2607 lo_value = b->VsrD(1); 2608 hi_value = b->VsrD(0); 2609 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2610 2611 if (hi_value == 0 && lo_value == 0) { 2612 cr = CRF_EQ; 2613 } else { 2614 cr = CRF_GT; 2615 } 2616 } 2617 2618 /* 2619 * Check src limits: abs(src) <= 10^31 - 1 2620 * 2621 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2622 */ 2623 if (ucmp128(lo_value, hi_value, 2624 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2625 cr |= CRF_SO; 2626 2627 /* 2628 * According to the ISA, if src wouldn't fit in the destination 2629 * register, the result is undefined. 2630 * In that case, we leave r unchanged. 2631 */ 2632 } else { 2633 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2634 2635 for (i = 1; i < 16; rem /= 10, i++) { 2636 bcd_put_digit(&ret, rem % 10, i); 2637 } 2638 2639 for (; i < 32; lo_value /= 10, i++) { 2640 bcd_put_digit(&ret, lo_value % 10, i); 2641 } 2642 2643 *r = ret; 2644 } 2645 2646 return cr; 2647 } 2648 2649 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2650 { 2651 uint8_t i; 2652 int cr; 2653 uint64_t carry; 2654 uint64_t unused; 2655 uint64_t lo_value; 2656 uint64_t hi_value = 0; 2657 int sgnb = bcd_get_sgn(b); 2658 int invalid = (sgnb == 0); 2659 2660 lo_value = bcd_get_digit(b, 31, &invalid); 2661 for (i = 30; i > 0; i--) { 2662 mulu64(&lo_value, &carry, lo_value, 10ULL); 2663 mulu64(&hi_value, &unused, hi_value, 10ULL); 2664 lo_value += bcd_get_digit(b, i, &invalid); 2665 hi_value += carry; 2666 2667 if (unlikely(invalid)) { 2668 break; 2669 } 2670 } 2671 2672 if (sgnb == -1) { 2673 r->VsrSD(1) = -lo_value; 2674 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2675 } else { 2676 r->VsrSD(1) = lo_value; 2677 r->VsrSD(0) = hi_value; 2678 } 2679 2680 cr = bcd_cmp_zero(b); 2681 2682 if (unlikely(invalid)) { 2683 cr = CRF_SO; 2684 } 2685 2686 return cr; 2687 } 2688 2689 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2690 { 2691 int i; 2692 int invalid = 0; 2693 2694 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2695 return CRF_SO; 2696 } 2697 2698 *r = *a; 2699 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2700 2701 for (i = 1; i < 32; i++) { 2702 bcd_get_digit(a, i, &invalid); 2703 bcd_get_digit(b, i, &invalid); 2704 if (unlikely(invalid)) { 2705 return CRF_SO; 2706 } 2707 } 2708 2709 return bcd_cmp_zero(r); 2710 } 2711 2712 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2713 { 2714 int sgnb = bcd_get_sgn(b); 2715 2716 *r = *b; 2717 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2718 2719 if (bcd_is_valid(b) == false) { 2720 return CRF_SO; 2721 } 2722 2723 return bcd_cmp_zero(r); 2724 } 2725 2726 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2727 { 2728 int cr; 2729 int i = a->VsrSB(7); 2730 bool ox_flag = false; 2731 int sgnb = bcd_get_sgn(b); 2732 ppc_avr_t ret = *b; 2733 ret.VsrD(1) &= ~0xf; 2734 2735 if (bcd_is_valid(b) == false) { 2736 return CRF_SO; 2737 } 2738 2739 if (unlikely(i > 31)) { 2740 i = 31; 2741 } else if (unlikely(i < -31)) { 2742 i = -31; 2743 } 2744 2745 if (i > 0) { 2746 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2747 } else { 2748 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2749 } 2750 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2751 2752 *r = ret; 2753 2754 cr = bcd_cmp_zero(r); 2755 if (ox_flag) { 2756 cr |= CRF_SO; 2757 } 2758 2759 return cr; 2760 } 2761 2762 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2763 { 2764 int cr; 2765 int i; 2766 int invalid = 0; 2767 bool ox_flag = false; 2768 ppc_avr_t ret = *b; 2769 2770 for (i = 0; i < 32; i++) { 2771 bcd_get_digit(b, i, &invalid); 2772 2773 if (unlikely(invalid)) { 2774 return CRF_SO; 2775 } 2776 } 2777 2778 i = a->VsrSB(7); 2779 if (i >= 32) { 2780 ox_flag = true; 2781 ret.VsrD(1) = ret.VsrD(0) = 0; 2782 } else if (i <= -32) { 2783 ret.VsrD(1) = ret.VsrD(0) = 0; 2784 } else if (i > 0) { 2785 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2786 } else { 2787 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2788 } 2789 *r = ret; 2790 2791 cr = bcd_cmp_zero(r); 2792 if (ox_flag) { 2793 cr |= CRF_SO; 2794 } 2795 2796 return cr; 2797 } 2798 2799 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2800 { 2801 int cr; 2802 int unused = 0; 2803 int invalid = 0; 2804 bool ox_flag = false; 2805 int sgnb = bcd_get_sgn(b); 2806 ppc_avr_t ret = *b; 2807 ret.VsrD(1) &= ~0xf; 2808 2809 int i = a->VsrSB(7); 2810 ppc_avr_t bcd_one; 2811 2812 bcd_one.VsrD(0) = 0; 2813 bcd_one.VsrD(1) = 0x10; 2814 2815 if (bcd_is_valid(b) == false) { 2816 return CRF_SO; 2817 } 2818 2819 if (unlikely(i > 31)) { 2820 i = 31; 2821 } else if (unlikely(i < -31)) { 2822 i = -31; 2823 } 2824 2825 if (i > 0) { 2826 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2827 } else { 2828 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2829 2830 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2831 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2832 } 2833 } 2834 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2835 2836 cr = bcd_cmp_zero(&ret); 2837 if (ox_flag) { 2838 cr |= CRF_SO; 2839 } 2840 *r = ret; 2841 2842 return cr; 2843 } 2844 2845 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2846 { 2847 uint64_t mask; 2848 uint32_t ox_flag = 0; 2849 int i = a->VsrSH(3) + 1; 2850 ppc_avr_t ret = *b; 2851 2852 if (bcd_is_valid(b) == false) { 2853 return CRF_SO; 2854 } 2855 2856 if (i > 16 && i < 32) { 2857 mask = (uint64_t)-1 >> (128 - i * 4); 2858 if (ret.VsrD(0) & ~mask) { 2859 ox_flag = CRF_SO; 2860 } 2861 2862 ret.VsrD(0) &= mask; 2863 } else if (i >= 0 && i <= 16) { 2864 mask = (uint64_t)-1 >> (64 - i * 4); 2865 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2866 ox_flag = CRF_SO; 2867 } 2868 2869 ret.VsrD(1) &= mask; 2870 ret.VsrD(0) = 0; 2871 } 2872 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2873 *r = ret; 2874 2875 return bcd_cmp_zero(&ret) | ox_flag; 2876 } 2877 2878 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2879 { 2880 int i; 2881 uint64_t mask; 2882 uint32_t ox_flag = 0; 2883 int invalid = 0; 2884 ppc_avr_t ret = *b; 2885 2886 for (i = 0; i < 32; i++) { 2887 bcd_get_digit(b, i, &invalid); 2888 2889 if (unlikely(invalid)) { 2890 return CRF_SO; 2891 } 2892 } 2893 2894 i = a->VsrSH(3); 2895 if (i > 16 && i < 33) { 2896 mask = (uint64_t)-1 >> (128 - i * 4); 2897 if (ret.VsrD(0) & ~mask) { 2898 ox_flag = CRF_SO; 2899 } 2900 2901 ret.VsrD(0) &= mask; 2902 } else if (i > 0 && i <= 16) { 2903 mask = (uint64_t)-1 >> (64 - i * 4); 2904 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2905 ox_flag = CRF_SO; 2906 } 2907 2908 ret.VsrD(1) &= mask; 2909 ret.VsrD(0) = 0; 2910 } else if (i == 0) { 2911 if (ret.VsrD(0) || ret.VsrD(1)) { 2912 ox_flag = CRF_SO; 2913 } 2914 ret.VsrD(0) = ret.VsrD(1) = 0; 2915 } 2916 2917 *r = ret; 2918 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2919 return ox_flag | CRF_EQ; 2920 } 2921 2922 return ox_flag | CRF_GT; 2923 } 2924 2925 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2926 { 2927 int i; 2928 VECTOR_FOR_INORDER_I(i, u8) { 2929 r->u8[i] = AES_sbox[a->u8[i]]; 2930 } 2931 } 2932 2933 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2934 { 2935 ppc_avr_t result; 2936 int i; 2937 2938 VECTOR_FOR_INORDER_I(i, u32) { 2939 result.VsrW(i) = b->VsrW(i) ^ 2940 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2941 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2942 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2943 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2944 } 2945 *r = result; 2946 } 2947 2948 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2949 { 2950 ppc_avr_t result; 2951 int i; 2952 2953 VECTOR_FOR_INORDER_I(i, u8) { 2954 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2955 } 2956 *r = result; 2957 } 2958 2959 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2960 { 2961 /* This differs from what is written in ISA V2.07. The RTL is */ 2962 /* incorrect and will be fixed in V2.07B. */ 2963 int i; 2964 ppc_avr_t tmp; 2965 2966 VECTOR_FOR_INORDER_I(i, u8) { 2967 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2968 } 2969 2970 VECTOR_FOR_INORDER_I(i, u32) { 2971 r->VsrW(i) = 2972 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2973 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2974 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2975 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2976 } 2977 } 2978 2979 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2980 { 2981 ppc_avr_t result; 2982 int i; 2983 2984 VECTOR_FOR_INORDER_I(i, u8) { 2985 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2986 } 2987 *r = result; 2988 } 2989 2990 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2991 { 2992 int st = (st_six & 0x10) != 0; 2993 int six = st_six & 0xF; 2994 int i; 2995 2996 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2997 if (st == 0) { 2998 if ((six & (0x8 >> i)) == 0) { 2999 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3000 ror32(a->VsrW(i), 18) ^ 3001 (a->VsrW(i) >> 3); 3002 } else { /* six.bit[i] == 1 */ 3003 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3004 ror32(a->VsrW(i), 19) ^ 3005 (a->VsrW(i) >> 10); 3006 } 3007 } else { /* st == 1 */ 3008 if ((six & (0x8 >> i)) == 0) { 3009 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3010 ror32(a->VsrW(i), 13) ^ 3011 ror32(a->VsrW(i), 22); 3012 } else { /* six.bit[i] == 1 */ 3013 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3014 ror32(a->VsrW(i), 11) ^ 3015 ror32(a->VsrW(i), 25); 3016 } 3017 } 3018 } 3019 } 3020 3021 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3022 { 3023 int st = (st_six & 0x10) != 0; 3024 int six = st_six & 0xF; 3025 int i; 3026 3027 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3028 if (st == 0) { 3029 if ((six & (0x8 >> (2 * i))) == 0) { 3030 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3031 ror64(a->VsrD(i), 8) ^ 3032 (a->VsrD(i) >> 7); 3033 } else { /* six.bit[2*i] == 1 */ 3034 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3035 ror64(a->VsrD(i), 61) ^ 3036 (a->VsrD(i) >> 6); 3037 } 3038 } else { /* st == 1 */ 3039 if ((six & (0x8 >> (2 * i))) == 0) { 3040 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3041 ror64(a->VsrD(i), 34) ^ 3042 ror64(a->VsrD(i), 39); 3043 } else { /* six.bit[2*i] == 1 */ 3044 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3045 ror64(a->VsrD(i), 18) ^ 3046 ror64(a->VsrD(i), 41); 3047 } 3048 } 3049 } 3050 } 3051 3052 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3053 { 3054 ppc_avr_t result; 3055 int i; 3056 3057 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3058 int indexA = c->VsrB(i) >> 4; 3059 int indexB = c->VsrB(i) & 0xF; 3060 3061 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3062 } 3063 *r = result; 3064 } 3065 3066 #undef VECTOR_FOR_INORDER_I 3067 3068 /*****************************************************************************/ 3069 /* SPE extension helpers */ 3070 /* Use a table to make this quicker */ 3071 static const uint8_t hbrev[16] = { 3072 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3073 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3074 }; 3075 3076 static inline uint8_t byte_reverse(uint8_t val) 3077 { 3078 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3079 } 3080 3081 static inline uint32_t word_reverse(uint32_t val) 3082 { 3083 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3084 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3085 } 3086 3087 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3088 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3089 { 3090 uint32_t a, b, d, mask; 3091 3092 mask = UINT32_MAX >> (32 - MASKBITS); 3093 a = arg1 & mask; 3094 b = arg2 & mask; 3095 d = word_reverse(1 + word_reverse(a | ~b)); 3096 return (arg1 & ~mask) | (d & b); 3097 } 3098 3099 uint32_t helper_cntlsw32(uint32_t val) 3100 { 3101 if (val & 0x80000000) { 3102 return clz32(~val); 3103 } else { 3104 return clz32(val); 3105 } 3106 } 3107 3108 uint32_t helper_cntlzw32(uint32_t val) 3109 { 3110 return clz32(val); 3111 } 3112 3113 /* 440 specific */ 3114 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3115 target_ulong low, uint32_t update_Rc) 3116 { 3117 target_ulong mask; 3118 int i; 3119 3120 i = 1; 3121 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3122 if ((high & mask) == 0) { 3123 if (update_Rc) { 3124 env->crf[0] = 0x4; 3125 } 3126 goto done; 3127 } 3128 i++; 3129 } 3130 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3131 if ((low & mask) == 0) { 3132 if (update_Rc) { 3133 env->crf[0] = 0x8; 3134 } 3135 goto done; 3136 } 3137 i++; 3138 } 3139 i = 8; 3140 if (update_Rc) { 3141 env->crf[0] = 0x2; 3142 } 3143 done: 3144 env->xer = (env->xer & ~0x7F) | i; 3145 if (update_Rc) { 3146 env->crf[0] |= xer_so; 3147 } 3148 return i; 3149 } 3150