1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = env->ov32 = 1; 41 } else { 42 env->ov = env->ov32 = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 /* vprtybq */ 496 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 497 { 498 uint64_t res = b->u64[0] ^ b->u64[1]; 499 res ^= res >> 32; 500 res ^= res >> 16; 501 res ^= res >> 8; 502 r->VsrD(1) = res & 1; 503 r->VsrD(0) = 0; 504 } 505 506 #define VARITHFP(suffix, func) \ 507 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 508 ppc_avr_t *b) \ 509 { \ 510 int i; \ 511 \ 512 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 513 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 514 } \ 515 } 516 VARITHFP(addfp, float32_add) 517 VARITHFP(subfp, float32_sub) 518 VARITHFP(minfp, float32_min) 519 VARITHFP(maxfp, float32_max) 520 #undef VARITHFP 521 522 #define VARITHFPFMA(suffix, type) \ 523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 524 ppc_avr_t *b, ppc_avr_t *c) \ 525 { \ 526 int i; \ 527 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 528 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 529 type, &env->vec_status); \ 530 } \ 531 } 532 VARITHFPFMA(maddfp, 0); 533 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 534 #undef VARITHFPFMA 535 536 #define VARITHSAT_CASE(type, op, cvt, element) \ 537 { \ 538 type result = (type)a->element[i] op (type)b->element[i]; \ 539 r->element[i] = cvt(result, &sat); \ 540 } 541 542 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 544 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 545 { \ 546 int sat = 0; \ 547 int i; \ 548 \ 549 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 550 VARITHSAT_CASE(optype, op, cvt, element); \ 551 } \ 552 if (sat) { \ 553 vscr_sat->u32[0] = 1; \ 554 } \ 555 } 556 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 557 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 558 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 559 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 560 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 561 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 562 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 563 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 564 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 565 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 566 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 567 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 568 #undef VARITHSAT_CASE 569 #undef VARITHSAT_DO 570 #undef VARITHSAT_SIGNED 571 #undef VARITHSAT_UNSIGNED 572 573 #define VAVG(name, element, etype) \ 574 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 575 { \ 576 int i; \ 577 \ 578 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 579 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 580 r->element[i] = x >> 1; \ 581 } \ 582 } 583 584 VAVG(VAVGSB, s8, int16_t) 585 VAVG(VAVGUB, u8, uint16_t) 586 VAVG(VAVGSH, s16, int32_t) 587 VAVG(VAVGUH, u16, uint32_t) 588 VAVG(VAVGSW, s32, int64_t) 589 VAVG(VAVGUW, u32, uint64_t) 590 #undef VAVG 591 592 #define VABSDU_DO(name, element) \ 593 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 594 { \ 595 int i; \ 596 \ 597 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 598 r->element[i] = (a->element[i] > b->element[i]) ? \ 599 (a->element[i] - b->element[i]) : \ 600 (b->element[i] - a->element[i]); \ 601 } \ 602 } 603 604 /* 605 * VABSDU - Vector absolute difference unsigned 606 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 607 * element - element type to access from vector 608 */ 609 #define VABSDU(type, element) \ 610 VABSDU_DO(absdu##type, element) 611 VABSDU(b, u8) 612 VABSDU(h, u16) 613 VABSDU(w, u32) 614 #undef VABSDU_DO 615 #undef VABSDU 616 617 #define VCF(suffix, cvt, element) \ 618 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 619 ppc_avr_t *b, uint32_t uim) \ 620 { \ 621 int i; \ 622 \ 623 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 624 float32 t = cvt(b->element[i], &env->vec_status); \ 625 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 626 } \ 627 } 628 VCF(ux, uint32_to_float32, u32) 629 VCF(sx, int32_to_float32, s32) 630 #undef VCF 631 632 #define VCMPNEZ(NAME, ELEM) \ 633 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 634 { \ 635 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 636 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 637 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 638 } \ 639 } 640 VCMPNEZ(VCMPNEZB, u8) 641 VCMPNEZ(VCMPNEZH, u16) 642 VCMPNEZ(VCMPNEZW, u32) 643 #undef VCMPNEZ 644 645 #define VCMPFP_DO(suffix, compare, order, record) \ 646 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 647 ppc_avr_t *a, ppc_avr_t *b) \ 648 { \ 649 uint32_t ones = (uint32_t)-1; \ 650 uint32_t all = ones; \ 651 uint32_t none = 0; \ 652 int i; \ 653 \ 654 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 655 uint32_t result; \ 656 FloatRelation rel = \ 657 float32_compare_quiet(a->f32[i], b->f32[i], \ 658 &env->vec_status); \ 659 if (rel == float_relation_unordered) { \ 660 result = 0; \ 661 } else if (rel compare order) { \ 662 result = ones; \ 663 } else { \ 664 result = 0; \ 665 } \ 666 r->u32[i] = result; \ 667 all &= result; \ 668 none |= result; \ 669 } \ 670 if (record) { \ 671 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 672 } \ 673 } 674 #define VCMPFP(suffix, compare, order) \ 675 VCMPFP_DO(suffix, compare, order, 0) \ 676 VCMPFP_DO(suffix##_dot, compare, order, 1) 677 VCMPFP(eqfp, ==, float_relation_equal) 678 VCMPFP(gefp, !=, float_relation_less) 679 VCMPFP(gtfp, ==, float_relation_greater) 680 #undef VCMPFP_DO 681 #undef VCMPFP 682 683 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 684 ppc_avr_t *a, ppc_avr_t *b, int record) 685 { 686 int i; 687 int all_in = 0; 688 689 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 690 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 691 &env->vec_status); 692 if (le_rel == float_relation_unordered) { 693 r->u32[i] = 0xc0000000; 694 all_in = 1; 695 } else { 696 float32 bneg = float32_chs(b->f32[i]); 697 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 698 &env->vec_status); 699 int le = le_rel != float_relation_greater; 700 int ge = ge_rel != float_relation_less; 701 702 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 703 all_in |= (!le | !ge); 704 } 705 } 706 if (record) { 707 env->crf[6] = (all_in == 0) << 1; 708 } 709 } 710 711 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 712 { 713 vcmpbfp_internal(env, r, a, b, 0); 714 } 715 716 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 717 ppc_avr_t *b) 718 { 719 vcmpbfp_internal(env, r, a, b, 1); 720 } 721 722 #define VCT(suffix, satcvt, element) \ 723 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 724 ppc_avr_t *b, uint32_t uim) \ 725 { \ 726 int i; \ 727 int sat = 0; \ 728 float_status s = env->vec_status; \ 729 \ 730 set_float_rounding_mode(float_round_to_zero, &s); \ 731 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 732 if (float32_is_any_nan(b->f32[i])) { \ 733 r->element[i] = 0; \ 734 } else { \ 735 float64 t = float32_to_float64(b->f32[i], &s); \ 736 int64_t j; \ 737 \ 738 t = float64_scalbn(t, uim, &s); \ 739 j = float64_to_int64(t, &s); \ 740 r->element[i] = satcvt(j, &sat); \ 741 } \ 742 } \ 743 if (sat) { \ 744 set_vscr_sat(env); \ 745 } \ 746 } 747 VCT(uxs, cvtsduw, u32) 748 VCT(sxs, cvtsdsw, s32) 749 #undef VCT 750 751 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 752 753 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 754 { 755 int64_t psum = 0; 756 for (int i = 0; i < 8; i++, mask >>= 1) { 757 if (mask & 1) { 758 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 759 } 760 } 761 return psum; 762 } 763 764 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 765 { 766 int64_t psum = 0; 767 for (int i = 0; i < 4; i++, mask >>= 1) { 768 if (mask & 1) { 769 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 770 } 771 } 772 return psum; 773 } 774 775 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 776 { 777 int64_t psum = 0; 778 for (int i = 0; i < 2; i++, mask >>= 1) { 779 if (mask & 1) { 780 psum += (int64_t)sextract32(a, 16 * i, 16) * 781 sextract32(b, 16 * i, 16); 782 } 783 } 784 return psum; 785 } 786 787 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 788 uint32_t mask, bool sat, bool acc, do_ger ger) 789 { 790 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 791 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 792 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 793 uint8_t xmsk_bit, ymsk_bit; 794 int64_t psum; 795 int i, j; 796 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 797 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 798 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 799 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 800 if (acc) { 801 psum += at[i].VsrSW(j); 802 } 803 if (sat && psum > INT32_MAX) { 804 set_vscr_sat(env); 805 at[i].VsrSW(j) = INT32_MAX; 806 } else if (sat && psum < INT32_MIN) { 807 set_vscr_sat(env); 808 at[i].VsrSW(j) = INT32_MIN; 809 } else { 810 at[i].VsrSW(j) = (int32_t) psum; 811 } 812 } else { 813 at[i].VsrSW(j) = 0; 814 } 815 } 816 } 817 } 818 819 QEMU_FLATTEN 820 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 821 ppc_acc_t *at, uint32_t mask) 822 { 823 xviger(env, a, b, at, mask, false, false, ger_rank8); 824 } 825 826 QEMU_FLATTEN 827 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 828 ppc_acc_t *at, uint32_t mask) 829 { 830 xviger(env, a, b, at, mask, false, true, ger_rank8); 831 } 832 833 QEMU_FLATTEN 834 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 835 ppc_acc_t *at, uint32_t mask) 836 { 837 xviger(env, a, b, at, mask, false, false, ger_rank4); 838 } 839 840 QEMU_FLATTEN 841 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 842 ppc_acc_t *at, uint32_t mask) 843 { 844 xviger(env, a, b, at, mask, false, true, ger_rank4); 845 } 846 847 QEMU_FLATTEN 848 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 849 ppc_acc_t *at, uint32_t mask) 850 { 851 xviger(env, a, b, at, mask, true, true, ger_rank4); 852 } 853 854 QEMU_FLATTEN 855 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 856 ppc_acc_t *at, uint32_t mask) 857 { 858 xviger(env, a, b, at, mask, false, false, ger_rank2); 859 } 860 861 QEMU_FLATTEN 862 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 863 ppc_acc_t *at, uint32_t mask) 864 { 865 xviger(env, a, b, at, mask, true, false, ger_rank2); 866 } 867 868 QEMU_FLATTEN 869 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 870 ppc_acc_t *at, uint32_t mask) 871 { 872 xviger(env, a, b, at, mask, false, true, ger_rank2); 873 } 874 875 QEMU_FLATTEN 876 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 877 ppc_acc_t *at, uint32_t mask) 878 { 879 xviger(env, a, b, at, mask, true, true, ger_rank2); 880 } 881 882 target_ulong helper_vclzlsbb(ppc_avr_t *r) 883 { 884 target_ulong count = 0; 885 int i; 886 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 887 if (r->VsrB(i) & 0x01) { 888 break; 889 } 890 count++; 891 } 892 return count; 893 } 894 895 target_ulong helper_vctzlsbb(ppc_avr_t *r) 896 { 897 target_ulong count = 0; 898 int i; 899 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 900 if (r->VsrB(i) & 0x01) { 901 break; 902 } 903 count++; 904 } 905 return count; 906 } 907 908 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 909 ppc_avr_t *b, ppc_avr_t *c) 910 { 911 int sat = 0; 912 int i; 913 914 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 915 int32_t prod = a->s16[i] * b->s16[i]; 916 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 917 918 r->s16[i] = cvtswsh(t, &sat); 919 } 920 921 if (sat) { 922 set_vscr_sat(env); 923 } 924 } 925 926 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 927 ppc_avr_t *b, ppc_avr_t *c) 928 { 929 int sat = 0; 930 int i; 931 932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 933 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 934 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 935 r->s16[i] = cvtswsh(t, &sat); 936 } 937 938 if (sat) { 939 set_vscr_sat(env); 940 } 941 } 942 943 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 944 uint32_t v) 945 { 946 int i; 947 948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 949 int32_t prod = a->s16[i] * b->s16[i]; 950 r->s16[i] = (int16_t) (prod + c->s16[i]); 951 } 952 } 953 954 #define VMRG_DO(name, element, access, ofs) \ 955 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 956 { \ 957 ppc_avr_t result; \ 958 int i, half = ARRAY_SIZE(r->element) / 2; \ 959 \ 960 for (i = 0; i < half; i++) { \ 961 result.access(i * 2 + 0) = a->access(i + ofs); \ 962 result.access(i * 2 + 1) = b->access(i + ofs); \ 963 } \ 964 *r = result; \ 965 } 966 967 #define VMRG(suffix, element, access) \ 968 VMRG_DO(mrgl##suffix, element, access, half) \ 969 VMRG_DO(mrgh##suffix, element, access, 0) 970 VMRG(b, u8, VsrB) 971 VMRG(h, u16, VsrH) 972 VMRG(w, u32, VsrW) 973 #undef VMRG_DO 974 #undef VMRG 975 976 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 977 { 978 int32_t prod[16]; 979 int i; 980 981 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 982 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 983 } 984 985 VECTOR_FOR_INORDER_I(i, s32) { 986 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 987 prod[4 * i + 2] + prod[4 * i + 3]; 988 } 989 } 990 991 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 992 { 993 int32_t prod[8]; 994 int i; 995 996 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 997 prod[i] = a->s16[i] * b->s16[i]; 998 } 999 1000 VECTOR_FOR_INORDER_I(i, s32) { 1001 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1002 } 1003 } 1004 1005 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1006 ppc_avr_t *b, ppc_avr_t *c) 1007 { 1008 int32_t prod[8]; 1009 int i; 1010 int sat = 0; 1011 1012 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1013 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1014 } 1015 1016 VECTOR_FOR_INORDER_I(i, s32) { 1017 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1018 1019 r->u32[i] = cvtsdsw(t, &sat); 1020 } 1021 1022 if (sat) { 1023 set_vscr_sat(env); 1024 } 1025 } 1026 1027 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1028 { 1029 uint16_t prod[16]; 1030 int i; 1031 1032 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1033 prod[i] = a->u8[i] * b->u8[i]; 1034 } 1035 1036 VECTOR_FOR_INORDER_I(i, u32) { 1037 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1038 prod[4 * i + 2] + prod[4 * i + 3]; 1039 } 1040 } 1041 1042 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1043 { 1044 uint32_t prod[8]; 1045 int i; 1046 1047 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1048 prod[i] = a->u16[i] * b->u16[i]; 1049 } 1050 1051 VECTOR_FOR_INORDER_I(i, u32) { 1052 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1053 } 1054 } 1055 1056 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1057 ppc_avr_t *b, ppc_avr_t *c) 1058 { 1059 uint32_t prod[8]; 1060 int i; 1061 int sat = 0; 1062 1063 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1064 prod[i] = a->u16[i] * b->u16[i]; 1065 } 1066 1067 VECTOR_FOR_INORDER_I(i, s32) { 1068 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1069 1070 r->u32[i] = cvtuduw(t, &sat); 1071 } 1072 1073 if (sat) { 1074 set_vscr_sat(env); 1075 } 1076 } 1077 1078 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1079 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1080 { \ 1081 int i; \ 1082 \ 1083 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1084 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1085 (cast)b->mul_access(i); \ 1086 } \ 1087 } 1088 1089 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1090 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1091 { \ 1092 int i; \ 1093 \ 1094 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1095 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1096 (cast)b->mul_access(i + 1); \ 1097 } \ 1098 } 1099 1100 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1101 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1102 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1103 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1104 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1105 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1106 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1107 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1108 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1109 #undef VMUL_DO_EVN 1110 #undef VMUL_DO_ODD 1111 #undef VMUL 1112 1113 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1114 target_ulong uim) 1115 { 1116 int i, idx; 1117 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1118 1119 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1120 if ((pcv->VsrB(i) >> 5) == uim) { 1121 idx = pcv->VsrB(i) & 0x1f; 1122 if (idx < ARRAY_SIZE(t->u8)) { 1123 tmp.VsrB(i) = s0->VsrB(idx); 1124 } else { 1125 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1126 } 1127 } 1128 } 1129 1130 *t = tmp; 1131 } 1132 1133 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1134 { 1135 Int128 neg1 = int128_makes64(-1); 1136 Int128 int128_min = int128_make128(0, INT64_MIN); 1137 if (likely(int128_nz(b->s128) && 1138 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1139 t->s128 = int128_divs(a->s128, b->s128); 1140 } else { 1141 t->s128 = a->s128; /* Undefined behavior */ 1142 } 1143 } 1144 1145 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1146 { 1147 if (int128_nz(b->s128)) { 1148 t->s128 = int128_divu(a->s128, b->s128); 1149 } else { 1150 t->s128 = a->s128; /* Undefined behavior */ 1151 } 1152 } 1153 1154 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1155 { 1156 int i; 1157 int64_t high; 1158 uint64_t low; 1159 for (i = 0; i < 2; i++) { 1160 high = a->s64[i]; 1161 low = 0; 1162 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1163 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1164 } else { 1165 divs128(&low, &high, b->s64[i]); 1166 t->s64[i] = low; 1167 } 1168 } 1169 } 1170 1171 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1172 { 1173 int i; 1174 uint64_t high, low; 1175 for (i = 0; i < 2; i++) { 1176 high = a->u64[i]; 1177 low = 0; 1178 if (unlikely(!b->u64[i])) { 1179 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1180 } else { 1181 divu128(&low, &high, b->u64[i]); 1182 t->u64[i] = low; 1183 } 1184 } 1185 } 1186 1187 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1188 { 1189 Int128 high, low; 1190 Int128 int128_min = int128_make128(0, INT64_MIN); 1191 Int128 neg1 = int128_makes64(-1); 1192 1193 high = a->s128; 1194 low = int128_zero(); 1195 if (unlikely(!int128_nz(b->s128) || 1196 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1197 t->s128 = a->s128; /* Undefined behavior */ 1198 } else { 1199 divs256(&low, &high, b->s128); 1200 t->s128 = low; 1201 } 1202 } 1203 1204 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1205 { 1206 Int128 high, low; 1207 1208 high = a->s128; 1209 low = int128_zero(); 1210 if (unlikely(!int128_nz(b->s128))) { 1211 t->s128 = a->s128; /* Undefined behavior */ 1212 } else { 1213 divu256(&low, &high, b->s128); 1214 t->s128 = low; 1215 } 1216 } 1217 1218 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1219 { 1220 Int128 neg1 = int128_makes64(-1); 1221 Int128 int128_min = int128_make128(0, INT64_MIN); 1222 if (likely(int128_nz(b->s128) && 1223 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1224 t->s128 = int128_rems(a->s128, b->s128); 1225 } else { 1226 t->s128 = int128_zero(); /* Undefined behavior */ 1227 } 1228 } 1229 1230 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1231 { 1232 if (likely(int128_nz(b->s128))) { 1233 t->s128 = int128_remu(a->s128, b->s128); 1234 } else { 1235 t->s128 = int128_zero(); /* Undefined behavior */ 1236 } 1237 } 1238 1239 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1240 { 1241 ppc_avr_t result; 1242 int i; 1243 1244 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1245 int s = c->VsrB(i) & 0x1f; 1246 int index = s & 0xf; 1247 1248 if (s & 0x10) { 1249 result.VsrB(i) = b->VsrB(index); 1250 } else { 1251 result.VsrB(i) = a->VsrB(index); 1252 } 1253 } 1254 *r = result; 1255 } 1256 1257 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1258 { 1259 ppc_avr_t result; 1260 int i; 1261 1262 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1263 int s = c->VsrB(i) & 0x1f; 1264 int index = 15 - (s & 0xf); 1265 1266 if (s & 0x10) { 1267 result.VsrB(i) = a->VsrB(index); 1268 } else { 1269 result.VsrB(i) = b->VsrB(index); 1270 } 1271 } 1272 *r = result; 1273 } 1274 1275 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1276 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1277 { \ 1278 ppc_vsr_t tmp; \ 1279 \ 1280 /* Initialize tmp with the result of an all-zeros mask */ \ 1281 tmp.VsrD(0) = 0x1011121314151617; \ 1282 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1283 \ 1284 /* Iterate over the most significant byte of each element */ \ 1285 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1286 if (b->VsrB(i) & 0x80) { \ 1287 /* Update each byte of the element */ \ 1288 for (int k = 0; k < SZ; k++) { \ 1289 tmp.VsrB(i + k) = j + k; \ 1290 } \ 1291 j += SZ; \ 1292 } \ 1293 } \ 1294 \ 1295 *t = tmp; \ 1296 } 1297 1298 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1299 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1300 { \ 1301 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1302 \ 1303 /* Iterate over the most significant byte of each element */ \ 1304 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1305 if (b->VsrB(i) & 0x80) { \ 1306 /* Update each byte of the element */ \ 1307 for (int k = 0; k < SZ; k++) { \ 1308 tmp.VsrB(j + k) = i + k; \ 1309 } \ 1310 j += SZ; \ 1311 } \ 1312 } \ 1313 \ 1314 *t = tmp; \ 1315 } 1316 1317 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1318 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1319 { \ 1320 ppc_vsr_t tmp; \ 1321 \ 1322 /* Initialize tmp with the result of an all-zeros mask */ \ 1323 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1324 tmp.VsrD(1) = 0x1716151413121110; \ 1325 \ 1326 /* Iterate over the most significant byte of each element */ \ 1327 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1328 /* Reverse indexing of "i" */ \ 1329 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1330 if (b->VsrB(idx) & 0x80) { \ 1331 /* Update each byte of the element */ \ 1332 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1333 tmp.VsrB(idx + rk) = j + k; \ 1334 } \ 1335 j += SZ; \ 1336 } \ 1337 } \ 1338 \ 1339 *t = tmp; \ 1340 } 1341 1342 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1343 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1344 { \ 1345 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1346 \ 1347 /* Iterate over the most significant byte of each element */ \ 1348 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1349 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1350 /* Update each byte of the element */ \ 1351 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1352 /* Reverse indexing of "j" */ \ 1353 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1354 tmp.VsrB(idx + rk) = i + k; \ 1355 } \ 1356 j += SZ; \ 1357 } \ 1358 } \ 1359 \ 1360 *t = tmp; \ 1361 } 1362 1363 #define XXGENPCV(NAME, SZ) \ 1364 XXGENPCV_BE_EXP(NAME, SZ) \ 1365 XXGENPCV_BE_COMP(NAME, SZ) \ 1366 XXGENPCV_LE_EXP(NAME, SZ) \ 1367 XXGENPCV_LE_COMP(NAME, SZ) \ 1368 1369 XXGENPCV(XXGENPCVBM, 1) 1370 XXGENPCV(XXGENPCVHM, 2) 1371 XXGENPCV(XXGENPCVWM, 4) 1372 XXGENPCV(XXGENPCVDM, 8) 1373 1374 #undef XXGENPCV_BE_EXP 1375 #undef XXGENPCV_BE_COMP 1376 #undef XXGENPCV_LE_EXP 1377 #undef XXGENPCV_LE_COMP 1378 #undef XXGENPCV 1379 1380 #if HOST_BIG_ENDIAN 1381 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1382 #define VBPERMD_INDEX(i) (i) 1383 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1384 #else 1385 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1386 #define VBPERMD_INDEX(i) (1 - i) 1387 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1388 #endif 1389 #define EXTRACT_BIT(avr, i, index) \ 1390 (extract64((avr)->VsrD(i), 63 - index, 1)) 1391 1392 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1393 { 1394 int i, j; 1395 ppc_avr_t result = { .u64 = { 0, 0 } }; 1396 VECTOR_FOR_INORDER_I(i, u64) { 1397 for (j = 0; j < 8; j++) { 1398 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1399 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1400 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1401 } 1402 } 1403 } 1404 *r = result; 1405 } 1406 1407 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1408 { 1409 int i; 1410 uint64_t perm = 0; 1411 1412 VECTOR_FOR_INORDER_I(i, u8) { 1413 int index = VBPERMQ_INDEX(b, i); 1414 1415 if (index < 128) { 1416 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1417 if (a->u64[VBPERMQ_DW(index)] & mask) { 1418 perm |= (0x8000 >> i); 1419 } 1420 } 1421 } 1422 1423 r->VsrD(0) = perm; 1424 r->VsrD(1) = 0; 1425 } 1426 1427 #undef VBPERMQ_INDEX 1428 #undef VBPERMQ_DW 1429 1430 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1431 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1432 { \ 1433 int i, j; \ 1434 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1435 \ 1436 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1437 prod[i] = 0; \ 1438 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1439 if (a->srcfld[i] & (1ull << j)) { \ 1440 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1441 } \ 1442 } \ 1443 } \ 1444 \ 1445 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1446 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1447 } \ 1448 } 1449 1450 PMSUM(vpmsumb, u8, u16, uint16_t) 1451 PMSUM(vpmsumh, u16, u32, uint32_t) 1452 PMSUM(vpmsumw, u32, u64, uint64_t) 1453 1454 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1455 { 1456 int i, j; 1457 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1458 1459 for (j = 0; j < 64; j++) { 1460 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1461 if (a->VsrD(i) & (1ull << j)) { 1462 tmp = int128_make64(b->VsrD(i)); 1463 tmp = int128_lshift(tmp, j); 1464 prod[i] = int128_xor(prod[i], tmp); 1465 } 1466 } 1467 } 1468 1469 r->s128 = int128_xor(prod[0], prod[1]); 1470 } 1471 1472 #if HOST_BIG_ENDIAN 1473 #define PKBIG 1 1474 #else 1475 #define PKBIG 0 1476 #endif 1477 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1478 { 1479 int i, j; 1480 ppc_avr_t result; 1481 #if HOST_BIG_ENDIAN 1482 const ppc_avr_t *x[2] = { a, b }; 1483 #else 1484 const ppc_avr_t *x[2] = { b, a }; 1485 #endif 1486 1487 VECTOR_FOR_INORDER_I(i, u64) { 1488 VECTOR_FOR_INORDER_I(j, u32) { 1489 uint32_t e = x[i]->u32[j]; 1490 1491 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1492 ((e >> 6) & 0x3e0) | 1493 ((e >> 3) & 0x1f)); 1494 } 1495 } 1496 *r = result; 1497 } 1498 1499 #define VPK(suffix, from, to, cvt, dosat) \ 1500 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1501 ppc_avr_t *a, ppc_avr_t *b) \ 1502 { \ 1503 int i; \ 1504 int sat = 0; \ 1505 ppc_avr_t result; \ 1506 ppc_avr_t *a0 = PKBIG ? a : b; \ 1507 ppc_avr_t *a1 = PKBIG ? b : a; \ 1508 \ 1509 VECTOR_FOR_INORDER_I(i, from) { \ 1510 result.to[i] = cvt(a0->from[i], &sat); \ 1511 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1512 } \ 1513 *r = result; \ 1514 if (dosat && sat) { \ 1515 set_vscr_sat(env); \ 1516 } \ 1517 } 1518 #define I(x, y) (x) 1519 VPK(shss, s16, s8, cvtshsb, 1) 1520 VPK(shus, s16, u8, cvtshub, 1) 1521 VPK(swss, s32, s16, cvtswsh, 1) 1522 VPK(swus, s32, u16, cvtswuh, 1) 1523 VPK(sdss, s64, s32, cvtsdsw, 1) 1524 VPK(sdus, s64, u32, cvtsduw, 1) 1525 VPK(uhus, u16, u8, cvtuhub, 1) 1526 VPK(uwus, u32, u16, cvtuwuh, 1) 1527 VPK(udus, u64, u32, cvtuduw, 1) 1528 VPK(uhum, u16, u8, I, 0) 1529 VPK(uwum, u32, u16, I, 0) 1530 VPK(udum, u64, u32, I, 0) 1531 #undef I 1532 #undef VPK 1533 #undef PKBIG 1534 1535 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1536 { 1537 int i; 1538 1539 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1540 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1541 } 1542 } 1543 1544 #define VRFI(suffix, rounding) \ 1545 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1546 ppc_avr_t *b) \ 1547 { \ 1548 int i; \ 1549 float_status s = env->vec_status; \ 1550 \ 1551 set_float_rounding_mode(rounding, &s); \ 1552 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1553 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1554 } \ 1555 } 1556 VRFI(n, float_round_nearest_even) 1557 VRFI(m, float_round_down) 1558 VRFI(p, float_round_up) 1559 VRFI(z, float_round_to_zero) 1560 #undef VRFI 1561 1562 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1563 { 1564 int i; 1565 1566 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1567 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1568 1569 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1570 } 1571 } 1572 1573 #define VRLMI(name, size, element, insert) \ 1574 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1575 { \ 1576 int i; \ 1577 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1578 uint##size##_t src1 = a->element[i]; \ 1579 uint##size##_t src2 = b->element[i]; \ 1580 uint##size##_t src3 = r->element[i]; \ 1581 uint##size##_t begin, end, shift, mask, rot_val; \ 1582 \ 1583 shift = extract##size(src2, 0, 6); \ 1584 end = extract##size(src2, 8, 6); \ 1585 begin = extract##size(src2, 16, 6); \ 1586 rot_val = rol##size(src1, shift); \ 1587 mask = mask_u##size(begin, end); \ 1588 if (insert) { \ 1589 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1590 } else { \ 1591 r->element[i] = (rot_val & mask); \ 1592 } \ 1593 } \ 1594 } 1595 1596 VRLMI(VRLDMI, 64, u64, 1); 1597 VRLMI(VRLWMI, 32, u32, 1); 1598 VRLMI(VRLDNM, 64, u64, 0); 1599 VRLMI(VRLWNM, 32, u32, 0); 1600 1601 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1602 { 1603 int i; 1604 1605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1606 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1607 } 1608 } 1609 1610 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1611 { 1612 int i; 1613 1614 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1615 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1616 } 1617 } 1618 1619 #define VEXTU_X_DO(name, size, left) \ 1620 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1621 { \ 1622 int index = (a & 0xf) * 8; \ 1623 if (left) { \ 1624 index = 128 - index - size; \ 1625 } \ 1626 return int128_getlo(int128_rshift(b->s128, index)) & \ 1627 MAKE_64BIT_MASK(0, size); \ 1628 } 1629 VEXTU_X_DO(vextublx, 8, 1) 1630 VEXTU_X_DO(vextuhlx, 16, 1) 1631 VEXTU_X_DO(vextuwlx, 32, 1) 1632 VEXTU_X_DO(vextubrx, 8, 0) 1633 VEXTU_X_DO(vextuhrx, 16, 0) 1634 VEXTU_X_DO(vextuwrx, 32, 0) 1635 #undef VEXTU_X_DO 1636 1637 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1638 { 1639 int i; 1640 unsigned int shift, bytes, size; 1641 1642 size = ARRAY_SIZE(r->u8); 1643 for (i = 0; i < size; i++) { 1644 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1645 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1646 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1647 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1648 } 1649 } 1650 1651 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1652 { 1653 int i; 1654 unsigned int shift, bytes; 1655 1656 /* 1657 * Use reverse order, as destination and source register can be 1658 * same. Its being modified in place saving temporary, reverse 1659 * order will guarantee that computed result is not fed back. 1660 */ 1661 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1662 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1663 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1664 /* extract adjacent bytes */ 1665 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1666 } 1667 } 1668 1669 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1670 { 1671 int sh = shift & 0xf; 1672 int i; 1673 ppc_avr_t result; 1674 1675 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1676 int index = sh + i; 1677 if (index > 0xf) { 1678 result.VsrB(i) = b->VsrB(index - 0x10); 1679 } else { 1680 result.VsrB(i) = a->VsrB(index); 1681 } 1682 } 1683 *r = result; 1684 } 1685 1686 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1687 { 1688 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1689 1690 #if HOST_BIG_ENDIAN 1691 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1692 memset(&r->u8[16 - sh], 0, sh); 1693 #else 1694 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1695 memset(&r->u8[0], 0, sh); 1696 #endif 1697 } 1698 1699 #if HOST_BIG_ENDIAN 1700 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1701 #else 1702 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1703 #endif 1704 1705 #define VINSX(SUFFIX, TYPE) \ 1706 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1707 uint64_t val, target_ulong index) \ 1708 { \ 1709 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1710 target_long idx = index; \ 1711 \ 1712 if (idx < 0 || idx > maxidx) { \ 1713 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1714 qemu_log_mask(LOG_GUEST_ERROR, \ 1715 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1716 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1717 } else { \ 1718 TYPE src = val; \ 1719 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1720 } \ 1721 } 1722 VINSX(B, uint8_t) 1723 VINSX(H, uint16_t) 1724 VINSX(W, uint32_t) 1725 VINSX(D, uint64_t) 1726 #undef ELEM_ADDR 1727 #undef VINSX 1728 #if HOST_BIG_ENDIAN 1729 #define VEXTDVLX(NAME, SIZE) \ 1730 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1731 target_ulong index) \ 1732 { \ 1733 const target_long idx = index; \ 1734 ppc_avr_t tmp[2] = { *a, *b }; \ 1735 memset(t, 0, sizeof(*t)); \ 1736 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1737 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1738 } else { \ 1739 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1740 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1741 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1742 } \ 1743 } 1744 #else 1745 #define VEXTDVLX(NAME, SIZE) \ 1746 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1747 target_ulong index) \ 1748 { \ 1749 const target_long idx = index; \ 1750 ppc_avr_t tmp[2] = { *b, *a }; \ 1751 memset(t, 0, sizeof(*t)); \ 1752 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1753 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1754 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1755 } else { \ 1756 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1757 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1758 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1759 } \ 1760 } 1761 #endif 1762 VEXTDVLX(VEXTDUBVLX, 1) 1763 VEXTDVLX(VEXTDUHVLX, 2) 1764 VEXTDVLX(VEXTDUWVLX, 4) 1765 VEXTDVLX(VEXTDDVLX, 8) 1766 #undef VEXTDVLX 1767 #if HOST_BIG_ENDIAN 1768 #define VEXTRACT(suffix, element) \ 1769 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1770 { \ 1771 uint32_t es = sizeof(r->element[0]); \ 1772 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1773 memset(&r->u8[8], 0, 8); \ 1774 memset(&r->u8[0], 0, 8 - es); \ 1775 } 1776 #else 1777 #define VEXTRACT(suffix, element) \ 1778 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1779 { \ 1780 uint32_t es = sizeof(r->element[0]); \ 1781 uint32_t s = (16 - index) - es; \ 1782 memmove(&r->u8[8], &b->u8[s], es); \ 1783 memset(&r->u8[0], 0, 8); \ 1784 memset(&r->u8[8 + es], 0, 8 - es); \ 1785 } 1786 #endif 1787 VEXTRACT(ub, u8) 1788 VEXTRACT(uh, u16) 1789 VEXTRACT(uw, u32) 1790 VEXTRACT(d, u64) 1791 #undef VEXTRACT 1792 1793 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1794 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1795 { \ 1796 int i, idx, crf = 0; \ 1797 \ 1798 for (i = 0; i < NUM_ELEMS; i++) { \ 1799 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1800 if (b->Vsr##ELEM(idx)) { \ 1801 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1802 } else { \ 1803 crf = 0b0010; \ 1804 break; \ 1805 } \ 1806 } \ 1807 \ 1808 for (; i < NUM_ELEMS; i++) { \ 1809 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1810 t->Vsr##ELEM(idx) = 0; \ 1811 } \ 1812 \ 1813 return crf; \ 1814 } 1815 VSTRI(VSTRIBL, B, 16, true) 1816 VSTRI(VSTRIBR, B, 16, false) 1817 VSTRI(VSTRIHL, H, 8, true) 1818 VSTRI(VSTRIHR, H, 8, false) 1819 #undef VSTRI 1820 1821 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1822 { 1823 ppc_vsr_t t = { }; 1824 size_t es = sizeof(uint32_t); 1825 uint32_t ext_index; 1826 int i; 1827 1828 ext_index = index; 1829 for (i = 0; i < es; i++, ext_index++) { 1830 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1831 } 1832 1833 *xt = t; 1834 } 1835 1836 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1837 { 1838 ppc_vsr_t t = *xt; 1839 size_t es = sizeof(uint32_t); 1840 int ins_index, i = 0; 1841 1842 ins_index = index; 1843 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1844 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1845 } 1846 1847 *xt = t; 1848 } 1849 1850 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1851 uint32_t desc) 1852 { 1853 /* 1854 * Instead of processing imm bit-by-bit, we'll skip the computation of 1855 * conjunctions whose corresponding bit is unset. 1856 */ 1857 int bit, imm = simd_data(desc); 1858 Int128 conj, disj = int128_zero(); 1859 1860 /* Iterate over set bits from the least to the most significant bit */ 1861 while (imm) { 1862 /* 1863 * Get the next bit to be processed with ctz64. Invert the result of 1864 * ctz64 to match the indexing used by PowerISA. 1865 */ 1866 bit = 7 - ctzl(imm); 1867 if (bit & 0x4) { 1868 conj = a->s128; 1869 } else { 1870 conj = int128_not(a->s128); 1871 } 1872 if (bit & 0x2) { 1873 conj = int128_and(conj, b->s128); 1874 } else { 1875 conj = int128_and(conj, int128_not(b->s128)); 1876 } 1877 if (bit & 0x1) { 1878 conj = int128_and(conj, c->s128); 1879 } else { 1880 conj = int128_and(conj, int128_not(c->s128)); 1881 } 1882 disj = int128_or(disj, conj); 1883 1884 /* Unset the least significant bit that is set */ 1885 imm &= imm - 1; 1886 } 1887 1888 t->s128 = disj; 1889 } 1890 1891 #define XXBLEND(name, sz) \ 1892 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1893 ppc_avr_t *c, uint32_t desc) \ 1894 { \ 1895 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1896 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1897 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1898 } \ 1899 } 1900 XXBLEND(B, 8) 1901 XXBLEND(H, 16) 1902 XXBLEND(W, 32) 1903 XXBLEND(D, 64) 1904 #undef XXBLEND 1905 1906 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1907 { 1908 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1909 1910 #if HOST_BIG_ENDIAN 1911 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1912 memset(&r->u8[0], 0, sh); 1913 #else 1914 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1915 memset(&r->u8[16 - sh], 0, sh); 1916 #endif 1917 } 1918 1919 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1920 { 1921 int64_t t; 1922 int i, upper; 1923 ppc_avr_t result; 1924 int sat = 0; 1925 1926 upper = ARRAY_SIZE(r->s32) - 1; 1927 t = (int64_t)b->VsrSW(upper); 1928 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1929 t += a->VsrSW(i); 1930 result.VsrSW(i) = 0; 1931 } 1932 result.VsrSW(upper) = cvtsdsw(t, &sat); 1933 *r = result; 1934 1935 if (sat) { 1936 set_vscr_sat(env); 1937 } 1938 } 1939 1940 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1941 { 1942 int i, j, upper; 1943 ppc_avr_t result; 1944 int sat = 0; 1945 1946 upper = 1; 1947 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1948 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1949 1950 result.VsrD(i) = 0; 1951 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1952 t += a->VsrSW(2 * i + j); 1953 } 1954 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1955 } 1956 1957 *r = result; 1958 if (sat) { 1959 set_vscr_sat(env); 1960 } 1961 } 1962 1963 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1964 { 1965 int i, j; 1966 int sat = 0; 1967 1968 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1969 int64_t t = (int64_t)b->s32[i]; 1970 1971 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1972 t += a->s8[4 * i + j]; 1973 } 1974 r->s32[i] = cvtsdsw(t, &sat); 1975 } 1976 1977 if (sat) { 1978 set_vscr_sat(env); 1979 } 1980 } 1981 1982 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1983 { 1984 int sat = 0; 1985 int i; 1986 1987 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1988 int64_t t = (int64_t)b->s32[i]; 1989 1990 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1991 r->s32[i] = cvtsdsw(t, &sat); 1992 } 1993 1994 if (sat) { 1995 set_vscr_sat(env); 1996 } 1997 } 1998 1999 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2000 { 2001 int i, j; 2002 int sat = 0; 2003 2004 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2005 uint64_t t = (uint64_t)b->u32[i]; 2006 2007 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2008 t += a->u8[4 * i + j]; 2009 } 2010 r->u32[i] = cvtuduw(t, &sat); 2011 } 2012 2013 if (sat) { 2014 set_vscr_sat(env); 2015 } 2016 } 2017 2018 #if HOST_BIG_ENDIAN 2019 #define UPKHI 1 2020 #define UPKLO 0 2021 #else 2022 #define UPKHI 0 2023 #define UPKLO 1 2024 #endif 2025 #define VUPKPX(suffix, hi) \ 2026 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2027 { \ 2028 int i; \ 2029 ppc_avr_t result; \ 2030 \ 2031 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2032 uint16_t e = b->u16[hi ? i : i + 4]; \ 2033 uint8_t a = (e >> 15) ? 0xff : 0; \ 2034 uint8_t r = (e >> 10) & 0x1f; \ 2035 uint8_t g = (e >> 5) & 0x1f; \ 2036 uint8_t b = e & 0x1f; \ 2037 \ 2038 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2039 } \ 2040 *r = result; \ 2041 } 2042 VUPKPX(lpx, UPKLO) 2043 VUPKPX(hpx, UPKHI) 2044 #undef VUPKPX 2045 2046 #define VUPK(suffix, unpacked, packee, hi) \ 2047 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2048 { \ 2049 int i; \ 2050 ppc_avr_t result; \ 2051 \ 2052 if (hi) { \ 2053 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2054 result.unpacked[i] = b->packee[i]; \ 2055 } \ 2056 } else { \ 2057 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2058 i++) { \ 2059 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2060 } \ 2061 } \ 2062 *r = result; \ 2063 } 2064 VUPK(hsb, s16, s8, UPKHI) 2065 VUPK(hsh, s32, s16, UPKHI) 2066 VUPK(hsw, s64, s32, UPKHI) 2067 VUPK(lsb, s16, s8, UPKLO) 2068 VUPK(lsh, s32, s16, UPKLO) 2069 VUPK(lsw, s64, s32, UPKLO) 2070 #undef VUPK 2071 #undef UPKHI 2072 #undef UPKLO 2073 2074 #define VGENERIC_DO(name, element) \ 2075 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2076 { \ 2077 int i; \ 2078 \ 2079 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2080 r->element[i] = name(b->element[i]); \ 2081 } \ 2082 } 2083 2084 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2085 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2086 2087 VGENERIC_DO(clzb, u8) 2088 VGENERIC_DO(clzh, u16) 2089 2090 #undef clzb 2091 #undef clzh 2092 2093 #define ctzb(v) ((v) ? ctz32(v) : 8) 2094 #define ctzh(v) ((v) ? ctz32(v) : 16) 2095 #define ctzw(v) ctz32((v)) 2096 #define ctzd(v) ctz64((v)) 2097 2098 VGENERIC_DO(ctzb, u8) 2099 VGENERIC_DO(ctzh, u16) 2100 VGENERIC_DO(ctzw, u32) 2101 VGENERIC_DO(ctzd, u64) 2102 2103 #undef ctzb 2104 #undef ctzh 2105 #undef ctzw 2106 #undef ctzd 2107 2108 #define popcntb(v) ctpop8(v) 2109 #define popcnth(v) ctpop16(v) 2110 #define popcntw(v) ctpop32(v) 2111 #define popcntd(v) ctpop64(v) 2112 2113 VGENERIC_DO(popcntb, u8) 2114 VGENERIC_DO(popcnth, u16) 2115 VGENERIC_DO(popcntw, u32) 2116 VGENERIC_DO(popcntd, u64) 2117 2118 #undef popcntb 2119 #undef popcnth 2120 #undef popcntw 2121 #undef popcntd 2122 2123 #undef VGENERIC_DO 2124 2125 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2126 { 2127 r->s128 = int128_add(a->s128, b->s128); 2128 } 2129 2130 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2131 { 2132 r->s128 = int128_add(int128_add(a->s128, b->s128), 2133 int128_make64(int128_getlo(c->s128) & 1)); 2134 } 2135 2136 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2137 { 2138 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2139 r->VsrD(0) = 0; 2140 } 2141 2142 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2143 { 2144 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2145 carry_in = int128_getlo(c->s128) & 1; 2146 2147 if (!carry_out && carry_in) { 2148 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2149 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2150 } 2151 2152 r->VsrD(0) = 0; 2153 r->VsrD(1) = carry_out; 2154 } 2155 2156 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2157 { 2158 r->s128 = int128_sub(a->s128, b->s128); 2159 } 2160 2161 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2162 { 2163 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2164 int128_make64(int128_getlo(c->s128) & 1)); 2165 } 2166 2167 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2168 { 2169 Int128 tmp = int128_not(b->s128); 2170 2171 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2172 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2173 r->VsrD(0) = 0; 2174 } 2175 2176 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2177 { 2178 Int128 tmp = int128_not(b->s128); 2179 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2180 carry_in = int128_getlo(c->s128) & 1; 2181 2182 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2183 int128_makes64(-1))); 2184 r->VsrD(0) = 0; 2185 } 2186 2187 #define BCD_PLUS_PREF_1 0xC 2188 #define BCD_PLUS_PREF_2 0xF 2189 #define BCD_PLUS_ALT_1 0xA 2190 #define BCD_NEG_PREF 0xD 2191 #define BCD_NEG_ALT 0xB 2192 #define BCD_PLUS_ALT_2 0xE 2193 #define NATIONAL_PLUS 0x2B 2194 #define NATIONAL_NEG 0x2D 2195 2196 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2197 2198 static int bcd_get_sgn(ppc_avr_t *bcd) 2199 { 2200 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2201 case BCD_PLUS_PREF_1: 2202 case BCD_PLUS_PREF_2: 2203 case BCD_PLUS_ALT_1: 2204 case BCD_PLUS_ALT_2: 2205 { 2206 return 1; 2207 } 2208 2209 case BCD_NEG_PREF: 2210 case BCD_NEG_ALT: 2211 { 2212 return -1; 2213 } 2214 2215 default: 2216 { 2217 return 0; 2218 } 2219 } 2220 } 2221 2222 static int bcd_preferred_sgn(int sgn, int ps) 2223 { 2224 if (sgn >= 0) { 2225 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2226 } else { 2227 return BCD_NEG_PREF; 2228 } 2229 } 2230 2231 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2232 { 2233 uint8_t result; 2234 if (n & 1) { 2235 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2236 } else { 2237 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2238 } 2239 2240 if (unlikely(result > 9)) { 2241 *invalid = true; 2242 } 2243 return result; 2244 } 2245 2246 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2247 { 2248 if (n & 1) { 2249 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2250 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2251 } else { 2252 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2253 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2254 } 2255 } 2256 2257 static bool bcd_is_valid(ppc_avr_t *bcd) 2258 { 2259 int i; 2260 int invalid = 0; 2261 2262 if (bcd_get_sgn(bcd) == 0) { 2263 return false; 2264 } 2265 2266 for (i = 1; i < 32; i++) { 2267 bcd_get_digit(bcd, i, &invalid); 2268 if (unlikely(invalid)) { 2269 return false; 2270 } 2271 } 2272 return true; 2273 } 2274 2275 static int bcd_cmp_zero(ppc_avr_t *bcd) 2276 { 2277 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2278 return CRF_EQ; 2279 } else { 2280 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2281 } 2282 } 2283 2284 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2285 { 2286 return reg->VsrH(7 - n); 2287 } 2288 2289 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2290 { 2291 reg->VsrH(7 - n) = val; 2292 } 2293 2294 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2295 { 2296 int i; 2297 int invalid = 0; 2298 for (i = 31; i > 0; i--) { 2299 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2300 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2301 if (unlikely(invalid)) { 2302 return 0; /* doesn't matter */ 2303 } else if (dig_a > dig_b) { 2304 return 1; 2305 } else if (dig_a < dig_b) { 2306 return -1; 2307 } 2308 } 2309 2310 return 0; 2311 } 2312 2313 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2314 int *overflow) 2315 { 2316 int carry = 0; 2317 int i; 2318 int is_zero = 1; 2319 2320 for (i = 1; i <= 31; i++) { 2321 uint8_t digit = bcd_get_digit(a, i, invalid) + 2322 bcd_get_digit(b, i, invalid) + carry; 2323 is_zero &= (digit == 0); 2324 if (digit > 9) { 2325 carry = 1; 2326 digit -= 10; 2327 } else { 2328 carry = 0; 2329 } 2330 2331 bcd_put_digit(t, digit, i); 2332 } 2333 2334 *overflow = carry; 2335 return is_zero; 2336 } 2337 2338 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2339 int *overflow) 2340 { 2341 int carry = 0; 2342 int i; 2343 2344 for (i = 1; i <= 31; i++) { 2345 uint8_t digit = bcd_get_digit(a, i, invalid) - 2346 bcd_get_digit(b, i, invalid) + carry; 2347 if (digit & 0x80) { 2348 carry = -1; 2349 digit += 10; 2350 } else { 2351 carry = 0; 2352 } 2353 2354 bcd_put_digit(t, digit, i); 2355 } 2356 2357 *overflow = carry; 2358 } 2359 2360 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2361 { 2362 2363 int sgna = bcd_get_sgn(a); 2364 int sgnb = bcd_get_sgn(b); 2365 int invalid = (sgna == 0) || (sgnb == 0); 2366 int overflow = 0; 2367 int zero = 0; 2368 uint32_t cr = 0; 2369 ppc_avr_t result = { .u64 = { 0, 0 } }; 2370 2371 if (!invalid) { 2372 if (sgna == sgnb) { 2373 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2374 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2375 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2376 } else { 2377 int magnitude = bcd_cmp_mag(a, b); 2378 if (magnitude > 0) { 2379 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2380 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2381 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2382 } else if (magnitude < 0) { 2383 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2384 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2385 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2386 } else { 2387 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2388 cr = CRF_EQ; 2389 } 2390 } 2391 } 2392 2393 if (unlikely(invalid)) { 2394 result.VsrD(0) = result.VsrD(1) = -1; 2395 cr = CRF_SO; 2396 } else if (overflow) { 2397 cr |= CRF_SO; 2398 } else if (zero) { 2399 cr |= CRF_EQ; 2400 } 2401 2402 *r = result; 2403 2404 return cr; 2405 } 2406 2407 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2408 { 2409 ppc_avr_t bcopy = *b; 2410 int sgnb = bcd_get_sgn(b); 2411 if (sgnb < 0) { 2412 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2413 } else if (sgnb > 0) { 2414 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2415 } 2416 /* else invalid ... defer to bcdadd code for proper handling */ 2417 2418 return helper_bcdadd(r, a, &bcopy, ps); 2419 } 2420 2421 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2422 { 2423 int i; 2424 int cr = 0; 2425 uint16_t national = 0; 2426 uint16_t sgnb = get_national_digit(b, 0); 2427 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2428 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2429 2430 for (i = 1; i < 8; i++) { 2431 national = get_national_digit(b, i); 2432 if (unlikely(national < 0x30 || national > 0x39)) { 2433 invalid = 1; 2434 break; 2435 } 2436 2437 bcd_put_digit(&ret, national & 0xf, i); 2438 } 2439 2440 if (sgnb == NATIONAL_PLUS) { 2441 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2442 } else { 2443 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2444 } 2445 2446 cr = bcd_cmp_zero(&ret); 2447 2448 if (unlikely(invalid)) { 2449 cr = CRF_SO; 2450 } 2451 2452 *r = ret; 2453 2454 return cr; 2455 } 2456 2457 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2458 { 2459 int i; 2460 int cr = 0; 2461 int sgnb = bcd_get_sgn(b); 2462 int invalid = (sgnb == 0); 2463 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2464 2465 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2466 2467 for (i = 1; i < 8; i++) { 2468 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2469 2470 if (unlikely(invalid)) { 2471 break; 2472 } 2473 } 2474 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2475 2476 cr = bcd_cmp_zero(b); 2477 2478 if (ox_flag) { 2479 cr |= CRF_SO; 2480 } 2481 2482 if (unlikely(invalid)) { 2483 cr = CRF_SO; 2484 } 2485 2486 *r = ret; 2487 2488 return cr; 2489 } 2490 2491 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2492 { 2493 int i; 2494 int cr = 0; 2495 int invalid = 0; 2496 int zone_digit = 0; 2497 int zone_lead = ps ? 0xF : 0x3; 2498 int digit = 0; 2499 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2500 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2501 2502 if (unlikely((sgnb < 0xA) && ps)) { 2503 invalid = 1; 2504 } 2505 2506 for (i = 0; i < 16; i++) { 2507 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2508 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2509 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2510 invalid = 1; 2511 break; 2512 } 2513 2514 bcd_put_digit(&ret, digit, i + 1); 2515 } 2516 2517 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2518 (!ps && (sgnb & 0x4))) { 2519 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2520 } else { 2521 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2522 } 2523 2524 cr = bcd_cmp_zero(&ret); 2525 2526 if (unlikely(invalid)) { 2527 cr = CRF_SO; 2528 } 2529 2530 *r = ret; 2531 2532 return cr; 2533 } 2534 2535 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2536 { 2537 int i; 2538 int cr = 0; 2539 uint8_t digit = 0; 2540 int sgnb = bcd_get_sgn(b); 2541 int zone_lead = (ps) ? 0xF0 : 0x30; 2542 int invalid = (sgnb == 0); 2543 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2544 2545 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2546 2547 for (i = 0; i < 16; i++) { 2548 digit = bcd_get_digit(b, i + 1, &invalid); 2549 2550 if (unlikely(invalid)) { 2551 break; 2552 } 2553 2554 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2555 } 2556 2557 if (ps) { 2558 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2559 } else { 2560 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2561 } 2562 2563 cr = bcd_cmp_zero(b); 2564 2565 if (ox_flag) { 2566 cr |= CRF_SO; 2567 } 2568 2569 if (unlikely(invalid)) { 2570 cr = CRF_SO; 2571 } 2572 2573 *r = ret; 2574 2575 return cr; 2576 } 2577 2578 /** 2579 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2580 * 2581 * Returns: 2582 * > 0 if ahi|alo > bhi|blo, 2583 * 0 if ahi|alo == bhi|blo, 2584 * < 0 if ahi|alo < bhi|blo 2585 */ 2586 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2587 uint64_t blo, uint64_t bhi) 2588 { 2589 return (ahi == bhi) ? 2590 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2591 (ahi > bhi ? 1 : -1); 2592 } 2593 2594 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2595 { 2596 int i; 2597 int cr; 2598 uint64_t lo_value; 2599 uint64_t hi_value; 2600 uint64_t rem; 2601 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2602 2603 if (b->VsrSD(0) < 0) { 2604 lo_value = -b->VsrSD(1); 2605 hi_value = ~b->VsrD(0) + !lo_value; 2606 bcd_put_digit(&ret, 0xD, 0); 2607 2608 cr = CRF_LT; 2609 } else { 2610 lo_value = b->VsrD(1); 2611 hi_value = b->VsrD(0); 2612 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2613 2614 if (hi_value == 0 && lo_value == 0) { 2615 cr = CRF_EQ; 2616 } else { 2617 cr = CRF_GT; 2618 } 2619 } 2620 2621 /* 2622 * Check src limits: abs(src) <= 10^31 - 1 2623 * 2624 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2625 */ 2626 if (ucmp128(lo_value, hi_value, 2627 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2628 cr |= CRF_SO; 2629 2630 /* 2631 * According to the ISA, if src wouldn't fit in the destination 2632 * register, the result is undefined. 2633 * In that case, we leave r unchanged. 2634 */ 2635 } else { 2636 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2637 2638 for (i = 1; i < 16; rem /= 10, i++) { 2639 bcd_put_digit(&ret, rem % 10, i); 2640 } 2641 2642 for (; i < 32; lo_value /= 10, i++) { 2643 bcd_put_digit(&ret, lo_value % 10, i); 2644 } 2645 2646 *r = ret; 2647 } 2648 2649 return cr; 2650 } 2651 2652 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2653 { 2654 uint8_t i; 2655 int cr; 2656 uint64_t carry; 2657 uint64_t unused; 2658 uint64_t lo_value; 2659 uint64_t hi_value = 0; 2660 int sgnb = bcd_get_sgn(b); 2661 int invalid = (sgnb == 0); 2662 2663 lo_value = bcd_get_digit(b, 31, &invalid); 2664 for (i = 30; i > 0; i--) { 2665 mulu64(&lo_value, &carry, lo_value, 10ULL); 2666 mulu64(&hi_value, &unused, hi_value, 10ULL); 2667 lo_value += bcd_get_digit(b, i, &invalid); 2668 hi_value += carry; 2669 2670 if (unlikely(invalid)) { 2671 break; 2672 } 2673 } 2674 2675 if (sgnb == -1) { 2676 r->VsrSD(1) = -lo_value; 2677 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2678 } else { 2679 r->VsrSD(1) = lo_value; 2680 r->VsrSD(0) = hi_value; 2681 } 2682 2683 cr = bcd_cmp_zero(b); 2684 2685 if (unlikely(invalid)) { 2686 cr = CRF_SO; 2687 } 2688 2689 return cr; 2690 } 2691 2692 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2693 { 2694 int i; 2695 int invalid = 0; 2696 2697 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2698 return CRF_SO; 2699 } 2700 2701 *r = *a; 2702 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2703 2704 for (i = 1; i < 32; i++) { 2705 bcd_get_digit(a, i, &invalid); 2706 bcd_get_digit(b, i, &invalid); 2707 if (unlikely(invalid)) { 2708 return CRF_SO; 2709 } 2710 } 2711 2712 return bcd_cmp_zero(r); 2713 } 2714 2715 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2716 { 2717 int sgnb = bcd_get_sgn(b); 2718 2719 *r = *b; 2720 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2721 2722 if (bcd_is_valid(b) == false) { 2723 return CRF_SO; 2724 } 2725 2726 return bcd_cmp_zero(r); 2727 } 2728 2729 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2730 { 2731 int cr; 2732 int i = a->VsrSB(7); 2733 bool ox_flag = false; 2734 int sgnb = bcd_get_sgn(b); 2735 ppc_avr_t ret = *b; 2736 ret.VsrD(1) &= ~0xf; 2737 2738 if (bcd_is_valid(b) == false) { 2739 return CRF_SO; 2740 } 2741 2742 if (unlikely(i > 31)) { 2743 i = 31; 2744 } else if (unlikely(i < -31)) { 2745 i = -31; 2746 } 2747 2748 if (i > 0) { 2749 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2750 } else { 2751 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2752 } 2753 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2754 2755 *r = ret; 2756 2757 cr = bcd_cmp_zero(r); 2758 if (ox_flag) { 2759 cr |= CRF_SO; 2760 } 2761 2762 return cr; 2763 } 2764 2765 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2766 { 2767 int cr; 2768 int i; 2769 int invalid = 0; 2770 bool ox_flag = false; 2771 ppc_avr_t ret = *b; 2772 2773 for (i = 0; i < 32; i++) { 2774 bcd_get_digit(b, i, &invalid); 2775 2776 if (unlikely(invalid)) { 2777 return CRF_SO; 2778 } 2779 } 2780 2781 i = a->VsrSB(7); 2782 if (i >= 32) { 2783 ox_flag = true; 2784 ret.VsrD(1) = ret.VsrD(0) = 0; 2785 } else if (i <= -32) { 2786 ret.VsrD(1) = ret.VsrD(0) = 0; 2787 } else if (i > 0) { 2788 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2789 } else { 2790 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2791 } 2792 *r = ret; 2793 2794 cr = bcd_cmp_zero(r); 2795 if (ox_flag) { 2796 cr |= CRF_SO; 2797 } 2798 2799 return cr; 2800 } 2801 2802 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2803 { 2804 int cr; 2805 int unused = 0; 2806 int invalid = 0; 2807 bool ox_flag = false; 2808 int sgnb = bcd_get_sgn(b); 2809 ppc_avr_t ret = *b; 2810 ret.VsrD(1) &= ~0xf; 2811 2812 int i = a->VsrSB(7); 2813 ppc_avr_t bcd_one; 2814 2815 bcd_one.VsrD(0) = 0; 2816 bcd_one.VsrD(1) = 0x10; 2817 2818 if (bcd_is_valid(b) == false) { 2819 return CRF_SO; 2820 } 2821 2822 if (unlikely(i > 31)) { 2823 i = 31; 2824 } else if (unlikely(i < -31)) { 2825 i = -31; 2826 } 2827 2828 if (i > 0) { 2829 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2830 } else { 2831 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2832 2833 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2834 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2835 } 2836 } 2837 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2838 2839 cr = bcd_cmp_zero(&ret); 2840 if (ox_flag) { 2841 cr |= CRF_SO; 2842 } 2843 *r = ret; 2844 2845 return cr; 2846 } 2847 2848 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2849 { 2850 uint64_t mask; 2851 uint32_t ox_flag = 0; 2852 int i = a->VsrSH(3) + 1; 2853 ppc_avr_t ret = *b; 2854 2855 if (bcd_is_valid(b) == false) { 2856 return CRF_SO; 2857 } 2858 2859 if (i > 16 && i < 32) { 2860 mask = (uint64_t)-1 >> (128 - i * 4); 2861 if (ret.VsrD(0) & ~mask) { 2862 ox_flag = CRF_SO; 2863 } 2864 2865 ret.VsrD(0) &= mask; 2866 } else if (i >= 0 && i <= 16) { 2867 mask = (uint64_t)-1 >> (64 - i * 4); 2868 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2869 ox_flag = CRF_SO; 2870 } 2871 2872 ret.VsrD(1) &= mask; 2873 ret.VsrD(0) = 0; 2874 } 2875 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2876 *r = ret; 2877 2878 return bcd_cmp_zero(&ret) | ox_flag; 2879 } 2880 2881 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2882 { 2883 int i; 2884 uint64_t mask; 2885 uint32_t ox_flag = 0; 2886 int invalid = 0; 2887 ppc_avr_t ret = *b; 2888 2889 for (i = 0; i < 32; i++) { 2890 bcd_get_digit(b, i, &invalid); 2891 2892 if (unlikely(invalid)) { 2893 return CRF_SO; 2894 } 2895 } 2896 2897 i = a->VsrSH(3); 2898 if (i > 16 && i < 33) { 2899 mask = (uint64_t)-1 >> (128 - i * 4); 2900 if (ret.VsrD(0) & ~mask) { 2901 ox_flag = CRF_SO; 2902 } 2903 2904 ret.VsrD(0) &= mask; 2905 } else if (i > 0 && i <= 16) { 2906 mask = (uint64_t)-1 >> (64 - i * 4); 2907 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2908 ox_flag = CRF_SO; 2909 } 2910 2911 ret.VsrD(1) &= mask; 2912 ret.VsrD(0) = 0; 2913 } else if (i == 0) { 2914 if (ret.VsrD(0) || ret.VsrD(1)) { 2915 ox_flag = CRF_SO; 2916 } 2917 ret.VsrD(0) = ret.VsrD(1) = 0; 2918 } 2919 2920 *r = ret; 2921 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2922 return ox_flag | CRF_EQ; 2923 } 2924 2925 return ox_flag | CRF_GT; 2926 } 2927 2928 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2929 { 2930 int i; 2931 VECTOR_FOR_INORDER_I(i, u8) { 2932 r->u8[i] = AES_sbox[a->u8[i]]; 2933 } 2934 } 2935 2936 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2937 { 2938 ppc_avr_t result; 2939 int i; 2940 2941 VECTOR_FOR_INORDER_I(i, u32) { 2942 result.VsrW(i) = b->VsrW(i) ^ 2943 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2944 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2945 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2946 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2947 } 2948 *r = result; 2949 } 2950 2951 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2952 { 2953 ppc_avr_t result; 2954 int i; 2955 2956 VECTOR_FOR_INORDER_I(i, u8) { 2957 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2958 } 2959 *r = result; 2960 } 2961 2962 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2963 { 2964 /* This differs from what is written in ISA V2.07. The RTL is */ 2965 /* incorrect and will be fixed in V2.07B. */ 2966 int i; 2967 ppc_avr_t tmp; 2968 2969 VECTOR_FOR_INORDER_I(i, u8) { 2970 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2971 } 2972 2973 VECTOR_FOR_INORDER_I(i, u32) { 2974 r->VsrW(i) = 2975 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2976 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2977 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2978 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2979 } 2980 } 2981 2982 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2983 { 2984 ppc_avr_t result; 2985 int i; 2986 2987 VECTOR_FOR_INORDER_I(i, u8) { 2988 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2989 } 2990 *r = result; 2991 } 2992 2993 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2994 { 2995 int st = (st_six & 0x10) != 0; 2996 int six = st_six & 0xF; 2997 int i; 2998 2999 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3000 if (st == 0) { 3001 if ((six & (0x8 >> i)) == 0) { 3002 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3003 ror32(a->VsrW(i), 18) ^ 3004 (a->VsrW(i) >> 3); 3005 } else { /* six.bit[i] == 1 */ 3006 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3007 ror32(a->VsrW(i), 19) ^ 3008 (a->VsrW(i) >> 10); 3009 } 3010 } else { /* st == 1 */ 3011 if ((six & (0x8 >> i)) == 0) { 3012 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3013 ror32(a->VsrW(i), 13) ^ 3014 ror32(a->VsrW(i), 22); 3015 } else { /* six.bit[i] == 1 */ 3016 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3017 ror32(a->VsrW(i), 11) ^ 3018 ror32(a->VsrW(i), 25); 3019 } 3020 } 3021 } 3022 } 3023 3024 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3025 { 3026 int st = (st_six & 0x10) != 0; 3027 int six = st_six & 0xF; 3028 int i; 3029 3030 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3031 if (st == 0) { 3032 if ((six & (0x8 >> (2 * i))) == 0) { 3033 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3034 ror64(a->VsrD(i), 8) ^ 3035 (a->VsrD(i) >> 7); 3036 } else { /* six.bit[2*i] == 1 */ 3037 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3038 ror64(a->VsrD(i), 61) ^ 3039 (a->VsrD(i) >> 6); 3040 } 3041 } else { /* st == 1 */ 3042 if ((six & (0x8 >> (2 * i))) == 0) { 3043 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3044 ror64(a->VsrD(i), 34) ^ 3045 ror64(a->VsrD(i), 39); 3046 } else { /* six.bit[2*i] == 1 */ 3047 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3048 ror64(a->VsrD(i), 18) ^ 3049 ror64(a->VsrD(i), 41); 3050 } 3051 } 3052 } 3053 } 3054 3055 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3056 { 3057 ppc_avr_t result; 3058 int i; 3059 3060 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3061 int indexA = c->VsrB(i) >> 4; 3062 int indexB = c->VsrB(i) & 0xF; 3063 3064 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3065 } 3066 *r = result; 3067 } 3068 3069 #undef VECTOR_FOR_INORDER_I 3070 3071 /*****************************************************************************/ 3072 /* SPE extension helpers */ 3073 /* Use a table to make this quicker */ 3074 static const uint8_t hbrev[16] = { 3075 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3076 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3077 }; 3078 3079 static inline uint8_t byte_reverse(uint8_t val) 3080 { 3081 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3082 } 3083 3084 static inline uint32_t word_reverse(uint32_t val) 3085 { 3086 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3087 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3088 } 3089 3090 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3091 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3092 { 3093 uint32_t a, b, d, mask; 3094 3095 mask = UINT32_MAX >> (32 - MASKBITS); 3096 a = arg1 & mask; 3097 b = arg2 & mask; 3098 d = word_reverse(1 + word_reverse(a | ~b)); 3099 return (arg1 & ~mask) | (d & b); 3100 } 3101 3102 uint32_t helper_cntlsw32(uint32_t val) 3103 { 3104 if (val & 0x80000000) { 3105 return clz32(~val); 3106 } else { 3107 return clz32(val); 3108 } 3109 } 3110 3111 uint32_t helper_cntlzw32(uint32_t val) 3112 { 3113 return clz32(val); 3114 } 3115 3116 /* 440 specific */ 3117 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3118 target_ulong low, uint32_t update_Rc) 3119 { 3120 target_ulong mask; 3121 int i; 3122 3123 i = 1; 3124 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3125 if ((high & mask) == 0) { 3126 if (update_Rc) { 3127 env->crf[0] = 0x4; 3128 } 3129 goto done; 3130 } 3131 i++; 3132 } 3133 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3134 if ((low & mask) == 0) { 3135 if (update_Rc) { 3136 env->crf[0] = 0x8; 3137 } 3138 goto done; 3139 } 3140 i++; 3141 } 3142 i = 8; 3143 if (update_Rc) { 3144 env->crf[0] = 0x2; 3145 } 3146 done: 3147 env->xer = (env->xer & ~0x7F) | i; 3148 if (update_Rc) { 3149 env->crf[0] |= xer_so; 3150 } 3151 return i; 3152 } 3153