1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/log.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "crypto/aes-round.h" 28 #include "crypto/clmul.h" 29 #include "fpu/softfloat.h" 30 #include "qapi/error.h" 31 #include "qemu/guest-random.h" 32 #include "tcg/tcg-gvec-desc.h" 33 34 #include "helper_regs.h" 35 /*****************************************************************************/ 36 /* Fixed point operations helpers */ 37 38 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 39 { 40 if (unlikely(ov)) { 41 env->so = env->ov = env->ov32 = 1; 42 } else { 43 env->ov = env->ov32 = 0; 44 } 45 } 46 47 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 48 uint32_t oe) 49 { 50 uint64_t rt = 0; 51 int overflow = 0; 52 53 uint64_t dividend = (uint64_t)ra << 32; 54 uint64_t divisor = (uint32_t)rb; 55 56 if (unlikely(divisor == 0)) { 57 overflow = 1; 58 } else { 59 rt = dividend / divisor; 60 overflow = rt > UINT32_MAX; 61 } 62 63 if (unlikely(overflow)) { 64 rt = 0; /* Undefined */ 65 } 66 67 if (oe) { 68 helper_update_ov_legacy(env, overflow); 69 } 70 71 return (target_ulong)rt; 72 } 73 74 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 75 uint32_t oe) 76 { 77 int64_t rt = 0; 78 int overflow = 0; 79 80 int64_t dividend = (int64_t)ra << 32; 81 int64_t divisor = (int64_t)((int32_t)rb); 82 83 if (unlikely((divisor == 0) || 84 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 85 overflow = 1; 86 } else { 87 rt = dividend / divisor; 88 overflow = rt != (int32_t)rt; 89 } 90 91 if (unlikely(overflow)) { 92 rt = 0; /* Undefined */ 93 } 94 95 if (oe) { 96 helper_update_ov_legacy(env, overflow); 97 } 98 99 return (target_ulong)rt; 100 } 101 102 #if defined(TARGET_PPC64) 103 104 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 105 { 106 uint64_t rt = 0; 107 int overflow = 0; 108 109 if (unlikely(rb == 0 || ra >= rb)) { 110 overflow = 1; 111 rt = 0; /* Undefined */ 112 } else { 113 divu128(&rt, &ra, rb); 114 } 115 116 if (oe) { 117 helper_update_ov_legacy(env, overflow); 118 } 119 120 return rt; 121 } 122 123 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 124 { 125 uint64_t rt = 0; 126 int64_t ra = (int64_t)rau; 127 int64_t rb = (int64_t)rbu; 128 int overflow = 0; 129 130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 131 overflow = 1; 132 rt = 0; /* Undefined */ 133 } else { 134 divs128(&rt, &ra, rb); 135 } 136 137 if (oe) { 138 helper_update_ov_legacy(env, overflow); 139 } 140 141 return rt; 142 } 143 144 #endif 145 146 147 #if defined(TARGET_PPC64) 148 /* if x = 0xab, returns 0xababababababababa */ 149 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 150 151 /* 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each 153 * byte. 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 156 */ 157 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 158 159 /* When you XOR the pattern and there is a match, that byte will be zero */ 160 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 161 162 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 163 { 164 return hasvalue(rb, ra) ? CRF_GT : 0; 165 } 166 167 #undef pattern 168 #undef haszero 169 #undef hasvalue 170 171 /* 172 * Return a random number. 173 */ 174 uint64_t helper_darn32(void) 175 { 176 Error *err = NULL; 177 uint32_t ret; 178 179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 181 error_get_pretty(err)); 182 error_free(err); 183 return -1; 184 } 185 186 return ret; 187 } 188 189 uint64_t helper_darn64(void) 190 { 191 Error *err = NULL; 192 uint64_t ret; 193 194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 196 error_get_pretty(err)); 197 error_free(err); 198 return -1; 199 } 200 201 return ret; 202 } 203 204 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 205 { 206 int i; 207 uint64_t ra = 0; 208 209 for (i = 0; i < 8; i++) { 210 int index = (rs >> (i * 8)) & 0xFF; 211 if (index < 64) { 212 if (rb & PPC_BIT(index)) { 213 ra |= 1 << i; 214 } 215 } 216 } 217 return ra; 218 } 219 220 #endif 221 222 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 223 { 224 target_ulong mask = 0xff; 225 target_ulong ra = 0; 226 int i; 227 228 for (i = 0; i < sizeof(target_ulong); i++) { 229 if ((rs & mask) == (rb & mask)) { 230 ra |= mask; 231 } 232 mask <<= 8; 233 } 234 return ra; 235 } 236 237 /* shift right arithmetic helper */ 238 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int32_t ret; 242 243 if (likely(!(shift & 0x20))) { 244 if (likely((uint32_t)shift != 0)) { 245 shift &= 0x1f; 246 ret = (int32_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int32_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int32_t)value >> 31; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return (target_long)ret; 261 } 262 263 #if defined(TARGET_PPC64) 264 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 265 target_ulong shift) 266 { 267 int64_t ret; 268 269 if (likely(!(shift & 0x40))) { 270 if (likely((uint64_t)shift != 0)) { 271 shift &= 0x3f; 272 ret = (int64_t)value >> shift; 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 274 env->ca32 = env->ca = 0; 275 } else { 276 env->ca32 = env->ca = 1; 277 } 278 } else { 279 ret = (int64_t)value; 280 env->ca32 = env->ca = 0; 281 } 282 } else { 283 ret = (int64_t)value >> 63; 284 env->ca32 = env->ca = (ret != 0); 285 } 286 return ret; 287 } 288 #endif 289 290 #if defined(TARGET_PPC64) 291 target_ulong helper_popcntb(target_ulong val) 292 { 293 /* Note that we don't fold past bytes */ 294 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 295 0x5555555555555555ULL); 296 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 297 0x3333333333333333ULL); 298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 299 0x0f0f0f0f0f0f0f0fULL); 300 return val; 301 } 302 303 target_ulong helper_popcntw(target_ulong val) 304 { 305 /* Note that we don't fold past words. */ 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 #else 319 target_ulong helper_popcntb(target_ulong val) 320 { 321 /* Note that we don't fold past bytes */ 322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 325 return val; 326 } 327 #endif 328 329 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 330 { 331 /* 332 * Instead of processing the mask bit-by-bit from the most significant to 333 * the least significant bit, as described in PowerISA, we'll handle it in 334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 335 * ctz or cto, we negate the mask at the end of the loop. 336 */ 337 target_ulong m, left = 0, right = 0; 338 unsigned int n, i = 64; 339 bool bit = false; /* tracks if we are processing zeros or ones */ 340 341 if (mask == 0 || mask == -1) { 342 return src; 343 } 344 345 /* Processes the mask in blocks, from LSB to MSB */ 346 while (i) { 347 /* Find how many bits we should take */ 348 n = ctz64(mask); 349 if (n > i) { 350 n = i; 351 } 352 353 /* 354 * Extracts 'n' trailing bits of src and put them on the leading 'n' 355 * bits of 'right' or 'left', pushing down the previously extracted 356 * values. 357 */ 358 m = (1ll << n) - 1; 359 if (bit) { 360 right = ror64(right | (src & m), n); 361 } else { 362 left = ror64(left | (src & m), n); 363 } 364 365 /* 366 * Discards the processed bits from 'src' and 'mask'. Note that we are 367 * removing 'n' trailing zeros from 'mask', but the logical shift will 368 * add 'n' leading zeros back, so the population count of 'mask' is kept 369 * the same. 370 */ 371 src >>= n; 372 mask >>= n; 373 i -= n; 374 bit = !bit; 375 mask = ~mask; 376 } 377 378 /* 379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 380 * we'll shift it more 64-ctpop(mask) times. 381 */ 382 if (bit) { 383 n = ctpop64(mask); 384 } else { 385 n = 64 - ctpop64(mask); 386 } 387 388 return left | (right >> n); 389 } 390 391 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 392 { 393 int i, o; 394 uint64_t result = 0; 395 396 if (mask == -1) { 397 return src; 398 } 399 400 for (i = 0; mask != 0; i++) { 401 o = ctz64(mask); 402 mask &= mask - 1; 403 result |= ((src >> i) & 1) << o; 404 } 405 406 return result; 407 } 408 409 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 410 { 411 int i, o; 412 uint64_t result = 0; 413 414 if (mask == -1) { 415 return src; 416 } 417 418 for (o = 0; mask != 0; o++) { 419 i = ctz64(mask); 420 mask &= mask - 1; 421 result |= ((src >> i) & 1) << o; 422 } 423 424 return result; 425 } 426 427 /*****************************************************************************/ 428 /* Altivec extension helpers */ 429 #if HOST_BIG_ENDIAN 430 #define VECTOR_FOR_INORDER_I(index, element) \ 431 for (index = 0; index < ARRAY_SIZE(r->element); index++) 432 #else 433 #define VECTOR_FOR_INORDER_I(index, element) \ 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 435 #endif 436 437 /* Saturating arithmetic helpers. */ 438 #define SATCVT(from, to, from_type, to_type, min, max) \ 439 static inline to_type cvt##from##to(from_type x, int *sat) \ 440 { \ 441 to_type r; \ 442 \ 443 if (x < (from_type)min) { \ 444 r = min; \ 445 *sat = 1; \ 446 } else if (x > (from_type)max) { \ 447 r = max; \ 448 *sat = 1; \ 449 } else { \ 450 r = x; \ 451 } \ 452 return r; \ 453 } 454 #define SATCVTU(from, to, from_type, to_type, min, max) \ 455 static inline to_type cvt##from##to(from_type x, int *sat) \ 456 { \ 457 to_type r; \ 458 \ 459 if (x > (from_type)max) { \ 460 r = max; \ 461 *sat = 1; \ 462 } else { \ 463 r = x; \ 464 } \ 465 return r; \ 466 } 467 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 468 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 469 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 470 471 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 472 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 473 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 474 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 475 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 476 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 477 #undef SATCVT 478 #undef SATCVTU 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 ppc_store_vscr(env, vscr); 483 } 484 485 uint32_t helper_mfvscr(CPUPPCState *env) 486 { 487 return ppc_get_vscr(env); 488 } 489 490 static inline void set_vscr_sat(CPUPPCState *env) 491 { 492 /* The choice of non-zero value is arbitrary. */ 493 env->vscr_sat.u32[0] = 1; 494 } 495 496 /* vprtybq */ 497 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 498 { 499 uint64_t res = b->u64[0] ^ b->u64[1]; 500 res ^= res >> 32; 501 res ^= res >> 16; 502 res ^= res >> 8; 503 r->VsrD(1) = res & 1; 504 r->VsrD(0) = 0; 505 } 506 507 #define VARITHFP(suffix, func) \ 508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 509 ppc_avr_t *b) \ 510 { \ 511 int i; \ 512 \ 513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 515 } \ 516 } 517 VARITHFP(addfp, float32_add) 518 VARITHFP(subfp, float32_sub) 519 VARITHFP(minfp, float32_min) 520 VARITHFP(maxfp, float32_max) 521 #undef VARITHFP 522 523 #define VARITHFPFMA(suffix, type) \ 524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 525 ppc_avr_t *b, ppc_avr_t *c) \ 526 { \ 527 int i; \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 530 type, &env->vec_status); \ 531 } \ 532 } 533 VARITHFPFMA(maddfp, 0); 534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 535 #undef VARITHFPFMA 536 537 #define VARITHSAT_CASE(type, op, cvt, element) \ 538 { \ 539 type result = (type)a->element[i] op (type)b->element[i]; \ 540 r->element[i] = cvt(result, &sat); \ 541 } 542 543 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 544 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 546 { \ 547 int sat = 0; \ 548 int i; \ 549 \ 550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 551 VARITHSAT_CASE(optype, op, cvt, element); \ 552 } \ 553 if (sat) { \ 554 vscr_sat->u32[0] = 1; \ 555 } \ 556 } 557 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 558 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 559 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 560 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 561 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 562 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 563 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 564 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 565 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 566 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 567 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 568 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 569 #undef VARITHSAT_CASE 570 #undef VARITHSAT_DO 571 #undef VARITHSAT_SIGNED 572 #undef VARITHSAT_UNSIGNED 573 574 #define VAVG(name, element, etype) \ 575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 576 { \ 577 int i; \ 578 \ 579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 581 r->element[i] = x >> 1; \ 582 } \ 583 } 584 585 VAVG(VAVGSB, s8, int16_t) 586 VAVG(VAVGUB, u8, uint16_t) 587 VAVG(VAVGSH, s16, int32_t) 588 VAVG(VAVGUH, u16, uint32_t) 589 VAVG(VAVGSW, s32, int64_t) 590 VAVG(VAVGUW, u32, uint64_t) 591 #undef VAVG 592 593 #define VABSDU(name, element) \ 594 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 r->element[i] = (a->element[i] > b->element[i]) ? \ 600 (a->element[i] - b->element[i]) : \ 601 (b->element[i] - a->element[i]); \ 602 } \ 603 } 604 605 /* 606 * VABSDU - Vector absolute difference unsigned 607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 608 * element - element type to access from vector 609 */ 610 VABSDU(VABSDUB, u8) 611 VABSDU(VABSDUH, u16) 612 VABSDU(VABSDUW, u32) 613 #undef VABSDU 614 615 #define VCF(suffix, cvt, element) \ 616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 617 ppc_avr_t *b, uint32_t uim) \ 618 { \ 619 int i; \ 620 \ 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 622 float32 t = cvt(b->element[i], &env->vec_status); \ 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 624 } \ 625 } 626 VCF(ux, uint32_to_float32, u32) 627 VCF(sx, int32_to_float32, s32) 628 #undef VCF 629 630 #define VCMPNEZ(NAME, ELEM) \ 631 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 632 { \ 633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 636 } \ 637 } 638 VCMPNEZ(VCMPNEZB, u8) 639 VCMPNEZ(VCMPNEZH, u16) 640 VCMPNEZ(VCMPNEZW, u32) 641 #undef VCMPNEZ 642 643 #define VCMPFP_DO(suffix, compare, order, record) \ 644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 645 ppc_avr_t *a, ppc_avr_t *b) \ 646 { \ 647 uint32_t ones = (uint32_t)-1; \ 648 uint32_t all = ones; \ 649 uint32_t none = 0; \ 650 int i; \ 651 \ 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 653 uint32_t result; \ 654 FloatRelation rel = \ 655 float32_compare_quiet(a->f32[i], b->f32[i], \ 656 &env->vec_status); \ 657 if (rel == float_relation_unordered) { \ 658 result = 0; \ 659 } else if (rel compare order) { \ 660 result = ones; \ 661 } else { \ 662 result = 0; \ 663 } \ 664 r->u32[i] = result; \ 665 all &= result; \ 666 none |= result; \ 667 } \ 668 if (record) { \ 669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 670 } \ 671 } 672 #define VCMPFP(suffix, compare, order) \ 673 VCMPFP_DO(suffix, compare, order, 0) \ 674 VCMPFP_DO(suffix##_dot, compare, order, 1) 675 VCMPFP(eqfp, ==, float_relation_equal) 676 VCMPFP(gefp, !=, float_relation_less) 677 VCMPFP(gtfp, ==, float_relation_greater) 678 #undef VCMPFP_DO 679 #undef VCMPFP 680 681 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 682 ppc_avr_t *a, ppc_avr_t *b, int record) 683 { 684 int i; 685 int all_in = 0; 686 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 689 &env->vec_status); 690 if (le_rel == float_relation_unordered) { 691 r->u32[i] = 0xc0000000; 692 all_in = 1; 693 } else { 694 float32 bneg = float32_chs(b->f32[i]); 695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 696 &env->vec_status); 697 int le = le_rel != float_relation_greater; 698 int ge = ge_rel != float_relation_less; 699 700 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 701 all_in |= (!le | !ge); 702 } 703 } 704 if (record) { 705 env->crf[6] = (all_in == 0) << 1; 706 } 707 } 708 709 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 710 { 711 vcmpbfp_internal(env, r, a, b, 0); 712 } 713 714 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 715 ppc_avr_t *b) 716 { 717 vcmpbfp_internal(env, r, a, b, 1); 718 } 719 720 #define VCT(suffix, satcvt, element) \ 721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 722 ppc_avr_t *b, uint32_t uim) \ 723 { \ 724 int i; \ 725 int sat = 0; \ 726 float_status s = env->vec_status; \ 727 \ 728 set_float_rounding_mode(float_round_to_zero, &s); \ 729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 730 if (float32_is_any_nan(b->f32[i])) { \ 731 r->element[i] = 0; \ 732 } else { \ 733 float64 t = float32_to_float64(b->f32[i], &s); \ 734 int64_t j; \ 735 \ 736 t = float64_scalbn(t, uim, &s); \ 737 j = float64_to_int64(t, &s); \ 738 r->element[i] = satcvt(j, &sat); \ 739 } \ 740 } \ 741 if (sat) { \ 742 set_vscr_sat(env); \ 743 } \ 744 } 745 VCT(uxs, cvtsduw, u32) 746 VCT(sxs, cvtsdsw, s32) 747 #undef VCT 748 749 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 750 751 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 752 { 753 int64_t psum = 0; 754 for (int i = 0; i < 8; i++, mask >>= 1) { 755 if (mask & 1) { 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 757 } 758 } 759 return psum; 760 } 761 762 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 763 { 764 int64_t psum = 0; 765 for (int i = 0; i < 4; i++, mask >>= 1) { 766 if (mask & 1) { 767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 768 } 769 } 770 return psum; 771 } 772 773 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 774 { 775 int64_t psum = 0; 776 for (int i = 0; i < 2; i++, mask >>= 1) { 777 if (mask & 1) { 778 psum += (int64_t)sextract32(a, 16 * i, 16) * 779 sextract32(b, 16 * i, 16); 780 } 781 } 782 return psum; 783 } 784 785 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 786 uint32_t mask, bool sat, bool acc, do_ger ger) 787 { 788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 791 uint8_t xmsk_bit, ymsk_bit; 792 int64_t psum; 793 int i, j; 794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 798 if (acc) { 799 psum += at[i].VsrSW(j); 800 } 801 if (sat && psum > INT32_MAX) { 802 set_vscr_sat(env); 803 at[i].VsrSW(j) = INT32_MAX; 804 } else if (sat && psum < INT32_MIN) { 805 set_vscr_sat(env); 806 at[i].VsrSW(j) = INT32_MIN; 807 } else { 808 at[i].VsrSW(j) = (int32_t) psum; 809 } 810 } else { 811 at[i].VsrSW(j) = 0; 812 } 813 } 814 } 815 } 816 817 QEMU_FLATTEN 818 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 819 ppc_acc_t *at, uint32_t mask) 820 { 821 xviger(env, a, b, at, mask, false, false, ger_rank8); 822 } 823 824 QEMU_FLATTEN 825 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 826 ppc_acc_t *at, uint32_t mask) 827 { 828 xviger(env, a, b, at, mask, false, true, ger_rank8); 829 } 830 831 QEMU_FLATTEN 832 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 833 ppc_acc_t *at, uint32_t mask) 834 { 835 xviger(env, a, b, at, mask, false, false, ger_rank4); 836 } 837 838 QEMU_FLATTEN 839 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 840 ppc_acc_t *at, uint32_t mask) 841 { 842 xviger(env, a, b, at, mask, false, true, ger_rank4); 843 } 844 845 QEMU_FLATTEN 846 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 847 ppc_acc_t *at, uint32_t mask) 848 { 849 xviger(env, a, b, at, mask, true, true, ger_rank4); 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank2); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, true, false, ger_rank2); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, true, ger_rank2); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, true, true, ger_rank2); 878 } 879 880 target_ulong helper_vclzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 target_ulong helper_vctzlsbb(ppc_avr_t *r) 894 { 895 target_ulong count = 0; 896 int i; 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 if (r->VsrB(i) & 0x01) { 899 break; 900 } 901 count++; 902 } 903 return count; 904 } 905 906 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 907 ppc_avr_t *b, ppc_avr_t *c) 908 { 909 int sat = 0; 910 int i; 911 912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 913 int32_t prod = a->s16[i] * b->s16[i]; 914 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 915 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 925 ppc_avr_t *b, ppc_avr_t *c) 926 { 927 int sat = 0; 928 int i; 929 930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 932 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 933 r->s16[i] = cvtswsh(t, &sat); 934 } 935 936 if (sat) { 937 set_vscr_sat(env); 938 } 939 } 940 941 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 942 uint32_t v) 943 { 944 int i; 945 946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 947 int32_t prod = a->s16[i] * b->s16[i]; 948 r->s16[i] = (int16_t) (prod + c->s16[i]); 949 } 950 } 951 952 #define VMRG_DO(name, element, access, ofs) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 ppc_avr_t result; \ 956 int i, half = ARRAY_SIZE(r->element) / 2; \ 957 \ 958 for (i = 0; i < half; i++) { \ 959 result.access(i * 2 + 0) = a->access(i + ofs); \ 960 result.access(i * 2 + 1) = b->access(i + ofs); \ 961 } \ 962 *r = result; \ 963 } 964 965 #define VMRG(suffix, element, access) \ 966 VMRG_DO(mrgl##suffix, element, access, half) \ 967 VMRG_DO(mrgh##suffix, element, access, 0) 968 VMRG(b, u8, VsrB) 969 VMRG(h, u16, VsrH) 970 VMRG(w, u32, VsrW) 971 #undef VMRG_DO 972 #undef VMRG 973 974 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int32_t prod[16]; 977 int i; 978 979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 980 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 981 } 982 983 VECTOR_FOR_INORDER_I(i, s32) { 984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 985 prod[4 * i + 2] + prod[4 * i + 3]; 986 } 987 } 988 989 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 990 { 991 int32_t prod[8]; 992 int i; 993 994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 995 prod[i] = a->s16[i] * b->s16[i]; 996 } 997 998 VECTOR_FOR_INORDER_I(i, s32) { 999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1000 } 1001 } 1002 1003 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1004 ppc_avr_t *b, ppc_avr_t *c) 1005 { 1006 int32_t prod[8]; 1007 int i; 1008 int sat = 0; 1009 1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1011 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1012 } 1013 1014 VECTOR_FOR_INORDER_I(i, s32) { 1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1016 1017 r->u32[i] = cvtsdsw(t, &sat); 1018 } 1019 1020 if (sat) { 1021 set_vscr_sat(env); 1022 } 1023 } 1024 1025 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 uint16_t prod[16]; 1028 int i; 1029 1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1031 prod[i] = a->u8[i] * b->u8[i]; 1032 } 1033 1034 VECTOR_FOR_INORDER_I(i, u32) { 1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1036 prod[4 * i + 2] + prod[4 * i + 3]; 1037 } 1038 } 1039 1040 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1041 { 1042 uint32_t prod[8]; 1043 int i; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1046 prod[i] = a->u16[i] * b->u16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, u32) { 1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 } 1052 } 1053 1054 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 int sat = 0; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1062 prod[i] = a->u16[i] * b->u16[i]; 1063 } 1064 1065 VECTOR_FOR_INORDER_I(i, s32) { 1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1067 1068 r->u32[i] = cvtuduw(t, &sat); 1069 } 1070 1071 if (sat) { 1072 set_vscr_sat(env); 1073 } 1074 } 1075 1076 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1083 (cast)b->mul_access(i); \ 1084 } \ 1085 } 1086 1087 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1089 { \ 1090 int i; \ 1091 \ 1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1094 (cast)b->mul_access(i + 1); \ 1095 } \ 1096 } 1097 1098 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1101 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1102 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1103 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1104 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1105 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1106 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1107 #undef VMUL_DO_EVN 1108 #undef VMUL_DO_ODD 1109 #undef VMUL 1110 1111 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1112 target_ulong uim) 1113 { 1114 int i, idx; 1115 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1116 1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1118 if ((pcv->VsrB(i) >> 5) == uim) { 1119 idx = pcv->VsrB(i) & 0x1f; 1120 if (idx < ARRAY_SIZE(t->u8)) { 1121 tmp.VsrB(i) = s0->VsrB(idx); 1122 } else { 1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1124 } 1125 } 1126 } 1127 1128 *t = tmp; 1129 } 1130 1131 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 Int128 neg1 = int128_makes64(-1); 1134 Int128 int128_min = int128_make128(0, INT64_MIN); 1135 if (likely(int128_nz(b->s128) && 1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1137 t->s128 = int128_divs(a->s128, b->s128); 1138 } else { 1139 t->s128 = a->s128; /* Undefined behavior */ 1140 } 1141 } 1142 1143 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1144 { 1145 if (int128_nz(b->s128)) { 1146 t->s128 = int128_divu(a->s128, b->s128); 1147 } else { 1148 t->s128 = a->s128; /* Undefined behavior */ 1149 } 1150 } 1151 1152 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i; 1155 int64_t high; 1156 uint64_t low; 1157 for (i = 0; i < 2; i++) { 1158 high = a->s64[i]; 1159 low = 0; 1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1161 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1162 } else { 1163 divs128(&low, &high, b->s64[i]); 1164 t->s64[i] = low; 1165 } 1166 } 1167 } 1168 1169 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1170 { 1171 int i; 1172 uint64_t high, low; 1173 for (i = 0; i < 2; i++) { 1174 high = a->u64[i]; 1175 low = 0; 1176 if (unlikely(!b->u64[i])) { 1177 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1178 } else { 1179 divu128(&low, &high, b->u64[i]); 1180 t->u64[i] = low; 1181 } 1182 } 1183 } 1184 1185 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 Int128 high, low; 1188 Int128 int128_min = int128_make128(0, INT64_MIN); 1189 Int128 neg1 = int128_makes64(-1); 1190 1191 high = a->s128; 1192 low = int128_zero(); 1193 if (unlikely(!int128_nz(b->s128) || 1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1195 t->s128 = a->s128; /* Undefined behavior */ 1196 } else { 1197 divs256(&low, &high, b->s128); 1198 t->s128 = low; 1199 } 1200 } 1201 1202 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 Int128 high, low; 1205 1206 high = a->s128; 1207 low = int128_zero(); 1208 if (unlikely(!int128_nz(b->s128))) { 1209 t->s128 = a->s128; /* Undefined behavior */ 1210 } else { 1211 divu256(&low, &high, b->s128); 1212 t->s128 = low; 1213 } 1214 } 1215 1216 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1217 { 1218 Int128 neg1 = int128_makes64(-1); 1219 Int128 int128_min = int128_make128(0, INT64_MIN); 1220 if (likely(int128_nz(b->s128) && 1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1222 t->s128 = int128_rems(a->s128, b->s128); 1223 } else { 1224 t->s128 = int128_zero(); /* Undefined behavior */ 1225 } 1226 } 1227 1228 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 if (likely(int128_nz(b->s128))) { 1231 t->s128 = int128_remu(a->s128, b->s128); 1232 } else { 1233 t->s128 = int128_zero(); /* Undefined behavior */ 1234 } 1235 } 1236 1237 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1238 { 1239 ppc_avr_t result; 1240 int i; 1241 1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1243 int s = c->VsrB(i) & 0x1f; 1244 int index = s & 0xf; 1245 1246 if (s & 0x10) { 1247 result.VsrB(i) = b->VsrB(index); 1248 } else { 1249 result.VsrB(i) = a->VsrB(index); 1250 } 1251 } 1252 *r = result; 1253 } 1254 1255 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1256 { 1257 ppc_avr_t result; 1258 int i; 1259 1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1261 int s = c->VsrB(i) & 0x1f; 1262 int index = 15 - (s & 0xf); 1263 1264 if (s & 0x10) { 1265 result.VsrB(i) = a->VsrB(index); 1266 } else { 1267 result.VsrB(i) = b->VsrB(index); 1268 } 1269 } 1270 *r = result; 1271 } 1272 1273 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1274 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1275 { \ 1276 ppc_vsr_t tmp; \ 1277 \ 1278 /* Initialize tmp with the result of an all-zeros mask */ \ 1279 tmp.VsrD(0) = 0x1011121314151617; \ 1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1281 \ 1282 /* Iterate over the most significant byte of each element */ \ 1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1284 if (b->VsrB(i) & 0x80) { \ 1285 /* Update each byte of the element */ \ 1286 for (int k = 0; k < SZ; k++) { \ 1287 tmp.VsrB(i + k) = j + k; \ 1288 } \ 1289 j += SZ; \ 1290 } \ 1291 } \ 1292 \ 1293 *t = tmp; \ 1294 } 1295 1296 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1297 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1298 { \ 1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1300 \ 1301 /* Iterate over the most significant byte of each element */ \ 1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1303 if (b->VsrB(i) & 0x80) { \ 1304 /* Update each byte of the element */ \ 1305 for (int k = 0; k < SZ; k++) { \ 1306 tmp.VsrB(j + k) = i + k; \ 1307 } \ 1308 j += SZ; \ 1309 } \ 1310 } \ 1311 \ 1312 *t = tmp; \ 1313 } 1314 1315 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1316 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1317 { \ 1318 ppc_vsr_t tmp; \ 1319 \ 1320 /* Initialize tmp with the result of an all-zeros mask */ \ 1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1322 tmp.VsrD(1) = 0x1716151413121110; \ 1323 \ 1324 /* Iterate over the most significant byte of each element */ \ 1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1326 /* Reverse indexing of "i" */ \ 1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1328 if (b->VsrB(idx) & 0x80) { \ 1329 /* Update each byte of the element */ \ 1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1331 tmp.VsrB(idx + rk) = j + k; \ 1332 } \ 1333 j += SZ; \ 1334 } \ 1335 } \ 1336 \ 1337 *t = tmp; \ 1338 } 1339 1340 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1341 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1342 { \ 1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1344 \ 1345 /* Iterate over the most significant byte of each element */ \ 1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1348 /* Update each byte of the element */ \ 1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1350 /* Reverse indexing of "j" */ \ 1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1352 tmp.VsrB(idx + rk) = i + k; \ 1353 } \ 1354 j += SZ; \ 1355 } \ 1356 } \ 1357 \ 1358 *t = tmp; \ 1359 } 1360 1361 #define XXGENPCV(NAME, SZ) \ 1362 XXGENPCV_BE_EXP(NAME, SZ) \ 1363 XXGENPCV_BE_COMP(NAME, SZ) \ 1364 XXGENPCV_LE_EXP(NAME, SZ) \ 1365 XXGENPCV_LE_COMP(NAME, SZ) \ 1366 1367 XXGENPCV(XXGENPCVBM, 1) 1368 XXGENPCV(XXGENPCVHM, 2) 1369 XXGENPCV(XXGENPCVWM, 4) 1370 XXGENPCV(XXGENPCVDM, 8) 1371 1372 #undef XXGENPCV_BE_EXP 1373 #undef XXGENPCV_BE_COMP 1374 #undef XXGENPCV_LE_EXP 1375 #undef XXGENPCV_LE_COMP 1376 #undef XXGENPCV 1377 1378 #if HOST_BIG_ENDIAN 1379 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1380 #define VBPERMD_INDEX(i) (i) 1381 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1382 #else 1383 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1384 #define VBPERMD_INDEX(i) (1 - i) 1385 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1386 #endif 1387 #define EXTRACT_BIT(avr, i, index) \ 1388 (extract64((avr)->VsrD(i), 63 - index, 1)) 1389 1390 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1391 { 1392 int i, j; 1393 ppc_avr_t result = { .u64 = { 0, 0 } }; 1394 VECTOR_FOR_INORDER_I(i, u64) { 1395 for (j = 0; j < 8; j++) { 1396 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1397 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1399 } 1400 } 1401 } 1402 *r = result; 1403 } 1404 1405 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1406 { 1407 int i; 1408 uint64_t perm = 0; 1409 1410 VECTOR_FOR_INORDER_I(i, u8) { 1411 int index = VBPERMQ_INDEX(b, i); 1412 1413 if (index < 128) { 1414 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1415 if (a->u64[VBPERMQ_DW(index)] & mask) { 1416 perm |= (0x8000 >> i); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(0) = perm; 1422 r->VsrD(1) = 0; 1423 } 1424 1425 #undef VBPERMQ_INDEX 1426 #undef VBPERMQ_DW 1427 1428 /* 1429 * There is no carry across the two doublewords, so their order does 1430 * not matter. Nor is there partial overlap between registers. 1431 */ 1432 void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1433 { 1434 for (int i = 0; i < 2; ++i) { 1435 uint64_t aa = a->u64[i], bb = b->u64[i]; 1436 r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb); 1437 } 1438 } 1439 1440 void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1441 { 1442 for (int i = 0; i < 2; ++i) { 1443 uint64_t aa = a->u64[i], bb = b->u64[i]; 1444 r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb); 1445 } 1446 } 1447 1448 void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1449 { 1450 for (int i = 0; i < 2; ++i) { 1451 uint64_t aa = a->u64[i], bb = b->u64[i]; 1452 r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32); 1453 } 1454 } 1455 1456 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1457 { 1458 int i, j; 1459 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1460 1461 for (j = 0; j < 64; j++) { 1462 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1463 if (a->VsrD(i) & (1ull << j)) { 1464 tmp = int128_make64(b->VsrD(i)); 1465 tmp = int128_lshift(tmp, j); 1466 prod[i] = int128_xor(prod[i], tmp); 1467 } 1468 } 1469 } 1470 1471 r->s128 = int128_xor(prod[0], prod[1]); 1472 } 1473 1474 #if HOST_BIG_ENDIAN 1475 #define PKBIG 1 1476 #else 1477 #define PKBIG 0 1478 #endif 1479 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1480 { 1481 int i, j; 1482 ppc_avr_t result; 1483 #if HOST_BIG_ENDIAN 1484 const ppc_avr_t *x[2] = { a, b }; 1485 #else 1486 const ppc_avr_t *x[2] = { b, a }; 1487 #endif 1488 1489 VECTOR_FOR_INORDER_I(i, u64) { 1490 VECTOR_FOR_INORDER_I(j, u32) { 1491 uint32_t e = x[i]->u32[j]; 1492 1493 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1494 ((e >> 6) & 0x3e0) | 1495 ((e >> 3) & 0x1f)); 1496 } 1497 } 1498 *r = result; 1499 } 1500 1501 #define VPK(suffix, from, to, cvt, dosat) \ 1502 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1503 ppc_avr_t *a, ppc_avr_t *b) \ 1504 { \ 1505 int i; \ 1506 int sat = 0; \ 1507 ppc_avr_t result; \ 1508 ppc_avr_t *a0 = PKBIG ? a : b; \ 1509 ppc_avr_t *a1 = PKBIG ? b : a; \ 1510 \ 1511 VECTOR_FOR_INORDER_I(i, from) { \ 1512 result.to[i] = cvt(a0->from[i], &sat); \ 1513 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1514 } \ 1515 *r = result; \ 1516 if (dosat && sat) { \ 1517 set_vscr_sat(env); \ 1518 } \ 1519 } 1520 #define I(x, y) (x) 1521 VPK(shss, s16, s8, cvtshsb, 1) 1522 VPK(shus, s16, u8, cvtshub, 1) 1523 VPK(swss, s32, s16, cvtswsh, 1) 1524 VPK(swus, s32, u16, cvtswuh, 1) 1525 VPK(sdss, s64, s32, cvtsdsw, 1) 1526 VPK(sdus, s64, u32, cvtsduw, 1) 1527 VPK(uhus, u16, u8, cvtuhub, 1) 1528 VPK(uwus, u32, u16, cvtuwuh, 1) 1529 VPK(udus, u64, u32, cvtuduw, 1) 1530 VPK(uhum, u16, u8, I, 0) 1531 VPK(uwum, u32, u16, I, 0) 1532 VPK(udum, u64, u32, I, 0) 1533 #undef I 1534 #undef VPK 1535 #undef PKBIG 1536 1537 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1538 { 1539 int i; 1540 1541 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1542 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1543 } 1544 } 1545 1546 #define VRFI(suffix, rounding) \ 1547 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1548 ppc_avr_t *b) \ 1549 { \ 1550 int i; \ 1551 float_status s = env->vec_status; \ 1552 \ 1553 set_float_rounding_mode(rounding, &s); \ 1554 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1555 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1556 } \ 1557 } 1558 VRFI(n, float_round_nearest_even) 1559 VRFI(m, float_round_down) 1560 VRFI(p, float_round_up) 1561 VRFI(z, float_round_to_zero) 1562 #undef VRFI 1563 1564 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1565 { 1566 int i; 1567 1568 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1569 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1570 1571 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1572 } 1573 } 1574 1575 #define VRLMI(name, size, element, insert) \ 1576 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1577 { \ 1578 int i; \ 1579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1580 uint##size##_t src1 = a->element[i]; \ 1581 uint##size##_t src2 = b->element[i]; \ 1582 uint##size##_t src3 = r->element[i]; \ 1583 uint##size##_t begin, end, shift, mask, rot_val; \ 1584 \ 1585 shift = extract##size(src2, 0, 6); \ 1586 end = extract##size(src2, 8, 6); \ 1587 begin = extract##size(src2, 16, 6); \ 1588 rot_val = rol##size(src1, shift); \ 1589 mask = mask_u##size(begin, end); \ 1590 if (insert) { \ 1591 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1592 } else { \ 1593 r->element[i] = (rot_val & mask); \ 1594 } \ 1595 } \ 1596 } 1597 1598 VRLMI(VRLDMI, 64, u64, 1); 1599 VRLMI(VRLWMI, 32, u32, 1); 1600 VRLMI(VRLDNM, 64, u64, 0); 1601 VRLMI(VRLWNM, 32, u32, 0); 1602 1603 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1604 { 1605 int i; 1606 1607 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1608 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1609 } 1610 } 1611 1612 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1613 { 1614 int i; 1615 1616 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1617 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1618 } 1619 } 1620 1621 #define VEXTU_X_DO(name, size, left) \ 1622 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1623 { \ 1624 int index = (a & 0xf) * 8; \ 1625 if (left) { \ 1626 index = 128 - index - size; \ 1627 } \ 1628 return int128_getlo(int128_rshift(b->s128, index)) & \ 1629 MAKE_64BIT_MASK(0, size); \ 1630 } 1631 VEXTU_X_DO(vextublx, 8, 1) 1632 VEXTU_X_DO(vextuhlx, 16, 1) 1633 VEXTU_X_DO(vextuwlx, 32, 1) 1634 VEXTU_X_DO(vextubrx, 8, 0) 1635 VEXTU_X_DO(vextuhrx, 16, 0) 1636 VEXTU_X_DO(vextuwrx, 32, 0) 1637 #undef VEXTU_X_DO 1638 1639 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1640 { 1641 int i; 1642 unsigned int shift, bytes, size; 1643 1644 size = ARRAY_SIZE(r->u8); 1645 for (i = 0; i < size; i++) { 1646 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1647 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1648 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1649 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1650 } 1651 } 1652 1653 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1654 { 1655 int i; 1656 unsigned int shift, bytes; 1657 1658 /* 1659 * Use reverse order, as destination and source register can be 1660 * same. Its being modified in place saving temporary, reverse 1661 * order will guarantee that computed result is not fed back. 1662 */ 1663 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1664 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1665 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1666 /* extract adjacent bytes */ 1667 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1668 } 1669 } 1670 1671 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1672 { 1673 int sh = shift & 0xf; 1674 int i; 1675 ppc_avr_t result; 1676 1677 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1678 int index = sh + i; 1679 if (index > 0xf) { 1680 result.VsrB(i) = b->VsrB(index - 0x10); 1681 } else { 1682 result.VsrB(i) = a->VsrB(index); 1683 } 1684 } 1685 *r = result; 1686 } 1687 1688 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1689 { 1690 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1691 1692 #if HOST_BIG_ENDIAN 1693 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1694 memset(&r->u8[16 - sh], 0, sh); 1695 #else 1696 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1697 memset(&r->u8[0], 0, sh); 1698 #endif 1699 } 1700 1701 #if HOST_BIG_ENDIAN 1702 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1703 #else 1704 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1705 #endif 1706 1707 #define VINSX(SUFFIX, TYPE) \ 1708 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1709 uint64_t val, target_ulong index) \ 1710 { \ 1711 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1712 target_long idx = index; \ 1713 \ 1714 if (idx < 0 || idx > maxidx) { \ 1715 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1716 qemu_log_mask(LOG_GUEST_ERROR, \ 1717 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1718 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1719 } else { \ 1720 TYPE src = val; \ 1721 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1722 } \ 1723 } 1724 VINSX(B, uint8_t) 1725 VINSX(H, uint16_t) 1726 VINSX(W, uint32_t) 1727 VINSX(D, uint64_t) 1728 #undef ELEM_ADDR 1729 #undef VINSX 1730 #if HOST_BIG_ENDIAN 1731 #define VEXTDVLX(NAME, SIZE) \ 1732 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1733 target_ulong index) \ 1734 { \ 1735 const target_long idx = index; \ 1736 ppc_avr_t tmp[2] = { *a, *b }; \ 1737 memset(t, 0, sizeof(*t)); \ 1738 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1739 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1740 } else { \ 1741 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1742 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1743 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1744 } \ 1745 } 1746 #else 1747 #define VEXTDVLX(NAME, SIZE) \ 1748 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1749 target_ulong index) \ 1750 { \ 1751 const target_long idx = index; \ 1752 ppc_avr_t tmp[2] = { *b, *a }; \ 1753 memset(t, 0, sizeof(*t)); \ 1754 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1755 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1756 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1757 } else { \ 1758 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1759 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1760 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1761 } \ 1762 } 1763 #endif 1764 VEXTDVLX(VEXTDUBVLX, 1) 1765 VEXTDVLX(VEXTDUHVLX, 2) 1766 VEXTDVLX(VEXTDUWVLX, 4) 1767 VEXTDVLX(VEXTDDVLX, 8) 1768 #undef VEXTDVLX 1769 #if HOST_BIG_ENDIAN 1770 #define VEXTRACT(suffix, element) \ 1771 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1772 { \ 1773 uint32_t es = sizeof(r->element[0]); \ 1774 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1775 memset(&r->u8[8], 0, 8); \ 1776 memset(&r->u8[0], 0, 8 - es); \ 1777 } 1778 #else 1779 #define VEXTRACT(suffix, element) \ 1780 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1781 { \ 1782 uint32_t es = sizeof(r->element[0]); \ 1783 uint32_t s = (16 - index) - es; \ 1784 memmove(&r->u8[8], &b->u8[s], es); \ 1785 memset(&r->u8[0], 0, 8); \ 1786 memset(&r->u8[8 + es], 0, 8 - es); \ 1787 } 1788 #endif 1789 VEXTRACT(ub, u8) 1790 VEXTRACT(uh, u16) 1791 VEXTRACT(uw, u32) 1792 VEXTRACT(d, u64) 1793 #undef VEXTRACT 1794 1795 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1796 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1797 { \ 1798 int i, idx, crf = 0; \ 1799 \ 1800 for (i = 0; i < NUM_ELEMS; i++) { \ 1801 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1802 if (b->Vsr##ELEM(idx)) { \ 1803 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1804 } else { \ 1805 crf = 0b0010; \ 1806 break; \ 1807 } \ 1808 } \ 1809 \ 1810 for (; i < NUM_ELEMS; i++) { \ 1811 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1812 t->Vsr##ELEM(idx) = 0; \ 1813 } \ 1814 \ 1815 return crf; \ 1816 } 1817 VSTRI(VSTRIBL, B, 16, true) 1818 VSTRI(VSTRIBR, B, 16, false) 1819 VSTRI(VSTRIHL, H, 8, true) 1820 VSTRI(VSTRIHR, H, 8, false) 1821 #undef VSTRI 1822 1823 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1824 { 1825 ppc_vsr_t t = { }; 1826 size_t es = sizeof(uint32_t); 1827 uint32_t ext_index; 1828 int i; 1829 1830 ext_index = index; 1831 for (i = 0; i < es; i++, ext_index++) { 1832 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1833 } 1834 1835 *xt = t; 1836 } 1837 1838 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1839 { 1840 ppc_vsr_t t = *xt; 1841 size_t es = sizeof(uint32_t); 1842 int ins_index, i = 0; 1843 1844 ins_index = index; 1845 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1846 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1847 } 1848 1849 *xt = t; 1850 } 1851 1852 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1853 uint32_t desc) 1854 { 1855 /* 1856 * Instead of processing imm bit-by-bit, we'll skip the computation of 1857 * conjunctions whose corresponding bit is unset. 1858 */ 1859 int bit, imm = simd_data(desc); 1860 Int128 conj, disj = int128_zero(); 1861 1862 /* Iterate over set bits from the least to the most significant bit */ 1863 while (imm) { 1864 /* 1865 * Get the next bit to be processed with ctz64. Invert the result of 1866 * ctz64 to match the indexing used by PowerISA. 1867 */ 1868 bit = 7 - ctzl(imm); 1869 if (bit & 0x4) { 1870 conj = a->s128; 1871 } else { 1872 conj = int128_not(a->s128); 1873 } 1874 if (bit & 0x2) { 1875 conj = int128_and(conj, b->s128); 1876 } else { 1877 conj = int128_and(conj, int128_not(b->s128)); 1878 } 1879 if (bit & 0x1) { 1880 conj = int128_and(conj, c->s128); 1881 } else { 1882 conj = int128_and(conj, int128_not(c->s128)); 1883 } 1884 disj = int128_or(disj, conj); 1885 1886 /* Unset the least significant bit that is set */ 1887 imm &= imm - 1; 1888 } 1889 1890 t->s128 = disj; 1891 } 1892 1893 #define XXBLEND(name, sz) \ 1894 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1895 ppc_avr_t *c, uint32_t desc) \ 1896 { \ 1897 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1898 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1899 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1900 } \ 1901 } 1902 XXBLEND(B, 8) 1903 XXBLEND(H, 16) 1904 XXBLEND(W, 32) 1905 XXBLEND(D, 64) 1906 #undef XXBLEND 1907 1908 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1909 { 1910 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1911 1912 #if HOST_BIG_ENDIAN 1913 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1914 memset(&r->u8[0], 0, sh); 1915 #else 1916 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1917 memset(&r->u8[16 - sh], 0, sh); 1918 #endif 1919 } 1920 1921 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1922 { 1923 int64_t t; 1924 int i, upper; 1925 ppc_avr_t result; 1926 int sat = 0; 1927 1928 upper = ARRAY_SIZE(r->s32) - 1; 1929 t = (int64_t)b->VsrSW(upper); 1930 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1931 t += a->VsrSW(i); 1932 result.VsrSW(i) = 0; 1933 } 1934 result.VsrSW(upper) = cvtsdsw(t, &sat); 1935 *r = result; 1936 1937 if (sat) { 1938 set_vscr_sat(env); 1939 } 1940 } 1941 1942 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1943 { 1944 int i, j, upper; 1945 ppc_avr_t result; 1946 int sat = 0; 1947 1948 upper = 1; 1949 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1950 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1951 1952 result.VsrD(i) = 0; 1953 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1954 t += a->VsrSW(2 * i + j); 1955 } 1956 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1957 } 1958 1959 *r = result; 1960 if (sat) { 1961 set_vscr_sat(env); 1962 } 1963 } 1964 1965 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1966 { 1967 int i, j; 1968 int sat = 0; 1969 1970 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1971 int64_t t = (int64_t)b->s32[i]; 1972 1973 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1974 t += a->s8[4 * i + j]; 1975 } 1976 r->s32[i] = cvtsdsw(t, &sat); 1977 } 1978 1979 if (sat) { 1980 set_vscr_sat(env); 1981 } 1982 } 1983 1984 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1985 { 1986 int sat = 0; 1987 int i; 1988 1989 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1990 int64_t t = (int64_t)b->s32[i]; 1991 1992 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1993 r->s32[i] = cvtsdsw(t, &sat); 1994 } 1995 1996 if (sat) { 1997 set_vscr_sat(env); 1998 } 1999 } 2000 2001 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2002 { 2003 int i, j; 2004 int sat = 0; 2005 2006 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2007 uint64_t t = (uint64_t)b->u32[i]; 2008 2009 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2010 t += a->u8[4 * i + j]; 2011 } 2012 r->u32[i] = cvtuduw(t, &sat); 2013 } 2014 2015 if (sat) { 2016 set_vscr_sat(env); 2017 } 2018 } 2019 2020 #if HOST_BIG_ENDIAN 2021 #define UPKHI 1 2022 #define UPKLO 0 2023 #else 2024 #define UPKHI 0 2025 #define UPKLO 1 2026 #endif 2027 #define VUPKPX(suffix, hi) \ 2028 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2029 { \ 2030 int i; \ 2031 ppc_avr_t result; \ 2032 \ 2033 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2034 uint16_t e = b->u16[hi ? i : i + 4]; \ 2035 uint8_t a = (e >> 15) ? 0xff : 0; \ 2036 uint8_t r = (e >> 10) & 0x1f; \ 2037 uint8_t g = (e >> 5) & 0x1f; \ 2038 uint8_t b = e & 0x1f; \ 2039 \ 2040 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2041 } \ 2042 *r = result; \ 2043 } 2044 VUPKPX(lpx, UPKLO) 2045 VUPKPX(hpx, UPKHI) 2046 #undef VUPKPX 2047 2048 #define VUPK(suffix, unpacked, packee, hi) \ 2049 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2050 { \ 2051 int i; \ 2052 ppc_avr_t result; \ 2053 \ 2054 if (hi) { \ 2055 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2056 result.unpacked[i] = b->packee[i]; \ 2057 } \ 2058 } else { \ 2059 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2060 i++) { \ 2061 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2062 } \ 2063 } \ 2064 *r = result; \ 2065 } 2066 VUPK(hsb, s16, s8, UPKHI) 2067 VUPK(hsh, s32, s16, UPKHI) 2068 VUPK(hsw, s64, s32, UPKHI) 2069 VUPK(lsb, s16, s8, UPKLO) 2070 VUPK(lsh, s32, s16, UPKLO) 2071 VUPK(lsw, s64, s32, UPKLO) 2072 #undef VUPK 2073 #undef UPKHI 2074 #undef UPKLO 2075 2076 #define VGENERIC_DO(name, element) \ 2077 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2078 { \ 2079 int i; \ 2080 \ 2081 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2082 r->element[i] = name(b->element[i]); \ 2083 } \ 2084 } 2085 2086 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2087 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2088 2089 VGENERIC_DO(clzb, u8) 2090 VGENERIC_DO(clzh, u16) 2091 2092 #undef clzb 2093 #undef clzh 2094 2095 #define ctzb(v) ((v) ? ctz32(v) : 8) 2096 #define ctzh(v) ((v) ? ctz32(v) : 16) 2097 #define ctzw(v) ctz32((v)) 2098 #define ctzd(v) ctz64((v)) 2099 2100 VGENERIC_DO(ctzb, u8) 2101 VGENERIC_DO(ctzh, u16) 2102 VGENERIC_DO(ctzw, u32) 2103 VGENERIC_DO(ctzd, u64) 2104 2105 #undef ctzb 2106 #undef ctzh 2107 #undef ctzw 2108 #undef ctzd 2109 2110 #define popcntb(v) ctpop8(v) 2111 #define popcnth(v) ctpop16(v) 2112 #define popcntw(v) ctpop32(v) 2113 #define popcntd(v) ctpop64(v) 2114 2115 VGENERIC_DO(popcntb, u8) 2116 VGENERIC_DO(popcnth, u16) 2117 VGENERIC_DO(popcntw, u32) 2118 VGENERIC_DO(popcntd, u64) 2119 2120 #undef popcntb 2121 #undef popcnth 2122 #undef popcntw 2123 #undef popcntd 2124 2125 #undef VGENERIC_DO 2126 2127 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2128 { 2129 r->s128 = int128_add(a->s128, b->s128); 2130 } 2131 2132 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2133 { 2134 r->s128 = int128_add(int128_add(a->s128, b->s128), 2135 int128_make64(int128_getlo(c->s128) & 1)); 2136 } 2137 2138 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2139 { 2140 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2141 r->VsrD(0) = 0; 2142 } 2143 2144 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2145 { 2146 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2147 carry_in = int128_getlo(c->s128) & 1; 2148 2149 if (!carry_out && carry_in) { 2150 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2151 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2152 } 2153 2154 r->VsrD(0) = 0; 2155 r->VsrD(1) = carry_out; 2156 } 2157 2158 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2159 { 2160 r->s128 = int128_sub(a->s128, b->s128); 2161 } 2162 2163 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2164 { 2165 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2166 int128_make64(int128_getlo(c->s128) & 1)); 2167 } 2168 2169 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2170 { 2171 Int128 tmp = int128_not(b->s128); 2172 2173 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2174 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2175 r->VsrD(0) = 0; 2176 } 2177 2178 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2179 { 2180 Int128 tmp = int128_not(b->s128); 2181 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2182 carry_in = int128_getlo(c->s128) & 1; 2183 2184 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2185 int128_makes64(-1))); 2186 r->VsrD(0) = 0; 2187 } 2188 2189 #define BCD_PLUS_PREF_1 0xC 2190 #define BCD_PLUS_PREF_2 0xF 2191 #define BCD_PLUS_ALT_1 0xA 2192 #define BCD_NEG_PREF 0xD 2193 #define BCD_NEG_ALT 0xB 2194 #define BCD_PLUS_ALT_2 0xE 2195 #define NATIONAL_PLUS 0x2B 2196 #define NATIONAL_NEG 0x2D 2197 2198 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2199 2200 static int bcd_get_sgn(ppc_avr_t *bcd) 2201 { 2202 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2203 case BCD_PLUS_PREF_1: 2204 case BCD_PLUS_PREF_2: 2205 case BCD_PLUS_ALT_1: 2206 case BCD_PLUS_ALT_2: 2207 { 2208 return 1; 2209 } 2210 2211 case BCD_NEG_PREF: 2212 case BCD_NEG_ALT: 2213 { 2214 return -1; 2215 } 2216 2217 default: 2218 { 2219 return 0; 2220 } 2221 } 2222 } 2223 2224 static int bcd_preferred_sgn(int sgn, int ps) 2225 { 2226 if (sgn >= 0) { 2227 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2228 } else { 2229 return BCD_NEG_PREF; 2230 } 2231 } 2232 2233 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2234 { 2235 uint8_t result; 2236 if (n & 1) { 2237 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2238 } else { 2239 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2240 } 2241 2242 if (unlikely(result > 9)) { 2243 *invalid = true; 2244 } 2245 return result; 2246 } 2247 2248 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2249 { 2250 if (n & 1) { 2251 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2252 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2253 } else { 2254 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2255 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2256 } 2257 } 2258 2259 static bool bcd_is_valid(ppc_avr_t *bcd) 2260 { 2261 int i; 2262 int invalid = 0; 2263 2264 if (bcd_get_sgn(bcd) == 0) { 2265 return false; 2266 } 2267 2268 for (i = 1; i < 32; i++) { 2269 bcd_get_digit(bcd, i, &invalid); 2270 if (unlikely(invalid)) { 2271 return false; 2272 } 2273 } 2274 return true; 2275 } 2276 2277 static int bcd_cmp_zero(ppc_avr_t *bcd) 2278 { 2279 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2280 return CRF_EQ; 2281 } else { 2282 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2283 } 2284 } 2285 2286 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2287 { 2288 return reg->VsrH(7 - n); 2289 } 2290 2291 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2292 { 2293 reg->VsrH(7 - n) = val; 2294 } 2295 2296 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2297 { 2298 int i; 2299 int invalid = 0; 2300 for (i = 31; i > 0; i--) { 2301 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2302 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2303 if (unlikely(invalid)) { 2304 return 0; /* doesn't matter */ 2305 } else if (dig_a > dig_b) { 2306 return 1; 2307 } else if (dig_a < dig_b) { 2308 return -1; 2309 } 2310 } 2311 2312 return 0; 2313 } 2314 2315 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2316 int *overflow) 2317 { 2318 int carry = 0; 2319 int i; 2320 int is_zero = 1; 2321 2322 for (i = 1; i <= 31; i++) { 2323 uint8_t digit = bcd_get_digit(a, i, invalid) + 2324 bcd_get_digit(b, i, invalid) + carry; 2325 is_zero &= (digit == 0); 2326 if (digit > 9) { 2327 carry = 1; 2328 digit -= 10; 2329 } else { 2330 carry = 0; 2331 } 2332 2333 bcd_put_digit(t, digit, i); 2334 } 2335 2336 *overflow = carry; 2337 return is_zero; 2338 } 2339 2340 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2341 int *overflow) 2342 { 2343 int carry = 0; 2344 int i; 2345 2346 for (i = 1; i <= 31; i++) { 2347 uint8_t digit = bcd_get_digit(a, i, invalid) - 2348 bcd_get_digit(b, i, invalid) + carry; 2349 if (digit & 0x80) { 2350 carry = -1; 2351 digit += 10; 2352 } else { 2353 carry = 0; 2354 } 2355 2356 bcd_put_digit(t, digit, i); 2357 } 2358 2359 *overflow = carry; 2360 } 2361 2362 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2363 { 2364 2365 int sgna = bcd_get_sgn(a); 2366 int sgnb = bcd_get_sgn(b); 2367 int invalid = (sgna == 0) || (sgnb == 0); 2368 int overflow = 0; 2369 int zero = 0; 2370 uint32_t cr = 0; 2371 ppc_avr_t result = { .u64 = { 0, 0 } }; 2372 2373 if (!invalid) { 2374 if (sgna == sgnb) { 2375 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2376 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2377 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2378 } else { 2379 int magnitude = bcd_cmp_mag(a, b); 2380 if (magnitude > 0) { 2381 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2382 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2383 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2384 } else if (magnitude < 0) { 2385 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2386 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2387 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2388 } else { 2389 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2390 cr = CRF_EQ; 2391 } 2392 } 2393 } 2394 2395 if (unlikely(invalid)) { 2396 result.VsrD(0) = result.VsrD(1) = -1; 2397 cr = CRF_SO; 2398 } else if (overflow) { 2399 cr |= CRF_SO; 2400 } else if (zero) { 2401 cr |= CRF_EQ; 2402 } 2403 2404 *r = result; 2405 2406 return cr; 2407 } 2408 2409 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2410 { 2411 ppc_avr_t bcopy = *b; 2412 int sgnb = bcd_get_sgn(b); 2413 if (sgnb < 0) { 2414 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2415 } else if (sgnb > 0) { 2416 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2417 } 2418 /* else invalid ... defer to bcdadd code for proper handling */ 2419 2420 return helper_bcdadd(r, a, &bcopy, ps); 2421 } 2422 2423 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2424 { 2425 int i; 2426 int cr = 0; 2427 uint16_t national = 0; 2428 uint16_t sgnb = get_national_digit(b, 0); 2429 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2430 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2431 2432 for (i = 1; i < 8; i++) { 2433 national = get_national_digit(b, i); 2434 if (unlikely(national < 0x30 || national > 0x39)) { 2435 invalid = 1; 2436 break; 2437 } 2438 2439 bcd_put_digit(&ret, national & 0xf, i); 2440 } 2441 2442 if (sgnb == NATIONAL_PLUS) { 2443 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2444 } else { 2445 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2446 } 2447 2448 cr = bcd_cmp_zero(&ret); 2449 2450 if (unlikely(invalid)) { 2451 cr = CRF_SO; 2452 } 2453 2454 *r = ret; 2455 2456 return cr; 2457 } 2458 2459 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2460 { 2461 int i; 2462 int cr = 0; 2463 int sgnb = bcd_get_sgn(b); 2464 int invalid = (sgnb == 0); 2465 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2466 2467 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2468 2469 for (i = 1; i < 8; i++) { 2470 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2471 2472 if (unlikely(invalid)) { 2473 break; 2474 } 2475 } 2476 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2477 2478 cr = bcd_cmp_zero(b); 2479 2480 if (ox_flag) { 2481 cr |= CRF_SO; 2482 } 2483 2484 if (unlikely(invalid)) { 2485 cr = CRF_SO; 2486 } 2487 2488 *r = ret; 2489 2490 return cr; 2491 } 2492 2493 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2494 { 2495 int i; 2496 int cr = 0; 2497 int invalid = 0; 2498 int zone_digit = 0; 2499 int zone_lead = ps ? 0xF : 0x3; 2500 int digit = 0; 2501 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2502 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2503 2504 if (unlikely((sgnb < 0xA) && ps)) { 2505 invalid = 1; 2506 } 2507 2508 for (i = 0; i < 16; i++) { 2509 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2510 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2511 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2512 invalid = 1; 2513 break; 2514 } 2515 2516 bcd_put_digit(&ret, digit, i + 1); 2517 } 2518 2519 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2520 (!ps && (sgnb & 0x4))) { 2521 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2522 } else { 2523 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2524 } 2525 2526 cr = bcd_cmp_zero(&ret); 2527 2528 if (unlikely(invalid)) { 2529 cr = CRF_SO; 2530 } 2531 2532 *r = ret; 2533 2534 return cr; 2535 } 2536 2537 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2538 { 2539 int i; 2540 int cr = 0; 2541 uint8_t digit = 0; 2542 int sgnb = bcd_get_sgn(b); 2543 int zone_lead = (ps) ? 0xF0 : 0x30; 2544 int invalid = (sgnb == 0); 2545 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2546 2547 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2548 2549 for (i = 0; i < 16; i++) { 2550 digit = bcd_get_digit(b, i + 1, &invalid); 2551 2552 if (unlikely(invalid)) { 2553 break; 2554 } 2555 2556 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2557 } 2558 2559 if (ps) { 2560 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2561 } else { 2562 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2563 } 2564 2565 cr = bcd_cmp_zero(b); 2566 2567 if (ox_flag) { 2568 cr |= CRF_SO; 2569 } 2570 2571 if (unlikely(invalid)) { 2572 cr = CRF_SO; 2573 } 2574 2575 *r = ret; 2576 2577 return cr; 2578 } 2579 2580 /** 2581 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2582 * 2583 * Returns: 2584 * > 0 if ahi|alo > bhi|blo, 2585 * 0 if ahi|alo == bhi|blo, 2586 * < 0 if ahi|alo < bhi|blo 2587 */ 2588 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2589 uint64_t blo, uint64_t bhi) 2590 { 2591 return (ahi == bhi) ? 2592 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2593 (ahi > bhi ? 1 : -1); 2594 } 2595 2596 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2597 { 2598 int i; 2599 int cr; 2600 uint64_t lo_value; 2601 uint64_t hi_value; 2602 uint64_t rem; 2603 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2604 2605 if (b->VsrSD(0) < 0) { 2606 lo_value = -b->VsrSD(1); 2607 hi_value = ~b->VsrD(0) + !lo_value; 2608 bcd_put_digit(&ret, 0xD, 0); 2609 2610 cr = CRF_LT; 2611 } else { 2612 lo_value = b->VsrD(1); 2613 hi_value = b->VsrD(0); 2614 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2615 2616 if (hi_value == 0 && lo_value == 0) { 2617 cr = CRF_EQ; 2618 } else { 2619 cr = CRF_GT; 2620 } 2621 } 2622 2623 /* 2624 * Check src limits: abs(src) <= 10^31 - 1 2625 * 2626 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2627 */ 2628 if (ucmp128(lo_value, hi_value, 2629 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2630 cr |= CRF_SO; 2631 2632 /* 2633 * According to the ISA, if src wouldn't fit in the destination 2634 * register, the result is undefined. 2635 * In that case, we leave r unchanged. 2636 */ 2637 } else { 2638 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2639 2640 for (i = 1; i < 16; rem /= 10, i++) { 2641 bcd_put_digit(&ret, rem % 10, i); 2642 } 2643 2644 for (; i < 32; lo_value /= 10, i++) { 2645 bcd_put_digit(&ret, lo_value % 10, i); 2646 } 2647 2648 *r = ret; 2649 } 2650 2651 return cr; 2652 } 2653 2654 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2655 { 2656 uint8_t i; 2657 int cr; 2658 uint64_t carry; 2659 uint64_t unused; 2660 uint64_t lo_value; 2661 uint64_t hi_value = 0; 2662 int sgnb = bcd_get_sgn(b); 2663 int invalid = (sgnb == 0); 2664 2665 lo_value = bcd_get_digit(b, 31, &invalid); 2666 for (i = 30; i > 0; i--) { 2667 mulu64(&lo_value, &carry, lo_value, 10ULL); 2668 mulu64(&hi_value, &unused, hi_value, 10ULL); 2669 lo_value += bcd_get_digit(b, i, &invalid); 2670 hi_value += carry; 2671 2672 if (unlikely(invalid)) { 2673 break; 2674 } 2675 } 2676 2677 if (sgnb == -1) { 2678 r->VsrSD(1) = -lo_value; 2679 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2680 } else { 2681 r->VsrSD(1) = lo_value; 2682 r->VsrSD(0) = hi_value; 2683 } 2684 2685 cr = bcd_cmp_zero(b); 2686 2687 if (unlikely(invalid)) { 2688 cr = CRF_SO; 2689 } 2690 2691 return cr; 2692 } 2693 2694 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2695 { 2696 int i; 2697 int invalid = 0; 2698 2699 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2700 return CRF_SO; 2701 } 2702 2703 *r = *a; 2704 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2705 2706 for (i = 1; i < 32; i++) { 2707 bcd_get_digit(a, i, &invalid); 2708 bcd_get_digit(b, i, &invalid); 2709 if (unlikely(invalid)) { 2710 return CRF_SO; 2711 } 2712 } 2713 2714 return bcd_cmp_zero(r); 2715 } 2716 2717 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2718 { 2719 int sgnb = bcd_get_sgn(b); 2720 2721 *r = *b; 2722 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2723 2724 if (bcd_is_valid(b) == false) { 2725 return CRF_SO; 2726 } 2727 2728 return bcd_cmp_zero(r); 2729 } 2730 2731 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2732 { 2733 int cr; 2734 int i = a->VsrSB(7); 2735 bool ox_flag = false; 2736 int sgnb = bcd_get_sgn(b); 2737 ppc_avr_t ret = *b; 2738 ret.VsrD(1) &= ~0xf; 2739 2740 if (bcd_is_valid(b) == false) { 2741 return CRF_SO; 2742 } 2743 2744 if (unlikely(i > 31)) { 2745 i = 31; 2746 } else if (unlikely(i < -31)) { 2747 i = -31; 2748 } 2749 2750 if (i > 0) { 2751 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2752 } else { 2753 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2754 } 2755 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2756 2757 *r = ret; 2758 2759 cr = bcd_cmp_zero(r); 2760 if (ox_flag) { 2761 cr |= CRF_SO; 2762 } 2763 2764 return cr; 2765 } 2766 2767 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2768 { 2769 int cr; 2770 int i; 2771 int invalid = 0; 2772 bool ox_flag = false; 2773 ppc_avr_t ret = *b; 2774 2775 for (i = 0; i < 32; i++) { 2776 bcd_get_digit(b, i, &invalid); 2777 2778 if (unlikely(invalid)) { 2779 return CRF_SO; 2780 } 2781 } 2782 2783 i = a->VsrSB(7); 2784 if (i >= 32) { 2785 ox_flag = true; 2786 ret.VsrD(1) = ret.VsrD(0) = 0; 2787 } else if (i <= -32) { 2788 ret.VsrD(1) = ret.VsrD(0) = 0; 2789 } else if (i > 0) { 2790 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2791 } else { 2792 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2793 } 2794 *r = ret; 2795 2796 cr = bcd_cmp_zero(r); 2797 if (ox_flag) { 2798 cr |= CRF_SO; 2799 } 2800 2801 return cr; 2802 } 2803 2804 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2805 { 2806 int cr; 2807 int unused = 0; 2808 int invalid = 0; 2809 bool ox_flag = false; 2810 int sgnb = bcd_get_sgn(b); 2811 ppc_avr_t ret = *b; 2812 ret.VsrD(1) &= ~0xf; 2813 2814 int i = a->VsrSB(7); 2815 ppc_avr_t bcd_one; 2816 2817 bcd_one.VsrD(0) = 0; 2818 bcd_one.VsrD(1) = 0x10; 2819 2820 if (bcd_is_valid(b) == false) { 2821 return CRF_SO; 2822 } 2823 2824 if (unlikely(i > 31)) { 2825 i = 31; 2826 } else if (unlikely(i < -31)) { 2827 i = -31; 2828 } 2829 2830 if (i > 0) { 2831 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2832 } else { 2833 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2834 2835 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2836 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2837 } 2838 } 2839 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2840 2841 cr = bcd_cmp_zero(&ret); 2842 if (ox_flag) { 2843 cr |= CRF_SO; 2844 } 2845 *r = ret; 2846 2847 return cr; 2848 } 2849 2850 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2851 { 2852 uint64_t mask; 2853 uint32_t ox_flag = 0; 2854 int i = a->VsrSH(3) + 1; 2855 ppc_avr_t ret = *b; 2856 2857 if (bcd_is_valid(b) == false) { 2858 return CRF_SO; 2859 } 2860 2861 if (i > 16 && i < 32) { 2862 mask = (uint64_t)-1 >> (128 - i * 4); 2863 if (ret.VsrD(0) & ~mask) { 2864 ox_flag = CRF_SO; 2865 } 2866 2867 ret.VsrD(0) &= mask; 2868 } else if (i >= 0 && i <= 16) { 2869 mask = (uint64_t)-1 >> (64 - i * 4); 2870 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2871 ox_flag = CRF_SO; 2872 } 2873 2874 ret.VsrD(1) &= mask; 2875 ret.VsrD(0) = 0; 2876 } 2877 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2878 *r = ret; 2879 2880 return bcd_cmp_zero(&ret) | ox_flag; 2881 } 2882 2883 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2884 { 2885 int i; 2886 uint64_t mask; 2887 uint32_t ox_flag = 0; 2888 int invalid = 0; 2889 ppc_avr_t ret = *b; 2890 2891 for (i = 0; i < 32; i++) { 2892 bcd_get_digit(b, i, &invalid); 2893 2894 if (unlikely(invalid)) { 2895 return CRF_SO; 2896 } 2897 } 2898 2899 i = a->VsrSH(3); 2900 if (i > 16 && i < 33) { 2901 mask = (uint64_t)-1 >> (128 - i * 4); 2902 if (ret.VsrD(0) & ~mask) { 2903 ox_flag = CRF_SO; 2904 } 2905 2906 ret.VsrD(0) &= mask; 2907 } else if (i > 0 && i <= 16) { 2908 mask = (uint64_t)-1 >> (64 - i * 4); 2909 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2910 ox_flag = CRF_SO; 2911 } 2912 2913 ret.VsrD(1) &= mask; 2914 ret.VsrD(0) = 0; 2915 } else if (i == 0) { 2916 if (ret.VsrD(0) || ret.VsrD(1)) { 2917 ox_flag = CRF_SO; 2918 } 2919 ret.VsrD(0) = ret.VsrD(1) = 0; 2920 } 2921 2922 *r = ret; 2923 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2924 return ox_flag | CRF_EQ; 2925 } 2926 2927 return ox_flag | CRF_GT; 2928 } 2929 2930 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2931 { 2932 int i; 2933 VECTOR_FOR_INORDER_I(i, u8) { 2934 r->u8[i] = AES_sbox[a->u8[i]]; 2935 } 2936 } 2937 2938 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2939 { 2940 AESState *ad = (AESState *)r; 2941 AESState *st = (AESState *)a; 2942 AESState *rk = (AESState *)b; 2943 2944 aesenc_SB_SR_MC_AK(ad, st, rk, true); 2945 } 2946 2947 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2948 { 2949 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2950 } 2951 2952 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2953 { 2954 AESState *ad = (AESState *)r; 2955 AESState *st = (AESState *)a; 2956 AESState *rk = (AESState *)b; 2957 2958 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true); 2959 } 2960 2961 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2962 { 2963 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2964 } 2965 2966 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2967 { 2968 int st = (st_six & 0x10) != 0; 2969 int six = st_six & 0xF; 2970 int i; 2971 2972 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2973 if (st == 0) { 2974 if ((six & (0x8 >> i)) == 0) { 2975 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2976 ror32(a->VsrW(i), 18) ^ 2977 (a->VsrW(i) >> 3); 2978 } else { /* six.bit[i] == 1 */ 2979 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2980 ror32(a->VsrW(i), 19) ^ 2981 (a->VsrW(i) >> 10); 2982 } 2983 } else { /* st == 1 */ 2984 if ((six & (0x8 >> i)) == 0) { 2985 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2986 ror32(a->VsrW(i), 13) ^ 2987 ror32(a->VsrW(i), 22); 2988 } else { /* six.bit[i] == 1 */ 2989 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2990 ror32(a->VsrW(i), 11) ^ 2991 ror32(a->VsrW(i), 25); 2992 } 2993 } 2994 } 2995 } 2996 2997 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2998 { 2999 int st = (st_six & 0x10) != 0; 3000 int six = st_six & 0xF; 3001 int i; 3002 3003 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3004 if (st == 0) { 3005 if ((six & (0x8 >> (2 * i))) == 0) { 3006 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3007 ror64(a->VsrD(i), 8) ^ 3008 (a->VsrD(i) >> 7); 3009 } else { /* six.bit[2*i] == 1 */ 3010 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3011 ror64(a->VsrD(i), 61) ^ 3012 (a->VsrD(i) >> 6); 3013 } 3014 } else { /* st == 1 */ 3015 if ((six & (0x8 >> (2 * i))) == 0) { 3016 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3017 ror64(a->VsrD(i), 34) ^ 3018 ror64(a->VsrD(i), 39); 3019 } else { /* six.bit[2*i] == 1 */ 3020 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3021 ror64(a->VsrD(i), 18) ^ 3022 ror64(a->VsrD(i), 41); 3023 } 3024 } 3025 } 3026 } 3027 3028 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3029 { 3030 ppc_avr_t result; 3031 int i; 3032 3033 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3034 int indexA = c->VsrB(i) >> 4; 3035 int indexB = c->VsrB(i) & 0xF; 3036 3037 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3038 } 3039 *r = result; 3040 } 3041 3042 #undef VECTOR_FOR_INORDER_I 3043 3044 /*****************************************************************************/ 3045 /* SPE extension helpers */ 3046 /* Use a table to make this quicker */ 3047 static const uint8_t hbrev[16] = { 3048 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3049 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3050 }; 3051 3052 static inline uint8_t byte_reverse(uint8_t val) 3053 { 3054 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3055 } 3056 3057 static inline uint32_t word_reverse(uint32_t val) 3058 { 3059 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3060 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3061 } 3062 3063 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3064 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3065 { 3066 uint32_t a, b, d, mask; 3067 3068 mask = UINT32_MAX >> (32 - MASKBITS); 3069 a = arg1 & mask; 3070 b = arg2 & mask; 3071 d = word_reverse(1 + word_reverse(a | ~b)); 3072 return (arg1 & ~mask) | (d & b); 3073 } 3074 3075 uint32_t helper_cntlsw32(uint32_t val) 3076 { 3077 if (val & 0x80000000) { 3078 return clz32(~val); 3079 } else { 3080 return clz32(val); 3081 } 3082 } 3083 3084 uint32_t helper_cntlzw32(uint32_t val) 3085 { 3086 return clz32(val); 3087 } 3088 3089 /* 440 specific */ 3090 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3091 target_ulong low, uint32_t update_Rc) 3092 { 3093 target_ulong mask; 3094 int i; 3095 3096 i = 1; 3097 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3098 if ((high & mask) == 0) { 3099 if (update_Rc) { 3100 env->crf[0] = 0x4; 3101 } 3102 goto done; 3103 } 3104 i++; 3105 } 3106 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3107 if ((low & mask) == 0) { 3108 if (update_Rc) { 3109 env->crf[0] = 0x8; 3110 } 3111 goto done; 3112 } 3113 i++; 3114 } 3115 i = 8; 3116 if (update_Rc) { 3117 env->crf[0] = 0x2; 3118 } 3119 done: 3120 env->xer = (env->xer & ~0x7F) | i; 3121 if (update_Rc) { 3122 env->crf[0] |= xer_so; 3123 } 3124 return i; 3125 } 3126