1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = 1; 41 } else { 42 env->ov = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 496 { 497 int i; 498 499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 500 r->u32[i] = ~a->u32[i] < b->u32[i]; 501 } 502 } 503 504 /* vprtybw */ 505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 510 res ^= res >> 8; 511 r->u32[i] = res & 1; 512 } 513 } 514 515 /* vprtybd */ 516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 int i; 519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->u64[i] = res & 1; 524 } 525 } 526 527 /* vprtybq */ 528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 529 { 530 uint64_t res = b->u64[0] ^ b->u64[1]; 531 res ^= res >> 32; 532 res ^= res >> 16; 533 res ^= res >> 8; 534 r->VsrD(1) = res & 1; 535 r->VsrD(0) = 0; 536 } 537 538 #define VARITHFP(suffix, func) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b) \ 541 { \ 542 int i; \ 543 \ 544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 546 } \ 547 } 548 VARITHFP(addfp, float32_add) 549 VARITHFP(subfp, float32_sub) 550 VARITHFP(minfp, float32_min) 551 VARITHFP(maxfp, float32_max) 552 #undef VARITHFP 553 554 #define VARITHFPFMA(suffix, type) \ 555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 556 ppc_avr_t *b, ppc_avr_t *c) \ 557 { \ 558 int i; \ 559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 561 type, &env->vec_status); \ 562 } \ 563 } 564 VARITHFPFMA(maddfp, 0); 565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 566 #undef VARITHFPFMA 567 568 #define VARITHSAT_CASE(type, op, cvt, element) \ 569 { \ 570 type result = (type)a->element[i] op (type)b->element[i]; \ 571 r->element[i] = cvt(result, &sat); \ 572 } 573 574 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 577 { \ 578 int sat = 0; \ 579 int i; \ 580 \ 581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 } \ 584 if (sat) { \ 585 vscr_sat->u32[0] = 1; \ 586 } \ 587 } 588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 600 #undef VARITHSAT_CASE 601 #undef VARITHSAT_DO 602 #undef VARITHSAT_SIGNED 603 #undef VARITHSAT_UNSIGNED 604 605 #define VAVG_DO(name, element, etype) \ 606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 607 { \ 608 int i; \ 609 \ 610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 612 r->element[i] = x >> 1; \ 613 } \ 614 } 615 616 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 617 unsigned_type) \ 618 VAVG_DO(avgs##type, signed_element, signed_type) \ 619 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 620 VAVG(b, s8, int16_t, u8, uint16_t) 621 VAVG(h, s16, int32_t, u16, uint32_t) 622 VAVG(w, s32, int64_t, u32, uint64_t) 623 #undef VAVG_DO 624 #undef VAVG 625 626 #define VABSDU_DO(name, element) \ 627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 r->element[i] = (a->element[i] > b->element[i]) ? \ 633 (a->element[i] - b->element[i]) : \ 634 (b->element[i] - a->element[i]); \ 635 } \ 636 } 637 638 /* 639 * VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMPNEZ(NAME, ELEM) \ 667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 668 { \ 669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 672 } \ 673 } 674 VCMPNEZ(VCMPNEZB, u8) 675 VCMPNEZ(VCMPNEZH, u16) 676 VCMPNEZ(VCMPNEZW, u32) 677 #undef VCMPNEZ 678 679 #define VCMPFP_DO(suffix, compare, order, record) \ 680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 681 ppc_avr_t *a, ppc_avr_t *b) \ 682 { \ 683 uint32_t ones = (uint32_t)-1; \ 684 uint32_t all = ones; \ 685 uint32_t none = 0; \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 689 uint32_t result; \ 690 FloatRelation rel = \ 691 float32_compare_quiet(a->f32[i], b->f32[i], \ 692 &env->vec_status); \ 693 if (rel == float_relation_unordered) { \ 694 result = 0; \ 695 } else if (rel compare order) { \ 696 result = ones; \ 697 } else { \ 698 result = 0; \ 699 } \ 700 r->u32[i] = result; \ 701 all &= result; \ 702 none |= result; \ 703 } \ 704 if (record) { \ 705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 706 } \ 707 } 708 #define VCMPFP(suffix, compare, order) \ 709 VCMPFP_DO(suffix, compare, order, 0) \ 710 VCMPFP_DO(suffix##_dot, compare, order, 1) 711 VCMPFP(eqfp, ==, float_relation_equal) 712 VCMPFP(gefp, !=, float_relation_less) 713 VCMPFP(gtfp, ==, float_relation_greater) 714 #undef VCMPFP_DO 715 #undef VCMPFP 716 717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 718 ppc_avr_t *a, ppc_avr_t *b, int record) 719 { 720 int i; 721 int all_in = 0; 722 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 725 &env->vec_status); 726 if (le_rel == float_relation_unordered) { 727 r->u32[i] = 0xc0000000; 728 all_in = 1; 729 } else { 730 float32 bneg = float32_chs(b->f32[i]); 731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 732 &env->vec_status); 733 int le = le_rel != float_relation_greater; 734 int ge = ge_rel != float_relation_less; 735 736 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 737 all_in |= (!le | !ge); 738 } 739 } 740 if (record) { 741 env->crf[6] = (all_in == 0) << 1; 742 } 743 } 744 745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 746 { 747 vcmpbfp_internal(env, r, a, b, 0); 748 } 749 750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 751 ppc_avr_t *b) 752 { 753 vcmpbfp_internal(env, r, a, b, 1); 754 } 755 756 #define VCT(suffix, satcvt, element) \ 757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 758 ppc_avr_t *b, uint32_t uim) \ 759 { \ 760 int i; \ 761 int sat = 0; \ 762 float_status s = env->vec_status; \ 763 \ 764 set_float_rounding_mode(float_round_to_zero, &s); \ 765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 766 if (float32_is_any_nan(b->f32[i])) { \ 767 r->element[i] = 0; \ 768 } else { \ 769 float64 t = float32_to_float64(b->f32[i], &s); \ 770 int64_t j; \ 771 \ 772 t = float64_scalbn(t, uim, &s); \ 773 j = float64_to_int64(t, &s); \ 774 r->element[i] = satcvt(j, &sat); \ 775 } \ 776 } \ 777 if (sat) { \ 778 set_vscr_sat(env); \ 779 } \ 780 } 781 VCT(uxs, cvtsduw, u32) 782 VCT(sxs, cvtsdsw, s32) 783 #undef VCT 784 785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 786 787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 788 { 789 int64_t psum = 0; 790 for (int i = 0; i < 8; i++, mask >>= 1) { 791 if (mask & 1) { 792 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 793 } 794 } 795 return psum; 796 } 797 798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 799 { 800 int64_t psum = 0; 801 for (int i = 0; i < 4; i++, mask >>= 1) { 802 if (mask & 1) { 803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 804 } 805 } 806 return psum; 807 } 808 809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 810 { 811 int64_t psum = 0; 812 for (int i = 0; i < 2; i++, mask >>= 1) { 813 if (mask & 1) { 814 psum += (int64_t)sextract32(a, 16 * i, 16) * 815 sextract32(b, 16 * i, 16); 816 } 817 } 818 return psum; 819 } 820 821 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 822 uint32_t mask, bool sat, bool acc, do_ger ger) 823 { 824 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 825 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 826 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 827 uint8_t xmsk_bit, ymsk_bit; 828 int64_t psum; 829 int i, j; 830 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 831 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 832 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 833 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 834 if (acc) { 835 psum += at[i].VsrSW(j); 836 } 837 if (sat && psum > INT32_MAX) { 838 set_vscr_sat(env); 839 at[i].VsrSW(j) = INT32_MAX; 840 } else if (sat && psum < INT32_MIN) { 841 set_vscr_sat(env); 842 at[i].VsrSW(j) = INT32_MIN; 843 } else { 844 at[i].VsrSW(j) = (int32_t) psum; 845 } 846 } else { 847 at[i].VsrSW(j) = 0; 848 } 849 } 850 } 851 } 852 853 QEMU_FLATTEN 854 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 855 ppc_acc_t *at, uint32_t mask) 856 { 857 xviger(env, a, b, at, mask, false, false, ger_rank8); 858 } 859 860 QEMU_FLATTEN 861 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 862 ppc_acc_t *at, uint32_t mask) 863 { 864 xviger(env, a, b, at, mask, false, true, ger_rank8); 865 } 866 867 QEMU_FLATTEN 868 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 869 ppc_acc_t *at, uint32_t mask) 870 { 871 xviger(env, a, b, at, mask, false, false, ger_rank4); 872 } 873 874 QEMU_FLATTEN 875 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 876 ppc_acc_t *at, uint32_t mask) 877 { 878 xviger(env, a, b, at, mask, false, true, ger_rank4); 879 } 880 881 QEMU_FLATTEN 882 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 883 ppc_acc_t *at, uint32_t mask) 884 { 885 xviger(env, a, b, at, mask, true, true, ger_rank4); 886 } 887 888 QEMU_FLATTEN 889 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 890 ppc_acc_t *at, uint32_t mask) 891 { 892 xviger(env, a, b, at, mask, false, false, ger_rank2); 893 } 894 895 QEMU_FLATTEN 896 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 897 ppc_acc_t *at, uint32_t mask) 898 { 899 xviger(env, a, b, at, mask, true, false, ger_rank2); 900 } 901 902 QEMU_FLATTEN 903 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 904 ppc_acc_t *at, uint32_t mask) 905 { 906 xviger(env, a, b, at, mask, false, true, ger_rank2); 907 } 908 909 QEMU_FLATTEN 910 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 911 ppc_acc_t *at, uint32_t mask) 912 { 913 xviger(env, a, b, at, mask, true, true, ger_rank2); 914 } 915 916 target_ulong helper_vclzlsbb(ppc_avr_t *r) 917 { 918 target_ulong count = 0; 919 int i; 920 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 921 if (r->VsrB(i) & 0x01) { 922 break; 923 } 924 count++; 925 } 926 return count; 927 } 928 929 target_ulong helper_vctzlsbb(ppc_avr_t *r) 930 { 931 target_ulong count = 0; 932 int i; 933 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 934 if (r->VsrB(i) & 0x01) { 935 break; 936 } 937 count++; 938 } 939 return count; 940 } 941 942 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 943 ppc_avr_t *b, ppc_avr_t *c) 944 { 945 int sat = 0; 946 int i; 947 948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 949 int32_t prod = a->s16[i] * b->s16[i]; 950 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 951 952 r->s16[i] = cvtswsh(t, &sat); 953 } 954 955 if (sat) { 956 set_vscr_sat(env); 957 } 958 } 959 960 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 961 ppc_avr_t *b, ppc_avr_t *c) 962 { 963 int sat = 0; 964 int i; 965 966 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 967 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 968 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 969 r->s16[i] = cvtswsh(t, &sat); 970 } 971 972 if (sat) { 973 set_vscr_sat(env); 974 } 975 } 976 977 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 978 { 979 int i; 980 981 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 982 int32_t prod = a->s16[i] * b->s16[i]; 983 r->s16[i] = (int16_t) (prod + c->s16[i]); 984 } 985 } 986 987 #define VMRG_DO(name, element, access, ofs) \ 988 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 989 { \ 990 ppc_avr_t result; \ 991 int i, half = ARRAY_SIZE(r->element) / 2; \ 992 \ 993 for (i = 0; i < half; i++) { \ 994 result.access(i * 2 + 0) = a->access(i + ofs); \ 995 result.access(i * 2 + 1) = b->access(i + ofs); \ 996 } \ 997 *r = result; \ 998 } 999 1000 #define VMRG(suffix, element, access) \ 1001 VMRG_DO(mrgl##suffix, element, access, half) \ 1002 VMRG_DO(mrgh##suffix, element, access, 0) 1003 VMRG(b, u8, VsrB) 1004 VMRG(h, u16, VsrH) 1005 VMRG(w, u32, VsrW) 1006 #undef VMRG_DO 1007 #undef VMRG 1008 1009 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1010 { 1011 int32_t prod[16]; 1012 int i; 1013 1014 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1015 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1016 } 1017 1018 VECTOR_FOR_INORDER_I(i, s32) { 1019 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1020 prod[4 * i + 2] + prod[4 * i + 3]; 1021 } 1022 } 1023 1024 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1025 { 1026 int32_t prod[8]; 1027 int i; 1028 1029 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1030 prod[i] = a->s16[i] * b->s16[i]; 1031 } 1032 1033 VECTOR_FOR_INORDER_I(i, s32) { 1034 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1035 } 1036 } 1037 1038 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1039 ppc_avr_t *b, ppc_avr_t *c) 1040 { 1041 int32_t prod[8]; 1042 int i; 1043 int sat = 0; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1046 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, s32) { 1050 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 1052 r->u32[i] = cvtsdsw(t, &sat); 1053 } 1054 1055 if (sat) { 1056 set_vscr_sat(env); 1057 } 1058 } 1059 1060 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1061 { 1062 uint16_t prod[16]; 1063 int i; 1064 1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1066 prod[i] = a->u8[i] * b->u8[i]; 1067 } 1068 1069 VECTOR_FOR_INORDER_I(i, u32) { 1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1071 prod[4 * i + 2] + prod[4 * i + 3]; 1072 } 1073 } 1074 1075 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1076 { 1077 uint32_t prod[8]; 1078 int i; 1079 1080 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1081 prod[i] = a->u16[i] * b->u16[i]; 1082 } 1083 1084 VECTOR_FOR_INORDER_I(i, u32) { 1085 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1086 } 1087 } 1088 1089 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1090 ppc_avr_t *b, ppc_avr_t *c) 1091 { 1092 uint32_t prod[8]; 1093 int i; 1094 int sat = 0; 1095 1096 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1097 prod[i] = a->u16[i] * b->u16[i]; 1098 } 1099 1100 VECTOR_FOR_INORDER_I(i, s32) { 1101 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1102 1103 r->u32[i] = cvtuduw(t, &sat); 1104 } 1105 1106 if (sat) { 1107 set_vscr_sat(env); 1108 } 1109 } 1110 1111 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1112 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1113 { \ 1114 int i; \ 1115 \ 1116 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1117 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1118 (cast)b->mul_access(i); \ 1119 } \ 1120 } 1121 1122 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1123 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1124 { \ 1125 int i; \ 1126 \ 1127 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1128 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1129 (cast)b->mul_access(i + 1); \ 1130 } \ 1131 } 1132 1133 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1134 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1135 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1136 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1137 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1138 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1139 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1140 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1141 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1142 #undef VMUL_DO_EVN 1143 #undef VMUL_DO_ODD 1144 #undef VMUL 1145 1146 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1147 target_ulong uim) 1148 { 1149 int i, idx; 1150 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1151 1152 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1153 if ((pcv->VsrB(i) >> 5) == uim) { 1154 idx = pcv->VsrB(i) & 0x1f; 1155 if (idx < ARRAY_SIZE(t->u8)) { 1156 tmp.VsrB(i) = s0->VsrB(idx); 1157 } else { 1158 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1159 } 1160 } 1161 } 1162 1163 *t = tmp; 1164 } 1165 1166 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1167 { 1168 Int128 neg1 = int128_makes64(-1); 1169 Int128 int128_min = int128_make128(0, INT64_MIN); 1170 if (likely(int128_nz(b->s128) && 1171 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1172 t->s128 = int128_divs(a->s128, b->s128); 1173 } else { 1174 t->s128 = a->s128; /* Undefined behavior */ 1175 } 1176 } 1177 1178 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1179 { 1180 if (int128_nz(b->s128)) { 1181 t->s128 = int128_divu(a->s128, b->s128); 1182 } else { 1183 t->s128 = a->s128; /* Undefined behavior */ 1184 } 1185 } 1186 1187 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1188 { 1189 int i; 1190 int64_t high; 1191 uint64_t low; 1192 for (i = 0; i < 2; i++) { 1193 high = a->s64[i]; 1194 low = 0; 1195 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1196 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1197 } else { 1198 divs128(&low, &high, b->s64[i]); 1199 t->s64[i] = low; 1200 } 1201 } 1202 } 1203 1204 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1205 { 1206 int i; 1207 uint64_t high, low; 1208 for (i = 0; i < 2; i++) { 1209 high = a->u64[i]; 1210 low = 0; 1211 if (unlikely(!b->u64[i])) { 1212 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1213 } else { 1214 divu128(&low, &high, b->u64[i]); 1215 t->u64[i] = low; 1216 } 1217 } 1218 } 1219 1220 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1221 { 1222 Int128 high, low; 1223 Int128 int128_min = int128_make128(0, INT64_MIN); 1224 Int128 neg1 = int128_makes64(-1); 1225 1226 high = a->s128; 1227 low = int128_zero(); 1228 if (unlikely(!int128_nz(b->s128) || 1229 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1230 t->s128 = a->s128; /* Undefined behavior */ 1231 } else { 1232 divs256(&low, &high, b->s128); 1233 t->s128 = low; 1234 } 1235 } 1236 1237 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1238 { 1239 Int128 high, low; 1240 1241 high = a->s128; 1242 low = int128_zero(); 1243 if (unlikely(!int128_nz(b->s128))) { 1244 t->s128 = a->s128; /* Undefined behavior */ 1245 } else { 1246 divu256(&low, &high, b->s128); 1247 t->s128 = low; 1248 } 1249 } 1250 1251 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1252 { 1253 Int128 neg1 = int128_makes64(-1); 1254 Int128 int128_min = int128_make128(0, INT64_MIN); 1255 if (likely(int128_nz(b->s128) && 1256 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1257 t->s128 = int128_rems(a->s128, b->s128); 1258 } else { 1259 t->s128 = int128_zero(); /* Undefined behavior */ 1260 } 1261 } 1262 1263 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1264 { 1265 if (likely(int128_nz(b->s128))) { 1266 t->s128 = int128_remu(a->s128, b->s128); 1267 } else { 1268 t->s128 = int128_zero(); /* Undefined behavior */ 1269 } 1270 } 1271 1272 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1273 { 1274 ppc_avr_t result; 1275 int i; 1276 1277 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1278 int s = c->VsrB(i) & 0x1f; 1279 int index = s & 0xf; 1280 1281 if (s & 0x10) { 1282 result.VsrB(i) = b->VsrB(index); 1283 } else { 1284 result.VsrB(i) = a->VsrB(index); 1285 } 1286 } 1287 *r = result; 1288 } 1289 1290 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1291 { 1292 ppc_avr_t result; 1293 int i; 1294 1295 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1296 int s = c->VsrB(i) & 0x1f; 1297 int index = 15 - (s & 0xf); 1298 1299 if (s & 0x10) { 1300 result.VsrB(i) = a->VsrB(index); 1301 } else { 1302 result.VsrB(i) = b->VsrB(index); 1303 } 1304 } 1305 *r = result; 1306 } 1307 1308 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1309 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1310 { \ 1311 ppc_vsr_t tmp; \ 1312 \ 1313 /* Initialize tmp with the result of an all-zeros mask */ \ 1314 tmp.VsrD(0) = 0x1011121314151617; \ 1315 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1316 \ 1317 /* Iterate over the most significant byte of each element */ \ 1318 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1319 if (b->VsrB(i) & 0x80) { \ 1320 /* Update each byte of the element */ \ 1321 for (int k = 0; k < SZ; k++) { \ 1322 tmp.VsrB(i + k) = j + k; \ 1323 } \ 1324 j += SZ; \ 1325 } \ 1326 } \ 1327 \ 1328 *t = tmp; \ 1329 } 1330 1331 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1332 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1333 { \ 1334 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1335 \ 1336 /* Iterate over the most significant byte of each element */ \ 1337 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1338 if (b->VsrB(i) & 0x80) { \ 1339 /* Update each byte of the element */ \ 1340 for (int k = 0; k < SZ; k++) { \ 1341 tmp.VsrB(j + k) = i + k; \ 1342 } \ 1343 j += SZ; \ 1344 } \ 1345 } \ 1346 \ 1347 *t = tmp; \ 1348 } 1349 1350 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1351 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1352 { \ 1353 ppc_vsr_t tmp; \ 1354 \ 1355 /* Initialize tmp with the result of an all-zeros mask */ \ 1356 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1357 tmp.VsrD(1) = 0x1716151413121110; \ 1358 \ 1359 /* Iterate over the most significant byte of each element */ \ 1360 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1361 /* Reverse indexing of "i" */ \ 1362 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1363 if (b->VsrB(idx) & 0x80) { \ 1364 /* Update each byte of the element */ \ 1365 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1366 tmp.VsrB(idx + rk) = j + k; \ 1367 } \ 1368 j += SZ; \ 1369 } \ 1370 } \ 1371 \ 1372 *t = tmp; \ 1373 } 1374 1375 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1376 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1377 { \ 1378 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1379 \ 1380 /* Iterate over the most significant byte of each element */ \ 1381 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1382 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1383 /* Update each byte of the element */ \ 1384 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1385 /* Reverse indexing of "j" */ \ 1386 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1387 tmp.VsrB(idx + rk) = i + k; \ 1388 } \ 1389 j += SZ; \ 1390 } \ 1391 } \ 1392 \ 1393 *t = tmp; \ 1394 } 1395 1396 #define XXGENPCV(NAME, SZ) \ 1397 XXGENPCV_BE_EXP(NAME, SZ) \ 1398 XXGENPCV_BE_COMP(NAME, SZ) \ 1399 XXGENPCV_LE_EXP(NAME, SZ) \ 1400 XXGENPCV_LE_COMP(NAME, SZ) \ 1401 1402 XXGENPCV(XXGENPCVBM, 1) 1403 XXGENPCV(XXGENPCVHM, 2) 1404 XXGENPCV(XXGENPCVWM, 4) 1405 XXGENPCV(XXGENPCVDM, 8) 1406 1407 #undef XXGENPCV_BE_EXP 1408 #undef XXGENPCV_BE_COMP 1409 #undef XXGENPCV_LE_EXP 1410 #undef XXGENPCV_LE_COMP 1411 #undef XXGENPCV 1412 1413 #if HOST_BIG_ENDIAN 1414 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1415 #define VBPERMD_INDEX(i) (i) 1416 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1417 #else 1418 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1419 #define VBPERMD_INDEX(i) (1 - i) 1420 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1421 #endif 1422 #define EXTRACT_BIT(avr, i, index) \ 1423 (extract64((avr)->VsrD(i), 63 - index, 1)) 1424 1425 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1426 { 1427 int i, j; 1428 ppc_avr_t result = { .u64 = { 0, 0 } }; 1429 VECTOR_FOR_INORDER_I(i, u64) { 1430 for (j = 0; j < 8; j++) { 1431 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1432 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1433 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1434 } 1435 } 1436 } 1437 *r = result; 1438 } 1439 1440 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1441 { 1442 int i; 1443 uint64_t perm = 0; 1444 1445 VECTOR_FOR_INORDER_I(i, u8) { 1446 int index = VBPERMQ_INDEX(b, i); 1447 1448 if (index < 128) { 1449 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1450 if (a->u64[VBPERMQ_DW(index)] & mask) { 1451 perm |= (0x8000 >> i); 1452 } 1453 } 1454 } 1455 1456 r->VsrD(0) = perm; 1457 r->VsrD(1) = 0; 1458 } 1459 1460 #undef VBPERMQ_INDEX 1461 #undef VBPERMQ_DW 1462 1463 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1464 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1465 { \ 1466 int i, j; \ 1467 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1468 \ 1469 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1470 prod[i] = 0; \ 1471 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1472 if (a->srcfld[i] & (1ull << j)) { \ 1473 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1474 } \ 1475 } \ 1476 } \ 1477 \ 1478 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1479 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1480 } \ 1481 } 1482 1483 PMSUM(vpmsumb, u8, u16, uint16_t) 1484 PMSUM(vpmsumh, u16, u32, uint32_t) 1485 PMSUM(vpmsumw, u32, u64, uint64_t) 1486 1487 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1488 { 1489 int i, j; 1490 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1491 1492 for (j = 0; j < 64; j++) { 1493 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1494 if (a->VsrD(i) & (1ull << j)) { 1495 tmp = int128_make64(b->VsrD(i)); 1496 tmp = int128_lshift(tmp, j); 1497 prod[i] = int128_xor(prod[i], tmp); 1498 } 1499 } 1500 } 1501 1502 r->s128 = int128_xor(prod[0], prod[1]); 1503 } 1504 1505 #if HOST_BIG_ENDIAN 1506 #define PKBIG 1 1507 #else 1508 #define PKBIG 0 1509 #endif 1510 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1511 { 1512 int i, j; 1513 ppc_avr_t result; 1514 #if HOST_BIG_ENDIAN 1515 const ppc_avr_t *x[2] = { a, b }; 1516 #else 1517 const ppc_avr_t *x[2] = { b, a }; 1518 #endif 1519 1520 VECTOR_FOR_INORDER_I(i, u64) { 1521 VECTOR_FOR_INORDER_I(j, u32) { 1522 uint32_t e = x[i]->u32[j]; 1523 1524 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1525 ((e >> 6) & 0x3e0) | 1526 ((e >> 3) & 0x1f)); 1527 } 1528 } 1529 *r = result; 1530 } 1531 1532 #define VPK(suffix, from, to, cvt, dosat) \ 1533 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1534 ppc_avr_t *a, ppc_avr_t *b) \ 1535 { \ 1536 int i; \ 1537 int sat = 0; \ 1538 ppc_avr_t result; \ 1539 ppc_avr_t *a0 = PKBIG ? a : b; \ 1540 ppc_avr_t *a1 = PKBIG ? b : a; \ 1541 \ 1542 VECTOR_FOR_INORDER_I(i, from) { \ 1543 result.to[i] = cvt(a0->from[i], &sat); \ 1544 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1545 } \ 1546 *r = result; \ 1547 if (dosat && sat) { \ 1548 set_vscr_sat(env); \ 1549 } \ 1550 } 1551 #define I(x, y) (x) 1552 VPK(shss, s16, s8, cvtshsb, 1) 1553 VPK(shus, s16, u8, cvtshub, 1) 1554 VPK(swss, s32, s16, cvtswsh, 1) 1555 VPK(swus, s32, u16, cvtswuh, 1) 1556 VPK(sdss, s64, s32, cvtsdsw, 1) 1557 VPK(sdus, s64, u32, cvtsduw, 1) 1558 VPK(uhus, u16, u8, cvtuhub, 1) 1559 VPK(uwus, u32, u16, cvtuwuh, 1) 1560 VPK(udus, u64, u32, cvtuduw, 1) 1561 VPK(uhum, u16, u8, I, 0) 1562 VPK(uwum, u32, u16, I, 0) 1563 VPK(udum, u64, u32, I, 0) 1564 #undef I 1565 #undef VPK 1566 #undef PKBIG 1567 1568 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1569 { 1570 int i; 1571 1572 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1573 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1574 } 1575 } 1576 1577 #define VRFI(suffix, rounding) \ 1578 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1579 ppc_avr_t *b) \ 1580 { \ 1581 int i; \ 1582 float_status s = env->vec_status; \ 1583 \ 1584 set_float_rounding_mode(rounding, &s); \ 1585 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1586 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1587 } \ 1588 } 1589 VRFI(n, float_round_nearest_even) 1590 VRFI(m, float_round_down) 1591 VRFI(p, float_round_up) 1592 VRFI(z, float_round_to_zero) 1593 #undef VRFI 1594 1595 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1596 { 1597 int i; 1598 1599 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1600 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1601 1602 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1603 } 1604 } 1605 1606 #define VRLMI(name, size, element, insert) \ 1607 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1608 { \ 1609 int i; \ 1610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1611 uint##size##_t src1 = a->element[i]; \ 1612 uint##size##_t src2 = b->element[i]; \ 1613 uint##size##_t src3 = r->element[i]; \ 1614 uint##size##_t begin, end, shift, mask, rot_val; \ 1615 \ 1616 shift = extract##size(src2, 0, 6); \ 1617 end = extract##size(src2, 8, 6); \ 1618 begin = extract##size(src2, 16, 6); \ 1619 rot_val = rol##size(src1, shift); \ 1620 mask = mask_u##size(begin, end); \ 1621 if (insert) { \ 1622 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1623 } else { \ 1624 r->element[i] = (rot_val & mask); \ 1625 } \ 1626 } \ 1627 } 1628 1629 VRLMI(VRLDMI, 64, u64, 1); 1630 VRLMI(VRLWMI, 32, u32, 1); 1631 VRLMI(VRLDNM, 64, u64, 0); 1632 VRLMI(VRLWNM, 32, u32, 0); 1633 1634 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1635 { 1636 int i; 1637 1638 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1639 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1640 } 1641 } 1642 1643 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1644 { 1645 int i; 1646 1647 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1648 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1649 } 1650 } 1651 1652 #define VEXTU_X_DO(name, size, left) \ 1653 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1654 { \ 1655 int index = (a & 0xf) * 8; \ 1656 if (left) { \ 1657 index = 128 - index - size; \ 1658 } \ 1659 return int128_getlo(int128_rshift(b->s128, index)) & \ 1660 MAKE_64BIT_MASK(0, size); \ 1661 } 1662 VEXTU_X_DO(vextublx, 8, 1) 1663 VEXTU_X_DO(vextuhlx, 16, 1) 1664 VEXTU_X_DO(vextuwlx, 32, 1) 1665 VEXTU_X_DO(vextubrx, 8, 0) 1666 VEXTU_X_DO(vextuhrx, 16, 0) 1667 VEXTU_X_DO(vextuwrx, 32, 0) 1668 #undef VEXTU_X_DO 1669 1670 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1671 { 1672 int i; 1673 unsigned int shift, bytes, size; 1674 1675 size = ARRAY_SIZE(r->u8); 1676 for (i = 0; i < size; i++) { 1677 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1678 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1679 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1680 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1681 } 1682 } 1683 1684 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1685 { 1686 int i; 1687 unsigned int shift, bytes; 1688 1689 /* 1690 * Use reverse order, as destination and source register can be 1691 * same. Its being modified in place saving temporary, reverse 1692 * order will guarantee that computed result is not fed back. 1693 */ 1694 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1695 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1696 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1697 /* extract adjacent bytes */ 1698 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1699 } 1700 } 1701 1702 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1703 { 1704 int sh = shift & 0xf; 1705 int i; 1706 ppc_avr_t result; 1707 1708 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1709 int index = sh + i; 1710 if (index > 0xf) { 1711 result.VsrB(i) = b->VsrB(index - 0x10); 1712 } else { 1713 result.VsrB(i) = a->VsrB(index); 1714 } 1715 } 1716 *r = result; 1717 } 1718 1719 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1720 { 1721 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1722 1723 #if HOST_BIG_ENDIAN 1724 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1725 memset(&r->u8[16 - sh], 0, sh); 1726 #else 1727 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1728 memset(&r->u8[0], 0, sh); 1729 #endif 1730 } 1731 1732 #if HOST_BIG_ENDIAN 1733 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1734 #else 1735 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1736 #endif 1737 1738 #define VINSX(SUFFIX, TYPE) \ 1739 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1740 uint64_t val, target_ulong index) \ 1741 { \ 1742 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1743 target_long idx = index; \ 1744 \ 1745 if (idx < 0 || idx > maxidx) { \ 1746 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1747 qemu_log_mask(LOG_GUEST_ERROR, \ 1748 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1749 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1750 } else { \ 1751 TYPE src = val; \ 1752 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1753 } \ 1754 } 1755 VINSX(B, uint8_t) 1756 VINSX(H, uint16_t) 1757 VINSX(W, uint32_t) 1758 VINSX(D, uint64_t) 1759 #undef ELEM_ADDR 1760 #undef VINSX 1761 #if HOST_BIG_ENDIAN 1762 #define VEXTDVLX(NAME, SIZE) \ 1763 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1764 target_ulong index) \ 1765 { \ 1766 const target_long idx = index; \ 1767 ppc_avr_t tmp[2] = { *a, *b }; \ 1768 memset(t, 0, sizeof(*t)); \ 1769 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1770 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1771 } else { \ 1772 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1773 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1774 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1775 } \ 1776 } 1777 #else 1778 #define VEXTDVLX(NAME, SIZE) \ 1779 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1780 target_ulong index) \ 1781 { \ 1782 const target_long idx = index; \ 1783 ppc_avr_t tmp[2] = { *b, *a }; \ 1784 memset(t, 0, sizeof(*t)); \ 1785 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1786 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1787 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1788 } else { \ 1789 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1790 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1791 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1792 } \ 1793 } 1794 #endif 1795 VEXTDVLX(VEXTDUBVLX, 1) 1796 VEXTDVLX(VEXTDUHVLX, 2) 1797 VEXTDVLX(VEXTDUWVLX, 4) 1798 VEXTDVLX(VEXTDDVLX, 8) 1799 #undef VEXTDVLX 1800 #if HOST_BIG_ENDIAN 1801 #define VEXTRACT(suffix, element) \ 1802 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1803 { \ 1804 uint32_t es = sizeof(r->element[0]); \ 1805 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1806 memset(&r->u8[8], 0, 8); \ 1807 memset(&r->u8[0], 0, 8 - es); \ 1808 } 1809 #else 1810 #define VEXTRACT(suffix, element) \ 1811 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1812 { \ 1813 uint32_t es = sizeof(r->element[0]); \ 1814 uint32_t s = (16 - index) - es; \ 1815 memmove(&r->u8[8], &b->u8[s], es); \ 1816 memset(&r->u8[0], 0, 8); \ 1817 memset(&r->u8[8 + es], 0, 8 - es); \ 1818 } 1819 #endif 1820 VEXTRACT(ub, u8) 1821 VEXTRACT(uh, u16) 1822 VEXTRACT(uw, u32) 1823 VEXTRACT(d, u64) 1824 #undef VEXTRACT 1825 1826 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1827 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1828 { \ 1829 int i, idx, crf = 0; \ 1830 \ 1831 for (i = 0; i < NUM_ELEMS; i++) { \ 1832 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1833 if (b->Vsr##ELEM(idx)) { \ 1834 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1835 } else { \ 1836 crf = 0b0010; \ 1837 break; \ 1838 } \ 1839 } \ 1840 \ 1841 for (; i < NUM_ELEMS; i++) { \ 1842 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1843 t->Vsr##ELEM(idx) = 0; \ 1844 } \ 1845 \ 1846 return crf; \ 1847 } 1848 VSTRI(VSTRIBL, B, 16, true) 1849 VSTRI(VSTRIBR, B, 16, false) 1850 VSTRI(VSTRIHL, H, 8, true) 1851 VSTRI(VSTRIHR, H, 8, false) 1852 #undef VSTRI 1853 1854 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1855 { 1856 ppc_vsr_t t = { }; 1857 size_t es = sizeof(uint32_t); 1858 uint32_t ext_index; 1859 int i; 1860 1861 ext_index = index; 1862 for (i = 0; i < es; i++, ext_index++) { 1863 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1864 } 1865 1866 *xt = t; 1867 } 1868 1869 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1870 { 1871 ppc_vsr_t t = *xt; 1872 size_t es = sizeof(uint32_t); 1873 int ins_index, i = 0; 1874 1875 ins_index = index; 1876 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1877 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1878 } 1879 1880 *xt = t; 1881 } 1882 1883 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1884 uint32_t desc) 1885 { 1886 /* 1887 * Instead of processing imm bit-by-bit, we'll skip the computation of 1888 * conjunctions whose corresponding bit is unset. 1889 */ 1890 int bit, imm = simd_data(desc); 1891 Int128 conj, disj = int128_zero(); 1892 1893 /* Iterate over set bits from the least to the most significant bit */ 1894 while (imm) { 1895 /* 1896 * Get the next bit to be processed with ctz64. Invert the result of 1897 * ctz64 to match the indexing used by PowerISA. 1898 */ 1899 bit = 7 - ctzl(imm); 1900 if (bit & 0x4) { 1901 conj = a->s128; 1902 } else { 1903 conj = int128_not(a->s128); 1904 } 1905 if (bit & 0x2) { 1906 conj = int128_and(conj, b->s128); 1907 } else { 1908 conj = int128_and(conj, int128_not(b->s128)); 1909 } 1910 if (bit & 0x1) { 1911 conj = int128_and(conj, c->s128); 1912 } else { 1913 conj = int128_and(conj, int128_not(c->s128)); 1914 } 1915 disj = int128_or(disj, conj); 1916 1917 /* Unset the least significant bit that is set */ 1918 imm &= imm - 1; 1919 } 1920 1921 t->s128 = disj; 1922 } 1923 1924 #define XXBLEND(name, sz) \ 1925 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1926 ppc_avr_t *c, uint32_t desc) \ 1927 { \ 1928 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1929 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1930 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1931 } \ 1932 } 1933 XXBLEND(B, 8) 1934 XXBLEND(H, 16) 1935 XXBLEND(W, 32) 1936 XXBLEND(D, 64) 1937 #undef XXBLEND 1938 1939 #define VNEG(name, element) \ 1940 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1941 { \ 1942 int i; \ 1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1944 r->element[i] = -b->element[i]; \ 1945 } \ 1946 } 1947 VNEG(vnegw, s32) 1948 VNEG(vnegd, s64) 1949 #undef VNEG 1950 1951 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1952 { 1953 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1954 1955 #if HOST_BIG_ENDIAN 1956 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1957 memset(&r->u8[0], 0, sh); 1958 #else 1959 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1960 memset(&r->u8[16 - sh], 0, sh); 1961 #endif 1962 } 1963 1964 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1965 { 1966 int i; 1967 1968 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1969 r->u32[i] = a->u32[i] >= b->u32[i]; 1970 } 1971 } 1972 1973 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1974 { 1975 int64_t t; 1976 int i, upper; 1977 ppc_avr_t result; 1978 int sat = 0; 1979 1980 upper = ARRAY_SIZE(r->s32) - 1; 1981 t = (int64_t)b->VsrSW(upper); 1982 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1983 t += a->VsrSW(i); 1984 result.VsrSW(i) = 0; 1985 } 1986 result.VsrSW(upper) = cvtsdsw(t, &sat); 1987 *r = result; 1988 1989 if (sat) { 1990 set_vscr_sat(env); 1991 } 1992 } 1993 1994 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1995 { 1996 int i, j, upper; 1997 ppc_avr_t result; 1998 int sat = 0; 1999 2000 upper = 1; 2001 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2002 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2003 2004 result.VsrD(i) = 0; 2005 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2006 t += a->VsrSW(2 * i + j); 2007 } 2008 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2009 } 2010 2011 *r = result; 2012 if (sat) { 2013 set_vscr_sat(env); 2014 } 2015 } 2016 2017 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2018 { 2019 int i, j; 2020 int sat = 0; 2021 2022 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2023 int64_t t = (int64_t)b->s32[i]; 2024 2025 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2026 t += a->s8[4 * i + j]; 2027 } 2028 r->s32[i] = cvtsdsw(t, &sat); 2029 } 2030 2031 if (sat) { 2032 set_vscr_sat(env); 2033 } 2034 } 2035 2036 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2037 { 2038 int sat = 0; 2039 int i; 2040 2041 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2042 int64_t t = (int64_t)b->s32[i]; 2043 2044 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2045 r->s32[i] = cvtsdsw(t, &sat); 2046 } 2047 2048 if (sat) { 2049 set_vscr_sat(env); 2050 } 2051 } 2052 2053 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2054 { 2055 int i, j; 2056 int sat = 0; 2057 2058 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2059 uint64_t t = (uint64_t)b->u32[i]; 2060 2061 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2062 t += a->u8[4 * i + j]; 2063 } 2064 r->u32[i] = cvtuduw(t, &sat); 2065 } 2066 2067 if (sat) { 2068 set_vscr_sat(env); 2069 } 2070 } 2071 2072 #if HOST_BIG_ENDIAN 2073 #define UPKHI 1 2074 #define UPKLO 0 2075 #else 2076 #define UPKHI 0 2077 #define UPKLO 1 2078 #endif 2079 #define VUPKPX(suffix, hi) \ 2080 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2081 { \ 2082 int i; \ 2083 ppc_avr_t result; \ 2084 \ 2085 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2086 uint16_t e = b->u16[hi ? i : i + 4]; \ 2087 uint8_t a = (e >> 15) ? 0xff : 0; \ 2088 uint8_t r = (e >> 10) & 0x1f; \ 2089 uint8_t g = (e >> 5) & 0x1f; \ 2090 uint8_t b = e & 0x1f; \ 2091 \ 2092 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2093 } \ 2094 *r = result; \ 2095 } 2096 VUPKPX(lpx, UPKLO) 2097 VUPKPX(hpx, UPKHI) 2098 #undef VUPKPX 2099 2100 #define VUPK(suffix, unpacked, packee, hi) \ 2101 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2102 { \ 2103 int i; \ 2104 ppc_avr_t result; \ 2105 \ 2106 if (hi) { \ 2107 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2108 result.unpacked[i] = b->packee[i]; \ 2109 } \ 2110 } else { \ 2111 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2112 i++) { \ 2113 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2114 } \ 2115 } \ 2116 *r = result; \ 2117 } 2118 VUPK(hsb, s16, s8, UPKHI) 2119 VUPK(hsh, s32, s16, UPKHI) 2120 VUPK(hsw, s64, s32, UPKHI) 2121 VUPK(lsb, s16, s8, UPKLO) 2122 VUPK(lsh, s32, s16, UPKLO) 2123 VUPK(lsw, s64, s32, UPKLO) 2124 #undef VUPK 2125 #undef UPKHI 2126 #undef UPKLO 2127 2128 #define VGENERIC_DO(name, element) \ 2129 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2130 { \ 2131 int i; \ 2132 \ 2133 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2134 r->element[i] = name(b->element[i]); \ 2135 } \ 2136 } 2137 2138 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2139 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2140 2141 VGENERIC_DO(clzb, u8) 2142 VGENERIC_DO(clzh, u16) 2143 2144 #undef clzb 2145 #undef clzh 2146 2147 #define ctzb(v) ((v) ? ctz32(v) : 8) 2148 #define ctzh(v) ((v) ? ctz32(v) : 16) 2149 #define ctzw(v) ctz32((v)) 2150 #define ctzd(v) ctz64((v)) 2151 2152 VGENERIC_DO(ctzb, u8) 2153 VGENERIC_DO(ctzh, u16) 2154 VGENERIC_DO(ctzw, u32) 2155 VGENERIC_DO(ctzd, u64) 2156 2157 #undef ctzb 2158 #undef ctzh 2159 #undef ctzw 2160 #undef ctzd 2161 2162 #define popcntb(v) ctpop8(v) 2163 #define popcnth(v) ctpop16(v) 2164 #define popcntw(v) ctpop32(v) 2165 #define popcntd(v) ctpop64(v) 2166 2167 VGENERIC_DO(popcntb, u8) 2168 VGENERIC_DO(popcnth, u16) 2169 VGENERIC_DO(popcntw, u32) 2170 VGENERIC_DO(popcntd, u64) 2171 2172 #undef popcntb 2173 #undef popcnth 2174 #undef popcntw 2175 #undef popcntd 2176 2177 #undef VGENERIC_DO 2178 2179 #if HOST_BIG_ENDIAN 2180 #define QW_ONE { .u64 = { 0, 1 } } 2181 #else 2182 #define QW_ONE { .u64 = { 1, 0 } } 2183 #endif 2184 2185 #ifndef CONFIG_INT128 2186 2187 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2188 { 2189 t->u64[0] = ~a.u64[0]; 2190 t->u64[1] = ~a.u64[1]; 2191 } 2192 2193 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2194 { 2195 if (a.VsrD(0) < b.VsrD(0)) { 2196 return -1; 2197 } else if (a.VsrD(0) > b.VsrD(0)) { 2198 return 1; 2199 } else if (a.VsrD(1) < b.VsrD(1)) { 2200 return -1; 2201 } else if (a.VsrD(1) > b.VsrD(1)) { 2202 return 1; 2203 } else { 2204 return 0; 2205 } 2206 } 2207 2208 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2209 { 2210 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2211 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2212 (~a.VsrD(1) < b.VsrD(1)); 2213 } 2214 2215 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2216 { 2217 ppc_avr_t not_a; 2218 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2219 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2220 (~a.VsrD(1) < b.VsrD(1)); 2221 avr_qw_not(¬_a, a); 2222 return avr_qw_cmpu(not_a, b) < 0; 2223 } 2224 2225 #endif 2226 2227 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2228 { 2229 #ifdef CONFIG_INT128 2230 r->u128 = a->u128 + b->u128; 2231 #else 2232 avr_qw_add(r, *a, *b); 2233 #endif 2234 } 2235 2236 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2237 { 2238 #ifdef CONFIG_INT128 2239 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2240 #else 2241 2242 if (c->VsrD(1) & 1) { 2243 ppc_avr_t tmp; 2244 2245 tmp.VsrD(0) = 0; 2246 tmp.VsrD(1) = c->VsrD(1) & 1; 2247 avr_qw_add(&tmp, *a, tmp); 2248 avr_qw_add(r, tmp, *b); 2249 } else { 2250 avr_qw_add(r, *a, *b); 2251 } 2252 #endif 2253 } 2254 2255 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2256 { 2257 #ifdef CONFIG_INT128 2258 r->u128 = (~a->u128 < b->u128); 2259 #else 2260 ppc_avr_t not_a; 2261 2262 avr_qw_not(¬_a, *a); 2263 2264 r->VsrD(0) = 0; 2265 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2266 #endif 2267 } 2268 2269 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2270 { 2271 #ifdef CONFIG_INT128 2272 int carry_out = (~a->u128 < b->u128); 2273 if (!carry_out && (c->u128 & 1)) { 2274 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2275 ((a->u128 != 0) || (b->u128 != 0)); 2276 } 2277 r->u128 = carry_out; 2278 #else 2279 2280 int carry_in = c->VsrD(1) & 1; 2281 int carry_out = 0; 2282 ppc_avr_t tmp; 2283 2284 carry_out = avr_qw_addc(&tmp, *a, *b); 2285 2286 if (!carry_out && carry_in) { 2287 ppc_avr_t one = QW_ONE; 2288 carry_out = avr_qw_addc(&tmp, tmp, one); 2289 } 2290 r->VsrD(0) = 0; 2291 r->VsrD(1) = carry_out; 2292 #endif 2293 } 2294 2295 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2296 { 2297 #ifdef CONFIG_INT128 2298 r->u128 = a->u128 - b->u128; 2299 #else 2300 ppc_avr_t tmp; 2301 ppc_avr_t one = QW_ONE; 2302 2303 avr_qw_not(&tmp, *b); 2304 avr_qw_add(&tmp, *a, tmp); 2305 avr_qw_add(r, tmp, one); 2306 #endif 2307 } 2308 2309 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2310 { 2311 #ifdef CONFIG_INT128 2312 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2313 #else 2314 ppc_avr_t tmp, sum; 2315 2316 avr_qw_not(&tmp, *b); 2317 avr_qw_add(&sum, *a, tmp); 2318 2319 tmp.VsrD(0) = 0; 2320 tmp.VsrD(1) = c->VsrD(1) & 1; 2321 avr_qw_add(r, sum, tmp); 2322 #endif 2323 } 2324 2325 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2326 { 2327 #ifdef CONFIG_INT128 2328 r->u128 = (~a->u128 < ~b->u128) || 2329 (a->u128 + ~b->u128 == (__uint128_t)-1); 2330 #else 2331 int carry = (avr_qw_cmpu(*a, *b) > 0); 2332 if (!carry) { 2333 ppc_avr_t tmp; 2334 avr_qw_not(&tmp, *b); 2335 avr_qw_add(&tmp, *a, tmp); 2336 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2337 } 2338 r->VsrD(0) = 0; 2339 r->VsrD(1) = carry; 2340 #endif 2341 } 2342 2343 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2344 { 2345 #ifdef CONFIG_INT128 2346 r->u128 = 2347 (~a->u128 < ~b->u128) || 2348 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2349 #else 2350 int carry_in = c->VsrD(1) & 1; 2351 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2352 if (!carry_out && carry_in) { 2353 ppc_avr_t tmp; 2354 avr_qw_not(&tmp, *b); 2355 avr_qw_add(&tmp, *a, tmp); 2356 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2357 } 2358 2359 r->VsrD(0) = 0; 2360 r->VsrD(1) = carry_out; 2361 #endif 2362 } 2363 2364 #define BCD_PLUS_PREF_1 0xC 2365 #define BCD_PLUS_PREF_2 0xF 2366 #define BCD_PLUS_ALT_1 0xA 2367 #define BCD_NEG_PREF 0xD 2368 #define BCD_NEG_ALT 0xB 2369 #define BCD_PLUS_ALT_2 0xE 2370 #define NATIONAL_PLUS 0x2B 2371 #define NATIONAL_NEG 0x2D 2372 2373 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2374 2375 static int bcd_get_sgn(ppc_avr_t *bcd) 2376 { 2377 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2378 case BCD_PLUS_PREF_1: 2379 case BCD_PLUS_PREF_2: 2380 case BCD_PLUS_ALT_1: 2381 case BCD_PLUS_ALT_2: 2382 { 2383 return 1; 2384 } 2385 2386 case BCD_NEG_PREF: 2387 case BCD_NEG_ALT: 2388 { 2389 return -1; 2390 } 2391 2392 default: 2393 { 2394 return 0; 2395 } 2396 } 2397 } 2398 2399 static int bcd_preferred_sgn(int sgn, int ps) 2400 { 2401 if (sgn >= 0) { 2402 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2403 } else { 2404 return BCD_NEG_PREF; 2405 } 2406 } 2407 2408 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2409 { 2410 uint8_t result; 2411 if (n & 1) { 2412 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2413 } else { 2414 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2415 } 2416 2417 if (unlikely(result > 9)) { 2418 *invalid = true; 2419 } 2420 return result; 2421 } 2422 2423 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2424 { 2425 if (n & 1) { 2426 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2427 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2428 } else { 2429 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2430 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2431 } 2432 } 2433 2434 static bool bcd_is_valid(ppc_avr_t *bcd) 2435 { 2436 int i; 2437 int invalid = 0; 2438 2439 if (bcd_get_sgn(bcd) == 0) { 2440 return false; 2441 } 2442 2443 for (i = 1; i < 32; i++) { 2444 bcd_get_digit(bcd, i, &invalid); 2445 if (unlikely(invalid)) { 2446 return false; 2447 } 2448 } 2449 return true; 2450 } 2451 2452 static int bcd_cmp_zero(ppc_avr_t *bcd) 2453 { 2454 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2455 return CRF_EQ; 2456 } else { 2457 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2458 } 2459 } 2460 2461 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2462 { 2463 return reg->VsrH(7 - n); 2464 } 2465 2466 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2467 { 2468 reg->VsrH(7 - n) = val; 2469 } 2470 2471 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2472 { 2473 int i; 2474 int invalid = 0; 2475 for (i = 31; i > 0; i--) { 2476 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2477 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2478 if (unlikely(invalid)) { 2479 return 0; /* doesn't matter */ 2480 } else if (dig_a > dig_b) { 2481 return 1; 2482 } else if (dig_a < dig_b) { 2483 return -1; 2484 } 2485 } 2486 2487 return 0; 2488 } 2489 2490 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2491 int *overflow) 2492 { 2493 int carry = 0; 2494 int i; 2495 int is_zero = 1; 2496 2497 for (i = 1; i <= 31; i++) { 2498 uint8_t digit = bcd_get_digit(a, i, invalid) + 2499 bcd_get_digit(b, i, invalid) + carry; 2500 is_zero &= (digit == 0); 2501 if (digit > 9) { 2502 carry = 1; 2503 digit -= 10; 2504 } else { 2505 carry = 0; 2506 } 2507 2508 bcd_put_digit(t, digit, i); 2509 } 2510 2511 *overflow = carry; 2512 return is_zero; 2513 } 2514 2515 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2516 int *overflow) 2517 { 2518 int carry = 0; 2519 int i; 2520 2521 for (i = 1; i <= 31; i++) { 2522 uint8_t digit = bcd_get_digit(a, i, invalid) - 2523 bcd_get_digit(b, i, invalid) + carry; 2524 if (digit & 0x80) { 2525 carry = -1; 2526 digit += 10; 2527 } else { 2528 carry = 0; 2529 } 2530 2531 bcd_put_digit(t, digit, i); 2532 } 2533 2534 *overflow = carry; 2535 } 2536 2537 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2538 { 2539 2540 int sgna = bcd_get_sgn(a); 2541 int sgnb = bcd_get_sgn(b); 2542 int invalid = (sgna == 0) || (sgnb == 0); 2543 int overflow = 0; 2544 int zero = 0; 2545 uint32_t cr = 0; 2546 ppc_avr_t result = { .u64 = { 0, 0 } }; 2547 2548 if (!invalid) { 2549 if (sgna == sgnb) { 2550 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2551 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2552 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2553 } else { 2554 int magnitude = bcd_cmp_mag(a, b); 2555 if (magnitude > 0) { 2556 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2557 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2558 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2559 } else if (magnitude < 0) { 2560 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2561 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2562 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2563 } else { 2564 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2565 cr = CRF_EQ; 2566 } 2567 } 2568 } 2569 2570 if (unlikely(invalid)) { 2571 result.VsrD(0) = result.VsrD(1) = -1; 2572 cr = CRF_SO; 2573 } else if (overflow) { 2574 cr |= CRF_SO; 2575 } else if (zero) { 2576 cr |= CRF_EQ; 2577 } 2578 2579 *r = result; 2580 2581 return cr; 2582 } 2583 2584 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2585 { 2586 ppc_avr_t bcopy = *b; 2587 int sgnb = bcd_get_sgn(b); 2588 if (sgnb < 0) { 2589 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2590 } else if (sgnb > 0) { 2591 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2592 } 2593 /* else invalid ... defer to bcdadd code for proper handling */ 2594 2595 return helper_bcdadd(r, a, &bcopy, ps); 2596 } 2597 2598 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2599 { 2600 int i; 2601 int cr = 0; 2602 uint16_t national = 0; 2603 uint16_t sgnb = get_national_digit(b, 0); 2604 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2605 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2606 2607 for (i = 1; i < 8; i++) { 2608 national = get_national_digit(b, i); 2609 if (unlikely(national < 0x30 || national > 0x39)) { 2610 invalid = 1; 2611 break; 2612 } 2613 2614 bcd_put_digit(&ret, national & 0xf, i); 2615 } 2616 2617 if (sgnb == NATIONAL_PLUS) { 2618 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2619 } else { 2620 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2621 } 2622 2623 cr = bcd_cmp_zero(&ret); 2624 2625 if (unlikely(invalid)) { 2626 cr = CRF_SO; 2627 } 2628 2629 *r = ret; 2630 2631 return cr; 2632 } 2633 2634 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2635 { 2636 int i; 2637 int cr = 0; 2638 int sgnb = bcd_get_sgn(b); 2639 int invalid = (sgnb == 0); 2640 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2641 2642 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2643 2644 for (i = 1; i < 8; i++) { 2645 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2646 2647 if (unlikely(invalid)) { 2648 break; 2649 } 2650 } 2651 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2652 2653 cr = bcd_cmp_zero(b); 2654 2655 if (ox_flag) { 2656 cr |= CRF_SO; 2657 } 2658 2659 if (unlikely(invalid)) { 2660 cr = CRF_SO; 2661 } 2662 2663 *r = ret; 2664 2665 return cr; 2666 } 2667 2668 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2669 { 2670 int i; 2671 int cr = 0; 2672 int invalid = 0; 2673 int zone_digit = 0; 2674 int zone_lead = ps ? 0xF : 0x3; 2675 int digit = 0; 2676 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2677 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2678 2679 if (unlikely((sgnb < 0xA) && ps)) { 2680 invalid = 1; 2681 } 2682 2683 for (i = 0; i < 16; i++) { 2684 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2685 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2686 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2687 invalid = 1; 2688 break; 2689 } 2690 2691 bcd_put_digit(&ret, digit, i + 1); 2692 } 2693 2694 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2695 (!ps && (sgnb & 0x4))) { 2696 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2697 } else { 2698 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2699 } 2700 2701 cr = bcd_cmp_zero(&ret); 2702 2703 if (unlikely(invalid)) { 2704 cr = CRF_SO; 2705 } 2706 2707 *r = ret; 2708 2709 return cr; 2710 } 2711 2712 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2713 { 2714 int i; 2715 int cr = 0; 2716 uint8_t digit = 0; 2717 int sgnb = bcd_get_sgn(b); 2718 int zone_lead = (ps) ? 0xF0 : 0x30; 2719 int invalid = (sgnb == 0); 2720 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2721 2722 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2723 2724 for (i = 0; i < 16; i++) { 2725 digit = bcd_get_digit(b, i + 1, &invalid); 2726 2727 if (unlikely(invalid)) { 2728 break; 2729 } 2730 2731 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2732 } 2733 2734 if (ps) { 2735 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2736 } else { 2737 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2738 } 2739 2740 cr = bcd_cmp_zero(b); 2741 2742 if (ox_flag) { 2743 cr |= CRF_SO; 2744 } 2745 2746 if (unlikely(invalid)) { 2747 cr = CRF_SO; 2748 } 2749 2750 *r = ret; 2751 2752 return cr; 2753 } 2754 2755 /** 2756 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2757 * 2758 * Returns: 2759 * > 0 if ahi|alo > bhi|blo, 2760 * 0 if ahi|alo == bhi|blo, 2761 * < 0 if ahi|alo < bhi|blo 2762 */ 2763 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2764 uint64_t blo, uint64_t bhi) 2765 { 2766 return (ahi == bhi) ? 2767 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2768 (ahi > bhi ? 1 : -1); 2769 } 2770 2771 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2772 { 2773 int i; 2774 int cr; 2775 uint64_t lo_value; 2776 uint64_t hi_value; 2777 uint64_t rem; 2778 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2779 2780 if (b->VsrSD(0) < 0) { 2781 lo_value = -b->VsrSD(1); 2782 hi_value = ~b->VsrD(0) + !lo_value; 2783 bcd_put_digit(&ret, 0xD, 0); 2784 2785 cr = CRF_LT; 2786 } else { 2787 lo_value = b->VsrD(1); 2788 hi_value = b->VsrD(0); 2789 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2790 2791 if (hi_value == 0 && lo_value == 0) { 2792 cr = CRF_EQ; 2793 } else { 2794 cr = CRF_GT; 2795 } 2796 } 2797 2798 /* 2799 * Check src limits: abs(src) <= 10^31 - 1 2800 * 2801 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2802 */ 2803 if (ucmp128(lo_value, hi_value, 2804 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2805 cr |= CRF_SO; 2806 2807 /* 2808 * According to the ISA, if src wouldn't fit in the destination 2809 * register, the result is undefined. 2810 * In that case, we leave r unchanged. 2811 */ 2812 } else { 2813 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2814 2815 for (i = 1; i < 16; rem /= 10, i++) { 2816 bcd_put_digit(&ret, rem % 10, i); 2817 } 2818 2819 for (; i < 32; lo_value /= 10, i++) { 2820 bcd_put_digit(&ret, lo_value % 10, i); 2821 } 2822 2823 *r = ret; 2824 } 2825 2826 return cr; 2827 } 2828 2829 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2830 { 2831 uint8_t i; 2832 int cr; 2833 uint64_t carry; 2834 uint64_t unused; 2835 uint64_t lo_value; 2836 uint64_t hi_value = 0; 2837 int sgnb = bcd_get_sgn(b); 2838 int invalid = (sgnb == 0); 2839 2840 lo_value = bcd_get_digit(b, 31, &invalid); 2841 for (i = 30; i > 0; i--) { 2842 mulu64(&lo_value, &carry, lo_value, 10ULL); 2843 mulu64(&hi_value, &unused, hi_value, 10ULL); 2844 lo_value += bcd_get_digit(b, i, &invalid); 2845 hi_value += carry; 2846 2847 if (unlikely(invalid)) { 2848 break; 2849 } 2850 } 2851 2852 if (sgnb == -1) { 2853 r->VsrSD(1) = -lo_value; 2854 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2855 } else { 2856 r->VsrSD(1) = lo_value; 2857 r->VsrSD(0) = hi_value; 2858 } 2859 2860 cr = bcd_cmp_zero(b); 2861 2862 if (unlikely(invalid)) { 2863 cr = CRF_SO; 2864 } 2865 2866 return cr; 2867 } 2868 2869 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2870 { 2871 int i; 2872 int invalid = 0; 2873 2874 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2875 return CRF_SO; 2876 } 2877 2878 *r = *a; 2879 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2880 2881 for (i = 1; i < 32; i++) { 2882 bcd_get_digit(a, i, &invalid); 2883 bcd_get_digit(b, i, &invalid); 2884 if (unlikely(invalid)) { 2885 return CRF_SO; 2886 } 2887 } 2888 2889 return bcd_cmp_zero(r); 2890 } 2891 2892 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2893 { 2894 int sgnb = bcd_get_sgn(b); 2895 2896 *r = *b; 2897 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2898 2899 if (bcd_is_valid(b) == false) { 2900 return CRF_SO; 2901 } 2902 2903 return bcd_cmp_zero(r); 2904 } 2905 2906 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2907 { 2908 int cr; 2909 int i = a->VsrSB(7); 2910 bool ox_flag = false; 2911 int sgnb = bcd_get_sgn(b); 2912 ppc_avr_t ret = *b; 2913 ret.VsrD(1) &= ~0xf; 2914 2915 if (bcd_is_valid(b) == false) { 2916 return CRF_SO; 2917 } 2918 2919 if (unlikely(i > 31)) { 2920 i = 31; 2921 } else if (unlikely(i < -31)) { 2922 i = -31; 2923 } 2924 2925 if (i > 0) { 2926 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2927 } else { 2928 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2929 } 2930 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2931 2932 *r = ret; 2933 2934 cr = bcd_cmp_zero(r); 2935 if (ox_flag) { 2936 cr |= CRF_SO; 2937 } 2938 2939 return cr; 2940 } 2941 2942 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2943 { 2944 int cr; 2945 int i; 2946 int invalid = 0; 2947 bool ox_flag = false; 2948 ppc_avr_t ret = *b; 2949 2950 for (i = 0; i < 32; i++) { 2951 bcd_get_digit(b, i, &invalid); 2952 2953 if (unlikely(invalid)) { 2954 return CRF_SO; 2955 } 2956 } 2957 2958 i = a->VsrSB(7); 2959 if (i >= 32) { 2960 ox_flag = true; 2961 ret.VsrD(1) = ret.VsrD(0) = 0; 2962 } else if (i <= -32) { 2963 ret.VsrD(1) = ret.VsrD(0) = 0; 2964 } else if (i > 0) { 2965 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2966 } else { 2967 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2968 } 2969 *r = ret; 2970 2971 cr = bcd_cmp_zero(r); 2972 if (ox_flag) { 2973 cr |= CRF_SO; 2974 } 2975 2976 return cr; 2977 } 2978 2979 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2980 { 2981 int cr; 2982 int unused = 0; 2983 int invalid = 0; 2984 bool ox_flag = false; 2985 int sgnb = bcd_get_sgn(b); 2986 ppc_avr_t ret = *b; 2987 ret.VsrD(1) &= ~0xf; 2988 2989 int i = a->VsrSB(7); 2990 ppc_avr_t bcd_one; 2991 2992 bcd_one.VsrD(0) = 0; 2993 bcd_one.VsrD(1) = 0x10; 2994 2995 if (bcd_is_valid(b) == false) { 2996 return CRF_SO; 2997 } 2998 2999 if (unlikely(i > 31)) { 3000 i = 31; 3001 } else if (unlikely(i < -31)) { 3002 i = -31; 3003 } 3004 3005 if (i > 0) { 3006 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3007 } else { 3008 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3009 3010 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3011 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3012 } 3013 } 3014 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3015 3016 cr = bcd_cmp_zero(&ret); 3017 if (ox_flag) { 3018 cr |= CRF_SO; 3019 } 3020 *r = ret; 3021 3022 return cr; 3023 } 3024 3025 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3026 { 3027 uint64_t mask; 3028 uint32_t ox_flag = 0; 3029 int i = a->VsrSH(3) + 1; 3030 ppc_avr_t ret = *b; 3031 3032 if (bcd_is_valid(b) == false) { 3033 return CRF_SO; 3034 } 3035 3036 if (i > 16 && i < 32) { 3037 mask = (uint64_t)-1 >> (128 - i * 4); 3038 if (ret.VsrD(0) & ~mask) { 3039 ox_flag = CRF_SO; 3040 } 3041 3042 ret.VsrD(0) &= mask; 3043 } else if (i >= 0 && i <= 16) { 3044 mask = (uint64_t)-1 >> (64 - i * 4); 3045 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3046 ox_flag = CRF_SO; 3047 } 3048 3049 ret.VsrD(1) &= mask; 3050 ret.VsrD(0) = 0; 3051 } 3052 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3053 *r = ret; 3054 3055 return bcd_cmp_zero(&ret) | ox_flag; 3056 } 3057 3058 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3059 { 3060 int i; 3061 uint64_t mask; 3062 uint32_t ox_flag = 0; 3063 int invalid = 0; 3064 ppc_avr_t ret = *b; 3065 3066 for (i = 0; i < 32; i++) { 3067 bcd_get_digit(b, i, &invalid); 3068 3069 if (unlikely(invalid)) { 3070 return CRF_SO; 3071 } 3072 } 3073 3074 i = a->VsrSH(3); 3075 if (i > 16 && i < 33) { 3076 mask = (uint64_t)-1 >> (128 - i * 4); 3077 if (ret.VsrD(0) & ~mask) { 3078 ox_flag = CRF_SO; 3079 } 3080 3081 ret.VsrD(0) &= mask; 3082 } else if (i > 0 && i <= 16) { 3083 mask = (uint64_t)-1 >> (64 - i * 4); 3084 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3085 ox_flag = CRF_SO; 3086 } 3087 3088 ret.VsrD(1) &= mask; 3089 ret.VsrD(0) = 0; 3090 } else if (i == 0) { 3091 if (ret.VsrD(0) || ret.VsrD(1)) { 3092 ox_flag = CRF_SO; 3093 } 3094 ret.VsrD(0) = ret.VsrD(1) = 0; 3095 } 3096 3097 *r = ret; 3098 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3099 return ox_flag | CRF_EQ; 3100 } 3101 3102 return ox_flag | CRF_GT; 3103 } 3104 3105 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3106 { 3107 int i; 3108 VECTOR_FOR_INORDER_I(i, u8) { 3109 r->u8[i] = AES_sbox[a->u8[i]]; 3110 } 3111 } 3112 3113 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3114 { 3115 ppc_avr_t result; 3116 int i; 3117 3118 VECTOR_FOR_INORDER_I(i, u32) { 3119 result.VsrW(i) = b->VsrW(i) ^ 3120 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3121 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3122 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3123 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3124 } 3125 *r = result; 3126 } 3127 3128 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3129 { 3130 ppc_avr_t result; 3131 int i; 3132 3133 VECTOR_FOR_INORDER_I(i, u8) { 3134 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3135 } 3136 *r = result; 3137 } 3138 3139 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3140 { 3141 /* This differs from what is written in ISA V2.07. The RTL is */ 3142 /* incorrect and will be fixed in V2.07B. */ 3143 int i; 3144 ppc_avr_t tmp; 3145 3146 VECTOR_FOR_INORDER_I(i, u8) { 3147 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3148 } 3149 3150 VECTOR_FOR_INORDER_I(i, u32) { 3151 r->VsrW(i) = 3152 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3153 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3154 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3155 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3156 } 3157 } 3158 3159 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3160 { 3161 ppc_avr_t result; 3162 int i; 3163 3164 VECTOR_FOR_INORDER_I(i, u8) { 3165 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3166 } 3167 *r = result; 3168 } 3169 3170 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3171 { 3172 int st = (st_six & 0x10) != 0; 3173 int six = st_six & 0xF; 3174 int i; 3175 3176 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3177 if (st == 0) { 3178 if ((six & (0x8 >> i)) == 0) { 3179 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3180 ror32(a->VsrW(i), 18) ^ 3181 (a->VsrW(i) >> 3); 3182 } else { /* six.bit[i] == 1 */ 3183 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3184 ror32(a->VsrW(i), 19) ^ 3185 (a->VsrW(i) >> 10); 3186 } 3187 } else { /* st == 1 */ 3188 if ((six & (0x8 >> i)) == 0) { 3189 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3190 ror32(a->VsrW(i), 13) ^ 3191 ror32(a->VsrW(i), 22); 3192 } else { /* six.bit[i] == 1 */ 3193 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3194 ror32(a->VsrW(i), 11) ^ 3195 ror32(a->VsrW(i), 25); 3196 } 3197 } 3198 } 3199 } 3200 3201 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3202 { 3203 int st = (st_six & 0x10) != 0; 3204 int six = st_six & 0xF; 3205 int i; 3206 3207 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3208 if (st == 0) { 3209 if ((six & (0x8 >> (2 * i))) == 0) { 3210 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3211 ror64(a->VsrD(i), 8) ^ 3212 (a->VsrD(i) >> 7); 3213 } else { /* six.bit[2*i] == 1 */ 3214 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3215 ror64(a->VsrD(i), 61) ^ 3216 (a->VsrD(i) >> 6); 3217 } 3218 } else { /* st == 1 */ 3219 if ((six & (0x8 >> (2 * i))) == 0) { 3220 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3221 ror64(a->VsrD(i), 34) ^ 3222 ror64(a->VsrD(i), 39); 3223 } else { /* six.bit[2*i] == 1 */ 3224 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3225 ror64(a->VsrD(i), 18) ^ 3226 ror64(a->VsrD(i), 41); 3227 } 3228 } 3229 } 3230 } 3231 3232 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3233 { 3234 ppc_avr_t result; 3235 int i; 3236 3237 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3238 int indexA = c->VsrB(i) >> 4; 3239 int indexB = c->VsrB(i) & 0xF; 3240 3241 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3242 } 3243 *r = result; 3244 } 3245 3246 #undef VECTOR_FOR_INORDER_I 3247 3248 /*****************************************************************************/ 3249 /* SPE extension helpers */ 3250 /* Use a table to make this quicker */ 3251 static const uint8_t hbrev[16] = { 3252 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3253 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3254 }; 3255 3256 static inline uint8_t byte_reverse(uint8_t val) 3257 { 3258 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3259 } 3260 3261 static inline uint32_t word_reverse(uint32_t val) 3262 { 3263 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3264 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3265 } 3266 3267 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3268 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3269 { 3270 uint32_t a, b, d, mask; 3271 3272 mask = UINT32_MAX >> (32 - MASKBITS); 3273 a = arg1 & mask; 3274 b = arg2 & mask; 3275 d = word_reverse(1 + word_reverse(a | ~b)); 3276 return (arg1 & ~mask) | (d & b); 3277 } 3278 3279 uint32_t helper_cntlsw32(uint32_t val) 3280 { 3281 if (val & 0x80000000) { 3282 return clz32(~val); 3283 } else { 3284 return clz32(val); 3285 } 3286 } 3287 3288 uint32_t helper_cntlzw32(uint32_t val) 3289 { 3290 return clz32(val); 3291 } 3292 3293 /* 440 specific */ 3294 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3295 target_ulong low, uint32_t update_Rc) 3296 { 3297 target_ulong mask; 3298 int i; 3299 3300 i = 1; 3301 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3302 if ((high & mask) == 0) { 3303 if (update_Rc) { 3304 env->crf[0] = 0x4; 3305 } 3306 goto done; 3307 } 3308 i++; 3309 } 3310 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3311 if ((low & mask) == 0) { 3312 if (update_Rc) { 3313 env->crf[0] = 0x8; 3314 } 3315 goto done; 3316 } 3317 i++; 3318 } 3319 i = 8; 3320 if (update_Rc) { 3321 env->crf[0] = 0x2; 3322 } 3323 done: 3324 env->xer = (env->xer & ~0x7F) | i; 3325 if (update_Rc) { 3326 env->crf[0] |= xer_so; 3327 } 3328 return i; 3329 } 3330