1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = 1; 41 } else { 42 env->ov = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 496 { 497 int i; 498 499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 500 r->u32[i] = ~a->u32[i] < b->u32[i]; 501 } 502 } 503 504 /* vprtybw */ 505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 510 res ^= res >> 8; 511 r->u32[i] = res & 1; 512 } 513 } 514 515 /* vprtybd */ 516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 int i; 519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->u64[i] = res & 1; 524 } 525 } 526 527 /* vprtybq */ 528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 529 { 530 uint64_t res = b->u64[0] ^ b->u64[1]; 531 res ^= res >> 32; 532 res ^= res >> 16; 533 res ^= res >> 8; 534 r->VsrD(1) = res & 1; 535 r->VsrD(0) = 0; 536 } 537 538 #define VARITHFP(suffix, func) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b) \ 541 { \ 542 int i; \ 543 \ 544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 546 } \ 547 } 548 VARITHFP(addfp, float32_add) 549 VARITHFP(subfp, float32_sub) 550 VARITHFP(minfp, float32_min) 551 VARITHFP(maxfp, float32_max) 552 #undef VARITHFP 553 554 #define VARITHFPFMA(suffix, type) \ 555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 556 ppc_avr_t *b, ppc_avr_t *c) \ 557 { \ 558 int i; \ 559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 561 type, &env->vec_status); \ 562 } \ 563 } 564 VARITHFPFMA(maddfp, 0); 565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 566 #undef VARITHFPFMA 567 568 #define VARITHSAT_CASE(type, op, cvt, element) \ 569 { \ 570 type result = (type)a->element[i] op (type)b->element[i]; \ 571 r->element[i] = cvt(result, &sat); \ 572 } 573 574 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 577 { \ 578 int sat = 0; \ 579 int i; \ 580 \ 581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 } \ 584 if (sat) { \ 585 vscr_sat->u32[0] = 1; \ 586 } \ 587 } 588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 600 #undef VARITHSAT_CASE 601 #undef VARITHSAT_DO 602 #undef VARITHSAT_SIGNED 603 #undef VARITHSAT_UNSIGNED 604 605 #define VAVG_DO(name, element, etype) \ 606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 607 { \ 608 int i; \ 609 \ 610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 612 r->element[i] = x >> 1; \ 613 } \ 614 } 615 616 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 617 unsigned_type) \ 618 VAVG_DO(avgs##type, signed_element, signed_type) \ 619 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 620 VAVG(b, s8, int16_t, u8, uint16_t) 621 VAVG(h, s16, int32_t, u16, uint32_t) 622 VAVG(w, s32, int64_t, u32, uint64_t) 623 #undef VAVG_DO 624 #undef VAVG 625 626 #define VABSDU_DO(name, element) \ 627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 r->element[i] = (a->element[i] > b->element[i]) ? \ 633 (a->element[i] - b->element[i]) : \ 634 (b->element[i] - a->element[i]); \ 635 } \ 636 } 637 638 /* 639 * VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMPNEZ(NAME, ELEM) \ 667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 668 { \ 669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 672 } \ 673 } 674 VCMPNEZ(VCMPNEZB, u8) 675 VCMPNEZ(VCMPNEZH, u16) 676 VCMPNEZ(VCMPNEZW, u32) 677 #undef VCMPNEZ 678 679 #define VCMPFP_DO(suffix, compare, order, record) \ 680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 681 ppc_avr_t *a, ppc_avr_t *b) \ 682 { \ 683 uint32_t ones = (uint32_t)-1; \ 684 uint32_t all = ones; \ 685 uint32_t none = 0; \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 689 uint32_t result; \ 690 FloatRelation rel = \ 691 float32_compare_quiet(a->f32[i], b->f32[i], \ 692 &env->vec_status); \ 693 if (rel == float_relation_unordered) { \ 694 result = 0; \ 695 } else if (rel compare order) { \ 696 result = ones; \ 697 } else { \ 698 result = 0; \ 699 } \ 700 r->u32[i] = result; \ 701 all &= result; \ 702 none |= result; \ 703 } \ 704 if (record) { \ 705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 706 } \ 707 } 708 #define VCMPFP(suffix, compare, order) \ 709 VCMPFP_DO(suffix, compare, order, 0) \ 710 VCMPFP_DO(suffix##_dot, compare, order, 1) 711 VCMPFP(eqfp, ==, float_relation_equal) 712 VCMPFP(gefp, !=, float_relation_less) 713 VCMPFP(gtfp, ==, float_relation_greater) 714 #undef VCMPFP_DO 715 #undef VCMPFP 716 717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 718 ppc_avr_t *a, ppc_avr_t *b, int record) 719 { 720 int i; 721 int all_in = 0; 722 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 725 &env->vec_status); 726 if (le_rel == float_relation_unordered) { 727 r->u32[i] = 0xc0000000; 728 all_in = 1; 729 } else { 730 float32 bneg = float32_chs(b->f32[i]); 731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 732 &env->vec_status); 733 int le = le_rel != float_relation_greater; 734 int ge = ge_rel != float_relation_less; 735 736 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 737 all_in |= (!le | !ge); 738 } 739 } 740 if (record) { 741 env->crf[6] = (all_in == 0) << 1; 742 } 743 } 744 745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 746 { 747 vcmpbfp_internal(env, r, a, b, 0); 748 } 749 750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 751 ppc_avr_t *b) 752 { 753 vcmpbfp_internal(env, r, a, b, 1); 754 } 755 756 #define VCT(suffix, satcvt, element) \ 757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 758 ppc_avr_t *b, uint32_t uim) \ 759 { \ 760 int i; \ 761 int sat = 0; \ 762 float_status s = env->vec_status; \ 763 \ 764 set_float_rounding_mode(float_round_to_zero, &s); \ 765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 766 if (float32_is_any_nan(b->f32[i])) { \ 767 r->element[i] = 0; \ 768 } else { \ 769 float64 t = float32_to_float64(b->f32[i], &s); \ 770 int64_t j; \ 771 \ 772 t = float64_scalbn(t, uim, &s); \ 773 j = float64_to_int64(t, &s); \ 774 r->element[i] = satcvt(j, &sat); \ 775 } \ 776 } \ 777 if (sat) { \ 778 set_vscr_sat(env); \ 779 } \ 780 } 781 VCT(uxs, cvtsduw, u32) 782 VCT(sxs, cvtsdsw, s32) 783 #undef VCT 784 785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 786 787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 788 { 789 int64_t psum = 0; 790 for (int i = 0; i < 8; i++, mask >>= 1) { 791 if (mask & 1) { 792 psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 793 } 794 } 795 return psum; 796 } 797 798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 799 { 800 int64_t psum = 0; 801 for (int i = 0; i < 4; i++, mask >>= 1) { 802 if (mask & 1) { 803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 804 } 805 } 806 return psum; 807 } 808 809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 810 { 811 int64_t psum = 0; 812 for (int i = 0; i < 2; i++, mask >>= 1) { 813 if (mask & 1) { 814 psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); 815 } 816 } 817 return psum; 818 } 819 820 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 821 uint32_t mask, bool sat, bool acc, do_ger ger) 822 { 823 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 824 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 825 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 826 uint8_t xmsk_bit, ymsk_bit; 827 int64_t psum; 828 int i, j; 829 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 830 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 831 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 832 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 833 if (acc) { 834 psum += at[i].VsrSW(j); 835 } 836 if (sat && psum > INT32_MAX) { 837 set_vscr_sat(env); 838 at[i].VsrSW(j) = INT32_MAX; 839 } else if (sat && psum < INT32_MIN) { 840 set_vscr_sat(env); 841 at[i].VsrSW(j) = INT32_MIN; 842 } else { 843 at[i].VsrSW(j) = (int32_t) psum; 844 } 845 } else { 846 at[i].VsrSW(j) = 0; 847 } 848 } 849 } 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank8); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, false, true, ger_rank8); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, false, ger_rank4); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, false, true, ger_rank4); 878 } 879 880 QEMU_FLATTEN 881 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 882 ppc_acc_t *at, uint32_t mask) 883 { 884 xviger(env, a, b, at, mask, true, true, ger_rank4); 885 } 886 887 QEMU_FLATTEN 888 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 889 ppc_acc_t *at, uint32_t mask) 890 { 891 xviger(env, a, b, at, mask, false, false, ger_rank2); 892 } 893 894 QEMU_FLATTEN 895 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 896 ppc_acc_t *at, uint32_t mask) 897 { 898 xviger(env, a, b, at, mask, true, false, ger_rank2); 899 } 900 901 QEMU_FLATTEN 902 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 903 ppc_acc_t *at, uint32_t mask) 904 { 905 xviger(env, a, b, at, mask, false, true, ger_rank2); 906 } 907 908 QEMU_FLATTEN 909 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 910 ppc_acc_t *at, uint32_t mask) 911 { 912 xviger(env, a, b, at, mask, true, true, ger_rank2); 913 } 914 915 target_ulong helper_vclzlsbb(ppc_avr_t *r) 916 { 917 target_ulong count = 0; 918 int i; 919 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 920 if (r->VsrB(i) & 0x01) { 921 break; 922 } 923 count++; 924 } 925 return count; 926 } 927 928 target_ulong helper_vctzlsbb(ppc_avr_t *r) 929 { 930 target_ulong count = 0; 931 int i; 932 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 933 if (r->VsrB(i) & 0x01) { 934 break; 935 } 936 count++; 937 } 938 return count; 939 } 940 941 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 942 ppc_avr_t *b, ppc_avr_t *c) 943 { 944 int sat = 0; 945 int i; 946 947 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 948 int32_t prod = a->s16[i] * b->s16[i]; 949 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 950 951 r->s16[i] = cvtswsh(t, &sat); 952 } 953 954 if (sat) { 955 set_vscr_sat(env); 956 } 957 } 958 959 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 960 ppc_avr_t *b, ppc_avr_t *c) 961 { 962 int sat = 0; 963 int i; 964 965 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 966 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 967 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 968 r->s16[i] = cvtswsh(t, &sat); 969 } 970 971 if (sat) { 972 set_vscr_sat(env); 973 } 974 } 975 976 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 977 { 978 int i; 979 980 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 981 int32_t prod = a->s16[i] * b->s16[i]; 982 r->s16[i] = (int16_t) (prod + c->s16[i]); 983 } 984 } 985 986 #define VMRG_DO(name, element, access, ofs) \ 987 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 988 { \ 989 ppc_avr_t result; \ 990 int i, half = ARRAY_SIZE(r->element) / 2; \ 991 \ 992 for (i = 0; i < half; i++) { \ 993 result.access(i * 2 + 0) = a->access(i + ofs); \ 994 result.access(i * 2 + 1) = b->access(i + ofs); \ 995 } \ 996 *r = result; \ 997 } 998 999 #define VMRG(suffix, element, access) \ 1000 VMRG_DO(mrgl##suffix, element, access, half) \ 1001 VMRG_DO(mrgh##suffix, element, access, 0) 1002 VMRG(b, u8, VsrB) 1003 VMRG(h, u16, VsrH) 1004 VMRG(w, u32, VsrW) 1005 #undef VMRG_DO 1006 #undef VMRG 1007 1008 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1009 { 1010 int32_t prod[16]; 1011 int i; 1012 1013 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1014 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1015 } 1016 1017 VECTOR_FOR_INORDER_I(i, s32) { 1018 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1019 prod[4 * i + 2] + prod[4 * i + 3]; 1020 } 1021 } 1022 1023 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1024 { 1025 int32_t prod[8]; 1026 int i; 1027 1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1029 prod[i] = a->s16[i] * b->s16[i]; 1030 } 1031 1032 VECTOR_FOR_INORDER_I(i, s32) { 1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1034 } 1035 } 1036 1037 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1038 ppc_avr_t *b, ppc_avr_t *c) 1039 { 1040 int32_t prod[8]; 1041 int i; 1042 int sat = 0; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1045 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, s32) { 1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1050 1051 r->u32[i] = cvtsdsw(t, &sat); 1052 } 1053 1054 if (sat) { 1055 set_vscr_sat(env); 1056 } 1057 } 1058 1059 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1060 { 1061 uint16_t prod[16]; 1062 int i; 1063 1064 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1065 prod[i] = a->u8[i] * b->u8[i]; 1066 } 1067 1068 VECTOR_FOR_INORDER_I(i, u32) { 1069 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1070 prod[4 * i + 2] + prod[4 * i + 3]; 1071 } 1072 } 1073 1074 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1075 { 1076 uint32_t prod[8]; 1077 int i; 1078 1079 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1080 prod[i] = a->u16[i] * b->u16[i]; 1081 } 1082 1083 VECTOR_FOR_INORDER_I(i, u32) { 1084 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1085 } 1086 } 1087 1088 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1089 ppc_avr_t *b, ppc_avr_t *c) 1090 { 1091 uint32_t prod[8]; 1092 int i; 1093 int sat = 0; 1094 1095 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1096 prod[i] = a->u16[i] * b->u16[i]; 1097 } 1098 1099 VECTOR_FOR_INORDER_I(i, s32) { 1100 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1101 1102 r->u32[i] = cvtuduw(t, &sat); 1103 } 1104 1105 if (sat) { 1106 set_vscr_sat(env); 1107 } 1108 } 1109 1110 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1111 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1112 { \ 1113 int i; \ 1114 \ 1115 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1116 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1117 (cast)b->mul_access(i); \ 1118 } \ 1119 } 1120 1121 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1122 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1123 { \ 1124 int i; \ 1125 \ 1126 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1127 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1128 (cast)b->mul_access(i + 1); \ 1129 } \ 1130 } 1131 1132 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1133 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1134 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1135 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1136 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1137 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1138 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1139 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1140 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1141 #undef VMUL_DO_EVN 1142 #undef VMUL_DO_ODD 1143 #undef VMUL 1144 1145 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1146 target_ulong uim) 1147 { 1148 int i, idx; 1149 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1150 1151 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1152 if ((pcv->VsrB(i) >> 5) == uim) { 1153 idx = pcv->VsrB(i) & 0x1f; 1154 if (idx < ARRAY_SIZE(t->u8)) { 1155 tmp.VsrB(i) = s0->VsrB(idx); 1156 } else { 1157 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1158 } 1159 } 1160 } 1161 1162 *t = tmp; 1163 } 1164 1165 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1166 { 1167 Int128 neg1 = int128_makes64(-1); 1168 Int128 int128_min = int128_make128(0, INT64_MIN); 1169 if (likely(int128_nz(b->s128) && 1170 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1171 t->s128 = int128_divs(a->s128, b->s128); 1172 } else { 1173 t->s128 = a->s128; /* Undefined behavior */ 1174 } 1175 } 1176 1177 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1178 { 1179 if (int128_nz(b->s128)) { 1180 t->s128 = int128_divu(a->s128, b->s128); 1181 } else { 1182 t->s128 = a->s128; /* Undefined behavior */ 1183 } 1184 } 1185 1186 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1187 { 1188 int i; 1189 int64_t high; 1190 uint64_t low; 1191 for (i = 0; i < 2; i++) { 1192 high = a->s64[i]; 1193 low = 0; 1194 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1195 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1196 } else { 1197 divs128(&low, &high, b->s64[i]); 1198 t->s64[i] = low; 1199 } 1200 } 1201 } 1202 1203 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1204 { 1205 int i; 1206 uint64_t high, low; 1207 for (i = 0; i < 2; i++) { 1208 high = a->u64[i]; 1209 low = 0; 1210 if (unlikely(!b->u64[i])) { 1211 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1212 } else { 1213 divu128(&low, &high, b->u64[i]); 1214 t->u64[i] = low; 1215 } 1216 } 1217 } 1218 1219 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1220 { 1221 Int128 high, low; 1222 Int128 int128_min = int128_make128(0, INT64_MIN); 1223 Int128 neg1 = int128_makes64(-1); 1224 1225 high = a->s128; 1226 low = int128_zero(); 1227 if (unlikely(!int128_nz(b->s128) || 1228 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1229 t->s128 = a->s128; /* Undefined behavior */ 1230 } else { 1231 divs256(&low, &high, b->s128); 1232 t->s128 = low; 1233 } 1234 } 1235 1236 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1237 { 1238 Int128 high, low; 1239 1240 high = a->s128; 1241 low = int128_zero(); 1242 if (unlikely(!int128_nz(b->s128))) { 1243 t->s128 = a->s128; /* Undefined behavior */ 1244 } else { 1245 divu256(&low, &high, b->s128); 1246 t->s128 = low; 1247 } 1248 } 1249 1250 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1251 { 1252 Int128 neg1 = int128_makes64(-1); 1253 Int128 int128_min = int128_make128(0, INT64_MIN); 1254 if (likely(int128_nz(b->s128) && 1255 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1256 t->s128 = int128_rems(a->s128, b->s128); 1257 } else { 1258 t->s128 = int128_zero(); /* Undefined behavior */ 1259 } 1260 } 1261 1262 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1263 { 1264 if (likely(int128_nz(b->s128))) { 1265 t->s128 = int128_remu(a->s128, b->s128); 1266 } else { 1267 t->s128 = int128_zero(); /* Undefined behavior */ 1268 } 1269 } 1270 1271 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1272 { 1273 ppc_avr_t result; 1274 int i; 1275 1276 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1277 int s = c->VsrB(i) & 0x1f; 1278 int index = s & 0xf; 1279 1280 if (s & 0x10) { 1281 result.VsrB(i) = b->VsrB(index); 1282 } else { 1283 result.VsrB(i) = a->VsrB(index); 1284 } 1285 } 1286 *r = result; 1287 } 1288 1289 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1290 { 1291 ppc_avr_t result; 1292 int i; 1293 1294 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1295 int s = c->VsrB(i) & 0x1f; 1296 int index = 15 - (s & 0xf); 1297 1298 if (s & 0x10) { 1299 result.VsrB(i) = a->VsrB(index); 1300 } else { 1301 result.VsrB(i) = b->VsrB(index); 1302 } 1303 } 1304 *r = result; 1305 } 1306 1307 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1308 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1309 { \ 1310 ppc_vsr_t tmp; \ 1311 \ 1312 /* Initialize tmp with the result of an all-zeros mask */ \ 1313 tmp.VsrD(0) = 0x1011121314151617; \ 1314 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1315 \ 1316 /* Iterate over the most significant byte of each element */ \ 1317 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1318 if (b->VsrB(i) & 0x80) { \ 1319 /* Update each byte of the element */ \ 1320 for (int k = 0; k < SZ; k++) { \ 1321 tmp.VsrB(i + k) = j + k; \ 1322 } \ 1323 j += SZ; \ 1324 } \ 1325 } \ 1326 \ 1327 *t = tmp; \ 1328 } 1329 1330 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1331 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1332 { \ 1333 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1334 \ 1335 /* Iterate over the most significant byte of each element */ \ 1336 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1337 if (b->VsrB(i) & 0x80) { \ 1338 /* Update each byte of the element */ \ 1339 for (int k = 0; k < SZ; k++) { \ 1340 tmp.VsrB(j + k) = i + k; \ 1341 } \ 1342 j += SZ; \ 1343 } \ 1344 } \ 1345 \ 1346 *t = tmp; \ 1347 } 1348 1349 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1350 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1351 { \ 1352 ppc_vsr_t tmp; \ 1353 \ 1354 /* Initialize tmp with the result of an all-zeros mask */ \ 1355 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1356 tmp.VsrD(1) = 0x1716151413121110; \ 1357 \ 1358 /* Iterate over the most significant byte of each element */ \ 1359 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1360 /* Reverse indexing of "i" */ \ 1361 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1362 if (b->VsrB(idx) & 0x80) { \ 1363 /* Update each byte of the element */ \ 1364 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1365 tmp.VsrB(idx + rk) = j + k; \ 1366 } \ 1367 j += SZ; \ 1368 } \ 1369 } \ 1370 \ 1371 *t = tmp; \ 1372 } 1373 1374 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1375 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1376 { \ 1377 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1378 \ 1379 /* Iterate over the most significant byte of each element */ \ 1380 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1381 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1382 /* Update each byte of the element */ \ 1383 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1384 /* Reverse indexing of "j" */ \ 1385 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1386 tmp.VsrB(idx + rk) = i + k; \ 1387 } \ 1388 j += SZ; \ 1389 } \ 1390 } \ 1391 \ 1392 *t = tmp; \ 1393 } 1394 1395 #define XXGENPCV(NAME, SZ) \ 1396 XXGENPCV_BE_EXP(NAME, SZ) \ 1397 XXGENPCV_BE_COMP(NAME, SZ) \ 1398 XXGENPCV_LE_EXP(NAME, SZ) \ 1399 XXGENPCV_LE_COMP(NAME, SZ) \ 1400 1401 XXGENPCV(XXGENPCVBM, 1) 1402 XXGENPCV(XXGENPCVHM, 2) 1403 XXGENPCV(XXGENPCVWM, 4) 1404 XXGENPCV(XXGENPCVDM, 8) 1405 1406 #undef XXGENPCV_BE_EXP 1407 #undef XXGENPCV_BE_COMP 1408 #undef XXGENPCV_LE_EXP 1409 #undef XXGENPCV_LE_COMP 1410 #undef XXGENPCV 1411 1412 #if HOST_BIG_ENDIAN 1413 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1414 #define VBPERMD_INDEX(i) (i) 1415 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1416 #else 1417 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1418 #define VBPERMD_INDEX(i) (1 - i) 1419 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1420 #endif 1421 #define EXTRACT_BIT(avr, i, index) \ 1422 (extract64((avr)->VsrD(i), 63 - index, 1)) 1423 1424 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1425 { 1426 int i, j; 1427 ppc_avr_t result = { .u64 = { 0, 0 } }; 1428 VECTOR_FOR_INORDER_I(i, u64) { 1429 for (j = 0; j < 8; j++) { 1430 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1431 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1432 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1433 } 1434 } 1435 } 1436 *r = result; 1437 } 1438 1439 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1440 { 1441 int i; 1442 uint64_t perm = 0; 1443 1444 VECTOR_FOR_INORDER_I(i, u8) { 1445 int index = VBPERMQ_INDEX(b, i); 1446 1447 if (index < 128) { 1448 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1449 if (a->u64[VBPERMQ_DW(index)] & mask) { 1450 perm |= (0x8000 >> i); 1451 } 1452 } 1453 } 1454 1455 r->VsrD(0) = perm; 1456 r->VsrD(1) = 0; 1457 } 1458 1459 #undef VBPERMQ_INDEX 1460 #undef VBPERMQ_DW 1461 1462 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1463 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1464 { \ 1465 int i, j; \ 1466 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1467 \ 1468 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1469 prod[i] = 0; \ 1470 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1471 if (a->srcfld[i] & (1ull << j)) { \ 1472 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1473 } \ 1474 } \ 1475 } \ 1476 \ 1477 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1478 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1479 } \ 1480 } 1481 1482 PMSUM(vpmsumb, u8, u16, uint16_t) 1483 PMSUM(vpmsumh, u16, u32, uint32_t) 1484 PMSUM(vpmsumw, u32, u64, uint64_t) 1485 1486 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1487 { 1488 1489 #ifdef CONFIG_INT128 1490 int i, j; 1491 __uint128_t prod[2]; 1492 1493 VECTOR_FOR_INORDER_I(i, u64) { 1494 prod[i] = 0; 1495 for (j = 0; j < 64; j++) { 1496 if (a->u64[i] & (1ull << j)) { 1497 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1498 } 1499 } 1500 } 1501 1502 r->u128 = prod[0] ^ prod[1]; 1503 1504 #else 1505 int i, j; 1506 ppc_avr_t prod[2]; 1507 1508 VECTOR_FOR_INORDER_I(i, u64) { 1509 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1510 for (j = 0; j < 64; j++) { 1511 if (a->u64[i] & (1ull << j)) { 1512 ppc_avr_t bshift; 1513 if (j == 0) { 1514 bshift.VsrD(0) = 0; 1515 bshift.VsrD(1) = b->u64[i]; 1516 } else { 1517 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1518 bshift.VsrD(1) = b->u64[i] << j; 1519 } 1520 prod[i].VsrD(1) ^= bshift.VsrD(1); 1521 prod[i].VsrD(0) ^= bshift.VsrD(0); 1522 } 1523 } 1524 } 1525 1526 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1527 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1528 #endif 1529 } 1530 1531 1532 #if HOST_BIG_ENDIAN 1533 #define PKBIG 1 1534 #else 1535 #define PKBIG 0 1536 #endif 1537 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1538 { 1539 int i, j; 1540 ppc_avr_t result; 1541 #if HOST_BIG_ENDIAN 1542 const ppc_avr_t *x[2] = { a, b }; 1543 #else 1544 const ppc_avr_t *x[2] = { b, a }; 1545 #endif 1546 1547 VECTOR_FOR_INORDER_I(i, u64) { 1548 VECTOR_FOR_INORDER_I(j, u32) { 1549 uint32_t e = x[i]->u32[j]; 1550 1551 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1552 ((e >> 6) & 0x3e0) | 1553 ((e >> 3) & 0x1f)); 1554 } 1555 } 1556 *r = result; 1557 } 1558 1559 #define VPK(suffix, from, to, cvt, dosat) \ 1560 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1561 ppc_avr_t *a, ppc_avr_t *b) \ 1562 { \ 1563 int i; \ 1564 int sat = 0; \ 1565 ppc_avr_t result; \ 1566 ppc_avr_t *a0 = PKBIG ? a : b; \ 1567 ppc_avr_t *a1 = PKBIG ? b : a; \ 1568 \ 1569 VECTOR_FOR_INORDER_I(i, from) { \ 1570 result.to[i] = cvt(a0->from[i], &sat); \ 1571 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1572 } \ 1573 *r = result; \ 1574 if (dosat && sat) { \ 1575 set_vscr_sat(env); \ 1576 } \ 1577 } 1578 #define I(x, y) (x) 1579 VPK(shss, s16, s8, cvtshsb, 1) 1580 VPK(shus, s16, u8, cvtshub, 1) 1581 VPK(swss, s32, s16, cvtswsh, 1) 1582 VPK(swus, s32, u16, cvtswuh, 1) 1583 VPK(sdss, s64, s32, cvtsdsw, 1) 1584 VPK(sdus, s64, u32, cvtsduw, 1) 1585 VPK(uhus, u16, u8, cvtuhub, 1) 1586 VPK(uwus, u32, u16, cvtuwuh, 1) 1587 VPK(udus, u64, u32, cvtuduw, 1) 1588 VPK(uhum, u16, u8, I, 0) 1589 VPK(uwum, u32, u16, I, 0) 1590 VPK(udum, u64, u32, I, 0) 1591 #undef I 1592 #undef VPK 1593 #undef PKBIG 1594 1595 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1596 { 1597 int i; 1598 1599 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1600 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1601 } 1602 } 1603 1604 #define VRFI(suffix, rounding) \ 1605 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1606 ppc_avr_t *b) \ 1607 { \ 1608 int i; \ 1609 float_status s = env->vec_status; \ 1610 \ 1611 set_float_rounding_mode(rounding, &s); \ 1612 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1613 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1614 } \ 1615 } 1616 VRFI(n, float_round_nearest_even) 1617 VRFI(m, float_round_down) 1618 VRFI(p, float_round_up) 1619 VRFI(z, float_round_to_zero) 1620 #undef VRFI 1621 1622 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1623 { 1624 int i; 1625 1626 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1627 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1628 1629 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1630 } 1631 } 1632 1633 #define VRLMI(name, size, element, insert) \ 1634 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1635 { \ 1636 int i; \ 1637 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1638 uint##size##_t src1 = a->element[i]; \ 1639 uint##size##_t src2 = b->element[i]; \ 1640 uint##size##_t src3 = r->element[i]; \ 1641 uint##size##_t begin, end, shift, mask, rot_val; \ 1642 \ 1643 shift = extract##size(src2, 0, 6); \ 1644 end = extract##size(src2, 8, 6); \ 1645 begin = extract##size(src2, 16, 6); \ 1646 rot_val = rol##size(src1, shift); \ 1647 mask = mask_u##size(begin, end); \ 1648 if (insert) { \ 1649 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1650 } else { \ 1651 r->element[i] = (rot_val & mask); \ 1652 } \ 1653 } \ 1654 } 1655 1656 VRLMI(VRLDMI, 64, u64, 1); 1657 VRLMI(VRLWMI, 32, u32, 1); 1658 VRLMI(VRLDNM, 64, u64, 0); 1659 VRLMI(VRLWNM, 32, u32, 0); 1660 1661 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1662 { 1663 int i; 1664 1665 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1666 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1667 } 1668 } 1669 1670 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1671 { 1672 int i; 1673 1674 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1675 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1676 } 1677 } 1678 1679 #define VEXTU_X_DO(name, size, left) \ 1680 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1681 { \ 1682 int index = (a & 0xf) * 8; \ 1683 if (left) { \ 1684 index = 128 - index - size; \ 1685 } \ 1686 return int128_getlo(int128_rshift(b->s128, index)) & \ 1687 MAKE_64BIT_MASK(0, size); \ 1688 } 1689 VEXTU_X_DO(vextublx, 8, 1) 1690 VEXTU_X_DO(vextuhlx, 16, 1) 1691 VEXTU_X_DO(vextuwlx, 32, 1) 1692 VEXTU_X_DO(vextubrx, 8, 0) 1693 VEXTU_X_DO(vextuhrx, 16, 0) 1694 VEXTU_X_DO(vextuwrx, 32, 0) 1695 #undef VEXTU_X_DO 1696 1697 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1698 { 1699 int i; 1700 unsigned int shift, bytes, size; 1701 1702 size = ARRAY_SIZE(r->u8); 1703 for (i = 0; i < size; i++) { 1704 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1705 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1706 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1707 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1708 } 1709 } 1710 1711 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1712 { 1713 int i; 1714 unsigned int shift, bytes; 1715 1716 /* 1717 * Use reverse order, as destination and source register can be 1718 * same. Its being modified in place saving temporary, reverse 1719 * order will guarantee that computed result is not fed back. 1720 */ 1721 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1722 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1723 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1724 /* extract adjacent bytes */ 1725 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1726 } 1727 } 1728 1729 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1730 { 1731 int sh = shift & 0xf; 1732 int i; 1733 ppc_avr_t result; 1734 1735 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1736 int index = sh + i; 1737 if (index > 0xf) { 1738 result.VsrB(i) = b->VsrB(index - 0x10); 1739 } else { 1740 result.VsrB(i) = a->VsrB(index); 1741 } 1742 } 1743 *r = result; 1744 } 1745 1746 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1747 { 1748 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1749 1750 #if HOST_BIG_ENDIAN 1751 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1752 memset(&r->u8[16 - sh], 0, sh); 1753 #else 1754 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1755 memset(&r->u8[0], 0, sh); 1756 #endif 1757 } 1758 1759 #if HOST_BIG_ENDIAN 1760 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1761 #else 1762 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1763 #endif 1764 1765 #define VINSX(SUFFIX, TYPE) \ 1766 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1767 uint64_t val, target_ulong index) \ 1768 { \ 1769 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1770 target_long idx = index; \ 1771 \ 1772 if (idx < 0 || idx > maxidx) { \ 1773 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1774 qemu_log_mask(LOG_GUEST_ERROR, \ 1775 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1776 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1777 } else { \ 1778 TYPE src = val; \ 1779 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1780 } \ 1781 } 1782 VINSX(B, uint8_t) 1783 VINSX(H, uint16_t) 1784 VINSX(W, uint32_t) 1785 VINSX(D, uint64_t) 1786 #undef ELEM_ADDR 1787 #undef VINSX 1788 #if HOST_BIG_ENDIAN 1789 #define VEXTDVLX(NAME, SIZE) \ 1790 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1791 target_ulong index) \ 1792 { \ 1793 const target_long idx = index; \ 1794 ppc_avr_t tmp[2] = { *a, *b }; \ 1795 memset(t, 0, sizeof(*t)); \ 1796 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1797 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1798 } else { \ 1799 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1800 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1801 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1802 } \ 1803 } 1804 #else 1805 #define VEXTDVLX(NAME, SIZE) \ 1806 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1807 target_ulong index) \ 1808 { \ 1809 const target_long idx = index; \ 1810 ppc_avr_t tmp[2] = { *b, *a }; \ 1811 memset(t, 0, sizeof(*t)); \ 1812 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1813 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1814 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1815 } else { \ 1816 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1817 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1818 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1819 } \ 1820 } 1821 #endif 1822 VEXTDVLX(VEXTDUBVLX, 1) 1823 VEXTDVLX(VEXTDUHVLX, 2) 1824 VEXTDVLX(VEXTDUWVLX, 4) 1825 VEXTDVLX(VEXTDDVLX, 8) 1826 #undef VEXTDVLX 1827 #if HOST_BIG_ENDIAN 1828 #define VEXTRACT(suffix, element) \ 1829 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1830 { \ 1831 uint32_t es = sizeof(r->element[0]); \ 1832 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1833 memset(&r->u8[8], 0, 8); \ 1834 memset(&r->u8[0], 0, 8 - es); \ 1835 } 1836 #else 1837 #define VEXTRACT(suffix, element) \ 1838 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1839 { \ 1840 uint32_t es = sizeof(r->element[0]); \ 1841 uint32_t s = (16 - index) - es; \ 1842 memmove(&r->u8[8], &b->u8[s], es); \ 1843 memset(&r->u8[0], 0, 8); \ 1844 memset(&r->u8[8 + es], 0, 8 - es); \ 1845 } 1846 #endif 1847 VEXTRACT(ub, u8) 1848 VEXTRACT(uh, u16) 1849 VEXTRACT(uw, u32) 1850 VEXTRACT(d, u64) 1851 #undef VEXTRACT 1852 1853 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1854 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1855 { \ 1856 int i, idx, crf = 0; \ 1857 \ 1858 for (i = 0; i < NUM_ELEMS; i++) { \ 1859 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1860 if (b->Vsr##ELEM(idx)) { \ 1861 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1862 } else { \ 1863 crf = 0b0010; \ 1864 break; \ 1865 } \ 1866 } \ 1867 \ 1868 for (; i < NUM_ELEMS; i++) { \ 1869 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1870 t->Vsr##ELEM(idx) = 0; \ 1871 } \ 1872 \ 1873 return crf; \ 1874 } 1875 VSTRI(VSTRIBL, B, 16, true) 1876 VSTRI(VSTRIBR, B, 16, false) 1877 VSTRI(VSTRIHL, H, 8, true) 1878 VSTRI(VSTRIHR, H, 8, false) 1879 #undef VSTRI 1880 1881 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1882 { 1883 ppc_vsr_t t = { }; 1884 size_t es = sizeof(uint32_t); 1885 uint32_t ext_index; 1886 int i; 1887 1888 ext_index = index; 1889 for (i = 0; i < es; i++, ext_index++) { 1890 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1891 } 1892 1893 *xt = t; 1894 } 1895 1896 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1897 { 1898 ppc_vsr_t t = *xt; 1899 size_t es = sizeof(uint32_t); 1900 int ins_index, i = 0; 1901 1902 ins_index = index; 1903 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1904 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1905 } 1906 1907 *xt = t; 1908 } 1909 1910 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1911 uint32_t desc) 1912 { 1913 /* 1914 * Instead of processing imm bit-by-bit, we'll skip the computation of 1915 * conjunctions whose corresponding bit is unset. 1916 */ 1917 int bit, imm = simd_data(desc); 1918 Int128 conj, disj = int128_zero(); 1919 1920 /* Iterate over set bits from the least to the most significant bit */ 1921 while (imm) { 1922 /* 1923 * Get the next bit to be processed with ctz64. Invert the result of 1924 * ctz64 to match the indexing used by PowerISA. 1925 */ 1926 bit = 7 - ctzl(imm); 1927 if (bit & 0x4) { 1928 conj = a->s128; 1929 } else { 1930 conj = int128_not(a->s128); 1931 } 1932 if (bit & 0x2) { 1933 conj = int128_and(conj, b->s128); 1934 } else { 1935 conj = int128_and(conj, int128_not(b->s128)); 1936 } 1937 if (bit & 0x1) { 1938 conj = int128_and(conj, c->s128); 1939 } else { 1940 conj = int128_and(conj, int128_not(c->s128)); 1941 } 1942 disj = int128_or(disj, conj); 1943 1944 /* Unset the least significant bit that is set */ 1945 imm &= imm - 1; 1946 } 1947 1948 t->s128 = disj; 1949 } 1950 1951 #define XXBLEND(name, sz) \ 1952 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1953 ppc_avr_t *c, uint32_t desc) \ 1954 { \ 1955 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1956 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1957 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1958 } \ 1959 } 1960 XXBLEND(B, 8) 1961 XXBLEND(H, 16) 1962 XXBLEND(W, 32) 1963 XXBLEND(D, 64) 1964 #undef XXBLEND 1965 1966 #define VNEG(name, element) \ 1967 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1968 { \ 1969 int i; \ 1970 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1971 r->element[i] = -b->element[i]; \ 1972 } \ 1973 } 1974 VNEG(vnegw, s32) 1975 VNEG(vnegd, s64) 1976 #undef VNEG 1977 1978 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1979 { 1980 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1981 1982 #if HOST_BIG_ENDIAN 1983 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1984 memset(&r->u8[0], 0, sh); 1985 #else 1986 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1987 memset(&r->u8[16 - sh], 0, sh); 1988 #endif 1989 } 1990 1991 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1992 { 1993 int i; 1994 1995 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1996 r->u32[i] = a->u32[i] >= b->u32[i]; 1997 } 1998 } 1999 2000 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2001 { 2002 int64_t t; 2003 int i, upper; 2004 ppc_avr_t result; 2005 int sat = 0; 2006 2007 upper = ARRAY_SIZE(r->s32) - 1; 2008 t = (int64_t)b->VsrSW(upper); 2009 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2010 t += a->VsrSW(i); 2011 result.VsrSW(i) = 0; 2012 } 2013 result.VsrSW(upper) = cvtsdsw(t, &sat); 2014 *r = result; 2015 2016 if (sat) { 2017 set_vscr_sat(env); 2018 } 2019 } 2020 2021 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2022 { 2023 int i, j, upper; 2024 ppc_avr_t result; 2025 int sat = 0; 2026 2027 upper = 1; 2028 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2029 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2030 2031 result.VsrD(i) = 0; 2032 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2033 t += a->VsrSW(2 * i + j); 2034 } 2035 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2036 } 2037 2038 *r = result; 2039 if (sat) { 2040 set_vscr_sat(env); 2041 } 2042 } 2043 2044 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2045 { 2046 int i, j; 2047 int sat = 0; 2048 2049 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2050 int64_t t = (int64_t)b->s32[i]; 2051 2052 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2053 t += a->s8[4 * i + j]; 2054 } 2055 r->s32[i] = cvtsdsw(t, &sat); 2056 } 2057 2058 if (sat) { 2059 set_vscr_sat(env); 2060 } 2061 } 2062 2063 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2064 { 2065 int sat = 0; 2066 int i; 2067 2068 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2069 int64_t t = (int64_t)b->s32[i]; 2070 2071 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2072 r->s32[i] = cvtsdsw(t, &sat); 2073 } 2074 2075 if (sat) { 2076 set_vscr_sat(env); 2077 } 2078 } 2079 2080 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2081 { 2082 int i, j; 2083 int sat = 0; 2084 2085 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2086 uint64_t t = (uint64_t)b->u32[i]; 2087 2088 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2089 t += a->u8[4 * i + j]; 2090 } 2091 r->u32[i] = cvtuduw(t, &sat); 2092 } 2093 2094 if (sat) { 2095 set_vscr_sat(env); 2096 } 2097 } 2098 2099 #if HOST_BIG_ENDIAN 2100 #define UPKHI 1 2101 #define UPKLO 0 2102 #else 2103 #define UPKHI 0 2104 #define UPKLO 1 2105 #endif 2106 #define VUPKPX(suffix, hi) \ 2107 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2108 { \ 2109 int i; \ 2110 ppc_avr_t result; \ 2111 \ 2112 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2113 uint16_t e = b->u16[hi ? i : i + 4]; \ 2114 uint8_t a = (e >> 15) ? 0xff : 0; \ 2115 uint8_t r = (e >> 10) & 0x1f; \ 2116 uint8_t g = (e >> 5) & 0x1f; \ 2117 uint8_t b = e & 0x1f; \ 2118 \ 2119 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2120 } \ 2121 *r = result; \ 2122 } 2123 VUPKPX(lpx, UPKLO) 2124 VUPKPX(hpx, UPKHI) 2125 #undef VUPKPX 2126 2127 #define VUPK(suffix, unpacked, packee, hi) \ 2128 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2129 { \ 2130 int i; \ 2131 ppc_avr_t result; \ 2132 \ 2133 if (hi) { \ 2134 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2135 result.unpacked[i] = b->packee[i]; \ 2136 } \ 2137 } else { \ 2138 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2139 i++) { \ 2140 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2141 } \ 2142 } \ 2143 *r = result; \ 2144 } 2145 VUPK(hsb, s16, s8, UPKHI) 2146 VUPK(hsh, s32, s16, UPKHI) 2147 VUPK(hsw, s64, s32, UPKHI) 2148 VUPK(lsb, s16, s8, UPKLO) 2149 VUPK(lsh, s32, s16, UPKLO) 2150 VUPK(lsw, s64, s32, UPKLO) 2151 #undef VUPK 2152 #undef UPKHI 2153 #undef UPKLO 2154 2155 #define VGENERIC_DO(name, element) \ 2156 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2157 { \ 2158 int i; \ 2159 \ 2160 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2161 r->element[i] = name(b->element[i]); \ 2162 } \ 2163 } 2164 2165 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2166 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2167 2168 VGENERIC_DO(clzb, u8) 2169 VGENERIC_DO(clzh, u16) 2170 2171 #undef clzb 2172 #undef clzh 2173 2174 #define ctzb(v) ((v) ? ctz32(v) : 8) 2175 #define ctzh(v) ((v) ? ctz32(v) : 16) 2176 #define ctzw(v) ctz32((v)) 2177 #define ctzd(v) ctz64((v)) 2178 2179 VGENERIC_DO(ctzb, u8) 2180 VGENERIC_DO(ctzh, u16) 2181 VGENERIC_DO(ctzw, u32) 2182 VGENERIC_DO(ctzd, u64) 2183 2184 #undef ctzb 2185 #undef ctzh 2186 #undef ctzw 2187 #undef ctzd 2188 2189 #define popcntb(v) ctpop8(v) 2190 #define popcnth(v) ctpop16(v) 2191 #define popcntw(v) ctpop32(v) 2192 #define popcntd(v) ctpop64(v) 2193 2194 VGENERIC_DO(popcntb, u8) 2195 VGENERIC_DO(popcnth, u16) 2196 VGENERIC_DO(popcntw, u32) 2197 VGENERIC_DO(popcntd, u64) 2198 2199 #undef popcntb 2200 #undef popcnth 2201 #undef popcntw 2202 #undef popcntd 2203 2204 #undef VGENERIC_DO 2205 2206 #if HOST_BIG_ENDIAN 2207 #define QW_ONE { .u64 = { 0, 1 } } 2208 #else 2209 #define QW_ONE { .u64 = { 1, 0 } } 2210 #endif 2211 2212 #ifndef CONFIG_INT128 2213 2214 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2215 { 2216 t->u64[0] = ~a.u64[0]; 2217 t->u64[1] = ~a.u64[1]; 2218 } 2219 2220 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2221 { 2222 if (a.VsrD(0) < b.VsrD(0)) { 2223 return -1; 2224 } else if (a.VsrD(0) > b.VsrD(0)) { 2225 return 1; 2226 } else if (a.VsrD(1) < b.VsrD(1)) { 2227 return -1; 2228 } else if (a.VsrD(1) > b.VsrD(1)) { 2229 return 1; 2230 } else { 2231 return 0; 2232 } 2233 } 2234 2235 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2236 { 2237 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2238 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2239 (~a.VsrD(1) < b.VsrD(1)); 2240 } 2241 2242 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2243 { 2244 ppc_avr_t not_a; 2245 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2246 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2247 (~a.VsrD(1) < b.VsrD(1)); 2248 avr_qw_not(¬_a, a); 2249 return avr_qw_cmpu(not_a, b) < 0; 2250 } 2251 2252 #endif 2253 2254 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2255 { 2256 #ifdef CONFIG_INT128 2257 r->u128 = a->u128 + b->u128; 2258 #else 2259 avr_qw_add(r, *a, *b); 2260 #endif 2261 } 2262 2263 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2264 { 2265 #ifdef CONFIG_INT128 2266 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2267 #else 2268 2269 if (c->VsrD(1) & 1) { 2270 ppc_avr_t tmp; 2271 2272 tmp.VsrD(0) = 0; 2273 tmp.VsrD(1) = c->VsrD(1) & 1; 2274 avr_qw_add(&tmp, *a, tmp); 2275 avr_qw_add(r, tmp, *b); 2276 } else { 2277 avr_qw_add(r, *a, *b); 2278 } 2279 #endif 2280 } 2281 2282 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2283 { 2284 #ifdef CONFIG_INT128 2285 r->u128 = (~a->u128 < b->u128); 2286 #else 2287 ppc_avr_t not_a; 2288 2289 avr_qw_not(¬_a, *a); 2290 2291 r->VsrD(0) = 0; 2292 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2293 #endif 2294 } 2295 2296 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2297 { 2298 #ifdef CONFIG_INT128 2299 int carry_out = (~a->u128 < b->u128); 2300 if (!carry_out && (c->u128 & 1)) { 2301 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2302 ((a->u128 != 0) || (b->u128 != 0)); 2303 } 2304 r->u128 = carry_out; 2305 #else 2306 2307 int carry_in = c->VsrD(1) & 1; 2308 int carry_out = 0; 2309 ppc_avr_t tmp; 2310 2311 carry_out = avr_qw_addc(&tmp, *a, *b); 2312 2313 if (!carry_out && carry_in) { 2314 ppc_avr_t one = QW_ONE; 2315 carry_out = avr_qw_addc(&tmp, tmp, one); 2316 } 2317 r->VsrD(0) = 0; 2318 r->VsrD(1) = carry_out; 2319 #endif 2320 } 2321 2322 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2323 { 2324 #ifdef CONFIG_INT128 2325 r->u128 = a->u128 - b->u128; 2326 #else 2327 ppc_avr_t tmp; 2328 ppc_avr_t one = QW_ONE; 2329 2330 avr_qw_not(&tmp, *b); 2331 avr_qw_add(&tmp, *a, tmp); 2332 avr_qw_add(r, tmp, one); 2333 #endif 2334 } 2335 2336 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2337 { 2338 #ifdef CONFIG_INT128 2339 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2340 #else 2341 ppc_avr_t tmp, sum; 2342 2343 avr_qw_not(&tmp, *b); 2344 avr_qw_add(&sum, *a, tmp); 2345 2346 tmp.VsrD(0) = 0; 2347 tmp.VsrD(1) = c->VsrD(1) & 1; 2348 avr_qw_add(r, sum, tmp); 2349 #endif 2350 } 2351 2352 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2353 { 2354 #ifdef CONFIG_INT128 2355 r->u128 = (~a->u128 < ~b->u128) || 2356 (a->u128 + ~b->u128 == (__uint128_t)-1); 2357 #else 2358 int carry = (avr_qw_cmpu(*a, *b) > 0); 2359 if (!carry) { 2360 ppc_avr_t tmp; 2361 avr_qw_not(&tmp, *b); 2362 avr_qw_add(&tmp, *a, tmp); 2363 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2364 } 2365 r->VsrD(0) = 0; 2366 r->VsrD(1) = carry; 2367 #endif 2368 } 2369 2370 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2371 { 2372 #ifdef CONFIG_INT128 2373 r->u128 = 2374 (~a->u128 < ~b->u128) || 2375 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2376 #else 2377 int carry_in = c->VsrD(1) & 1; 2378 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2379 if (!carry_out && carry_in) { 2380 ppc_avr_t tmp; 2381 avr_qw_not(&tmp, *b); 2382 avr_qw_add(&tmp, *a, tmp); 2383 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2384 } 2385 2386 r->VsrD(0) = 0; 2387 r->VsrD(1) = carry_out; 2388 #endif 2389 } 2390 2391 #define BCD_PLUS_PREF_1 0xC 2392 #define BCD_PLUS_PREF_2 0xF 2393 #define BCD_PLUS_ALT_1 0xA 2394 #define BCD_NEG_PREF 0xD 2395 #define BCD_NEG_ALT 0xB 2396 #define BCD_PLUS_ALT_2 0xE 2397 #define NATIONAL_PLUS 0x2B 2398 #define NATIONAL_NEG 0x2D 2399 2400 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2401 2402 static int bcd_get_sgn(ppc_avr_t *bcd) 2403 { 2404 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2405 case BCD_PLUS_PREF_1: 2406 case BCD_PLUS_PREF_2: 2407 case BCD_PLUS_ALT_1: 2408 case BCD_PLUS_ALT_2: 2409 { 2410 return 1; 2411 } 2412 2413 case BCD_NEG_PREF: 2414 case BCD_NEG_ALT: 2415 { 2416 return -1; 2417 } 2418 2419 default: 2420 { 2421 return 0; 2422 } 2423 } 2424 } 2425 2426 static int bcd_preferred_sgn(int sgn, int ps) 2427 { 2428 if (sgn >= 0) { 2429 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2430 } else { 2431 return BCD_NEG_PREF; 2432 } 2433 } 2434 2435 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2436 { 2437 uint8_t result; 2438 if (n & 1) { 2439 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2440 } else { 2441 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2442 } 2443 2444 if (unlikely(result > 9)) { 2445 *invalid = true; 2446 } 2447 return result; 2448 } 2449 2450 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2451 { 2452 if (n & 1) { 2453 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2454 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2455 } else { 2456 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2457 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2458 } 2459 } 2460 2461 static bool bcd_is_valid(ppc_avr_t *bcd) 2462 { 2463 int i; 2464 int invalid = 0; 2465 2466 if (bcd_get_sgn(bcd) == 0) { 2467 return false; 2468 } 2469 2470 for (i = 1; i < 32; i++) { 2471 bcd_get_digit(bcd, i, &invalid); 2472 if (unlikely(invalid)) { 2473 return false; 2474 } 2475 } 2476 return true; 2477 } 2478 2479 static int bcd_cmp_zero(ppc_avr_t *bcd) 2480 { 2481 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2482 return CRF_EQ; 2483 } else { 2484 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2485 } 2486 } 2487 2488 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2489 { 2490 return reg->VsrH(7 - n); 2491 } 2492 2493 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2494 { 2495 reg->VsrH(7 - n) = val; 2496 } 2497 2498 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2499 { 2500 int i; 2501 int invalid = 0; 2502 for (i = 31; i > 0; i--) { 2503 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2504 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2505 if (unlikely(invalid)) { 2506 return 0; /* doesn't matter */ 2507 } else if (dig_a > dig_b) { 2508 return 1; 2509 } else if (dig_a < dig_b) { 2510 return -1; 2511 } 2512 } 2513 2514 return 0; 2515 } 2516 2517 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2518 int *overflow) 2519 { 2520 int carry = 0; 2521 int i; 2522 int is_zero = 1; 2523 2524 for (i = 1; i <= 31; i++) { 2525 uint8_t digit = bcd_get_digit(a, i, invalid) + 2526 bcd_get_digit(b, i, invalid) + carry; 2527 is_zero &= (digit == 0); 2528 if (digit > 9) { 2529 carry = 1; 2530 digit -= 10; 2531 } else { 2532 carry = 0; 2533 } 2534 2535 bcd_put_digit(t, digit, i); 2536 } 2537 2538 *overflow = carry; 2539 return is_zero; 2540 } 2541 2542 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2543 int *overflow) 2544 { 2545 int carry = 0; 2546 int i; 2547 2548 for (i = 1; i <= 31; i++) { 2549 uint8_t digit = bcd_get_digit(a, i, invalid) - 2550 bcd_get_digit(b, i, invalid) + carry; 2551 if (digit & 0x80) { 2552 carry = -1; 2553 digit += 10; 2554 } else { 2555 carry = 0; 2556 } 2557 2558 bcd_put_digit(t, digit, i); 2559 } 2560 2561 *overflow = carry; 2562 } 2563 2564 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2565 { 2566 2567 int sgna = bcd_get_sgn(a); 2568 int sgnb = bcd_get_sgn(b); 2569 int invalid = (sgna == 0) || (sgnb == 0); 2570 int overflow = 0; 2571 int zero = 0; 2572 uint32_t cr = 0; 2573 ppc_avr_t result = { .u64 = { 0, 0 } }; 2574 2575 if (!invalid) { 2576 if (sgna == sgnb) { 2577 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2578 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2579 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2580 } else { 2581 int magnitude = bcd_cmp_mag(a, b); 2582 if (magnitude > 0) { 2583 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2584 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2585 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2586 } else if (magnitude < 0) { 2587 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2588 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2589 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2590 } else { 2591 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2592 cr = CRF_EQ; 2593 } 2594 } 2595 } 2596 2597 if (unlikely(invalid)) { 2598 result.VsrD(0) = result.VsrD(1) = -1; 2599 cr = CRF_SO; 2600 } else if (overflow) { 2601 cr |= CRF_SO; 2602 } else if (zero) { 2603 cr |= CRF_EQ; 2604 } 2605 2606 *r = result; 2607 2608 return cr; 2609 } 2610 2611 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2612 { 2613 ppc_avr_t bcopy = *b; 2614 int sgnb = bcd_get_sgn(b); 2615 if (sgnb < 0) { 2616 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2617 } else if (sgnb > 0) { 2618 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2619 } 2620 /* else invalid ... defer to bcdadd code for proper handling */ 2621 2622 return helper_bcdadd(r, a, &bcopy, ps); 2623 } 2624 2625 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2626 { 2627 int i; 2628 int cr = 0; 2629 uint16_t national = 0; 2630 uint16_t sgnb = get_national_digit(b, 0); 2631 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2632 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2633 2634 for (i = 1; i < 8; i++) { 2635 national = get_national_digit(b, i); 2636 if (unlikely(national < 0x30 || national > 0x39)) { 2637 invalid = 1; 2638 break; 2639 } 2640 2641 bcd_put_digit(&ret, national & 0xf, i); 2642 } 2643 2644 if (sgnb == NATIONAL_PLUS) { 2645 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2646 } else { 2647 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2648 } 2649 2650 cr = bcd_cmp_zero(&ret); 2651 2652 if (unlikely(invalid)) { 2653 cr = CRF_SO; 2654 } 2655 2656 *r = ret; 2657 2658 return cr; 2659 } 2660 2661 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2662 { 2663 int i; 2664 int cr = 0; 2665 int sgnb = bcd_get_sgn(b); 2666 int invalid = (sgnb == 0); 2667 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2668 2669 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2670 2671 for (i = 1; i < 8; i++) { 2672 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2673 2674 if (unlikely(invalid)) { 2675 break; 2676 } 2677 } 2678 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2679 2680 cr = bcd_cmp_zero(b); 2681 2682 if (ox_flag) { 2683 cr |= CRF_SO; 2684 } 2685 2686 if (unlikely(invalid)) { 2687 cr = CRF_SO; 2688 } 2689 2690 *r = ret; 2691 2692 return cr; 2693 } 2694 2695 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2696 { 2697 int i; 2698 int cr = 0; 2699 int invalid = 0; 2700 int zone_digit = 0; 2701 int zone_lead = ps ? 0xF : 0x3; 2702 int digit = 0; 2703 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2704 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2705 2706 if (unlikely((sgnb < 0xA) && ps)) { 2707 invalid = 1; 2708 } 2709 2710 for (i = 0; i < 16; i++) { 2711 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2712 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2713 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2714 invalid = 1; 2715 break; 2716 } 2717 2718 bcd_put_digit(&ret, digit, i + 1); 2719 } 2720 2721 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2722 (!ps && (sgnb & 0x4))) { 2723 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2724 } else { 2725 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2726 } 2727 2728 cr = bcd_cmp_zero(&ret); 2729 2730 if (unlikely(invalid)) { 2731 cr = CRF_SO; 2732 } 2733 2734 *r = ret; 2735 2736 return cr; 2737 } 2738 2739 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2740 { 2741 int i; 2742 int cr = 0; 2743 uint8_t digit = 0; 2744 int sgnb = bcd_get_sgn(b); 2745 int zone_lead = (ps) ? 0xF0 : 0x30; 2746 int invalid = (sgnb == 0); 2747 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2748 2749 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2750 2751 for (i = 0; i < 16; i++) { 2752 digit = bcd_get_digit(b, i + 1, &invalid); 2753 2754 if (unlikely(invalid)) { 2755 break; 2756 } 2757 2758 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2759 } 2760 2761 if (ps) { 2762 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2763 } else { 2764 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2765 } 2766 2767 cr = bcd_cmp_zero(b); 2768 2769 if (ox_flag) { 2770 cr |= CRF_SO; 2771 } 2772 2773 if (unlikely(invalid)) { 2774 cr = CRF_SO; 2775 } 2776 2777 *r = ret; 2778 2779 return cr; 2780 } 2781 2782 /** 2783 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2784 * 2785 * Returns: 2786 * > 0 if ahi|alo > bhi|blo, 2787 * 0 if ahi|alo == bhi|blo, 2788 * < 0 if ahi|alo < bhi|blo 2789 */ 2790 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2791 uint64_t blo, uint64_t bhi) 2792 { 2793 return (ahi == bhi) ? 2794 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2795 (ahi > bhi ? 1 : -1); 2796 } 2797 2798 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2799 { 2800 int i; 2801 int cr; 2802 uint64_t lo_value; 2803 uint64_t hi_value; 2804 uint64_t rem; 2805 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2806 2807 if (b->VsrSD(0) < 0) { 2808 lo_value = -b->VsrSD(1); 2809 hi_value = ~b->VsrD(0) + !lo_value; 2810 bcd_put_digit(&ret, 0xD, 0); 2811 2812 cr = CRF_LT; 2813 } else { 2814 lo_value = b->VsrD(1); 2815 hi_value = b->VsrD(0); 2816 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2817 2818 if (hi_value == 0 && lo_value == 0) { 2819 cr = CRF_EQ; 2820 } else { 2821 cr = CRF_GT; 2822 } 2823 } 2824 2825 /* 2826 * Check src limits: abs(src) <= 10^31 - 1 2827 * 2828 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2829 */ 2830 if (ucmp128(lo_value, hi_value, 2831 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2832 cr |= CRF_SO; 2833 2834 /* 2835 * According to the ISA, if src wouldn't fit in the destination 2836 * register, the result is undefined. 2837 * In that case, we leave r unchanged. 2838 */ 2839 } else { 2840 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2841 2842 for (i = 1; i < 16; rem /= 10, i++) { 2843 bcd_put_digit(&ret, rem % 10, i); 2844 } 2845 2846 for (; i < 32; lo_value /= 10, i++) { 2847 bcd_put_digit(&ret, lo_value % 10, i); 2848 } 2849 2850 *r = ret; 2851 } 2852 2853 return cr; 2854 } 2855 2856 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2857 { 2858 uint8_t i; 2859 int cr; 2860 uint64_t carry; 2861 uint64_t unused; 2862 uint64_t lo_value; 2863 uint64_t hi_value = 0; 2864 int sgnb = bcd_get_sgn(b); 2865 int invalid = (sgnb == 0); 2866 2867 lo_value = bcd_get_digit(b, 31, &invalid); 2868 for (i = 30; i > 0; i--) { 2869 mulu64(&lo_value, &carry, lo_value, 10ULL); 2870 mulu64(&hi_value, &unused, hi_value, 10ULL); 2871 lo_value += bcd_get_digit(b, i, &invalid); 2872 hi_value += carry; 2873 2874 if (unlikely(invalid)) { 2875 break; 2876 } 2877 } 2878 2879 if (sgnb == -1) { 2880 r->VsrSD(1) = -lo_value; 2881 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2882 } else { 2883 r->VsrSD(1) = lo_value; 2884 r->VsrSD(0) = hi_value; 2885 } 2886 2887 cr = bcd_cmp_zero(b); 2888 2889 if (unlikely(invalid)) { 2890 cr = CRF_SO; 2891 } 2892 2893 return cr; 2894 } 2895 2896 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2897 { 2898 int i; 2899 int invalid = 0; 2900 2901 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2902 return CRF_SO; 2903 } 2904 2905 *r = *a; 2906 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2907 2908 for (i = 1; i < 32; i++) { 2909 bcd_get_digit(a, i, &invalid); 2910 bcd_get_digit(b, i, &invalid); 2911 if (unlikely(invalid)) { 2912 return CRF_SO; 2913 } 2914 } 2915 2916 return bcd_cmp_zero(r); 2917 } 2918 2919 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2920 { 2921 int sgnb = bcd_get_sgn(b); 2922 2923 *r = *b; 2924 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2925 2926 if (bcd_is_valid(b) == false) { 2927 return CRF_SO; 2928 } 2929 2930 return bcd_cmp_zero(r); 2931 } 2932 2933 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2934 { 2935 int cr; 2936 int i = a->VsrSB(7); 2937 bool ox_flag = false; 2938 int sgnb = bcd_get_sgn(b); 2939 ppc_avr_t ret = *b; 2940 ret.VsrD(1) &= ~0xf; 2941 2942 if (bcd_is_valid(b) == false) { 2943 return CRF_SO; 2944 } 2945 2946 if (unlikely(i > 31)) { 2947 i = 31; 2948 } else if (unlikely(i < -31)) { 2949 i = -31; 2950 } 2951 2952 if (i > 0) { 2953 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2954 } else { 2955 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2956 } 2957 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2958 2959 *r = ret; 2960 2961 cr = bcd_cmp_zero(r); 2962 if (ox_flag) { 2963 cr |= CRF_SO; 2964 } 2965 2966 return cr; 2967 } 2968 2969 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2970 { 2971 int cr; 2972 int i; 2973 int invalid = 0; 2974 bool ox_flag = false; 2975 ppc_avr_t ret = *b; 2976 2977 for (i = 0; i < 32; i++) { 2978 bcd_get_digit(b, i, &invalid); 2979 2980 if (unlikely(invalid)) { 2981 return CRF_SO; 2982 } 2983 } 2984 2985 i = a->VsrSB(7); 2986 if (i >= 32) { 2987 ox_flag = true; 2988 ret.VsrD(1) = ret.VsrD(0) = 0; 2989 } else if (i <= -32) { 2990 ret.VsrD(1) = ret.VsrD(0) = 0; 2991 } else if (i > 0) { 2992 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2993 } else { 2994 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2995 } 2996 *r = ret; 2997 2998 cr = bcd_cmp_zero(r); 2999 if (ox_flag) { 3000 cr |= CRF_SO; 3001 } 3002 3003 return cr; 3004 } 3005 3006 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3007 { 3008 int cr; 3009 int unused = 0; 3010 int invalid = 0; 3011 bool ox_flag = false; 3012 int sgnb = bcd_get_sgn(b); 3013 ppc_avr_t ret = *b; 3014 ret.VsrD(1) &= ~0xf; 3015 3016 int i = a->VsrSB(7); 3017 ppc_avr_t bcd_one; 3018 3019 bcd_one.VsrD(0) = 0; 3020 bcd_one.VsrD(1) = 0x10; 3021 3022 if (bcd_is_valid(b) == false) { 3023 return CRF_SO; 3024 } 3025 3026 if (unlikely(i > 31)) { 3027 i = 31; 3028 } else if (unlikely(i < -31)) { 3029 i = -31; 3030 } 3031 3032 if (i > 0) { 3033 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3034 } else { 3035 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3036 3037 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3038 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3039 } 3040 } 3041 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3042 3043 cr = bcd_cmp_zero(&ret); 3044 if (ox_flag) { 3045 cr |= CRF_SO; 3046 } 3047 *r = ret; 3048 3049 return cr; 3050 } 3051 3052 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3053 { 3054 uint64_t mask; 3055 uint32_t ox_flag = 0; 3056 int i = a->VsrSH(3) + 1; 3057 ppc_avr_t ret = *b; 3058 3059 if (bcd_is_valid(b) == false) { 3060 return CRF_SO; 3061 } 3062 3063 if (i > 16 && i < 32) { 3064 mask = (uint64_t)-1 >> (128 - i * 4); 3065 if (ret.VsrD(0) & ~mask) { 3066 ox_flag = CRF_SO; 3067 } 3068 3069 ret.VsrD(0) &= mask; 3070 } else if (i >= 0 && i <= 16) { 3071 mask = (uint64_t)-1 >> (64 - i * 4); 3072 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3073 ox_flag = CRF_SO; 3074 } 3075 3076 ret.VsrD(1) &= mask; 3077 ret.VsrD(0) = 0; 3078 } 3079 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3080 *r = ret; 3081 3082 return bcd_cmp_zero(&ret) | ox_flag; 3083 } 3084 3085 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3086 { 3087 int i; 3088 uint64_t mask; 3089 uint32_t ox_flag = 0; 3090 int invalid = 0; 3091 ppc_avr_t ret = *b; 3092 3093 for (i = 0; i < 32; i++) { 3094 bcd_get_digit(b, i, &invalid); 3095 3096 if (unlikely(invalid)) { 3097 return CRF_SO; 3098 } 3099 } 3100 3101 i = a->VsrSH(3); 3102 if (i > 16 && i < 33) { 3103 mask = (uint64_t)-1 >> (128 - i * 4); 3104 if (ret.VsrD(0) & ~mask) { 3105 ox_flag = CRF_SO; 3106 } 3107 3108 ret.VsrD(0) &= mask; 3109 } else if (i > 0 && i <= 16) { 3110 mask = (uint64_t)-1 >> (64 - i * 4); 3111 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3112 ox_flag = CRF_SO; 3113 } 3114 3115 ret.VsrD(1) &= mask; 3116 ret.VsrD(0) = 0; 3117 } else if (i == 0) { 3118 if (ret.VsrD(0) || ret.VsrD(1)) { 3119 ox_flag = CRF_SO; 3120 } 3121 ret.VsrD(0) = ret.VsrD(1) = 0; 3122 } 3123 3124 *r = ret; 3125 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3126 return ox_flag | CRF_EQ; 3127 } 3128 3129 return ox_flag | CRF_GT; 3130 } 3131 3132 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3133 { 3134 int i; 3135 VECTOR_FOR_INORDER_I(i, u8) { 3136 r->u8[i] = AES_sbox[a->u8[i]]; 3137 } 3138 } 3139 3140 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3141 { 3142 ppc_avr_t result; 3143 int i; 3144 3145 VECTOR_FOR_INORDER_I(i, u32) { 3146 result.VsrW(i) = b->VsrW(i) ^ 3147 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3148 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3149 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3150 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3151 } 3152 *r = result; 3153 } 3154 3155 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3156 { 3157 ppc_avr_t result; 3158 int i; 3159 3160 VECTOR_FOR_INORDER_I(i, u8) { 3161 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3162 } 3163 *r = result; 3164 } 3165 3166 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3167 { 3168 /* This differs from what is written in ISA V2.07. The RTL is */ 3169 /* incorrect and will be fixed in V2.07B. */ 3170 int i; 3171 ppc_avr_t tmp; 3172 3173 VECTOR_FOR_INORDER_I(i, u8) { 3174 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3175 } 3176 3177 VECTOR_FOR_INORDER_I(i, u32) { 3178 r->VsrW(i) = 3179 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3180 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3181 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3182 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3183 } 3184 } 3185 3186 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3187 { 3188 ppc_avr_t result; 3189 int i; 3190 3191 VECTOR_FOR_INORDER_I(i, u8) { 3192 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3193 } 3194 *r = result; 3195 } 3196 3197 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3198 { 3199 int st = (st_six & 0x10) != 0; 3200 int six = st_six & 0xF; 3201 int i; 3202 3203 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3204 if (st == 0) { 3205 if ((six & (0x8 >> i)) == 0) { 3206 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3207 ror32(a->VsrW(i), 18) ^ 3208 (a->VsrW(i) >> 3); 3209 } else { /* six.bit[i] == 1 */ 3210 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3211 ror32(a->VsrW(i), 19) ^ 3212 (a->VsrW(i) >> 10); 3213 } 3214 } else { /* st == 1 */ 3215 if ((six & (0x8 >> i)) == 0) { 3216 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3217 ror32(a->VsrW(i), 13) ^ 3218 ror32(a->VsrW(i), 22); 3219 } else { /* six.bit[i] == 1 */ 3220 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3221 ror32(a->VsrW(i), 11) ^ 3222 ror32(a->VsrW(i), 25); 3223 } 3224 } 3225 } 3226 } 3227 3228 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3229 { 3230 int st = (st_six & 0x10) != 0; 3231 int six = st_six & 0xF; 3232 int i; 3233 3234 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3235 if (st == 0) { 3236 if ((six & (0x8 >> (2 * i))) == 0) { 3237 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3238 ror64(a->VsrD(i), 8) ^ 3239 (a->VsrD(i) >> 7); 3240 } else { /* six.bit[2*i] == 1 */ 3241 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3242 ror64(a->VsrD(i), 61) ^ 3243 (a->VsrD(i) >> 6); 3244 } 3245 } else { /* st == 1 */ 3246 if ((six & (0x8 >> (2 * i))) == 0) { 3247 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3248 ror64(a->VsrD(i), 34) ^ 3249 ror64(a->VsrD(i), 39); 3250 } else { /* six.bit[2*i] == 1 */ 3251 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3252 ror64(a->VsrD(i), 18) ^ 3253 ror64(a->VsrD(i), 41); 3254 } 3255 } 3256 } 3257 } 3258 3259 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3260 { 3261 ppc_avr_t result; 3262 int i; 3263 3264 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3265 int indexA = c->VsrB(i) >> 4; 3266 int indexB = c->VsrB(i) & 0xF; 3267 3268 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3269 } 3270 *r = result; 3271 } 3272 3273 #undef VECTOR_FOR_INORDER_I 3274 3275 /*****************************************************************************/ 3276 /* SPE extension helpers */ 3277 /* Use a table to make this quicker */ 3278 static const uint8_t hbrev[16] = { 3279 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3280 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3281 }; 3282 3283 static inline uint8_t byte_reverse(uint8_t val) 3284 { 3285 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3286 } 3287 3288 static inline uint32_t word_reverse(uint32_t val) 3289 { 3290 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3291 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3292 } 3293 3294 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3295 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3296 { 3297 uint32_t a, b, d, mask; 3298 3299 mask = UINT32_MAX >> (32 - MASKBITS); 3300 a = arg1 & mask; 3301 b = arg2 & mask; 3302 d = word_reverse(1 + word_reverse(a | ~b)); 3303 return (arg1 & ~mask) | (d & b); 3304 } 3305 3306 uint32_t helper_cntlsw32(uint32_t val) 3307 { 3308 if (val & 0x80000000) { 3309 return clz32(~val); 3310 } else { 3311 return clz32(val); 3312 } 3313 } 3314 3315 uint32_t helper_cntlzw32(uint32_t val) 3316 { 3317 return clz32(val); 3318 } 3319 3320 /* 440 specific */ 3321 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3322 target_ulong low, uint32_t update_Rc) 3323 { 3324 target_ulong mask; 3325 int i; 3326 3327 i = 1; 3328 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3329 if ((high & mask) == 0) { 3330 if (update_Rc) { 3331 env->crf[0] = 0x4; 3332 } 3333 goto done; 3334 } 3335 i++; 3336 } 3337 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3338 if ((low & mask) == 0) { 3339 if (update_Rc) { 3340 env->crf[0] = 0x8; 3341 } 3342 goto done; 3343 } 3344 i++; 3345 } 3346 i = 8; 3347 if (update_Rc) { 3348 env->crf[0] = 0x2; 3349 } 3350 done: 3351 env->xer = (env->xer & ~0x7F) | i; 3352 if (update_Rc) { 3353 env->crf[0] |= xer_so; 3354 } 3355 return i; 3356 } 3357