1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 390 { 391 int i, o; 392 uint64_t result = 0; 393 394 if (mask == -1) { 395 return src; 396 } 397 398 for (i = 0; mask != 0; i++) { 399 o = ctz64(mask); 400 mask &= mask - 1; 401 result |= ((src >> i) & 1) << o; 402 } 403 404 return result; 405 } 406 407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 408 { 409 int i, o; 410 uint64_t result = 0; 411 412 if (mask == -1) { 413 return src; 414 } 415 416 for (o = 0; mask != 0; o++) { 417 i = ctz64(mask); 418 mask &= mask - 1; 419 result |= ((src >> i) & 1) << o; 420 } 421 422 return result; 423 } 424 425 /*****************************************************************************/ 426 /* PowerPC 601 specific instructions (POWER bridge) */ 427 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 428 { 429 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 430 431 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 432 (int32_t)arg2 == 0) { 433 env->spr[SPR_MQ] = 0; 434 return INT32_MIN; 435 } else { 436 env->spr[SPR_MQ] = tmp % arg2; 437 return tmp / (int32_t)arg2; 438 } 439 } 440 441 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 442 target_ulong arg2) 443 { 444 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 445 446 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 447 (int32_t)arg2 == 0) { 448 env->so = env->ov = 1; 449 env->spr[SPR_MQ] = 0; 450 return INT32_MIN; 451 } else { 452 env->spr[SPR_MQ] = tmp % arg2; 453 tmp /= (int32_t)arg2; 454 if ((int32_t)tmp != tmp) { 455 env->so = env->ov = 1; 456 } else { 457 env->ov = 0; 458 } 459 return tmp; 460 } 461 } 462 463 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 464 target_ulong arg2) 465 { 466 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 467 (int32_t)arg2 == 0) { 468 env->spr[SPR_MQ] = 0; 469 return INT32_MIN; 470 } else { 471 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 472 return (int32_t)arg1 / (int32_t)arg2; 473 } 474 } 475 476 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 477 target_ulong arg2) 478 { 479 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 480 (int32_t)arg2 == 0) { 481 env->so = env->ov = 1; 482 env->spr[SPR_MQ] = 0; 483 return INT32_MIN; 484 } else { 485 env->ov = 0; 486 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 487 return (int32_t)arg1 / (int32_t)arg2; 488 } 489 } 490 491 /*****************************************************************************/ 492 /* 602 specific instructions */ 493 /* mfrom is the most crazy instruction ever seen, imho ! */ 494 /* Real implementation uses a ROM table. Do the same */ 495 /* 496 * Extremely decomposed: 497 * -arg / 256 498 * return 256 * log10(10 + 1.0) + 0.5 499 */ 500 #if !defined(CONFIG_USER_ONLY) 501 target_ulong helper_602_mfrom(target_ulong arg) 502 { 503 if (likely(arg < 602)) { 504 #include "mfrom_table.c.inc" 505 return mfrom_ROM_table[arg]; 506 } else { 507 return 0; 508 } 509 } 510 #endif 511 512 /*****************************************************************************/ 513 /* Altivec extension helpers */ 514 #if defined(HOST_WORDS_BIGENDIAN) 515 #define VECTOR_FOR_INORDER_I(index, element) \ 516 for (index = 0; index < ARRAY_SIZE(r->element); index++) 517 #else 518 #define VECTOR_FOR_INORDER_I(index, element) \ 519 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 520 #endif 521 522 /* Saturating arithmetic helpers. */ 523 #define SATCVT(from, to, from_type, to_type, min, max) \ 524 static inline to_type cvt##from##to(from_type x, int *sat) \ 525 { \ 526 to_type r; \ 527 \ 528 if (x < (from_type)min) { \ 529 r = min; \ 530 *sat = 1; \ 531 } else if (x > (from_type)max) { \ 532 r = max; \ 533 *sat = 1; \ 534 } else { \ 535 r = x; \ 536 } \ 537 return r; \ 538 } 539 #define SATCVTU(from, to, from_type, to_type, min, max) \ 540 static inline to_type cvt##from##to(from_type x, int *sat) \ 541 { \ 542 to_type r; \ 543 \ 544 if (x > (from_type)max) { \ 545 r = max; \ 546 *sat = 1; \ 547 } else { \ 548 r = x; \ 549 } \ 550 return r; \ 551 } 552 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 553 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 554 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 555 556 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 557 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 558 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 559 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 560 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 561 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 562 #undef SATCVT 563 #undef SATCVTU 564 565 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 566 { 567 ppc_store_vscr(env, vscr); 568 } 569 570 uint32_t helper_mfvscr(CPUPPCState *env) 571 { 572 return ppc_get_vscr(env); 573 } 574 575 static inline void set_vscr_sat(CPUPPCState *env) 576 { 577 /* The choice of non-zero value is arbitrary. */ 578 env->vscr_sat.u32[0] = 1; 579 } 580 581 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 582 { 583 int i; 584 585 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 586 r->u32[i] = ~a->u32[i] < b->u32[i]; 587 } 588 } 589 590 /* vprtybw */ 591 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 592 { 593 int i; 594 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 595 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 596 res ^= res >> 8; 597 r->u32[i] = res & 1; 598 } 599 } 600 601 /* vprtybd */ 602 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 603 { 604 int i; 605 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 606 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 607 res ^= res >> 16; 608 res ^= res >> 8; 609 r->u64[i] = res & 1; 610 } 611 } 612 613 /* vprtybq */ 614 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 615 { 616 uint64_t res = b->u64[0] ^ b->u64[1]; 617 res ^= res >> 32; 618 res ^= res >> 16; 619 res ^= res >> 8; 620 r->VsrD(1) = res & 1; 621 r->VsrD(0) = 0; 622 } 623 624 #define VARITHFP(suffix, func) \ 625 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 626 ppc_avr_t *b) \ 627 { \ 628 int i; \ 629 \ 630 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 631 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 632 } \ 633 } 634 VARITHFP(addfp, float32_add) 635 VARITHFP(subfp, float32_sub) 636 VARITHFP(minfp, float32_min) 637 VARITHFP(maxfp, float32_max) 638 #undef VARITHFP 639 640 #define VARITHFPFMA(suffix, type) \ 641 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 642 ppc_avr_t *b, ppc_avr_t *c) \ 643 { \ 644 int i; \ 645 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 646 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 647 type, &env->vec_status); \ 648 } \ 649 } 650 VARITHFPFMA(maddfp, 0); 651 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 652 #undef VARITHFPFMA 653 654 #define VARITHSAT_CASE(type, op, cvt, element) \ 655 { \ 656 type result = (type)a->element[i] op (type)b->element[i]; \ 657 r->element[i] = cvt(result, &sat); \ 658 } 659 660 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 661 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 662 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 663 { \ 664 int sat = 0; \ 665 int i; \ 666 \ 667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 668 VARITHSAT_CASE(optype, op, cvt, element); \ 669 } \ 670 if (sat) { \ 671 vscr_sat->u32[0] = 1; \ 672 } \ 673 } 674 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 675 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 676 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 677 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 678 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 679 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 680 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 681 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 682 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 683 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 684 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 685 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 686 #undef VARITHSAT_CASE 687 #undef VARITHSAT_DO 688 #undef VARITHSAT_SIGNED 689 #undef VARITHSAT_UNSIGNED 690 691 #define VAVG_DO(name, element, etype) \ 692 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 693 { \ 694 int i; \ 695 \ 696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 697 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 698 r->element[i] = x >> 1; \ 699 } \ 700 } 701 702 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 703 unsigned_type) \ 704 VAVG_DO(avgs##type, signed_element, signed_type) \ 705 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 706 VAVG(b, s8, int16_t, u8, uint16_t) 707 VAVG(h, s16, int32_t, u16, uint32_t) 708 VAVG(w, s32, int64_t, u32, uint64_t) 709 #undef VAVG_DO 710 #undef VAVG 711 712 #define VABSDU_DO(name, element) \ 713 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 714 { \ 715 int i; \ 716 \ 717 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 718 r->element[i] = (a->element[i] > b->element[i]) ? \ 719 (a->element[i] - b->element[i]) : \ 720 (b->element[i] - a->element[i]); \ 721 } \ 722 } 723 724 /* 725 * VABSDU - Vector absolute difference unsigned 726 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 727 * element - element type to access from vector 728 */ 729 #define VABSDU(type, element) \ 730 VABSDU_DO(absdu##type, element) 731 VABSDU(b, u8) 732 VABSDU(h, u16) 733 VABSDU(w, u32) 734 #undef VABSDU_DO 735 #undef VABSDU 736 737 #define VCF(suffix, cvt, element) \ 738 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 739 ppc_avr_t *b, uint32_t uim) \ 740 { \ 741 int i; \ 742 \ 743 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 744 float32 t = cvt(b->element[i], &env->vec_status); \ 745 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 746 } \ 747 } 748 VCF(ux, uint32_to_float32, u32) 749 VCF(sx, int32_to_float32, s32) 750 #undef VCF 751 752 #define VCMP_DO(suffix, compare, element, record) \ 753 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 754 ppc_avr_t *a, ppc_avr_t *b) \ 755 { \ 756 uint64_t ones = (uint64_t)-1; \ 757 uint64_t all = ones; \ 758 uint64_t none = 0; \ 759 int i; \ 760 \ 761 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 762 uint64_t result = (a->element[i] compare b->element[i] ? \ 763 ones : 0x0); \ 764 switch (sizeof(a->element[0])) { \ 765 case 8: \ 766 r->u64[i] = result; \ 767 break; \ 768 case 4: \ 769 r->u32[i] = result; \ 770 break; \ 771 case 2: \ 772 r->u16[i] = result; \ 773 break; \ 774 case 1: \ 775 r->u8[i] = result; \ 776 break; \ 777 } \ 778 all &= result; \ 779 none |= result; \ 780 } \ 781 if (record) { \ 782 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 783 } \ 784 } 785 #define VCMP(suffix, compare, element) \ 786 VCMP_DO(suffix, compare, element, 0) \ 787 VCMP_DO(suffix##_dot, compare, element, 1) 788 VCMP(equb, ==, u8) 789 VCMP(equh, ==, u16) 790 VCMP(equw, ==, u32) 791 VCMP(equd, ==, u64) 792 VCMP(gtub, >, u8) 793 VCMP(gtuh, >, u16) 794 VCMP(gtuw, >, u32) 795 VCMP(gtud, >, u64) 796 VCMP(gtsb, >, s8) 797 VCMP(gtsh, >, s16) 798 VCMP(gtsw, >, s32) 799 VCMP(gtsd, >, s64) 800 #undef VCMP_DO 801 #undef VCMP 802 803 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 804 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 805 ppc_avr_t *a, ppc_avr_t *b) \ 806 { \ 807 etype ones = (etype)-1; \ 808 etype all = ones; \ 809 etype result, none = 0; \ 810 int i; \ 811 \ 812 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 813 if (cmpzero) { \ 814 result = ((a->element[i] == 0) \ 815 || (b->element[i] == 0) \ 816 || (a->element[i] != b->element[i]) ? \ 817 ones : 0x0); \ 818 } else { \ 819 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 820 } \ 821 r->element[i] = result; \ 822 all &= result; \ 823 none |= result; \ 824 } \ 825 if (record) { \ 826 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 827 } \ 828 } 829 830 /* 831 * VCMPNEZ - Vector compare not equal to zero 832 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 833 * element - element type to access from vector 834 */ 835 #define VCMPNE(suffix, element, etype, cmpzero) \ 836 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 837 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 838 VCMPNE(zb, u8, uint8_t, 1) 839 VCMPNE(zh, u16, uint16_t, 1) 840 VCMPNE(zw, u32, uint32_t, 1) 841 VCMPNE(b, u8, uint8_t, 0) 842 VCMPNE(h, u16, uint16_t, 0) 843 VCMPNE(w, u32, uint32_t, 0) 844 #undef VCMPNE_DO 845 #undef VCMPNE 846 847 #define VCMPFP_DO(suffix, compare, order, record) \ 848 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 849 ppc_avr_t *a, ppc_avr_t *b) \ 850 { \ 851 uint32_t ones = (uint32_t)-1; \ 852 uint32_t all = ones; \ 853 uint32_t none = 0; \ 854 int i; \ 855 \ 856 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 857 uint32_t result; \ 858 FloatRelation rel = \ 859 float32_compare_quiet(a->f32[i], b->f32[i], \ 860 &env->vec_status); \ 861 if (rel == float_relation_unordered) { \ 862 result = 0; \ 863 } else if (rel compare order) { \ 864 result = ones; \ 865 } else { \ 866 result = 0; \ 867 } \ 868 r->u32[i] = result; \ 869 all &= result; \ 870 none |= result; \ 871 } \ 872 if (record) { \ 873 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 874 } \ 875 } 876 #define VCMPFP(suffix, compare, order) \ 877 VCMPFP_DO(suffix, compare, order, 0) \ 878 VCMPFP_DO(suffix##_dot, compare, order, 1) 879 VCMPFP(eqfp, ==, float_relation_equal) 880 VCMPFP(gefp, !=, float_relation_less) 881 VCMPFP(gtfp, ==, float_relation_greater) 882 #undef VCMPFP_DO 883 #undef VCMPFP 884 885 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 886 ppc_avr_t *a, ppc_avr_t *b, int record) 887 { 888 int i; 889 int all_in = 0; 890 891 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 892 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 893 &env->vec_status); 894 if (le_rel == float_relation_unordered) { 895 r->u32[i] = 0xc0000000; 896 all_in = 1; 897 } else { 898 float32 bneg = float32_chs(b->f32[i]); 899 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 900 &env->vec_status); 901 int le = le_rel != float_relation_greater; 902 int ge = ge_rel != float_relation_less; 903 904 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 905 all_in |= (!le | !ge); 906 } 907 } 908 if (record) { 909 env->crf[6] = (all_in == 0) << 1; 910 } 911 } 912 913 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 914 { 915 vcmpbfp_internal(env, r, a, b, 0); 916 } 917 918 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 919 ppc_avr_t *b) 920 { 921 vcmpbfp_internal(env, r, a, b, 1); 922 } 923 924 #define VCT(suffix, satcvt, element) \ 925 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 926 ppc_avr_t *b, uint32_t uim) \ 927 { \ 928 int i; \ 929 int sat = 0; \ 930 float_status s = env->vec_status; \ 931 \ 932 set_float_rounding_mode(float_round_to_zero, &s); \ 933 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 934 if (float32_is_any_nan(b->f32[i])) { \ 935 r->element[i] = 0; \ 936 } else { \ 937 float64 t = float32_to_float64(b->f32[i], &s); \ 938 int64_t j; \ 939 \ 940 t = float64_scalbn(t, uim, &s); \ 941 j = float64_to_int64(t, &s); \ 942 r->element[i] = satcvt(j, &sat); \ 943 } \ 944 } \ 945 if (sat) { \ 946 set_vscr_sat(env); \ 947 } \ 948 } 949 VCT(uxs, cvtsduw, u32) 950 VCT(sxs, cvtsdsw, s32) 951 #undef VCT 952 953 target_ulong helper_vclzlsbb(ppc_avr_t *r) 954 { 955 target_ulong count = 0; 956 int i; 957 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 958 if (r->VsrB(i) & 0x01) { 959 break; 960 } 961 count++; 962 } 963 return count; 964 } 965 966 target_ulong helper_vctzlsbb(ppc_avr_t *r) 967 { 968 target_ulong count = 0; 969 int i; 970 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 971 if (r->VsrB(i) & 0x01) { 972 break; 973 } 974 count++; 975 } 976 return count; 977 } 978 979 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 980 ppc_avr_t *b, ppc_avr_t *c) 981 { 982 int sat = 0; 983 int i; 984 985 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 986 int32_t prod = a->s16[i] * b->s16[i]; 987 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 988 989 r->s16[i] = cvtswsh(t, &sat); 990 } 991 992 if (sat) { 993 set_vscr_sat(env); 994 } 995 } 996 997 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 998 ppc_avr_t *b, ppc_avr_t *c) 999 { 1000 int sat = 0; 1001 int i; 1002 1003 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1004 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 1005 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 1006 r->s16[i] = cvtswsh(t, &sat); 1007 } 1008 1009 if (sat) { 1010 set_vscr_sat(env); 1011 } 1012 } 1013 1014 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1015 { 1016 int i; 1017 1018 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1019 int32_t prod = a->s16[i] * b->s16[i]; 1020 r->s16[i] = (int16_t) (prod + c->s16[i]); 1021 } 1022 } 1023 1024 #define VMRG_DO(name, element, access, ofs) \ 1025 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1026 { \ 1027 ppc_avr_t result; \ 1028 int i, half = ARRAY_SIZE(r->element) / 2; \ 1029 \ 1030 for (i = 0; i < half; i++) { \ 1031 result.access(i * 2 + 0) = a->access(i + ofs); \ 1032 result.access(i * 2 + 1) = b->access(i + ofs); \ 1033 } \ 1034 *r = result; \ 1035 } 1036 1037 #define VMRG(suffix, element, access) \ 1038 VMRG_DO(mrgl##suffix, element, access, half) \ 1039 VMRG_DO(mrgh##suffix, element, access, 0) 1040 VMRG(b, u8, VsrB) 1041 VMRG(h, u16, VsrH) 1042 VMRG(w, u32, VsrW) 1043 #undef VMRG_DO 1044 #undef VMRG 1045 1046 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1047 ppc_avr_t *b, ppc_avr_t *c) 1048 { 1049 int32_t prod[16]; 1050 int i; 1051 1052 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1053 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1054 } 1055 1056 VECTOR_FOR_INORDER_I(i, s32) { 1057 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1058 prod[4 * i + 2] + prod[4 * i + 3]; 1059 } 1060 } 1061 1062 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1063 ppc_avr_t *b, ppc_avr_t *c) 1064 { 1065 int32_t prod[8]; 1066 int i; 1067 1068 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1069 prod[i] = a->s16[i] * b->s16[i]; 1070 } 1071 1072 VECTOR_FOR_INORDER_I(i, s32) { 1073 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1074 } 1075 } 1076 1077 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1078 ppc_avr_t *b, ppc_avr_t *c) 1079 { 1080 int32_t prod[8]; 1081 int i; 1082 int sat = 0; 1083 1084 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1085 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1086 } 1087 1088 VECTOR_FOR_INORDER_I(i, s32) { 1089 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1090 1091 r->u32[i] = cvtsdsw(t, &sat); 1092 } 1093 1094 if (sat) { 1095 set_vscr_sat(env); 1096 } 1097 } 1098 1099 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1100 ppc_avr_t *b, ppc_avr_t *c) 1101 { 1102 uint16_t prod[16]; 1103 int i; 1104 1105 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1106 prod[i] = a->u8[i] * b->u8[i]; 1107 } 1108 1109 VECTOR_FOR_INORDER_I(i, u32) { 1110 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1111 prod[4 * i + 2] + prod[4 * i + 3]; 1112 } 1113 } 1114 1115 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1116 ppc_avr_t *b, ppc_avr_t *c) 1117 { 1118 uint32_t prod[8]; 1119 int i; 1120 1121 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1122 prod[i] = a->u16[i] * b->u16[i]; 1123 } 1124 1125 VECTOR_FOR_INORDER_I(i, u32) { 1126 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1127 } 1128 } 1129 1130 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1131 ppc_avr_t *b, ppc_avr_t *c) 1132 { 1133 uint32_t prod[8]; 1134 int i; 1135 int sat = 0; 1136 1137 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1138 prod[i] = a->u16[i] * b->u16[i]; 1139 } 1140 1141 VECTOR_FOR_INORDER_I(i, s32) { 1142 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1143 1144 r->u32[i] = cvtuduw(t, &sat); 1145 } 1146 1147 if (sat) { 1148 set_vscr_sat(env); 1149 } 1150 } 1151 1152 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1153 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1154 { \ 1155 int i; \ 1156 \ 1157 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1158 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1159 (cast)b->mul_access(i); \ 1160 } \ 1161 } 1162 1163 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1164 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1165 { \ 1166 int i; \ 1167 \ 1168 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1169 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1170 (cast)b->mul_access(i + 1); \ 1171 } \ 1172 } 1173 1174 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1175 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1176 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1177 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1178 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1179 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1180 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1181 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1182 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1183 #undef VMUL_DO_EVN 1184 #undef VMUL_DO_ODD 1185 #undef VMUL 1186 1187 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1188 { 1189 int i; 1190 1191 for (i = 0; i < 4; i++) { 1192 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1193 } 1194 } 1195 1196 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1197 { 1198 int i; 1199 1200 for (i = 0; i < 4; i++) { 1201 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1202 (uint64_t)b->u32[i]) >> 32); 1203 } 1204 } 1205 1206 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1207 { 1208 uint64_t discard; 1209 1210 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1211 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1212 } 1213 1214 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1215 { 1216 uint64_t discard; 1217 1218 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1219 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1220 } 1221 1222 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1223 ppc_avr_t *c) 1224 { 1225 ppc_avr_t result; 1226 int i; 1227 1228 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1229 int s = c->VsrB(i) & 0x1f; 1230 int index = s & 0xf; 1231 1232 if (s & 0x10) { 1233 result.VsrB(i) = b->VsrB(index); 1234 } else { 1235 result.VsrB(i) = a->VsrB(index); 1236 } 1237 } 1238 *r = result; 1239 } 1240 1241 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1242 ppc_avr_t *c) 1243 { 1244 ppc_avr_t result; 1245 int i; 1246 1247 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1248 int s = c->VsrB(i) & 0x1f; 1249 int index = 15 - (s & 0xf); 1250 1251 if (s & 0x10) { 1252 result.VsrB(i) = a->VsrB(index); 1253 } else { 1254 result.VsrB(i) = b->VsrB(index); 1255 } 1256 } 1257 *r = result; 1258 } 1259 1260 #if defined(HOST_WORDS_BIGENDIAN) 1261 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1262 #define VBPERMD_INDEX(i) (i) 1263 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1264 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1265 #else 1266 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1267 #define VBPERMD_INDEX(i) (1 - i) 1268 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1269 #define EXTRACT_BIT(avr, i, index) \ 1270 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1271 #endif 1272 1273 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1274 { 1275 int i, j; 1276 ppc_avr_t result = { .u64 = { 0, 0 } }; 1277 VECTOR_FOR_INORDER_I(i, u64) { 1278 for (j = 0; j < 8; j++) { 1279 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1280 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1281 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1282 } 1283 } 1284 } 1285 *r = result; 1286 } 1287 1288 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1289 { 1290 int i; 1291 uint64_t perm = 0; 1292 1293 VECTOR_FOR_INORDER_I(i, u8) { 1294 int index = VBPERMQ_INDEX(b, i); 1295 1296 if (index < 128) { 1297 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1298 if (a->u64[VBPERMQ_DW(index)] & mask) { 1299 perm |= (0x8000 >> i); 1300 } 1301 } 1302 } 1303 1304 r->VsrD(0) = perm; 1305 r->VsrD(1) = 0; 1306 } 1307 1308 #undef VBPERMQ_INDEX 1309 #undef VBPERMQ_DW 1310 1311 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1312 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1313 { \ 1314 int i, j; \ 1315 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1316 \ 1317 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1318 prod[i] = 0; \ 1319 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1320 if (a->srcfld[i] & (1ull << j)) { \ 1321 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1322 } \ 1323 } \ 1324 } \ 1325 \ 1326 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1327 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1328 } \ 1329 } 1330 1331 PMSUM(vpmsumb, u8, u16, uint16_t) 1332 PMSUM(vpmsumh, u16, u32, uint32_t) 1333 PMSUM(vpmsumw, u32, u64, uint64_t) 1334 1335 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1336 { 1337 1338 #ifdef CONFIG_INT128 1339 int i, j; 1340 __uint128_t prod[2]; 1341 1342 VECTOR_FOR_INORDER_I(i, u64) { 1343 prod[i] = 0; 1344 for (j = 0; j < 64; j++) { 1345 if (a->u64[i] & (1ull << j)) { 1346 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1347 } 1348 } 1349 } 1350 1351 r->u128 = prod[0] ^ prod[1]; 1352 1353 #else 1354 int i, j; 1355 ppc_avr_t prod[2]; 1356 1357 VECTOR_FOR_INORDER_I(i, u64) { 1358 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1359 for (j = 0; j < 64; j++) { 1360 if (a->u64[i] & (1ull << j)) { 1361 ppc_avr_t bshift; 1362 if (j == 0) { 1363 bshift.VsrD(0) = 0; 1364 bshift.VsrD(1) = b->u64[i]; 1365 } else { 1366 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1367 bshift.VsrD(1) = b->u64[i] << j; 1368 } 1369 prod[i].VsrD(1) ^= bshift.VsrD(1); 1370 prod[i].VsrD(0) ^= bshift.VsrD(0); 1371 } 1372 } 1373 } 1374 1375 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1376 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1377 #endif 1378 } 1379 1380 1381 #if defined(HOST_WORDS_BIGENDIAN) 1382 #define PKBIG 1 1383 #else 1384 #define PKBIG 0 1385 #endif 1386 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1387 { 1388 int i, j; 1389 ppc_avr_t result; 1390 #if defined(HOST_WORDS_BIGENDIAN) 1391 const ppc_avr_t *x[2] = { a, b }; 1392 #else 1393 const ppc_avr_t *x[2] = { b, a }; 1394 #endif 1395 1396 VECTOR_FOR_INORDER_I(i, u64) { 1397 VECTOR_FOR_INORDER_I(j, u32) { 1398 uint32_t e = x[i]->u32[j]; 1399 1400 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1401 ((e >> 6) & 0x3e0) | 1402 ((e >> 3) & 0x1f)); 1403 } 1404 } 1405 *r = result; 1406 } 1407 1408 #define VPK(suffix, from, to, cvt, dosat) \ 1409 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1410 ppc_avr_t *a, ppc_avr_t *b) \ 1411 { \ 1412 int i; \ 1413 int sat = 0; \ 1414 ppc_avr_t result; \ 1415 ppc_avr_t *a0 = PKBIG ? a : b; \ 1416 ppc_avr_t *a1 = PKBIG ? b : a; \ 1417 \ 1418 VECTOR_FOR_INORDER_I(i, from) { \ 1419 result.to[i] = cvt(a0->from[i], &sat); \ 1420 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1421 } \ 1422 *r = result; \ 1423 if (dosat && sat) { \ 1424 set_vscr_sat(env); \ 1425 } \ 1426 } 1427 #define I(x, y) (x) 1428 VPK(shss, s16, s8, cvtshsb, 1) 1429 VPK(shus, s16, u8, cvtshub, 1) 1430 VPK(swss, s32, s16, cvtswsh, 1) 1431 VPK(swus, s32, u16, cvtswuh, 1) 1432 VPK(sdss, s64, s32, cvtsdsw, 1) 1433 VPK(sdus, s64, u32, cvtsduw, 1) 1434 VPK(uhus, u16, u8, cvtuhub, 1) 1435 VPK(uwus, u32, u16, cvtuwuh, 1) 1436 VPK(udus, u64, u32, cvtuduw, 1) 1437 VPK(uhum, u16, u8, I, 0) 1438 VPK(uwum, u32, u16, I, 0) 1439 VPK(udum, u64, u32, I, 0) 1440 #undef I 1441 #undef VPK 1442 #undef PKBIG 1443 1444 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1445 { 1446 int i; 1447 1448 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1449 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1450 } 1451 } 1452 1453 #define VRFI(suffix, rounding) \ 1454 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1455 ppc_avr_t *b) \ 1456 { \ 1457 int i; \ 1458 float_status s = env->vec_status; \ 1459 \ 1460 set_float_rounding_mode(rounding, &s); \ 1461 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1462 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1463 } \ 1464 } 1465 VRFI(n, float_round_nearest_even) 1466 VRFI(m, float_round_down) 1467 VRFI(p, float_round_up) 1468 VRFI(z, float_round_to_zero) 1469 #undef VRFI 1470 1471 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1472 { 1473 int i; 1474 1475 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1476 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1477 1478 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1479 } 1480 } 1481 1482 #define VRLMI(name, size, element, insert) \ 1483 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1484 { \ 1485 int i; \ 1486 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1487 uint##size##_t src1 = a->element[i]; \ 1488 uint##size##_t src2 = b->element[i]; \ 1489 uint##size##_t src3 = r->element[i]; \ 1490 uint##size##_t begin, end, shift, mask, rot_val; \ 1491 \ 1492 shift = extract##size(src2, 0, 6); \ 1493 end = extract##size(src2, 8, 6); \ 1494 begin = extract##size(src2, 16, 6); \ 1495 rot_val = rol##size(src1, shift); \ 1496 mask = mask_u##size(begin, end); \ 1497 if (insert) { \ 1498 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1499 } else { \ 1500 r->element[i] = (rot_val & mask); \ 1501 } \ 1502 } \ 1503 } 1504 1505 VRLMI(vrldmi, 64, u64, 1); 1506 VRLMI(vrlwmi, 32, u32, 1); 1507 VRLMI(vrldnm, 64, u64, 0); 1508 VRLMI(vrlwnm, 32, u32, 0); 1509 1510 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1511 ppc_avr_t *c) 1512 { 1513 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1514 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1515 } 1516 1517 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1518 { 1519 int i; 1520 1521 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1522 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1523 } 1524 } 1525 1526 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1527 { 1528 int i; 1529 1530 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1531 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1532 } 1533 } 1534 1535 #define VEXTU_X_DO(name, size, left) \ 1536 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1537 { \ 1538 int index = (a & 0xf) * 8; \ 1539 if (left) { \ 1540 index = 128 - index - size; \ 1541 } \ 1542 return int128_getlo(int128_rshift(b->s128, index)) & \ 1543 MAKE_64BIT_MASK(0, size); \ 1544 } 1545 VEXTU_X_DO(vextublx, 8, 1) 1546 VEXTU_X_DO(vextuhlx, 16, 1) 1547 VEXTU_X_DO(vextuwlx, 32, 1) 1548 VEXTU_X_DO(vextubrx, 8, 0) 1549 VEXTU_X_DO(vextuhrx, 16, 0) 1550 VEXTU_X_DO(vextuwrx, 32, 0) 1551 #undef VEXTU_X_DO 1552 1553 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1554 { 1555 int i; 1556 unsigned int shift, bytes, size; 1557 1558 size = ARRAY_SIZE(r->u8); 1559 for (i = 0; i < size; i++) { 1560 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1561 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1562 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1563 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1564 } 1565 } 1566 1567 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1568 { 1569 int i; 1570 unsigned int shift, bytes; 1571 1572 /* 1573 * Use reverse order, as destination and source register can be 1574 * same. Its being modified in place saving temporary, reverse 1575 * order will guarantee that computed result is not fed back. 1576 */ 1577 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1578 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1579 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1580 /* extract adjacent bytes */ 1581 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1582 } 1583 } 1584 1585 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1586 { 1587 int sh = shift & 0xf; 1588 int i; 1589 ppc_avr_t result; 1590 1591 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1592 int index = sh + i; 1593 if (index > 0xf) { 1594 result.VsrB(i) = b->VsrB(index - 0x10); 1595 } else { 1596 result.VsrB(i) = a->VsrB(index); 1597 } 1598 } 1599 *r = result; 1600 } 1601 1602 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1603 { 1604 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1605 1606 #if defined(HOST_WORDS_BIGENDIAN) 1607 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1608 memset(&r->u8[16 - sh], 0, sh); 1609 #else 1610 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1611 memset(&r->u8[0], 0, sh); 1612 #endif 1613 } 1614 1615 #if defined(HOST_WORDS_BIGENDIAN) 1616 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1617 #else 1618 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1619 #endif 1620 1621 #define VINSX(SUFFIX, TYPE) \ 1622 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1623 uint64_t val, target_ulong index) \ 1624 { \ 1625 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1626 target_long idx = index; \ 1627 \ 1628 if (idx < 0 || idx > maxidx) { \ 1629 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1630 qemu_log_mask(LOG_GUEST_ERROR, \ 1631 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1632 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1633 } else { \ 1634 TYPE src = val; \ 1635 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1636 } \ 1637 } 1638 VINSX(B, uint8_t) 1639 VINSX(H, uint16_t) 1640 VINSX(W, uint32_t) 1641 VINSX(D, uint64_t) 1642 #undef ELEM_ADDR 1643 #undef VINSX 1644 #if defined(HOST_WORDS_BIGENDIAN) 1645 #define VEXTDVLX(NAME, SIZE) \ 1646 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1647 target_ulong index) \ 1648 { \ 1649 const target_long idx = index; \ 1650 ppc_avr_t tmp[2] = { *a, *b }; \ 1651 memset(t, 0, sizeof(*t)); \ 1652 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1653 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1654 } else { \ 1655 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1656 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1657 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1658 } \ 1659 } 1660 #else 1661 #define VEXTDVLX(NAME, SIZE) \ 1662 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1663 target_ulong index) \ 1664 { \ 1665 const target_long idx = index; \ 1666 ppc_avr_t tmp[2] = { *b, *a }; \ 1667 memset(t, 0, sizeof(*t)); \ 1668 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1669 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1670 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1671 } else { \ 1672 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1673 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1674 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1675 } \ 1676 } 1677 #endif 1678 VEXTDVLX(VEXTDUBVLX, 1) 1679 VEXTDVLX(VEXTDUHVLX, 2) 1680 VEXTDVLX(VEXTDUWVLX, 4) 1681 VEXTDVLX(VEXTDDVLX, 8) 1682 #undef VEXTDVLX 1683 #if defined(HOST_WORDS_BIGENDIAN) 1684 #define VEXTRACT(suffix, element) \ 1685 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1686 { \ 1687 uint32_t es = sizeof(r->element[0]); \ 1688 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1689 memset(&r->u8[8], 0, 8); \ 1690 memset(&r->u8[0], 0, 8 - es); \ 1691 } 1692 #else 1693 #define VEXTRACT(suffix, element) \ 1694 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1695 { \ 1696 uint32_t es = sizeof(r->element[0]); \ 1697 uint32_t s = (16 - index) - es; \ 1698 memmove(&r->u8[8], &b->u8[s], es); \ 1699 memset(&r->u8[0], 0, 8); \ 1700 memset(&r->u8[8 + es], 0, 8 - es); \ 1701 } 1702 #endif 1703 VEXTRACT(ub, u8) 1704 VEXTRACT(uh, u16) 1705 VEXTRACT(uw, u32) 1706 VEXTRACT(d, u64) 1707 #undef VEXTRACT 1708 1709 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1710 ppc_vsr_t *xb, uint32_t index) 1711 { 1712 ppc_vsr_t t = { }; 1713 size_t es = sizeof(uint32_t); 1714 uint32_t ext_index; 1715 int i; 1716 1717 ext_index = index; 1718 for (i = 0; i < es; i++, ext_index++) { 1719 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1720 } 1721 1722 *xt = t; 1723 } 1724 1725 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1726 ppc_vsr_t *xb, uint32_t index) 1727 { 1728 ppc_vsr_t t = *xt; 1729 size_t es = sizeof(uint32_t); 1730 int ins_index, i = 0; 1731 1732 ins_index = index; 1733 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1734 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1735 } 1736 1737 *xt = t; 1738 } 1739 1740 #define XXBLEND(name, sz) \ 1741 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1742 ppc_avr_t *c, uint32_t desc) \ 1743 { \ 1744 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1745 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1746 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1747 } \ 1748 } 1749 XXBLEND(B, 8) 1750 XXBLEND(H, 16) 1751 XXBLEND(W, 32) 1752 XXBLEND(D, 64) 1753 #undef XXBLEND 1754 1755 #define VEXT_SIGNED(name, element, cast) \ 1756 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1757 { \ 1758 int i; \ 1759 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1760 r->element[i] = (cast)b->element[i]; \ 1761 } \ 1762 } 1763 VEXT_SIGNED(vextsb2w, s32, int8_t) 1764 VEXT_SIGNED(vextsb2d, s64, int8_t) 1765 VEXT_SIGNED(vextsh2w, s32, int16_t) 1766 VEXT_SIGNED(vextsh2d, s64, int16_t) 1767 VEXT_SIGNED(vextsw2d, s64, int32_t) 1768 #undef VEXT_SIGNED 1769 1770 #define VNEG(name, element) \ 1771 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1772 { \ 1773 int i; \ 1774 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1775 r->element[i] = -b->element[i]; \ 1776 } \ 1777 } 1778 VNEG(vnegw, s32) 1779 VNEG(vnegd, s64) 1780 #undef VNEG 1781 1782 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1783 { 1784 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1785 1786 #if defined(HOST_WORDS_BIGENDIAN) 1787 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1788 memset(&r->u8[0], 0, sh); 1789 #else 1790 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1791 memset(&r->u8[16 - sh], 0, sh); 1792 #endif 1793 } 1794 1795 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1796 { 1797 int i; 1798 1799 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1800 r->u32[i] = a->u32[i] >= b->u32[i]; 1801 } 1802 } 1803 1804 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1805 { 1806 int64_t t; 1807 int i, upper; 1808 ppc_avr_t result; 1809 int sat = 0; 1810 1811 upper = ARRAY_SIZE(r->s32) - 1; 1812 t = (int64_t)b->VsrSW(upper); 1813 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1814 t += a->VsrSW(i); 1815 result.VsrSW(i) = 0; 1816 } 1817 result.VsrSW(upper) = cvtsdsw(t, &sat); 1818 *r = result; 1819 1820 if (sat) { 1821 set_vscr_sat(env); 1822 } 1823 } 1824 1825 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1826 { 1827 int i, j, upper; 1828 ppc_avr_t result; 1829 int sat = 0; 1830 1831 upper = 1; 1832 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1833 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1834 1835 result.VsrD(i) = 0; 1836 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1837 t += a->VsrSW(2 * i + j); 1838 } 1839 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1840 } 1841 1842 *r = result; 1843 if (sat) { 1844 set_vscr_sat(env); 1845 } 1846 } 1847 1848 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1849 { 1850 int i, j; 1851 int sat = 0; 1852 1853 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1854 int64_t t = (int64_t)b->s32[i]; 1855 1856 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1857 t += a->s8[4 * i + j]; 1858 } 1859 r->s32[i] = cvtsdsw(t, &sat); 1860 } 1861 1862 if (sat) { 1863 set_vscr_sat(env); 1864 } 1865 } 1866 1867 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1868 { 1869 int sat = 0; 1870 int i; 1871 1872 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1873 int64_t t = (int64_t)b->s32[i]; 1874 1875 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1876 r->s32[i] = cvtsdsw(t, &sat); 1877 } 1878 1879 if (sat) { 1880 set_vscr_sat(env); 1881 } 1882 } 1883 1884 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1885 { 1886 int i, j; 1887 int sat = 0; 1888 1889 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1890 uint64_t t = (uint64_t)b->u32[i]; 1891 1892 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1893 t += a->u8[4 * i + j]; 1894 } 1895 r->u32[i] = cvtuduw(t, &sat); 1896 } 1897 1898 if (sat) { 1899 set_vscr_sat(env); 1900 } 1901 } 1902 1903 #if defined(HOST_WORDS_BIGENDIAN) 1904 #define UPKHI 1 1905 #define UPKLO 0 1906 #else 1907 #define UPKHI 0 1908 #define UPKLO 1 1909 #endif 1910 #define VUPKPX(suffix, hi) \ 1911 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1912 { \ 1913 int i; \ 1914 ppc_avr_t result; \ 1915 \ 1916 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1917 uint16_t e = b->u16[hi ? i : i + 4]; \ 1918 uint8_t a = (e >> 15) ? 0xff : 0; \ 1919 uint8_t r = (e >> 10) & 0x1f; \ 1920 uint8_t g = (e >> 5) & 0x1f; \ 1921 uint8_t b = e & 0x1f; \ 1922 \ 1923 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1924 } \ 1925 *r = result; \ 1926 } 1927 VUPKPX(lpx, UPKLO) 1928 VUPKPX(hpx, UPKHI) 1929 #undef VUPKPX 1930 1931 #define VUPK(suffix, unpacked, packee, hi) \ 1932 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1933 { \ 1934 int i; \ 1935 ppc_avr_t result; \ 1936 \ 1937 if (hi) { \ 1938 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1939 result.unpacked[i] = b->packee[i]; \ 1940 } \ 1941 } else { \ 1942 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1943 i++) { \ 1944 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1945 } \ 1946 } \ 1947 *r = result; \ 1948 } 1949 VUPK(hsb, s16, s8, UPKHI) 1950 VUPK(hsh, s32, s16, UPKHI) 1951 VUPK(hsw, s64, s32, UPKHI) 1952 VUPK(lsb, s16, s8, UPKLO) 1953 VUPK(lsh, s32, s16, UPKLO) 1954 VUPK(lsw, s64, s32, UPKLO) 1955 #undef VUPK 1956 #undef UPKHI 1957 #undef UPKLO 1958 1959 #define VGENERIC_DO(name, element) \ 1960 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1961 { \ 1962 int i; \ 1963 \ 1964 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1965 r->element[i] = name(b->element[i]); \ 1966 } \ 1967 } 1968 1969 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1970 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1971 1972 VGENERIC_DO(clzb, u8) 1973 VGENERIC_DO(clzh, u16) 1974 1975 #undef clzb 1976 #undef clzh 1977 1978 #define ctzb(v) ((v) ? ctz32(v) : 8) 1979 #define ctzh(v) ((v) ? ctz32(v) : 16) 1980 #define ctzw(v) ctz32((v)) 1981 #define ctzd(v) ctz64((v)) 1982 1983 VGENERIC_DO(ctzb, u8) 1984 VGENERIC_DO(ctzh, u16) 1985 VGENERIC_DO(ctzw, u32) 1986 VGENERIC_DO(ctzd, u64) 1987 1988 #undef ctzb 1989 #undef ctzh 1990 #undef ctzw 1991 #undef ctzd 1992 1993 #define popcntb(v) ctpop8(v) 1994 #define popcnth(v) ctpop16(v) 1995 #define popcntw(v) ctpop32(v) 1996 #define popcntd(v) ctpop64(v) 1997 1998 VGENERIC_DO(popcntb, u8) 1999 VGENERIC_DO(popcnth, u16) 2000 VGENERIC_DO(popcntw, u32) 2001 VGENERIC_DO(popcntd, u64) 2002 2003 #undef popcntb 2004 #undef popcnth 2005 #undef popcntw 2006 #undef popcntd 2007 2008 #undef VGENERIC_DO 2009 2010 #if defined(HOST_WORDS_BIGENDIAN) 2011 #define QW_ONE { .u64 = { 0, 1 } } 2012 #else 2013 #define QW_ONE { .u64 = { 1, 0 } } 2014 #endif 2015 2016 #ifndef CONFIG_INT128 2017 2018 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2019 { 2020 t->u64[0] = ~a.u64[0]; 2021 t->u64[1] = ~a.u64[1]; 2022 } 2023 2024 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2025 { 2026 if (a.VsrD(0) < b.VsrD(0)) { 2027 return -1; 2028 } else if (a.VsrD(0) > b.VsrD(0)) { 2029 return 1; 2030 } else if (a.VsrD(1) < b.VsrD(1)) { 2031 return -1; 2032 } else if (a.VsrD(1) > b.VsrD(1)) { 2033 return 1; 2034 } else { 2035 return 0; 2036 } 2037 } 2038 2039 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2040 { 2041 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2042 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2043 (~a.VsrD(1) < b.VsrD(1)); 2044 } 2045 2046 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2047 { 2048 ppc_avr_t not_a; 2049 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2050 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2051 (~a.VsrD(1) < b.VsrD(1)); 2052 avr_qw_not(¬_a, a); 2053 return avr_qw_cmpu(not_a, b) < 0; 2054 } 2055 2056 #endif 2057 2058 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2059 { 2060 #ifdef CONFIG_INT128 2061 r->u128 = a->u128 + b->u128; 2062 #else 2063 avr_qw_add(r, *a, *b); 2064 #endif 2065 } 2066 2067 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2068 { 2069 #ifdef CONFIG_INT128 2070 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2071 #else 2072 2073 if (c->VsrD(1) & 1) { 2074 ppc_avr_t tmp; 2075 2076 tmp.VsrD(0) = 0; 2077 tmp.VsrD(1) = c->VsrD(1) & 1; 2078 avr_qw_add(&tmp, *a, tmp); 2079 avr_qw_add(r, tmp, *b); 2080 } else { 2081 avr_qw_add(r, *a, *b); 2082 } 2083 #endif 2084 } 2085 2086 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2087 { 2088 #ifdef CONFIG_INT128 2089 r->u128 = (~a->u128 < b->u128); 2090 #else 2091 ppc_avr_t not_a; 2092 2093 avr_qw_not(¬_a, *a); 2094 2095 r->VsrD(0) = 0; 2096 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2097 #endif 2098 } 2099 2100 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2101 { 2102 #ifdef CONFIG_INT128 2103 int carry_out = (~a->u128 < b->u128); 2104 if (!carry_out && (c->u128 & 1)) { 2105 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2106 ((a->u128 != 0) || (b->u128 != 0)); 2107 } 2108 r->u128 = carry_out; 2109 #else 2110 2111 int carry_in = c->VsrD(1) & 1; 2112 int carry_out = 0; 2113 ppc_avr_t tmp; 2114 2115 carry_out = avr_qw_addc(&tmp, *a, *b); 2116 2117 if (!carry_out && carry_in) { 2118 ppc_avr_t one = QW_ONE; 2119 carry_out = avr_qw_addc(&tmp, tmp, one); 2120 } 2121 r->VsrD(0) = 0; 2122 r->VsrD(1) = carry_out; 2123 #endif 2124 } 2125 2126 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2127 { 2128 #ifdef CONFIG_INT128 2129 r->u128 = a->u128 - b->u128; 2130 #else 2131 ppc_avr_t tmp; 2132 ppc_avr_t one = QW_ONE; 2133 2134 avr_qw_not(&tmp, *b); 2135 avr_qw_add(&tmp, *a, tmp); 2136 avr_qw_add(r, tmp, one); 2137 #endif 2138 } 2139 2140 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2141 { 2142 #ifdef CONFIG_INT128 2143 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2144 #else 2145 ppc_avr_t tmp, sum; 2146 2147 avr_qw_not(&tmp, *b); 2148 avr_qw_add(&sum, *a, tmp); 2149 2150 tmp.VsrD(0) = 0; 2151 tmp.VsrD(1) = c->VsrD(1) & 1; 2152 avr_qw_add(r, sum, tmp); 2153 #endif 2154 } 2155 2156 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2157 { 2158 #ifdef CONFIG_INT128 2159 r->u128 = (~a->u128 < ~b->u128) || 2160 (a->u128 + ~b->u128 == (__uint128_t)-1); 2161 #else 2162 int carry = (avr_qw_cmpu(*a, *b) > 0); 2163 if (!carry) { 2164 ppc_avr_t tmp; 2165 avr_qw_not(&tmp, *b); 2166 avr_qw_add(&tmp, *a, tmp); 2167 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2168 } 2169 r->VsrD(0) = 0; 2170 r->VsrD(1) = carry; 2171 #endif 2172 } 2173 2174 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2175 { 2176 #ifdef CONFIG_INT128 2177 r->u128 = 2178 (~a->u128 < ~b->u128) || 2179 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2180 #else 2181 int carry_in = c->VsrD(1) & 1; 2182 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2183 if (!carry_out && carry_in) { 2184 ppc_avr_t tmp; 2185 avr_qw_not(&tmp, *b); 2186 avr_qw_add(&tmp, *a, tmp); 2187 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2188 } 2189 2190 r->VsrD(0) = 0; 2191 r->VsrD(1) = carry_out; 2192 #endif 2193 } 2194 2195 #define BCD_PLUS_PREF_1 0xC 2196 #define BCD_PLUS_PREF_2 0xF 2197 #define BCD_PLUS_ALT_1 0xA 2198 #define BCD_NEG_PREF 0xD 2199 #define BCD_NEG_ALT 0xB 2200 #define BCD_PLUS_ALT_2 0xE 2201 #define NATIONAL_PLUS 0x2B 2202 #define NATIONAL_NEG 0x2D 2203 2204 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2205 2206 static int bcd_get_sgn(ppc_avr_t *bcd) 2207 { 2208 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2209 case BCD_PLUS_PREF_1: 2210 case BCD_PLUS_PREF_2: 2211 case BCD_PLUS_ALT_1: 2212 case BCD_PLUS_ALT_2: 2213 { 2214 return 1; 2215 } 2216 2217 case BCD_NEG_PREF: 2218 case BCD_NEG_ALT: 2219 { 2220 return -1; 2221 } 2222 2223 default: 2224 { 2225 return 0; 2226 } 2227 } 2228 } 2229 2230 static int bcd_preferred_sgn(int sgn, int ps) 2231 { 2232 if (sgn >= 0) { 2233 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2234 } else { 2235 return BCD_NEG_PREF; 2236 } 2237 } 2238 2239 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2240 { 2241 uint8_t result; 2242 if (n & 1) { 2243 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2244 } else { 2245 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2246 } 2247 2248 if (unlikely(result > 9)) { 2249 *invalid = true; 2250 } 2251 return result; 2252 } 2253 2254 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2255 { 2256 if (n & 1) { 2257 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2258 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2259 } else { 2260 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2261 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2262 } 2263 } 2264 2265 static bool bcd_is_valid(ppc_avr_t *bcd) 2266 { 2267 int i; 2268 int invalid = 0; 2269 2270 if (bcd_get_sgn(bcd) == 0) { 2271 return false; 2272 } 2273 2274 for (i = 1; i < 32; i++) { 2275 bcd_get_digit(bcd, i, &invalid); 2276 if (unlikely(invalid)) { 2277 return false; 2278 } 2279 } 2280 return true; 2281 } 2282 2283 static int bcd_cmp_zero(ppc_avr_t *bcd) 2284 { 2285 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2286 return CRF_EQ; 2287 } else { 2288 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2289 } 2290 } 2291 2292 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2293 { 2294 return reg->VsrH(7 - n); 2295 } 2296 2297 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2298 { 2299 reg->VsrH(7 - n) = val; 2300 } 2301 2302 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2303 { 2304 int i; 2305 int invalid = 0; 2306 for (i = 31; i > 0; i--) { 2307 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2308 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2309 if (unlikely(invalid)) { 2310 return 0; /* doesn't matter */ 2311 } else if (dig_a > dig_b) { 2312 return 1; 2313 } else if (dig_a < dig_b) { 2314 return -1; 2315 } 2316 } 2317 2318 return 0; 2319 } 2320 2321 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2322 int *overflow) 2323 { 2324 int carry = 0; 2325 int i; 2326 int is_zero = 1; 2327 2328 for (i = 1; i <= 31; i++) { 2329 uint8_t digit = bcd_get_digit(a, i, invalid) + 2330 bcd_get_digit(b, i, invalid) + carry; 2331 is_zero &= (digit == 0); 2332 if (digit > 9) { 2333 carry = 1; 2334 digit -= 10; 2335 } else { 2336 carry = 0; 2337 } 2338 2339 bcd_put_digit(t, digit, i); 2340 } 2341 2342 *overflow = carry; 2343 return is_zero; 2344 } 2345 2346 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2347 int *overflow) 2348 { 2349 int carry = 0; 2350 int i; 2351 2352 for (i = 1; i <= 31; i++) { 2353 uint8_t digit = bcd_get_digit(a, i, invalid) - 2354 bcd_get_digit(b, i, invalid) + carry; 2355 if (digit & 0x80) { 2356 carry = -1; 2357 digit += 10; 2358 } else { 2359 carry = 0; 2360 } 2361 2362 bcd_put_digit(t, digit, i); 2363 } 2364 2365 *overflow = carry; 2366 } 2367 2368 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2369 { 2370 2371 int sgna = bcd_get_sgn(a); 2372 int sgnb = bcd_get_sgn(b); 2373 int invalid = (sgna == 0) || (sgnb == 0); 2374 int overflow = 0; 2375 int zero = 0; 2376 uint32_t cr = 0; 2377 ppc_avr_t result = { .u64 = { 0, 0 } }; 2378 2379 if (!invalid) { 2380 if (sgna == sgnb) { 2381 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2382 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2383 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2384 } else { 2385 int magnitude = bcd_cmp_mag(a, b); 2386 if (magnitude > 0) { 2387 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2388 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2389 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2390 } else if (magnitude < 0) { 2391 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2392 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2393 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2394 } else { 2395 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2396 cr = CRF_EQ; 2397 } 2398 } 2399 } 2400 2401 if (unlikely(invalid)) { 2402 result.VsrD(0) = result.VsrD(1) = -1; 2403 cr = CRF_SO; 2404 } else if (overflow) { 2405 cr |= CRF_SO; 2406 } else if (zero) { 2407 cr |= CRF_EQ; 2408 } 2409 2410 *r = result; 2411 2412 return cr; 2413 } 2414 2415 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2416 { 2417 ppc_avr_t bcopy = *b; 2418 int sgnb = bcd_get_sgn(b); 2419 if (sgnb < 0) { 2420 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2421 } else if (sgnb > 0) { 2422 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2423 } 2424 /* else invalid ... defer to bcdadd code for proper handling */ 2425 2426 return helper_bcdadd(r, a, &bcopy, ps); 2427 } 2428 2429 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2430 { 2431 int i; 2432 int cr = 0; 2433 uint16_t national = 0; 2434 uint16_t sgnb = get_national_digit(b, 0); 2435 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2436 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2437 2438 for (i = 1; i < 8; i++) { 2439 national = get_national_digit(b, i); 2440 if (unlikely(national < 0x30 || national > 0x39)) { 2441 invalid = 1; 2442 break; 2443 } 2444 2445 bcd_put_digit(&ret, national & 0xf, i); 2446 } 2447 2448 if (sgnb == NATIONAL_PLUS) { 2449 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2450 } else { 2451 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2452 } 2453 2454 cr = bcd_cmp_zero(&ret); 2455 2456 if (unlikely(invalid)) { 2457 cr = CRF_SO; 2458 } 2459 2460 *r = ret; 2461 2462 return cr; 2463 } 2464 2465 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2466 { 2467 int i; 2468 int cr = 0; 2469 int sgnb = bcd_get_sgn(b); 2470 int invalid = (sgnb == 0); 2471 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2472 2473 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2474 2475 for (i = 1; i < 8; i++) { 2476 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2477 2478 if (unlikely(invalid)) { 2479 break; 2480 } 2481 } 2482 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2483 2484 cr = bcd_cmp_zero(b); 2485 2486 if (ox_flag) { 2487 cr |= CRF_SO; 2488 } 2489 2490 if (unlikely(invalid)) { 2491 cr = CRF_SO; 2492 } 2493 2494 *r = ret; 2495 2496 return cr; 2497 } 2498 2499 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2500 { 2501 int i; 2502 int cr = 0; 2503 int invalid = 0; 2504 int zone_digit = 0; 2505 int zone_lead = ps ? 0xF : 0x3; 2506 int digit = 0; 2507 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2508 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2509 2510 if (unlikely((sgnb < 0xA) && ps)) { 2511 invalid = 1; 2512 } 2513 2514 for (i = 0; i < 16; i++) { 2515 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2516 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2517 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2518 invalid = 1; 2519 break; 2520 } 2521 2522 bcd_put_digit(&ret, digit, i + 1); 2523 } 2524 2525 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2526 (!ps && (sgnb & 0x4))) { 2527 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2528 } else { 2529 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2530 } 2531 2532 cr = bcd_cmp_zero(&ret); 2533 2534 if (unlikely(invalid)) { 2535 cr = CRF_SO; 2536 } 2537 2538 *r = ret; 2539 2540 return cr; 2541 } 2542 2543 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2544 { 2545 int i; 2546 int cr = 0; 2547 uint8_t digit = 0; 2548 int sgnb = bcd_get_sgn(b); 2549 int zone_lead = (ps) ? 0xF0 : 0x30; 2550 int invalid = (sgnb == 0); 2551 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2552 2553 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2554 2555 for (i = 0; i < 16; i++) { 2556 digit = bcd_get_digit(b, i + 1, &invalid); 2557 2558 if (unlikely(invalid)) { 2559 break; 2560 } 2561 2562 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2563 } 2564 2565 if (ps) { 2566 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2567 } else { 2568 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2569 } 2570 2571 cr = bcd_cmp_zero(b); 2572 2573 if (ox_flag) { 2574 cr |= CRF_SO; 2575 } 2576 2577 if (unlikely(invalid)) { 2578 cr = CRF_SO; 2579 } 2580 2581 *r = ret; 2582 2583 return cr; 2584 } 2585 2586 /** 2587 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2588 * 2589 * Returns: 2590 * > 0 if ahi|alo > bhi|blo, 2591 * 0 if ahi|alo == bhi|blo, 2592 * < 0 if ahi|alo < bhi|blo 2593 */ 2594 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2595 uint64_t blo, uint64_t bhi) 2596 { 2597 return (ahi == bhi) ? 2598 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2599 (ahi > bhi ? 1 : -1); 2600 } 2601 2602 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2603 { 2604 int i; 2605 int cr; 2606 uint64_t lo_value; 2607 uint64_t hi_value; 2608 uint64_t rem; 2609 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2610 2611 if (b->VsrSD(0) < 0) { 2612 lo_value = -b->VsrSD(1); 2613 hi_value = ~b->VsrD(0) + !lo_value; 2614 bcd_put_digit(&ret, 0xD, 0); 2615 2616 cr = CRF_LT; 2617 } else { 2618 lo_value = b->VsrD(1); 2619 hi_value = b->VsrD(0); 2620 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2621 2622 if (hi_value == 0 && lo_value == 0) { 2623 cr = CRF_EQ; 2624 } else { 2625 cr = CRF_GT; 2626 } 2627 } 2628 2629 /* 2630 * Check src limits: abs(src) <= 10^31 - 1 2631 * 2632 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2633 */ 2634 if (ucmp128(lo_value, hi_value, 2635 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2636 cr |= CRF_SO; 2637 2638 /* 2639 * According to the ISA, if src wouldn't fit in the destination 2640 * register, the result is undefined. 2641 * In that case, we leave r unchanged. 2642 */ 2643 } else { 2644 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2645 2646 for (i = 1; i < 16; rem /= 10, i++) { 2647 bcd_put_digit(&ret, rem % 10, i); 2648 } 2649 2650 for (; i < 32; lo_value /= 10, i++) { 2651 bcd_put_digit(&ret, lo_value % 10, i); 2652 } 2653 2654 *r = ret; 2655 } 2656 2657 return cr; 2658 } 2659 2660 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2661 { 2662 uint8_t i; 2663 int cr; 2664 uint64_t carry; 2665 uint64_t unused; 2666 uint64_t lo_value; 2667 uint64_t hi_value = 0; 2668 int sgnb = bcd_get_sgn(b); 2669 int invalid = (sgnb == 0); 2670 2671 lo_value = bcd_get_digit(b, 31, &invalid); 2672 for (i = 30; i > 0; i--) { 2673 mulu64(&lo_value, &carry, lo_value, 10ULL); 2674 mulu64(&hi_value, &unused, hi_value, 10ULL); 2675 lo_value += bcd_get_digit(b, i, &invalid); 2676 hi_value += carry; 2677 2678 if (unlikely(invalid)) { 2679 break; 2680 } 2681 } 2682 2683 if (sgnb == -1) { 2684 r->VsrSD(1) = -lo_value; 2685 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2686 } else { 2687 r->VsrSD(1) = lo_value; 2688 r->VsrSD(0) = hi_value; 2689 } 2690 2691 cr = bcd_cmp_zero(b); 2692 2693 if (unlikely(invalid)) { 2694 cr = CRF_SO; 2695 } 2696 2697 return cr; 2698 } 2699 2700 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2701 { 2702 int i; 2703 int invalid = 0; 2704 2705 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2706 return CRF_SO; 2707 } 2708 2709 *r = *a; 2710 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2711 2712 for (i = 1; i < 32; i++) { 2713 bcd_get_digit(a, i, &invalid); 2714 bcd_get_digit(b, i, &invalid); 2715 if (unlikely(invalid)) { 2716 return CRF_SO; 2717 } 2718 } 2719 2720 return bcd_cmp_zero(r); 2721 } 2722 2723 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2724 { 2725 int sgnb = bcd_get_sgn(b); 2726 2727 *r = *b; 2728 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2729 2730 if (bcd_is_valid(b) == false) { 2731 return CRF_SO; 2732 } 2733 2734 return bcd_cmp_zero(r); 2735 } 2736 2737 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2738 { 2739 int cr; 2740 int i = a->VsrSB(7); 2741 bool ox_flag = false; 2742 int sgnb = bcd_get_sgn(b); 2743 ppc_avr_t ret = *b; 2744 ret.VsrD(1) &= ~0xf; 2745 2746 if (bcd_is_valid(b) == false) { 2747 return CRF_SO; 2748 } 2749 2750 if (unlikely(i > 31)) { 2751 i = 31; 2752 } else if (unlikely(i < -31)) { 2753 i = -31; 2754 } 2755 2756 if (i > 0) { 2757 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2758 } else { 2759 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2760 } 2761 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2762 2763 *r = ret; 2764 2765 cr = bcd_cmp_zero(r); 2766 if (ox_flag) { 2767 cr |= CRF_SO; 2768 } 2769 2770 return cr; 2771 } 2772 2773 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2774 { 2775 int cr; 2776 int i; 2777 int invalid = 0; 2778 bool ox_flag = false; 2779 ppc_avr_t ret = *b; 2780 2781 for (i = 0; i < 32; i++) { 2782 bcd_get_digit(b, i, &invalid); 2783 2784 if (unlikely(invalid)) { 2785 return CRF_SO; 2786 } 2787 } 2788 2789 i = a->VsrSB(7); 2790 if (i >= 32) { 2791 ox_flag = true; 2792 ret.VsrD(1) = ret.VsrD(0) = 0; 2793 } else if (i <= -32) { 2794 ret.VsrD(1) = ret.VsrD(0) = 0; 2795 } else if (i > 0) { 2796 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2797 } else { 2798 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2799 } 2800 *r = ret; 2801 2802 cr = bcd_cmp_zero(r); 2803 if (ox_flag) { 2804 cr |= CRF_SO; 2805 } 2806 2807 return cr; 2808 } 2809 2810 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2811 { 2812 int cr; 2813 int unused = 0; 2814 int invalid = 0; 2815 bool ox_flag = false; 2816 int sgnb = bcd_get_sgn(b); 2817 ppc_avr_t ret = *b; 2818 ret.VsrD(1) &= ~0xf; 2819 2820 int i = a->VsrSB(7); 2821 ppc_avr_t bcd_one; 2822 2823 bcd_one.VsrD(0) = 0; 2824 bcd_one.VsrD(1) = 0x10; 2825 2826 if (bcd_is_valid(b) == false) { 2827 return CRF_SO; 2828 } 2829 2830 if (unlikely(i > 31)) { 2831 i = 31; 2832 } else if (unlikely(i < -31)) { 2833 i = -31; 2834 } 2835 2836 if (i > 0) { 2837 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2838 } else { 2839 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2840 2841 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2842 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2843 } 2844 } 2845 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2846 2847 cr = bcd_cmp_zero(&ret); 2848 if (ox_flag) { 2849 cr |= CRF_SO; 2850 } 2851 *r = ret; 2852 2853 return cr; 2854 } 2855 2856 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2857 { 2858 uint64_t mask; 2859 uint32_t ox_flag = 0; 2860 int i = a->VsrSH(3) + 1; 2861 ppc_avr_t ret = *b; 2862 2863 if (bcd_is_valid(b) == false) { 2864 return CRF_SO; 2865 } 2866 2867 if (i > 16 && i < 32) { 2868 mask = (uint64_t)-1 >> (128 - i * 4); 2869 if (ret.VsrD(0) & ~mask) { 2870 ox_flag = CRF_SO; 2871 } 2872 2873 ret.VsrD(0) &= mask; 2874 } else if (i >= 0 && i <= 16) { 2875 mask = (uint64_t)-1 >> (64 - i * 4); 2876 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2877 ox_flag = CRF_SO; 2878 } 2879 2880 ret.VsrD(1) &= mask; 2881 ret.VsrD(0) = 0; 2882 } 2883 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2884 *r = ret; 2885 2886 return bcd_cmp_zero(&ret) | ox_flag; 2887 } 2888 2889 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2890 { 2891 int i; 2892 uint64_t mask; 2893 uint32_t ox_flag = 0; 2894 int invalid = 0; 2895 ppc_avr_t ret = *b; 2896 2897 for (i = 0; i < 32; i++) { 2898 bcd_get_digit(b, i, &invalid); 2899 2900 if (unlikely(invalid)) { 2901 return CRF_SO; 2902 } 2903 } 2904 2905 i = a->VsrSH(3); 2906 if (i > 16 && i < 33) { 2907 mask = (uint64_t)-1 >> (128 - i * 4); 2908 if (ret.VsrD(0) & ~mask) { 2909 ox_flag = CRF_SO; 2910 } 2911 2912 ret.VsrD(0) &= mask; 2913 } else if (i > 0 && i <= 16) { 2914 mask = (uint64_t)-1 >> (64 - i * 4); 2915 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2916 ox_flag = CRF_SO; 2917 } 2918 2919 ret.VsrD(1) &= mask; 2920 ret.VsrD(0) = 0; 2921 } else if (i == 0) { 2922 if (ret.VsrD(0) || ret.VsrD(1)) { 2923 ox_flag = CRF_SO; 2924 } 2925 ret.VsrD(0) = ret.VsrD(1) = 0; 2926 } 2927 2928 *r = ret; 2929 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2930 return ox_flag | CRF_EQ; 2931 } 2932 2933 return ox_flag | CRF_GT; 2934 } 2935 2936 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2937 { 2938 int i; 2939 VECTOR_FOR_INORDER_I(i, u8) { 2940 r->u8[i] = AES_sbox[a->u8[i]]; 2941 } 2942 } 2943 2944 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2945 { 2946 ppc_avr_t result; 2947 int i; 2948 2949 VECTOR_FOR_INORDER_I(i, u32) { 2950 result.VsrW(i) = b->VsrW(i) ^ 2951 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2952 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2953 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2954 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2955 } 2956 *r = result; 2957 } 2958 2959 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2960 { 2961 ppc_avr_t result; 2962 int i; 2963 2964 VECTOR_FOR_INORDER_I(i, u8) { 2965 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2966 } 2967 *r = result; 2968 } 2969 2970 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2971 { 2972 /* This differs from what is written in ISA V2.07. The RTL is */ 2973 /* incorrect and will be fixed in V2.07B. */ 2974 int i; 2975 ppc_avr_t tmp; 2976 2977 VECTOR_FOR_INORDER_I(i, u8) { 2978 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2979 } 2980 2981 VECTOR_FOR_INORDER_I(i, u32) { 2982 r->VsrW(i) = 2983 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2984 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2985 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2986 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2987 } 2988 } 2989 2990 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2991 { 2992 ppc_avr_t result; 2993 int i; 2994 2995 VECTOR_FOR_INORDER_I(i, u8) { 2996 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2997 } 2998 *r = result; 2999 } 3000 3001 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3002 { 3003 int st = (st_six & 0x10) != 0; 3004 int six = st_six & 0xF; 3005 int i; 3006 3007 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3008 if (st == 0) { 3009 if ((six & (0x8 >> i)) == 0) { 3010 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3011 ror32(a->VsrW(i), 18) ^ 3012 (a->VsrW(i) >> 3); 3013 } else { /* six.bit[i] == 1 */ 3014 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3015 ror32(a->VsrW(i), 19) ^ 3016 (a->VsrW(i) >> 10); 3017 } 3018 } else { /* st == 1 */ 3019 if ((six & (0x8 >> i)) == 0) { 3020 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3021 ror32(a->VsrW(i), 13) ^ 3022 ror32(a->VsrW(i), 22); 3023 } else { /* six.bit[i] == 1 */ 3024 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3025 ror32(a->VsrW(i), 11) ^ 3026 ror32(a->VsrW(i), 25); 3027 } 3028 } 3029 } 3030 } 3031 3032 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3033 { 3034 int st = (st_six & 0x10) != 0; 3035 int six = st_six & 0xF; 3036 int i; 3037 3038 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3039 if (st == 0) { 3040 if ((six & (0x8 >> (2 * i))) == 0) { 3041 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3042 ror64(a->VsrD(i), 8) ^ 3043 (a->VsrD(i) >> 7); 3044 } else { /* six.bit[2*i] == 1 */ 3045 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3046 ror64(a->VsrD(i), 61) ^ 3047 (a->VsrD(i) >> 6); 3048 } 3049 } else { /* st == 1 */ 3050 if ((six & (0x8 >> (2 * i))) == 0) { 3051 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3052 ror64(a->VsrD(i), 34) ^ 3053 ror64(a->VsrD(i), 39); 3054 } else { /* six.bit[2*i] == 1 */ 3055 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3056 ror64(a->VsrD(i), 18) ^ 3057 ror64(a->VsrD(i), 41); 3058 } 3059 } 3060 } 3061 } 3062 3063 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3064 { 3065 ppc_avr_t result; 3066 int i; 3067 3068 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3069 int indexA = c->VsrB(i) >> 4; 3070 int indexB = c->VsrB(i) & 0xF; 3071 3072 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3073 } 3074 *r = result; 3075 } 3076 3077 #undef VECTOR_FOR_INORDER_I 3078 3079 /*****************************************************************************/ 3080 /* SPE extension helpers */ 3081 /* Use a table to make this quicker */ 3082 static const uint8_t hbrev[16] = { 3083 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3084 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3085 }; 3086 3087 static inline uint8_t byte_reverse(uint8_t val) 3088 { 3089 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3090 } 3091 3092 static inline uint32_t word_reverse(uint32_t val) 3093 { 3094 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3095 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3096 } 3097 3098 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3099 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3100 { 3101 uint32_t a, b, d, mask; 3102 3103 mask = UINT32_MAX >> (32 - MASKBITS); 3104 a = arg1 & mask; 3105 b = arg2 & mask; 3106 d = word_reverse(1 + word_reverse(a | ~b)); 3107 return (arg1 & ~mask) | (d & b); 3108 } 3109 3110 uint32_t helper_cntlsw32(uint32_t val) 3111 { 3112 if (val & 0x80000000) { 3113 return clz32(~val); 3114 } else { 3115 return clz32(val); 3116 } 3117 } 3118 3119 uint32_t helper_cntlzw32(uint32_t val) 3120 { 3121 return clz32(val); 3122 } 3123 3124 /* 440 specific */ 3125 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3126 target_ulong low, uint32_t update_Rc) 3127 { 3128 target_ulong mask; 3129 int i; 3130 3131 i = 1; 3132 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3133 if ((high & mask) == 0) { 3134 if (update_Rc) { 3135 env->crf[0] = 0x4; 3136 } 3137 goto done; 3138 } 3139 i++; 3140 } 3141 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3142 if ((low & mask) == 0) { 3143 if (update_Rc) { 3144 env->crf[0] = 0x8; 3145 } 3146 goto done; 3147 } 3148 i++; 3149 } 3150 i = 8; 3151 if (update_Rc) { 3152 env->crf[0] = 0x2; 3153 } 3154 done: 3155 env->xer = (env->xer & ~0x7F) | i; 3156 if (update_Rc) { 3157 env->crf[0] |= xer_so; 3158 } 3159 return i; 3160 } 3161