1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 390 { 391 int i, o; 392 uint64_t result = 0; 393 394 if (mask == -1) { 395 return src; 396 } 397 398 for (i = 0; mask != 0; i++) { 399 o = ctz64(mask); 400 mask &= mask - 1; 401 result |= ((src >> i) & 1) << o; 402 } 403 404 return result; 405 } 406 407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 408 { 409 int i, o; 410 uint64_t result = 0; 411 412 if (mask == -1) { 413 return src; 414 } 415 416 for (o = 0; mask != 0; o++) { 417 i = ctz64(mask); 418 mask &= mask - 1; 419 result |= ((src >> i) & 1) << o; 420 } 421 422 return result; 423 } 424 425 /*****************************************************************************/ 426 /* PowerPC 601 specific instructions (POWER bridge) */ 427 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 428 { 429 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 430 431 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 432 (int32_t)arg2 == 0) { 433 env->spr[SPR_MQ] = 0; 434 return INT32_MIN; 435 } else { 436 env->spr[SPR_MQ] = tmp % arg2; 437 return tmp / (int32_t)arg2; 438 } 439 } 440 441 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 442 target_ulong arg2) 443 { 444 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 445 446 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 447 (int32_t)arg2 == 0) { 448 env->so = env->ov = 1; 449 env->spr[SPR_MQ] = 0; 450 return INT32_MIN; 451 } else { 452 env->spr[SPR_MQ] = tmp % arg2; 453 tmp /= (int32_t)arg2; 454 if ((int32_t)tmp != tmp) { 455 env->so = env->ov = 1; 456 } else { 457 env->ov = 0; 458 } 459 return tmp; 460 } 461 } 462 463 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 464 target_ulong arg2) 465 { 466 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 467 (int32_t)arg2 == 0) { 468 env->spr[SPR_MQ] = 0; 469 return INT32_MIN; 470 } else { 471 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 472 return (int32_t)arg1 / (int32_t)arg2; 473 } 474 } 475 476 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 477 target_ulong arg2) 478 { 479 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 480 (int32_t)arg2 == 0) { 481 env->so = env->ov = 1; 482 env->spr[SPR_MQ] = 0; 483 return INT32_MIN; 484 } else { 485 env->ov = 0; 486 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 487 return (int32_t)arg1 / (int32_t)arg2; 488 } 489 } 490 491 /*****************************************************************************/ 492 /* Altivec extension helpers */ 493 #if defined(HOST_WORDS_BIGENDIAN) 494 #define VECTOR_FOR_INORDER_I(index, element) \ 495 for (index = 0; index < ARRAY_SIZE(r->element); index++) 496 #else 497 #define VECTOR_FOR_INORDER_I(index, element) \ 498 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 499 #endif 500 501 /* Saturating arithmetic helpers. */ 502 #define SATCVT(from, to, from_type, to_type, min, max) \ 503 static inline to_type cvt##from##to(from_type x, int *sat) \ 504 { \ 505 to_type r; \ 506 \ 507 if (x < (from_type)min) { \ 508 r = min; \ 509 *sat = 1; \ 510 } else if (x > (from_type)max) { \ 511 r = max; \ 512 *sat = 1; \ 513 } else { \ 514 r = x; \ 515 } \ 516 return r; \ 517 } 518 #define SATCVTU(from, to, from_type, to_type, min, max) \ 519 static inline to_type cvt##from##to(from_type x, int *sat) \ 520 { \ 521 to_type r; \ 522 \ 523 if (x > (from_type)max) { \ 524 r = max; \ 525 *sat = 1; \ 526 } else { \ 527 r = x; \ 528 } \ 529 return r; \ 530 } 531 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 532 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 533 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 534 535 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 536 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 537 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 538 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 539 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 540 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 541 #undef SATCVT 542 #undef SATCVTU 543 544 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 545 { 546 ppc_store_vscr(env, vscr); 547 } 548 549 uint32_t helper_mfvscr(CPUPPCState *env) 550 { 551 return ppc_get_vscr(env); 552 } 553 554 static inline void set_vscr_sat(CPUPPCState *env) 555 { 556 /* The choice of non-zero value is arbitrary. */ 557 env->vscr_sat.u32[0] = 1; 558 } 559 560 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 561 { 562 int i; 563 564 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 565 r->u32[i] = ~a->u32[i] < b->u32[i]; 566 } 567 } 568 569 /* vprtybw */ 570 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 571 { 572 int i; 573 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 574 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 575 res ^= res >> 8; 576 r->u32[i] = res & 1; 577 } 578 } 579 580 /* vprtybd */ 581 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 582 { 583 int i; 584 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 585 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 586 res ^= res >> 16; 587 res ^= res >> 8; 588 r->u64[i] = res & 1; 589 } 590 } 591 592 /* vprtybq */ 593 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 594 { 595 uint64_t res = b->u64[0] ^ b->u64[1]; 596 res ^= res >> 32; 597 res ^= res >> 16; 598 res ^= res >> 8; 599 r->VsrD(1) = res & 1; 600 r->VsrD(0) = 0; 601 } 602 603 #define VARITHFP(suffix, func) \ 604 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 605 ppc_avr_t *b) \ 606 { \ 607 int i; \ 608 \ 609 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 610 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 611 } \ 612 } 613 VARITHFP(addfp, float32_add) 614 VARITHFP(subfp, float32_sub) 615 VARITHFP(minfp, float32_min) 616 VARITHFP(maxfp, float32_max) 617 #undef VARITHFP 618 619 #define VARITHFPFMA(suffix, type) \ 620 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 621 ppc_avr_t *b, ppc_avr_t *c) \ 622 { \ 623 int i; \ 624 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 625 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 626 type, &env->vec_status); \ 627 } \ 628 } 629 VARITHFPFMA(maddfp, 0); 630 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 631 #undef VARITHFPFMA 632 633 #define VARITHSAT_CASE(type, op, cvt, element) \ 634 { \ 635 type result = (type)a->element[i] op (type)b->element[i]; \ 636 r->element[i] = cvt(result, &sat); \ 637 } 638 639 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 640 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 641 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 642 { \ 643 int sat = 0; \ 644 int i; \ 645 \ 646 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 647 VARITHSAT_CASE(optype, op, cvt, element); \ 648 } \ 649 if (sat) { \ 650 vscr_sat->u32[0] = 1; \ 651 } \ 652 } 653 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 654 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 655 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 656 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 657 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 658 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 659 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 660 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 661 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 662 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 663 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 664 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 665 #undef VARITHSAT_CASE 666 #undef VARITHSAT_DO 667 #undef VARITHSAT_SIGNED 668 #undef VARITHSAT_UNSIGNED 669 670 #define VAVG_DO(name, element, etype) \ 671 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 672 { \ 673 int i; \ 674 \ 675 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 676 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 677 r->element[i] = x >> 1; \ 678 } \ 679 } 680 681 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 682 unsigned_type) \ 683 VAVG_DO(avgs##type, signed_element, signed_type) \ 684 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 685 VAVG(b, s8, int16_t, u8, uint16_t) 686 VAVG(h, s16, int32_t, u16, uint32_t) 687 VAVG(w, s32, int64_t, u32, uint64_t) 688 #undef VAVG_DO 689 #undef VAVG 690 691 #define VABSDU_DO(name, element) \ 692 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 693 { \ 694 int i; \ 695 \ 696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 697 r->element[i] = (a->element[i] > b->element[i]) ? \ 698 (a->element[i] - b->element[i]) : \ 699 (b->element[i] - a->element[i]); \ 700 } \ 701 } 702 703 /* 704 * VABSDU - Vector absolute difference unsigned 705 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 706 * element - element type to access from vector 707 */ 708 #define VABSDU(type, element) \ 709 VABSDU_DO(absdu##type, element) 710 VABSDU(b, u8) 711 VABSDU(h, u16) 712 VABSDU(w, u32) 713 #undef VABSDU_DO 714 #undef VABSDU 715 716 #define VCF(suffix, cvt, element) \ 717 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 718 ppc_avr_t *b, uint32_t uim) \ 719 { \ 720 int i; \ 721 \ 722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 723 float32 t = cvt(b->element[i], &env->vec_status); \ 724 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 725 } \ 726 } 727 VCF(ux, uint32_to_float32, u32) 728 VCF(sx, int32_to_float32, s32) 729 #undef VCF 730 731 #define VCMP_DO(suffix, compare, element, record) \ 732 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 733 ppc_avr_t *a, ppc_avr_t *b) \ 734 { \ 735 uint64_t ones = (uint64_t)-1; \ 736 uint64_t all = ones; \ 737 uint64_t none = 0; \ 738 int i; \ 739 \ 740 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 741 uint64_t result = (a->element[i] compare b->element[i] ? \ 742 ones : 0x0); \ 743 switch (sizeof(a->element[0])) { \ 744 case 8: \ 745 r->u64[i] = result; \ 746 break; \ 747 case 4: \ 748 r->u32[i] = result; \ 749 break; \ 750 case 2: \ 751 r->u16[i] = result; \ 752 break; \ 753 case 1: \ 754 r->u8[i] = result; \ 755 break; \ 756 } \ 757 all &= result; \ 758 none |= result; \ 759 } \ 760 if (record) { \ 761 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 762 } \ 763 } 764 #define VCMP(suffix, compare, element) \ 765 VCMP_DO(suffix, compare, element, 0) \ 766 VCMP_DO(suffix##_dot, compare, element, 1) 767 VCMP(equb, ==, u8) 768 VCMP(equh, ==, u16) 769 VCMP(equw, ==, u32) 770 VCMP(equd, ==, u64) 771 VCMP(gtub, >, u8) 772 VCMP(gtuh, >, u16) 773 VCMP(gtuw, >, u32) 774 VCMP(gtud, >, u64) 775 VCMP(gtsb, >, s8) 776 VCMP(gtsh, >, s16) 777 VCMP(gtsw, >, s32) 778 VCMP(gtsd, >, s64) 779 #undef VCMP_DO 780 #undef VCMP 781 782 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 783 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 784 ppc_avr_t *a, ppc_avr_t *b) \ 785 { \ 786 etype ones = (etype)-1; \ 787 etype all = ones; \ 788 etype result, none = 0; \ 789 int i; \ 790 \ 791 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 792 if (cmpzero) { \ 793 result = ((a->element[i] == 0) \ 794 || (b->element[i] == 0) \ 795 || (a->element[i] != b->element[i]) ? \ 796 ones : 0x0); \ 797 } else { \ 798 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 799 } \ 800 r->element[i] = result; \ 801 all &= result; \ 802 none |= result; \ 803 } \ 804 if (record) { \ 805 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 806 } \ 807 } 808 809 /* 810 * VCMPNEZ - Vector compare not equal to zero 811 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 812 * element - element type to access from vector 813 */ 814 #define VCMPNE(suffix, element, etype, cmpzero) \ 815 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 816 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 817 VCMPNE(zb, u8, uint8_t, 1) 818 VCMPNE(zh, u16, uint16_t, 1) 819 VCMPNE(zw, u32, uint32_t, 1) 820 VCMPNE(b, u8, uint8_t, 0) 821 VCMPNE(h, u16, uint16_t, 0) 822 VCMPNE(w, u32, uint32_t, 0) 823 #undef VCMPNE_DO 824 #undef VCMPNE 825 826 #define VCMPFP_DO(suffix, compare, order, record) \ 827 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 828 ppc_avr_t *a, ppc_avr_t *b) \ 829 { \ 830 uint32_t ones = (uint32_t)-1; \ 831 uint32_t all = ones; \ 832 uint32_t none = 0; \ 833 int i; \ 834 \ 835 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 836 uint32_t result; \ 837 FloatRelation rel = \ 838 float32_compare_quiet(a->f32[i], b->f32[i], \ 839 &env->vec_status); \ 840 if (rel == float_relation_unordered) { \ 841 result = 0; \ 842 } else if (rel compare order) { \ 843 result = ones; \ 844 } else { \ 845 result = 0; \ 846 } \ 847 r->u32[i] = result; \ 848 all &= result; \ 849 none |= result; \ 850 } \ 851 if (record) { \ 852 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 853 } \ 854 } 855 #define VCMPFP(suffix, compare, order) \ 856 VCMPFP_DO(suffix, compare, order, 0) \ 857 VCMPFP_DO(suffix##_dot, compare, order, 1) 858 VCMPFP(eqfp, ==, float_relation_equal) 859 VCMPFP(gefp, !=, float_relation_less) 860 VCMPFP(gtfp, ==, float_relation_greater) 861 #undef VCMPFP_DO 862 #undef VCMPFP 863 864 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 865 ppc_avr_t *a, ppc_avr_t *b, int record) 866 { 867 int i; 868 int all_in = 0; 869 870 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 871 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 872 &env->vec_status); 873 if (le_rel == float_relation_unordered) { 874 r->u32[i] = 0xc0000000; 875 all_in = 1; 876 } else { 877 float32 bneg = float32_chs(b->f32[i]); 878 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 879 &env->vec_status); 880 int le = le_rel != float_relation_greater; 881 int ge = ge_rel != float_relation_less; 882 883 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 884 all_in |= (!le | !ge); 885 } 886 } 887 if (record) { 888 env->crf[6] = (all_in == 0) << 1; 889 } 890 } 891 892 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 893 { 894 vcmpbfp_internal(env, r, a, b, 0); 895 } 896 897 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 898 ppc_avr_t *b) 899 { 900 vcmpbfp_internal(env, r, a, b, 1); 901 } 902 903 #define VCT(suffix, satcvt, element) \ 904 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 905 ppc_avr_t *b, uint32_t uim) \ 906 { \ 907 int i; \ 908 int sat = 0; \ 909 float_status s = env->vec_status; \ 910 \ 911 set_float_rounding_mode(float_round_to_zero, &s); \ 912 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 913 if (float32_is_any_nan(b->f32[i])) { \ 914 r->element[i] = 0; \ 915 } else { \ 916 float64 t = float32_to_float64(b->f32[i], &s); \ 917 int64_t j; \ 918 \ 919 t = float64_scalbn(t, uim, &s); \ 920 j = float64_to_int64(t, &s); \ 921 r->element[i] = satcvt(j, &sat); \ 922 } \ 923 } \ 924 if (sat) { \ 925 set_vscr_sat(env); \ 926 } \ 927 } 928 VCT(uxs, cvtsduw, u32) 929 VCT(sxs, cvtsdsw, s32) 930 #undef VCT 931 932 target_ulong helper_vclzlsbb(ppc_avr_t *r) 933 { 934 target_ulong count = 0; 935 int i; 936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 937 if (r->VsrB(i) & 0x01) { 938 break; 939 } 940 count++; 941 } 942 return count; 943 } 944 945 target_ulong helper_vctzlsbb(ppc_avr_t *r) 946 { 947 target_ulong count = 0; 948 int i; 949 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 950 if (r->VsrB(i) & 0x01) { 951 break; 952 } 953 count++; 954 } 955 return count; 956 } 957 958 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 959 ppc_avr_t *b, ppc_avr_t *c) 960 { 961 int sat = 0; 962 int i; 963 964 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 965 int32_t prod = a->s16[i] * b->s16[i]; 966 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 967 968 r->s16[i] = cvtswsh(t, &sat); 969 } 970 971 if (sat) { 972 set_vscr_sat(env); 973 } 974 } 975 976 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 977 ppc_avr_t *b, ppc_avr_t *c) 978 { 979 int sat = 0; 980 int i; 981 982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 983 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 984 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 985 r->s16[i] = cvtswsh(t, &sat); 986 } 987 988 if (sat) { 989 set_vscr_sat(env); 990 } 991 } 992 993 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 994 { 995 int i; 996 997 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 998 int32_t prod = a->s16[i] * b->s16[i]; 999 r->s16[i] = (int16_t) (prod + c->s16[i]); 1000 } 1001 } 1002 1003 #define VMRG_DO(name, element, access, ofs) \ 1004 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1005 { \ 1006 ppc_avr_t result; \ 1007 int i, half = ARRAY_SIZE(r->element) / 2; \ 1008 \ 1009 for (i = 0; i < half; i++) { \ 1010 result.access(i * 2 + 0) = a->access(i + ofs); \ 1011 result.access(i * 2 + 1) = b->access(i + ofs); \ 1012 } \ 1013 *r = result; \ 1014 } 1015 1016 #define VMRG(suffix, element, access) \ 1017 VMRG_DO(mrgl##suffix, element, access, half) \ 1018 VMRG_DO(mrgh##suffix, element, access, 0) 1019 VMRG(b, u8, VsrB) 1020 VMRG(h, u16, VsrH) 1021 VMRG(w, u32, VsrW) 1022 #undef VMRG_DO 1023 #undef VMRG 1024 1025 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1026 ppc_avr_t *b, ppc_avr_t *c) 1027 { 1028 int32_t prod[16]; 1029 int i; 1030 1031 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1032 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1033 } 1034 1035 VECTOR_FOR_INORDER_I(i, s32) { 1036 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1037 prod[4 * i + 2] + prod[4 * i + 3]; 1038 } 1039 } 1040 1041 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1042 ppc_avr_t *b, ppc_avr_t *c) 1043 { 1044 int32_t prod[8]; 1045 int i; 1046 1047 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1048 prod[i] = a->s16[i] * b->s16[i]; 1049 } 1050 1051 VECTOR_FOR_INORDER_I(i, s32) { 1052 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1053 } 1054 } 1055 1056 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1057 ppc_avr_t *b, ppc_avr_t *c) 1058 { 1059 int32_t prod[8]; 1060 int i; 1061 int sat = 0; 1062 1063 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1064 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1065 } 1066 1067 VECTOR_FOR_INORDER_I(i, s32) { 1068 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1069 1070 r->u32[i] = cvtsdsw(t, &sat); 1071 } 1072 1073 if (sat) { 1074 set_vscr_sat(env); 1075 } 1076 } 1077 1078 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1079 ppc_avr_t *b, ppc_avr_t *c) 1080 { 1081 uint16_t prod[16]; 1082 int i; 1083 1084 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1085 prod[i] = a->u8[i] * b->u8[i]; 1086 } 1087 1088 VECTOR_FOR_INORDER_I(i, u32) { 1089 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1090 prod[4 * i + 2] + prod[4 * i + 3]; 1091 } 1092 } 1093 1094 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1095 ppc_avr_t *b, ppc_avr_t *c) 1096 { 1097 uint32_t prod[8]; 1098 int i; 1099 1100 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1101 prod[i] = a->u16[i] * b->u16[i]; 1102 } 1103 1104 VECTOR_FOR_INORDER_I(i, u32) { 1105 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1106 } 1107 } 1108 1109 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1110 ppc_avr_t *b, ppc_avr_t *c) 1111 { 1112 uint32_t prod[8]; 1113 int i; 1114 int sat = 0; 1115 1116 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1117 prod[i] = a->u16[i] * b->u16[i]; 1118 } 1119 1120 VECTOR_FOR_INORDER_I(i, s32) { 1121 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1122 1123 r->u32[i] = cvtuduw(t, &sat); 1124 } 1125 1126 if (sat) { 1127 set_vscr_sat(env); 1128 } 1129 } 1130 1131 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1132 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1133 { \ 1134 int i; \ 1135 \ 1136 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1137 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1138 (cast)b->mul_access(i); \ 1139 } \ 1140 } 1141 1142 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1143 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1144 { \ 1145 int i; \ 1146 \ 1147 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1148 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1149 (cast)b->mul_access(i + 1); \ 1150 } \ 1151 } 1152 1153 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1154 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1155 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1156 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1157 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1158 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1159 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1160 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1161 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1162 #undef VMUL_DO_EVN 1163 #undef VMUL_DO_ODD 1164 #undef VMUL 1165 1166 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1167 { 1168 int i; 1169 1170 for (i = 0; i < 4; i++) { 1171 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1172 } 1173 } 1174 1175 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1176 { 1177 int i; 1178 1179 for (i = 0; i < 4; i++) { 1180 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1181 (uint64_t)b->u32[i]) >> 32); 1182 } 1183 } 1184 1185 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 uint64_t discard; 1188 1189 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1190 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1191 } 1192 1193 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1194 { 1195 uint64_t discard; 1196 1197 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1198 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1199 } 1200 1201 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1202 ppc_avr_t *c) 1203 { 1204 ppc_avr_t result; 1205 int i; 1206 1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1208 int s = c->VsrB(i) & 0x1f; 1209 int index = s & 0xf; 1210 1211 if (s & 0x10) { 1212 result.VsrB(i) = b->VsrB(index); 1213 } else { 1214 result.VsrB(i) = a->VsrB(index); 1215 } 1216 } 1217 *r = result; 1218 } 1219 1220 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1221 ppc_avr_t *c) 1222 { 1223 ppc_avr_t result; 1224 int i; 1225 1226 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1227 int s = c->VsrB(i) & 0x1f; 1228 int index = 15 - (s & 0xf); 1229 1230 if (s & 0x10) { 1231 result.VsrB(i) = a->VsrB(index); 1232 } else { 1233 result.VsrB(i) = b->VsrB(index); 1234 } 1235 } 1236 *r = result; 1237 } 1238 1239 #if defined(HOST_WORDS_BIGENDIAN) 1240 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1241 #define VBPERMD_INDEX(i) (i) 1242 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1243 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1244 #else 1245 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1246 #define VBPERMD_INDEX(i) (1 - i) 1247 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1248 #define EXTRACT_BIT(avr, i, index) \ 1249 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1250 #endif 1251 1252 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1253 { 1254 int i, j; 1255 ppc_avr_t result = { .u64 = { 0, 0 } }; 1256 VECTOR_FOR_INORDER_I(i, u64) { 1257 for (j = 0; j < 8; j++) { 1258 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1259 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1260 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1261 } 1262 } 1263 } 1264 *r = result; 1265 } 1266 1267 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1268 { 1269 int i; 1270 uint64_t perm = 0; 1271 1272 VECTOR_FOR_INORDER_I(i, u8) { 1273 int index = VBPERMQ_INDEX(b, i); 1274 1275 if (index < 128) { 1276 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1277 if (a->u64[VBPERMQ_DW(index)] & mask) { 1278 perm |= (0x8000 >> i); 1279 } 1280 } 1281 } 1282 1283 r->VsrD(0) = perm; 1284 r->VsrD(1) = 0; 1285 } 1286 1287 #undef VBPERMQ_INDEX 1288 #undef VBPERMQ_DW 1289 1290 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1291 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1292 { \ 1293 int i, j; \ 1294 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1295 \ 1296 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1297 prod[i] = 0; \ 1298 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1299 if (a->srcfld[i] & (1ull << j)) { \ 1300 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1301 } \ 1302 } \ 1303 } \ 1304 \ 1305 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1306 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1307 } \ 1308 } 1309 1310 PMSUM(vpmsumb, u8, u16, uint16_t) 1311 PMSUM(vpmsumh, u16, u32, uint32_t) 1312 PMSUM(vpmsumw, u32, u64, uint64_t) 1313 1314 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1315 { 1316 1317 #ifdef CONFIG_INT128 1318 int i, j; 1319 __uint128_t prod[2]; 1320 1321 VECTOR_FOR_INORDER_I(i, u64) { 1322 prod[i] = 0; 1323 for (j = 0; j < 64; j++) { 1324 if (a->u64[i] & (1ull << j)) { 1325 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1326 } 1327 } 1328 } 1329 1330 r->u128 = prod[0] ^ prod[1]; 1331 1332 #else 1333 int i, j; 1334 ppc_avr_t prod[2]; 1335 1336 VECTOR_FOR_INORDER_I(i, u64) { 1337 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1338 for (j = 0; j < 64; j++) { 1339 if (a->u64[i] & (1ull << j)) { 1340 ppc_avr_t bshift; 1341 if (j == 0) { 1342 bshift.VsrD(0) = 0; 1343 bshift.VsrD(1) = b->u64[i]; 1344 } else { 1345 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1346 bshift.VsrD(1) = b->u64[i] << j; 1347 } 1348 prod[i].VsrD(1) ^= bshift.VsrD(1); 1349 prod[i].VsrD(0) ^= bshift.VsrD(0); 1350 } 1351 } 1352 } 1353 1354 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1355 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1356 #endif 1357 } 1358 1359 1360 #if defined(HOST_WORDS_BIGENDIAN) 1361 #define PKBIG 1 1362 #else 1363 #define PKBIG 0 1364 #endif 1365 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1366 { 1367 int i, j; 1368 ppc_avr_t result; 1369 #if defined(HOST_WORDS_BIGENDIAN) 1370 const ppc_avr_t *x[2] = { a, b }; 1371 #else 1372 const ppc_avr_t *x[2] = { b, a }; 1373 #endif 1374 1375 VECTOR_FOR_INORDER_I(i, u64) { 1376 VECTOR_FOR_INORDER_I(j, u32) { 1377 uint32_t e = x[i]->u32[j]; 1378 1379 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1380 ((e >> 6) & 0x3e0) | 1381 ((e >> 3) & 0x1f)); 1382 } 1383 } 1384 *r = result; 1385 } 1386 1387 #define VPK(suffix, from, to, cvt, dosat) \ 1388 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1389 ppc_avr_t *a, ppc_avr_t *b) \ 1390 { \ 1391 int i; \ 1392 int sat = 0; \ 1393 ppc_avr_t result; \ 1394 ppc_avr_t *a0 = PKBIG ? a : b; \ 1395 ppc_avr_t *a1 = PKBIG ? b : a; \ 1396 \ 1397 VECTOR_FOR_INORDER_I(i, from) { \ 1398 result.to[i] = cvt(a0->from[i], &sat); \ 1399 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1400 } \ 1401 *r = result; \ 1402 if (dosat && sat) { \ 1403 set_vscr_sat(env); \ 1404 } \ 1405 } 1406 #define I(x, y) (x) 1407 VPK(shss, s16, s8, cvtshsb, 1) 1408 VPK(shus, s16, u8, cvtshub, 1) 1409 VPK(swss, s32, s16, cvtswsh, 1) 1410 VPK(swus, s32, u16, cvtswuh, 1) 1411 VPK(sdss, s64, s32, cvtsdsw, 1) 1412 VPK(sdus, s64, u32, cvtsduw, 1) 1413 VPK(uhus, u16, u8, cvtuhub, 1) 1414 VPK(uwus, u32, u16, cvtuwuh, 1) 1415 VPK(udus, u64, u32, cvtuduw, 1) 1416 VPK(uhum, u16, u8, I, 0) 1417 VPK(uwum, u32, u16, I, 0) 1418 VPK(udum, u64, u32, I, 0) 1419 #undef I 1420 #undef VPK 1421 #undef PKBIG 1422 1423 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1424 { 1425 int i; 1426 1427 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1428 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1429 } 1430 } 1431 1432 #define VRFI(suffix, rounding) \ 1433 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1434 ppc_avr_t *b) \ 1435 { \ 1436 int i; \ 1437 float_status s = env->vec_status; \ 1438 \ 1439 set_float_rounding_mode(rounding, &s); \ 1440 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1441 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1442 } \ 1443 } 1444 VRFI(n, float_round_nearest_even) 1445 VRFI(m, float_round_down) 1446 VRFI(p, float_round_up) 1447 VRFI(z, float_round_to_zero) 1448 #undef VRFI 1449 1450 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1451 { 1452 int i; 1453 1454 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1455 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1456 1457 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1458 } 1459 } 1460 1461 #define VRLMI(name, size, element, insert) \ 1462 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1463 { \ 1464 int i; \ 1465 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1466 uint##size##_t src1 = a->element[i]; \ 1467 uint##size##_t src2 = b->element[i]; \ 1468 uint##size##_t src3 = r->element[i]; \ 1469 uint##size##_t begin, end, shift, mask, rot_val; \ 1470 \ 1471 shift = extract##size(src2, 0, 6); \ 1472 end = extract##size(src2, 8, 6); \ 1473 begin = extract##size(src2, 16, 6); \ 1474 rot_val = rol##size(src1, shift); \ 1475 mask = mask_u##size(begin, end); \ 1476 if (insert) { \ 1477 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1478 } else { \ 1479 r->element[i] = (rot_val & mask); \ 1480 } \ 1481 } \ 1482 } 1483 1484 VRLMI(vrldmi, 64, u64, 1); 1485 VRLMI(vrlwmi, 32, u32, 1); 1486 VRLMI(vrldnm, 64, u64, 0); 1487 VRLMI(vrlwnm, 32, u32, 0); 1488 1489 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1490 ppc_avr_t *c) 1491 { 1492 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1493 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1494 } 1495 1496 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1497 { 1498 int i; 1499 1500 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1501 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1502 } 1503 } 1504 1505 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1506 { 1507 int i; 1508 1509 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1510 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1511 } 1512 } 1513 1514 #define VEXTU_X_DO(name, size, left) \ 1515 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1516 { \ 1517 int index = (a & 0xf) * 8; \ 1518 if (left) { \ 1519 index = 128 - index - size; \ 1520 } \ 1521 return int128_getlo(int128_rshift(b->s128, index)) & \ 1522 MAKE_64BIT_MASK(0, size); \ 1523 } 1524 VEXTU_X_DO(vextublx, 8, 1) 1525 VEXTU_X_DO(vextuhlx, 16, 1) 1526 VEXTU_X_DO(vextuwlx, 32, 1) 1527 VEXTU_X_DO(vextubrx, 8, 0) 1528 VEXTU_X_DO(vextuhrx, 16, 0) 1529 VEXTU_X_DO(vextuwrx, 32, 0) 1530 #undef VEXTU_X_DO 1531 1532 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1533 { 1534 int i; 1535 unsigned int shift, bytes, size; 1536 1537 size = ARRAY_SIZE(r->u8); 1538 for (i = 0; i < size; i++) { 1539 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1540 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1541 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1542 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1543 } 1544 } 1545 1546 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1547 { 1548 int i; 1549 unsigned int shift, bytes; 1550 1551 /* 1552 * Use reverse order, as destination and source register can be 1553 * same. Its being modified in place saving temporary, reverse 1554 * order will guarantee that computed result is not fed back. 1555 */ 1556 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1557 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1558 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1559 /* extract adjacent bytes */ 1560 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1561 } 1562 } 1563 1564 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1565 { 1566 int sh = shift & 0xf; 1567 int i; 1568 ppc_avr_t result; 1569 1570 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1571 int index = sh + i; 1572 if (index > 0xf) { 1573 result.VsrB(i) = b->VsrB(index - 0x10); 1574 } else { 1575 result.VsrB(i) = a->VsrB(index); 1576 } 1577 } 1578 *r = result; 1579 } 1580 1581 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1582 { 1583 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1584 1585 #if defined(HOST_WORDS_BIGENDIAN) 1586 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1587 memset(&r->u8[16 - sh], 0, sh); 1588 #else 1589 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1590 memset(&r->u8[0], 0, sh); 1591 #endif 1592 } 1593 1594 #if defined(HOST_WORDS_BIGENDIAN) 1595 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1596 #else 1597 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1598 #endif 1599 1600 #define VINSX(SUFFIX, TYPE) \ 1601 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1602 uint64_t val, target_ulong index) \ 1603 { \ 1604 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1605 target_long idx = index; \ 1606 \ 1607 if (idx < 0 || idx > maxidx) { \ 1608 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1609 qemu_log_mask(LOG_GUEST_ERROR, \ 1610 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1611 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1612 } else { \ 1613 TYPE src = val; \ 1614 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1615 } \ 1616 } 1617 VINSX(B, uint8_t) 1618 VINSX(H, uint16_t) 1619 VINSX(W, uint32_t) 1620 VINSX(D, uint64_t) 1621 #undef ELEM_ADDR 1622 #undef VINSX 1623 #if defined(HOST_WORDS_BIGENDIAN) 1624 #define VEXTDVLX(NAME, SIZE) \ 1625 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1626 target_ulong index) \ 1627 { \ 1628 const target_long idx = index; \ 1629 ppc_avr_t tmp[2] = { *a, *b }; \ 1630 memset(t, 0, sizeof(*t)); \ 1631 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1632 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1633 } else { \ 1634 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1635 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1636 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1637 } \ 1638 } 1639 #else 1640 #define VEXTDVLX(NAME, SIZE) \ 1641 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1642 target_ulong index) \ 1643 { \ 1644 const target_long idx = index; \ 1645 ppc_avr_t tmp[2] = { *b, *a }; \ 1646 memset(t, 0, sizeof(*t)); \ 1647 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1648 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1649 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1650 } else { \ 1651 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1652 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1653 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1654 } \ 1655 } 1656 #endif 1657 VEXTDVLX(VEXTDUBVLX, 1) 1658 VEXTDVLX(VEXTDUHVLX, 2) 1659 VEXTDVLX(VEXTDUWVLX, 4) 1660 VEXTDVLX(VEXTDDVLX, 8) 1661 #undef VEXTDVLX 1662 #if defined(HOST_WORDS_BIGENDIAN) 1663 #define VEXTRACT(suffix, element) \ 1664 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1665 { \ 1666 uint32_t es = sizeof(r->element[0]); \ 1667 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1668 memset(&r->u8[8], 0, 8); \ 1669 memset(&r->u8[0], 0, 8 - es); \ 1670 } 1671 #else 1672 #define VEXTRACT(suffix, element) \ 1673 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1674 { \ 1675 uint32_t es = sizeof(r->element[0]); \ 1676 uint32_t s = (16 - index) - es; \ 1677 memmove(&r->u8[8], &b->u8[s], es); \ 1678 memset(&r->u8[0], 0, 8); \ 1679 memset(&r->u8[8 + es], 0, 8 - es); \ 1680 } 1681 #endif 1682 VEXTRACT(ub, u8) 1683 VEXTRACT(uh, u16) 1684 VEXTRACT(uw, u32) 1685 VEXTRACT(d, u64) 1686 #undef VEXTRACT 1687 1688 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1689 ppc_vsr_t *xb, uint32_t index) 1690 { 1691 ppc_vsr_t t = { }; 1692 size_t es = sizeof(uint32_t); 1693 uint32_t ext_index; 1694 int i; 1695 1696 ext_index = index; 1697 for (i = 0; i < es; i++, ext_index++) { 1698 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1699 } 1700 1701 *xt = t; 1702 } 1703 1704 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1705 ppc_vsr_t *xb, uint32_t index) 1706 { 1707 ppc_vsr_t t = *xt; 1708 size_t es = sizeof(uint32_t); 1709 int ins_index, i = 0; 1710 1711 ins_index = index; 1712 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1713 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1714 } 1715 1716 *xt = t; 1717 } 1718 1719 #define XXBLEND(name, sz) \ 1720 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1721 ppc_avr_t *c, uint32_t desc) \ 1722 { \ 1723 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1724 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1725 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1726 } \ 1727 } 1728 XXBLEND(B, 8) 1729 XXBLEND(H, 16) 1730 XXBLEND(W, 32) 1731 XXBLEND(D, 64) 1732 #undef XXBLEND 1733 1734 #define VEXT_SIGNED(name, element, cast) \ 1735 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1736 { \ 1737 int i; \ 1738 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1739 r->element[i] = (cast)b->element[i]; \ 1740 } \ 1741 } 1742 VEXT_SIGNED(vextsb2w, s32, int8_t) 1743 VEXT_SIGNED(vextsb2d, s64, int8_t) 1744 VEXT_SIGNED(vextsh2w, s32, int16_t) 1745 VEXT_SIGNED(vextsh2d, s64, int16_t) 1746 VEXT_SIGNED(vextsw2d, s64, int32_t) 1747 #undef VEXT_SIGNED 1748 1749 #define VNEG(name, element) \ 1750 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1751 { \ 1752 int i; \ 1753 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1754 r->element[i] = -b->element[i]; \ 1755 } \ 1756 } 1757 VNEG(vnegw, s32) 1758 VNEG(vnegd, s64) 1759 #undef VNEG 1760 1761 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1762 { 1763 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1764 1765 #if defined(HOST_WORDS_BIGENDIAN) 1766 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1767 memset(&r->u8[0], 0, sh); 1768 #else 1769 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1770 memset(&r->u8[16 - sh], 0, sh); 1771 #endif 1772 } 1773 1774 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1775 { 1776 int i; 1777 1778 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1779 r->u32[i] = a->u32[i] >= b->u32[i]; 1780 } 1781 } 1782 1783 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1784 { 1785 int64_t t; 1786 int i, upper; 1787 ppc_avr_t result; 1788 int sat = 0; 1789 1790 upper = ARRAY_SIZE(r->s32) - 1; 1791 t = (int64_t)b->VsrSW(upper); 1792 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1793 t += a->VsrSW(i); 1794 result.VsrSW(i) = 0; 1795 } 1796 result.VsrSW(upper) = cvtsdsw(t, &sat); 1797 *r = result; 1798 1799 if (sat) { 1800 set_vscr_sat(env); 1801 } 1802 } 1803 1804 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1805 { 1806 int i, j, upper; 1807 ppc_avr_t result; 1808 int sat = 0; 1809 1810 upper = 1; 1811 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1812 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1813 1814 result.VsrD(i) = 0; 1815 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1816 t += a->VsrSW(2 * i + j); 1817 } 1818 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1819 } 1820 1821 *r = result; 1822 if (sat) { 1823 set_vscr_sat(env); 1824 } 1825 } 1826 1827 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1828 { 1829 int i, j; 1830 int sat = 0; 1831 1832 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1833 int64_t t = (int64_t)b->s32[i]; 1834 1835 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1836 t += a->s8[4 * i + j]; 1837 } 1838 r->s32[i] = cvtsdsw(t, &sat); 1839 } 1840 1841 if (sat) { 1842 set_vscr_sat(env); 1843 } 1844 } 1845 1846 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1847 { 1848 int sat = 0; 1849 int i; 1850 1851 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1852 int64_t t = (int64_t)b->s32[i]; 1853 1854 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1855 r->s32[i] = cvtsdsw(t, &sat); 1856 } 1857 1858 if (sat) { 1859 set_vscr_sat(env); 1860 } 1861 } 1862 1863 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1864 { 1865 int i, j; 1866 int sat = 0; 1867 1868 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1869 uint64_t t = (uint64_t)b->u32[i]; 1870 1871 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1872 t += a->u8[4 * i + j]; 1873 } 1874 r->u32[i] = cvtuduw(t, &sat); 1875 } 1876 1877 if (sat) { 1878 set_vscr_sat(env); 1879 } 1880 } 1881 1882 #if defined(HOST_WORDS_BIGENDIAN) 1883 #define UPKHI 1 1884 #define UPKLO 0 1885 #else 1886 #define UPKHI 0 1887 #define UPKLO 1 1888 #endif 1889 #define VUPKPX(suffix, hi) \ 1890 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1891 { \ 1892 int i; \ 1893 ppc_avr_t result; \ 1894 \ 1895 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1896 uint16_t e = b->u16[hi ? i : i + 4]; \ 1897 uint8_t a = (e >> 15) ? 0xff : 0; \ 1898 uint8_t r = (e >> 10) & 0x1f; \ 1899 uint8_t g = (e >> 5) & 0x1f; \ 1900 uint8_t b = e & 0x1f; \ 1901 \ 1902 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1903 } \ 1904 *r = result; \ 1905 } 1906 VUPKPX(lpx, UPKLO) 1907 VUPKPX(hpx, UPKHI) 1908 #undef VUPKPX 1909 1910 #define VUPK(suffix, unpacked, packee, hi) \ 1911 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1912 { \ 1913 int i; \ 1914 ppc_avr_t result; \ 1915 \ 1916 if (hi) { \ 1917 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1918 result.unpacked[i] = b->packee[i]; \ 1919 } \ 1920 } else { \ 1921 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1922 i++) { \ 1923 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1924 } \ 1925 } \ 1926 *r = result; \ 1927 } 1928 VUPK(hsb, s16, s8, UPKHI) 1929 VUPK(hsh, s32, s16, UPKHI) 1930 VUPK(hsw, s64, s32, UPKHI) 1931 VUPK(lsb, s16, s8, UPKLO) 1932 VUPK(lsh, s32, s16, UPKLO) 1933 VUPK(lsw, s64, s32, UPKLO) 1934 #undef VUPK 1935 #undef UPKHI 1936 #undef UPKLO 1937 1938 #define VGENERIC_DO(name, element) \ 1939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1940 { \ 1941 int i; \ 1942 \ 1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1944 r->element[i] = name(b->element[i]); \ 1945 } \ 1946 } 1947 1948 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1949 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1950 1951 VGENERIC_DO(clzb, u8) 1952 VGENERIC_DO(clzh, u16) 1953 1954 #undef clzb 1955 #undef clzh 1956 1957 #define ctzb(v) ((v) ? ctz32(v) : 8) 1958 #define ctzh(v) ((v) ? ctz32(v) : 16) 1959 #define ctzw(v) ctz32((v)) 1960 #define ctzd(v) ctz64((v)) 1961 1962 VGENERIC_DO(ctzb, u8) 1963 VGENERIC_DO(ctzh, u16) 1964 VGENERIC_DO(ctzw, u32) 1965 VGENERIC_DO(ctzd, u64) 1966 1967 #undef ctzb 1968 #undef ctzh 1969 #undef ctzw 1970 #undef ctzd 1971 1972 #define popcntb(v) ctpop8(v) 1973 #define popcnth(v) ctpop16(v) 1974 #define popcntw(v) ctpop32(v) 1975 #define popcntd(v) ctpop64(v) 1976 1977 VGENERIC_DO(popcntb, u8) 1978 VGENERIC_DO(popcnth, u16) 1979 VGENERIC_DO(popcntw, u32) 1980 VGENERIC_DO(popcntd, u64) 1981 1982 #undef popcntb 1983 #undef popcnth 1984 #undef popcntw 1985 #undef popcntd 1986 1987 #undef VGENERIC_DO 1988 1989 #if defined(HOST_WORDS_BIGENDIAN) 1990 #define QW_ONE { .u64 = { 0, 1 } } 1991 #else 1992 #define QW_ONE { .u64 = { 1, 0 } } 1993 #endif 1994 1995 #ifndef CONFIG_INT128 1996 1997 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1998 { 1999 t->u64[0] = ~a.u64[0]; 2000 t->u64[1] = ~a.u64[1]; 2001 } 2002 2003 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2004 { 2005 if (a.VsrD(0) < b.VsrD(0)) { 2006 return -1; 2007 } else if (a.VsrD(0) > b.VsrD(0)) { 2008 return 1; 2009 } else if (a.VsrD(1) < b.VsrD(1)) { 2010 return -1; 2011 } else if (a.VsrD(1) > b.VsrD(1)) { 2012 return 1; 2013 } else { 2014 return 0; 2015 } 2016 } 2017 2018 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2019 { 2020 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2021 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2022 (~a.VsrD(1) < b.VsrD(1)); 2023 } 2024 2025 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2026 { 2027 ppc_avr_t not_a; 2028 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2029 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2030 (~a.VsrD(1) < b.VsrD(1)); 2031 avr_qw_not(¬_a, a); 2032 return avr_qw_cmpu(not_a, b) < 0; 2033 } 2034 2035 #endif 2036 2037 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2038 { 2039 #ifdef CONFIG_INT128 2040 r->u128 = a->u128 + b->u128; 2041 #else 2042 avr_qw_add(r, *a, *b); 2043 #endif 2044 } 2045 2046 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2047 { 2048 #ifdef CONFIG_INT128 2049 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2050 #else 2051 2052 if (c->VsrD(1) & 1) { 2053 ppc_avr_t tmp; 2054 2055 tmp.VsrD(0) = 0; 2056 tmp.VsrD(1) = c->VsrD(1) & 1; 2057 avr_qw_add(&tmp, *a, tmp); 2058 avr_qw_add(r, tmp, *b); 2059 } else { 2060 avr_qw_add(r, *a, *b); 2061 } 2062 #endif 2063 } 2064 2065 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2066 { 2067 #ifdef CONFIG_INT128 2068 r->u128 = (~a->u128 < b->u128); 2069 #else 2070 ppc_avr_t not_a; 2071 2072 avr_qw_not(¬_a, *a); 2073 2074 r->VsrD(0) = 0; 2075 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2076 #endif 2077 } 2078 2079 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2080 { 2081 #ifdef CONFIG_INT128 2082 int carry_out = (~a->u128 < b->u128); 2083 if (!carry_out && (c->u128 & 1)) { 2084 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2085 ((a->u128 != 0) || (b->u128 != 0)); 2086 } 2087 r->u128 = carry_out; 2088 #else 2089 2090 int carry_in = c->VsrD(1) & 1; 2091 int carry_out = 0; 2092 ppc_avr_t tmp; 2093 2094 carry_out = avr_qw_addc(&tmp, *a, *b); 2095 2096 if (!carry_out && carry_in) { 2097 ppc_avr_t one = QW_ONE; 2098 carry_out = avr_qw_addc(&tmp, tmp, one); 2099 } 2100 r->VsrD(0) = 0; 2101 r->VsrD(1) = carry_out; 2102 #endif 2103 } 2104 2105 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2106 { 2107 #ifdef CONFIG_INT128 2108 r->u128 = a->u128 - b->u128; 2109 #else 2110 ppc_avr_t tmp; 2111 ppc_avr_t one = QW_ONE; 2112 2113 avr_qw_not(&tmp, *b); 2114 avr_qw_add(&tmp, *a, tmp); 2115 avr_qw_add(r, tmp, one); 2116 #endif 2117 } 2118 2119 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2120 { 2121 #ifdef CONFIG_INT128 2122 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2123 #else 2124 ppc_avr_t tmp, sum; 2125 2126 avr_qw_not(&tmp, *b); 2127 avr_qw_add(&sum, *a, tmp); 2128 2129 tmp.VsrD(0) = 0; 2130 tmp.VsrD(1) = c->VsrD(1) & 1; 2131 avr_qw_add(r, sum, tmp); 2132 #endif 2133 } 2134 2135 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2136 { 2137 #ifdef CONFIG_INT128 2138 r->u128 = (~a->u128 < ~b->u128) || 2139 (a->u128 + ~b->u128 == (__uint128_t)-1); 2140 #else 2141 int carry = (avr_qw_cmpu(*a, *b) > 0); 2142 if (!carry) { 2143 ppc_avr_t tmp; 2144 avr_qw_not(&tmp, *b); 2145 avr_qw_add(&tmp, *a, tmp); 2146 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2147 } 2148 r->VsrD(0) = 0; 2149 r->VsrD(1) = carry; 2150 #endif 2151 } 2152 2153 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2154 { 2155 #ifdef CONFIG_INT128 2156 r->u128 = 2157 (~a->u128 < ~b->u128) || 2158 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2159 #else 2160 int carry_in = c->VsrD(1) & 1; 2161 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2162 if (!carry_out && carry_in) { 2163 ppc_avr_t tmp; 2164 avr_qw_not(&tmp, *b); 2165 avr_qw_add(&tmp, *a, tmp); 2166 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2167 } 2168 2169 r->VsrD(0) = 0; 2170 r->VsrD(1) = carry_out; 2171 #endif 2172 } 2173 2174 #define BCD_PLUS_PREF_1 0xC 2175 #define BCD_PLUS_PREF_2 0xF 2176 #define BCD_PLUS_ALT_1 0xA 2177 #define BCD_NEG_PREF 0xD 2178 #define BCD_NEG_ALT 0xB 2179 #define BCD_PLUS_ALT_2 0xE 2180 #define NATIONAL_PLUS 0x2B 2181 #define NATIONAL_NEG 0x2D 2182 2183 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2184 2185 static int bcd_get_sgn(ppc_avr_t *bcd) 2186 { 2187 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2188 case BCD_PLUS_PREF_1: 2189 case BCD_PLUS_PREF_2: 2190 case BCD_PLUS_ALT_1: 2191 case BCD_PLUS_ALT_2: 2192 { 2193 return 1; 2194 } 2195 2196 case BCD_NEG_PREF: 2197 case BCD_NEG_ALT: 2198 { 2199 return -1; 2200 } 2201 2202 default: 2203 { 2204 return 0; 2205 } 2206 } 2207 } 2208 2209 static int bcd_preferred_sgn(int sgn, int ps) 2210 { 2211 if (sgn >= 0) { 2212 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2213 } else { 2214 return BCD_NEG_PREF; 2215 } 2216 } 2217 2218 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2219 { 2220 uint8_t result; 2221 if (n & 1) { 2222 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2223 } else { 2224 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2225 } 2226 2227 if (unlikely(result > 9)) { 2228 *invalid = true; 2229 } 2230 return result; 2231 } 2232 2233 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2234 { 2235 if (n & 1) { 2236 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2237 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2238 } else { 2239 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2240 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2241 } 2242 } 2243 2244 static bool bcd_is_valid(ppc_avr_t *bcd) 2245 { 2246 int i; 2247 int invalid = 0; 2248 2249 if (bcd_get_sgn(bcd) == 0) { 2250 return false; 2251 } 2252 2253 for (i = 1; i < 32; i++) { 2254 bcd_get_digit(bcd, i, &invalid); 2255 if (unlikely(invalid)) { 2256 return false; 2257 } 2258 } 2259 return true; 2260 } 2261 2262 static int bcd_cmp_zero(ppc_avr_t *bcd) 2263 { 2264 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2265 return CRF_EQ; 2266 } else { 2267 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2268 } 2269 } 2270 2271 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2272 { 2273 return reg->VsrH(7 - n); 2274 } 2275 2276 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2277 { 2278 reg->VsrH(7 - n) = val; 2279 } 2280 2281 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2282 { 2283 int i; 2284 int invalid = 0; 2285 for (i = 31; i > 0; i--) { 2286 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2287 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2288 if (unlikely(invalid)) { 2289 return 0; /* doesn't matter */ 2290 } else if (dig_a > dig_b) { 2291 return 1; 2292 } else if (dig_a < dig_b) { 2293 return -1; 2294 } 2295 } 2296 2297 return 0; 2298 } 2299 2300 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2301 int *overflow) 2302 { 2303 int carry = 0; 2304 int i; 2305 int is_zero = 1; 2306 2307 for (i = 1; i <= 31; i++) { 2308 uint8_t digit = bcd_get_digit(a, i, invalid) + 2309 bcd_get_digit(b, i, invalid) + carry; 2310 is_zero &= (digit == 0); 2311 if (digit > 9) { 2312 carry = 1; 2313 digit -= 10; 2314 } else { 2315 carry = 0; 2316 } 2317 2318 bcd_put_digit(t, digit, i); 2319 } 2320 2321 *overflow = carry; 2322 return is_zero; 2323 } 2324 2325 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2326 int *overflow) 2327 { 2328 int carry = 0; 2329 int i; 2330 2331 for (i = 1; i <= 31; i++) { 2332 uint8_t digit = bcd_get_digit(a, i, invalid) - 2333 bcd_get_digit(b, i, invalid) + carry; 2334 if (digit & 0x80) { 2335 carry = -1; 2336 digit += 10; 2337 } else { 2338 carry = 0; 2339 } 2340 2341 bcd_put_digit(t, digit, i); 2342 } 2343 2344 *overflow = carry; 2345 } 2346 2347 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2348 { 2349 2350 int sgna = bcd_get_sgn(a); 2351 int sgnb = bcd_get_sgn(b); 2352 int invalid = (sgna == 0) || (sgnb == 0); 2353 int overflow = 0; 2354 int zero = 0; 2355 uint32_t cr = 0; 2356 ppc_avr_t result = { .u64 = { 0, 0 } }; 2357 2358 if (!invalid) { 2359 if (sgna == sgnb) { 2360 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2361 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2362 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2363 } else { 2364 int magnitude = bcd_cmp_mag(a, b); 2365 if (magnitude > 0) { 2366 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2367 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2368 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2369 } else if (magnitude < 0) { 2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2371 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2372 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2373 } else { 2374 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2375 cr = CRF_EQ; 2376 } 2377 } 2378 } 2379 2380 if (unlikely(invalid)) { 2381 result.VsrD(0) = result.VsrD(1) = -1; 2382 cr = CRF_SO; 2383 } else if (overflow) { 2384 cr |= CRF_SO; 2385 } else if (zero) { 2386 cr |= CRF_EQ; 2387 } 2388 2389 *r = result; 2390 2391 return cr; 2392 } 2393 2394 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2395 { 2396 ppc_avr_t bcopy = *b; 2397 int sgnb = bcd_get_sgn(b); 2398 if (sgnb < 0) { 2399 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2400 } else if (sgnb > 0) { 2401 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2402 } 2403 /* else invalid ... defer to bcdadd code for proper handling */ 2404 2405 return helper_bcdadd(r, a, &bcopy, ps); 2406 } 2407 2408 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2409 { 2410 int i; 2411 int cr = 0; 2412 uint16_t national = 0; 2413 uint16_t sgnb = get_national_digit(b, 0); 2414 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2415 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2416 2417 for (i = 1; i < 8; i++) { 2418 national = get_national_digit(b, i); 2419 if (unlikely(national < 0x30 || national > 0x39)) { 2420 invalid = 1; 2421 break; 2422 } 2423 2424 bcd_put_digit(&ret, national & 0xf, i); 2425 } 2426 2427 if (sgnb == NATIONAL_PLUS) { 2428 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2429 } else { 2430 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2431 } 2432 2433 cr = bcd_cmp_zero(&ret); 2434 2435 if (unlikely(invalid)) { 2436 cr = CRF_SO; 2437 } 2438 2439 *r = ret; 2440 2441 return cr; 2442 } 2443 2444 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2445 { 2446 int i; 2447 int cr = 0; 2448 int sgnb = bcd_get_sgn(b); 2449 int invalid = (sgnb == 0); 2450 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2451 2452 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2453 2454 for (i = 1; i < 8; i++) { 2455 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2456 2457 if (unlikely(invalid)) { 2458 break; 2459 } 2460 } 2461 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2462 2463 cr = bcd_cmp_zero(b); 2464 2465 if (ox_flag) { 2466 cr |= CRF_SO; 2467 } 2468 2469 if (unlikely(invalid)) { 2470 cr = CRF_SO; 2471 } 2472 2473 *r = ret; 2474 2475 return cr; 2476 } 2477 2478 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2479 { 2480 int i; 2481 int cr = 0; 2482 int invalid = 0; 2483 int zone_digit = 0; 2484 int zone_lead = ps ? 0xF : 0x3; 2485 int digit = 0; 2486 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2487 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2488 2489 if (unlikely((sgnb < 0xA) && ps)) { 2490 invalid = 1; 2491 } 2492 2493 for (i = 0; i < 16; i++) { 2494 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2495 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2496 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2497 invalid = 1; 2498 break; 2499 } 2500 2501 bcd_put_digit(&ret, digit, i + 1); 2502 } 2503 2504 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2505 (!ps && (sgnb & 0x4))) { 2506 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2507 } else { 2508 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2509 } 2510 2511 cr = bcd_cmp_zero(&ret); 2512 2513 if (unlikely(invalid)) { 2514 cr = CRF_SO; 2515 } 2516 2517 *r = ret; 2518 2519 return cr; 2520 } 2521 2522 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2523 { 2524 int i; 2525 int cr = 0; 2526 uint8_t digit = 0; 2527 int sgnb = bcd_get_sgn(b); 2528 int zone_lead = (ps) ? 0xF0 : 0x30; 2529 int invalid = (sgnb == 0); 2530 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2531 2532 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2533 2534 for (i = 0; i < 16; i++) { 2535 digit = bcd_get_digit(b, i + 1, &invalid); 2536 2537 if (unlikely(invalid)) { 2538 break; 2539 } 2540 2541 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2542 } 2543 2544 if (ps) { 2545 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2546 } else { 2547 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2548 } 2549 2550 cr = bcd_cmp_zero(b); 2551 2552 if (ox_flag) { 2553 cr |= CRF_SO; 2554 } 2555 2556 if (unlikely(invalid)) { 2557 cr = CRF_SO; 2558 } 2559 2560 *r = ret; 2561 2562 return cr; 2563 } 2564 2565 /** 2566 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2567 * 2568 * Returns: 2569 * > 0 if ahi|alo > bhi|blo, 2570 * 0 if ahi|alo == bhi|blo, 2571 * < 0 if ahi|alo < bhi|blo 2572 */ 2573 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2574 uint64_t blo, uint64_t bhi) 2575 { 2576 return (ahi == bhi) ? 2577 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2578 (ahi > bhi ? 1 : -1); 2579 } 2580 2581 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2582 { 2583 int i; 2584 int cr; 2585 uint64_t lo_value; 2586 uint64_t hi_value; 2587 uint64_t rem; 2588 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2589 2590 if (b->VsrSD(0) < 0) { 2591 lo_value = -b->VsrSD(1); 2592 hi_value = ~b->VsrD(0) + !lo_value; 2593 bcd_put_digit(&ret, 0xD, 0); 2594 2595 cr = CRF_LT; 2596 } else { 2597 lo_value = b->VsrD(1); 2598 hi_value = b->VsrD(0); 2599 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2600 2601 if (hi_value == 0 && lo_value == 0) { 2602 cr = CRF_EQ; 2603 } else { 2604 cr = CRF_GT; 2605 } 2606 } 2607 2608 /* 2609 * Check src limits: abs(src) <= 10^31 - 1 2610 * 2611 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2612 */ 2613 if (ucmp128(lo_value, hi_value, 2614 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2615 cr |= CRF_SO; 2616 2617 /* 2618 * According to the ISA, if src wouldn't fit in the destination 2619 * register, the result is undefined. 2620 * In that case, we leave r unchanged. 2621 */ 2622 } else { 2623 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2624 2625 for (i = 1; i < 16; rem /= 10, i++) { 2626 bcd_put_digit(&ret, rem % 10, i); 2627 } 2628 2629 for (; i < 32; lo_value /= 10, i++) { 2630 bcd_put_digit(&ret, lo_value % 10, i); 2631 } 2632 2633 *r = ret; 2634 } 2635 2636 return cr; 2637 } 2638 2639 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2640 { 2641 uint8_t i; 2642 int cr; 2643 uint64_t carry; 2644 uint64_t unused; 2645 uint64_t lo_value; 2646 uint64_t hi_value = 0; 2647 int sgnb = bcd_get_sgn(b); 2648 int invalid = (sgnb == 0); 2649 2650 lo_value = bcd_get_digit(b, 31, &invalid); 2651 for (i = 30; i > 0; i--) { 2652 mulu64(&lo_value, &carry, lo_value, 10ULL); 2653 mulu64(&hi_value, &unused, hi_value, 10ULL); 2654 lo_value += bcd_get_digit(b, i, &invalid); 2655 hi_value += carry; 2656 2657 if (unlikely(invalid)) { 2658 break; 2659 } 2660 } 2661 2662 if (sgnb == -1) { 2663 r->VsrSD(1) = -lo_value; 2664 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2665 } else { 2666 r->VsrSD(1) = lo_value; 2667 r->VsrSD(0) = hi_value; 2668 } 2669 2670 cr = bcd_cmp_zero(b); 2671 2672 if (unlikely(invalid)) { 2673 cr = CRF_SO; 2674 } 2675 2676 return cr; 2677 } 2678 2679 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2680 { 2681 int i; 2682 int invalid = 0; 2683 2684 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2685 return CRF_SO; 2686 } 2687 2688 *r = *a; 2689 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2690 2691 for (i = 1; i < 32; i++) { 2692 bcd_get_digit(a, i, &invalid); 2693 bcd_get_digit(b, i, &invalid); 2694 if (unlikely(invalid)) { 2695 return CRF_SO; 2696 } 2697 } 2698 2699 return bcd_cmp_zero(r); 2700 } 2701 2702 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2703 { 2704 int sgnb = bcd_get_sgn(b); 2705 2706 *r = *b; 2707 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2708 2709 if (bcd_is_valid(b) == false) { 2710 return CRF_SO; 2711 } 2712 2713 return bcd_cmp_zero(r); 2714 } 2715 2716 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2717 { 2718 int cr; 2719 int i = a->VsrSB(7); 2720 bool ox_flag = false; 2721 int sgnb = bcd_get_sgn(b); 2722 ppc_avr_t ret = *b; 2723 ret.VsrD(1) &= ~0xf; 2724 2725 if (bcd_is_valid(b) == false) { 2726 return CRF_SO; 2727 } 2728 2729 if (unlikely(i > 31)) { 2730 i = 31; 2731 } else if (unlikely(i < -31)) { 2732 i = -31; 2733 } 2734 2735 if (i > 0) { 2736 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2737 } else { 2738 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2739 } 2740 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2741 2742 *r = ret; 2743 2744 cr = bcd_cmp_zero(r); 2745 if (ox_flag) { 2746 cr |= CRF_SO; 2747 } 2748 2749 return cr; 2750 } 2751 2752 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2753 { 2754 int cr; 2755 int i; 2756 int invalid = 0; 2757 bool ox_flag = false; 2758 ppc_avr_t ret = *b; 2759 2760 for (i = 0; i < 32; i++) { 2761 bcd_get_digit(b, i, &invalid); 2762 2763 if (unlikely(invalid)) { 2764 return CRF_SO; 2765 } 2766 } 2767 2768 i = a->VsrSB(7); 2769 if (i >= 32) { 2770 ox_flag = true; 2771 ret.VsrD(1) = ret.VsrD(0) = 0; 2772 } else if (i <= -32) { 2773 ret.VsrD(1) = ret.VsrD(0) = 0; 2774 } else if (i > 0) { 2775 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2776 } else { 2777 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2778 } 2779 *r = ret; 2780 2781 cr = bcd_cmp_zero(r); 2782 if (ox_flag) { 2783 cr |= CRF_SO; 2784 } 2785 2786 return cr; 2787 } 2788 2789 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2790 { 2791 int cr; 2792 int unused = 0; 2793 int invalid = 0; 2794 bool ox_flag = false; 2795 int sgnb = bcd_get_sgn(b); 2796 ppc_avr_t ret = *b; 2797 ret.VsrD(1) &= ~0xf; 2798 2799 int i = a->VsrSB(7); 2800 ppc_avr_t bcd_one; 2801 2802 bcd_one.VsrD(0) = 0; 2803 bcd_one.VsrD(1) = 0x10; 2804 2805 if (bcd_is_valid(b) == false) { 2806 return CRF_SO; 2807 } 2808 2809 if (unlikely(i > 31)) { 2810 i = 31; 2811 } else if (unlikely(i < -31)) { 2812 i = -31; 2813 } 2814 2815 if (i > 0) { 2816 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2817 } else { 2818 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2819 2820 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2821 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2822 } 2823 } 2824 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2825 2826 cr = bcd_cmp_zero(&ret); 2827 if (ox_flag) { 2828 cr |= CRF_SO; 2829 } 2830 *r = ret; 2831 2832 return cr; 2833 } 2834 2835 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2836 { 2837 uint64_t mask; 2838 uint32_t ox_flag = 0; 2839 int i = a->VsrSH(3) + 1; 2840 ppc_avr_t ret = *b; 2841 2842 if (bcd_is_valid(b) == false) { 2843 return CRF_SO; 2844 } 2845 2846 if (i > 16 && i < 32) { 2847 mask = (uint64_t)-1 >> (128 - i * 4); 2848 if (ret.VsrD(0) & ~mask) { 2849 ox_flag = CRF_SO; 2850 } 2851 2852 ret.VsrD(0) &= mask; 2853 } else if (i >= 0 && i <= 16) { 2854 mask = (uint64_t)-1 >> (64 - i * 4); 2855 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2856 ox_flag = CRF_SO; 2857 } 2858 2859 ret.VsrD(1) &= mask; 2860 ret.VsrD(0) = 0; 2861 } 2862 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2863 *r = ret; 2864 2865 return bcd_cmp_zero(&ret) | ox_flag; 2866 } 2867 2868 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2869 { 2870 int i; 2871 uint64_t mask; 2872 uint32_t ox_flag = 0; 2873 int invalid = 0; 2874 ppc_avr_t ret = *b; 2875 2876 for (i = 0; i < 32; i++) { 2877 bcd_get_digit(b, i, &invalid); 2878 2879 if (unlikely(invalid)) { 2880 return CRF_SO; 2881 } 2882 } 2883 2884 i = a->VsrSH(3); 2885 if (i > 16 && i < 33) { 2886 mask = (uint64_t)-1 >> (128 - i * 4); 2887 if (ret.VsrD(0) & ~mask) { 2888 ox_flag = CRF_SO; 2889 } 2890 2891 ret.VsrD(0) &= mask; 2892 } else if (i > 0 && i <= 16) { 2893 mask = (uint64_t)-1 >> (64 - i * 4); 2894 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2895 ox_flag = CRF_SO; 2896 } 2897 2898 ret.VsrD(1) &= mask; 2899 ret.VsrD(0) = 0; 2900 } else if (i == 0) { 2901 if (ret.VsrD(0) || ret.VsrD(1)) { 2902 ox_flag = CRF_SO; 2903 } 2904 ret.VsrD(0) = ret.VsrD(1) = 0; 2905 } 2906 2907 *r = ret; 2908 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2909 return ox_flag | CRF_EQ; 2910 } 2911 2912 return ox_flag | CRF_GT; 2913 } 2914 2915 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2916 { 2917 int i; 2918 VECTOR_FOR_INORDER_I(i, u8) { 2919 r->u8[i] = AES_sbox[a->u8[i]]; 2920 } 2921 } 2922 2923 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2924 { 2925 ppc_avr_t result; 2926 int i; 2927 2928 VECTOR_FOR_INORDER_I(i, u32) { 2929 result.VsrW(i) = b->VsrW(i) ^ 2930 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2931 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2932 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2933 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2934 } 2935 *r = result; 2936 } 2937 2938 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2939 { 2940 ppc_avr_t result; 2941 int i; 2942 2943 VECTOR_FOR_INORDER_I(i, u8) { 2944 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2945 } 2946 *r = result; 2947 } 2948 2949 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2950 { 2951 /* This differs from what is written in ISA V2.07. The RTL is */ 2952 /* incorrect and will be fixed in V2.07B. */ 2953 int i; 2954 ppc_avr_t tmp; 2955 2956 VECTOR_FOR_INORDER_I(i, u8) { 2957 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2958 } 2959 2960 VECTOR_FOR_INORDER_I(i, u32) { 2961 r->VsrW(i) = 2962 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2963 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2964 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2965 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2966 } 2967 } 2968 2969 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2970 { 2971 ppc_avr_t result; 2972 int i; 2973 2974 VECTOR_FOR_INORDER_I(i, u8) { 2975 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2976 } 2977 *r = result; 2978 } 2979 2980 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2981 { 2982 int st = (st_six & 0x10) != 0; 2983 int six = st_six & 0xF; 2984 int i; 2985 2986 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2987 if (st == 0) { 2988 if ((six & (0x8 >> i)) == 0) { 2989 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2990 ror32(a->VsrW(i), 18) ^ 2991 (a->VsrW(i) >> 3); 2992 } else { /* six.bit[i] == 1 */ 2993 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2994 ror32(a->VsrW(i), 19) ^ 2995 (a->VsrW(i) >> 10); 2996 } 2997 } else { /* st == 1 */ 2998 if ((six & (0x8 >> i)) == 0) { 2999 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3000 ror32(a->VsrW(i), 13) ^ 3001 ror32(a->VsrW(i), 22); 3002 } else { /* six.bit[i] == 1 */ 3003 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3004 ror32(a->VsrW(i), 11) ^ 3005 ror32(a->VsrW(i), 25); 3006 } 3007 } 3008 } 3009 } 3010 3011 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3012 { 3013 int st = (st_six & 0x10) != 0; 3014 int six = st_six & 0xF; 3015 int i; 3016 3017 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3018 if (st == 0) { 3019 if ((six & (0x8 >> (2 * i))) == 0) { 3020 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3021 ror64(a->VsrD(i), 8) ^ 3022 (a->VsrD(i) >> 7); 3023 } else { /* six.bit[2*i] == 1 */ 3024 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3025 ror64(a->VsrD(i), 61) ^ 3026 (a->VsrD(i) >> 6); 3027 } 3028 } else { /* st == 1 */ 3029 if ((six & (0x8 >> (2 * i))) == 0) { 3030 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3031 ror64(a->VsrD(i), 34) ^ 3032 ror64(a->VsrD(i), 39); 3033 } else { /* six.bit[2*i] == 1 */ 3034 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3035 ror64(a->VsrD(i), 18) ^ 3036 ror64(a->VsrD(i), 41); 3037 } 3038 } 3039 } 3040 } 3041 3042 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3043 { 3044 ppc_avr_t result; 3045 int i; 3046 3047 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3048 int indexA = c->VsrB(i) >> 4; 3049 int indexB = c->VsrB(i) & 0xF; 3050 3051 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3052 } 3053 *r = result; 3054 } 3055 3056 #undef VECTOR_FOR_INORDER_I 3057 3058 /*****************************************************************************/ 3059 /* SPE extension helpers */ 3060 /* Use a table to make this quicker */ 3061 static const uint8_t hbrev[16] = { 3062 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3063 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3064 }; 3065 3066 static inline uint8_t byte_reverse(uint8_t val) 3067 { 3068 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3069 } 3070 3071 static inline uint32_t word_reverse(uint32_t val) 3072 { 3073 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3074 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3075 } 3076 3077 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3078 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3079 { 3080 uint32_t a, b, d, mask; 3081 3082 mask = UINT32_MAX >> (32 - MASKBITS); 3083 a = arg1 & mask; 3084 b = arg2 & mask; 3085 d = word_reverse(1 + word_reverse(a | ~b)); 3086 return (arg1 & ~mask) | (d & b); 3087 } 3088 3089 uint32_t helper_cntlsw32(uint32_t val) 3090 { 3091 if (val & 0x80000000) { 3092 return clz32(~val); 3093 } else { 3094 return clz32(val); 3095 } 3096 } 3097 3098 uint32_t helper_cntlzw32(uint32_t val) 3099 { 3100 return clz32(val); 3101 } 3102 3103 /* 440 specific */ 3104 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3105 target_ulong low, uint32_t update_Rc) 3106 { 3107 target_ulong mask; 3108 int i; 3109 3110 i = 1; 3111 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3112 if ((high & mask) == 0) { 3113 if (update_Rc) { 3114 env->crf[0] = 0x4; 3115 } 3116 goto done; 3117 } 3118 i++; 3119 } 3120 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3121 if ((low & mask) == 0) { 3122 if (update_Rc) { 3123 env->crf[0] = 0x8; 3124 } 3125 goto done; 3126 } 3127 i++; 3128 } 3129 i = 8; 3130 if (update_Rc) { 3131 env->crf[0] = 0x2; 3132 } 3133 done: 3134 env->xer = (env->xer & ~0x7F) | i; 3135 if (update_Rc) { 3136 env->crf[0] |= xer_so; 3137 } 3138 return i; 3139 } 3140