1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 overflow = divu128(&rt, &ra, rb); 108 109 if (unlikely(overflow)) { 110 rt = 0; /* Undefined */ 111 } 112 113 if (oe) { 114 helper_update_ov_legacy(env, overflow); 115 } 116 117 return rt; 118 } 119 120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 121 { 122 int64_t rt = 0; 123 int64_t ra = (int64_t)rau; 124 int64_t rb = (int64_t)rbu; 125 int overflow = divs128(&rt, &ra, rb); 126 127 if (unlikely(overflow)) { 128 rt = 0; /* Undefined */ 129 } 130 131 if (oe) { 132 helper_update_ov_legacy(env, overflow); 133 } 134 135 return rt; 136 } 137 138 #endif 139 140 141 #if defined(TARGET_PPC64) 142 /* if x = 0xab, returns 0xababababababababa */ 143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 144 145 /* 146 * subtract 1 from each byte, and with inverse, check if MSB is set at each 147 * byte. 148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 150 */ 151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 152 153 /* When you XOR the pattern and there is a match, that byte will be zero */ 154 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 155 156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 157 { 158 return hasvalue(rb, ra) ? CRF_GT : 0; 159 } 160 161 #undef pattern 162 #undef haszero 163 #undef hasvalue 164 165 /* 166 * Return a random number. 167 */ 168 uint64_t helper_darn32(void) 169 { 170 Error *err = NULL; 171 uint32_t ret; 172 173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 175 error_get_pretty(err)); 176 error_free(err); 177 return -1; 178 } 179 180 return ret; 181 } 182 183 uint64_t helper_darn64(void) 184 { 185 Error *err = NULL; 186 uint64_t ret; 187 188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 190 error_get_pretty(err)); 191 error_free(err); 192 return -1; 193 } 194 195 return ret; 196 } 197 198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 199 { 200 int i; 201 uint64_t ra = 0; 202 203 for (i = 0; i < 8; i++) { 204 int index = (rs >> (i * 8)) & 0xFF; 205 if (index < 64) { 206 if (rb & PPC_BIT(index)) { 207 ra |= 1 << i; 208 } 209 } 210 } 211 return ra; 212 } 213 214 #endif 215 216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 217 { 218 target_ulong mask = 0xff; 219 target_ulong ra = 0; 220 int i; 221 222 for (i = 0; i < sizeof(target_ulong); i++) { 223 if ((rs & mask) == (rb & mask)) { 224 ra |= mask; 225 } 226 mask <<= 8; 227 } 228 return ra; 229 } 230 231 /* shift right arithmetic helper */ 232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 233 target_ulong shift) 234 { 235 int32_t ret; 236 237 if (likely(!(shift & 0x20))) { 238 if (likely((uint32_t)shift != 0)) { 239 shift &= 0x1f; 240 ret = (int32_t)value >> shift; 241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 242 env->ca32 = env->ca = 0; 243 } else { 244 env->ca32 = env->ca = 1; 245 } 246 } else { 247 ret = (int32_t)value; 248 env->ca32 = env->ca = 0; 249 } 250 } else { 251 ret = (int32_t)value >> 31; 252 env->ca32 = env->ca = (ret != 0); 253 } 254 return (target_long)ret; 255 } 256 257 #if defined(TARGET_PPC64) 258 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 259 target_ulong shift) 260 { 261 int64_t ret; 262 263 if (likely(!(shift & 0x40))) { 264 if (likely((uint64_t)shift != 0)) { 265 shift &= 0x3f; 266 ret = (int64_t)value >> shift; 267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 268 env->ca32 = env->ca = 0; 269 } else { 270 env->ca32 = env->ca = 1; 271 } 272 } else { 273 ret = (int64_t)value; 274 env->ca32 = env->ca = 0; 275 } 276 } else { 277 ret = (int64_t)value >> 63; 278 env->ca32 = env->ca = (ret != 0); 279 } 280 return ret; 281 } 282 #endif 283 284 #if defined(TARGET_PPC64) 285 target_ulong helper_popcntb(target_ulong val) 286 { 287 /* Note that we don't fold past bytes */ 288 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 289 0x5555555555555555ULL); 290 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 291 0x3333333333333333ULL); 292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 293 0x0f0f0f0f0f0f0f0fULL); 294 return val; 295 } 296 297 target_ulong helper_popcntw(target_ulong val) 298 { 299 /* Note that we don't fold past words. */ 300 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 301 0x5555555555555555ULL); 302 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 303 0x3333333333333333ULL); 304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 305 0x0f0f0f0f0f0f0f0fULL); 306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 307 0x00ff00ff00ff00ffULL); 308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 309 0x0000ffff0000ffffULL); 310 return val; 311 } 312 #else 313 target_ulong helper_popcntb(target_ulong val) 314 { 315 /* Note that we don't fold past bytes */ 316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 319 return val; 320 } 321 #endif 322 323 uint64_t helper_cfuged(uint64_t src, uint64_t mask) 324 { 325 /* 326 * Instead of processing the mask bit-by-bit from the most significant to 327 * the least significant bit, as described in PowerISA, we'll handle it in 328 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 329 * ctz or cto, we negate the mask at the end of the loop. 330 */ 331 target_ulong m, left = 0, right = 0; 332 unsigned int n, i = 64; 333 bool bit = false; /* tracks if we are processing zeros or ones */ 334 335 if (mask == 0 || mask == -1) { 336 return src; 337 } 338 339 /* Processes the mask in blocks, from LSB to MSB */ 340 while (i) { 341 /* Find how many bits we should take */ 342 n = ctz64(mask); 343 if (n > i) { 344 n = i; 345 } 346 347 /* 348 * Extracts 'n' trailing bits of src and put them on the leading 'n' 349 * bits of 'right' or 'left', pushing down the previously extracted 350 * values. 351 */ 352 m = (1ll << n) - 1; 353 if (bit) { 354 right = ror64(right | (src & m), n); 355 } else { 356 left = ror64(left | (src & m), n); 357 } 358 359 /* 360 * Discards the processed bits from 'src' and 'mask'. Note that we are 361 * removing 'n' trailing zeros from 'mask', but the logical shift will 362 * add 'n' leading zeros back, so the population count of 'mask' is kept 363 * the same. 364 */ 365 src >>= n; 366 mask >>= n; 367 i -= n; 368 bit = !bit; 369 mask = ~mask; 370 } 371 372 /* 373 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 374 * we'll shift it more 64-ctpop(mask) times. 375 */ 376 if (bit) { 377 n = ctpop64(mask); 378 } else { 379 n = 64 - ctpop64(mask); 380 } 381 382 return left | (right >> n); 383 } 384 385 /*****************************************************************************/ 386 /* PowerPC 601 specific instructions (POWER bridge) */ 387 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 388 { 389 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 390 391 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 392 (int32_t)arg2 == 0) { 393 env->spr[SPR_MQ] = 0; 394 return INT32_MIN; 395 } else { 396 env->spr[SPR_MQ] = tmp % arg2; 397 return tmp / (int32_t)arg2; 398 } 399 } 400 401 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 402 target_ulong arg2) 403 { 404 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 405 406 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 407 (int32_t)arg2 == 0) { 408 env->so = env->ov = 1; 409 env->spr[SPR_MQ] = 0; 410 return INT32_MIN; 411 } else { 412 env->spr[SPR_MQ] = tmp % arg2; 413 tmp /= (int32_t)arg2; 414 if ((int32_t)tmp != tmp) { 415 env->so = env->ov = 1; 416 } else { 417 env->ov = 0; 418 } 419 return tmp; 420 } 421 } 422 423 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 424 target_ulong arg2) 425 { 426 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 427 (int32_t)arg2 == 0) { 428 env->spr[SPR_MQ] = 0; 429 return INT32_MIN; 430 } else { 431 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 432 return (int32_t)arg1 / (int32_t)arg2; 433 } 434 } 435 436 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 437 target_ulong arg2) 438 { 439 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 440 (int32_t)arg2 == 0) { 441 env->so = env->ov = 1; 442 env->spr[SPR_MQ] = 0; 443 return INT32_MIN; 444 } else { 445 env->ov = 0; 446 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 447 return (int32_t)arg1 / (int32_t)arg2; 448 } 449 } 450 451 /*****************************************************************************/ 452 /* 602 specific instructions */ 453 /* mfrom is the most crazy instruction ever seen, imho ! */ 454 /* Real implementation uses a ROM table. Do the same */ 455 /* 456 * Extremely decomposed: 457 * -arg / 256 458 * return 256 * log10(10 + 1.0) + 0.5 459 */ 460 #if !defined(CONFIG_USER_ONLY) 461 target_ulong helper_602_mfrom(target_ulong arg) 462 { 463 if (likely(arg < 602)) { 464 #include "mfrom_table.c.inc" 465 return mfrom_ROM_table[arg]; 466 } else { 467 return 0; 468 } 469 } 470 #endif 471 472 /*****************************************************************************/ 473 /* Altivec extension helpers */ 474 #if defined(HOST_WORDS_BIGENDIAN) 475 #define VECTOR_FOR_INORDER_I(index, element) \ 476 for (index = 0; index < ARRAY_SIZE(r->element); index++) 477 #else 478 #define VECTOR_FOR_INORDER_I(index, element) \ 479 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 480 #endif 481 482 /* Saturating arithmetic helpers. */ 483 #define SATCVT(from, to, from_type, to_type, min, max) \ 484 static inline to_type cvt##from##to(from_type x, int *sat) \ 485 { \ 486 to_type r; \ 487 \ 488 if (x < (from_type)min) { \ 489 r = min; \ 490 *sat = 1; \ 491 } else if (x > (from_type)max) { \ 492 r = max; \ 493 *sat = 1; \ 494 } else { \ 495 r = x; \ 496 } \ 497 return r; \ 498 } 499 #define SATCVTU(from, to, from_type, to_type, min, max) \ 500 static inline to_type cvt##from##to(from_type x, int *sat) \ 501 { \ 502 to_type r; \ 503 \ 504 if (x > (from_type)max) { \ 505 r = max; \ 506 *sat = 1; \ 507 } else { \ 508 r = x; \ 509 } \ 510 return r; \ 511 } 512 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 513 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 514 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 515 516 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 517 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 518 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 519 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 520 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 521 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 522 #undef SATCVT 523 #undef SATCVTU 524 525 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 526 { 527 ppc_store_vscr(env, vscr); 528 } 529 530 uint32_t helper_mfvscr(CPUPPCState *env) 531 { 532 return ppc_get_vscr(env); 533 } 534 535 static inline void set_vscr_sat(CPUPPCState *env) 536 { 537 /* The choice of non-zero value is arbitrary. */ 538 env->vscr_sat.u32[0] = 1; 539 } 540 541 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 542 { 543 int i; 544 545 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 546 r->u32[i] = ~a->u32[i] < b->u32[i]; 547 } 548 } 549 550 /* vprtybw */ 551 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 552 { 553 int i; 554 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 555 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 556 res ^= res >> 8; 557 r->u32[i] = res & 1; 558 } 559 } 560 561 /* vprtybd */ 562 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 563 { 564 int i; 565 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 566 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 567 res ^= res >> 16; 568 res ^= res >> 8; 569 r->u64[i] = res & 1; 570 } 571 } 572 573 /* vprtybq */ 574 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 575 { 576 uint64_t res = b->u64[0] ^ b->u64[1]; 577 res ^= res >> 32; 578 res ^= res >> 16; 579 res ^= res >> 8; 580 r->VsrD(1) = res & 1; 581 r->VsrD(0) = 0; 582 } 583 584 #define VARITHFP(suffix, func) \ 585 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 586 ppc_avr_t *b) \ 587 { \ 588 int i; \ 589 \ 590 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 591 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 592 } \ 593 } 594 VARITHFP(addfp, float32_add) 595 VARITHFP(subfp, float32_sub) 596 VARITHFP(minfp, float32_min) 597 VARITHFP(maxfp, float32_max) 598 #undef VARITHFP 599 600 #define VARITHFPFMA(suffix, type) \ 601 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 602 ppc_avr_t *b, ppc_avr_t *c) \ 603 { \ 604 int i; \ 605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 606 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 607 type, &env->vec_status); \ 608 } \ 609 } 610 VARITHFPFMA(maddfp, 0); 611 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 612 #undef VARITHFPFMA 613 614 #define VARITHSAT_CASE(type, op, cvt, element) \ 615 { \ 616 type result = (type)a->element[i] op (type)b->element[i]; \ 617 r->element[i] = cvt(result, &sat); \ 618 } 619 620 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 621 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 622 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 623 { \ 624 int sat = 0; \ 625 int i; \ 626 \ 627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 628 VARITHSAT_CASE(optype, op, cvt, element); \ 629 } \ 630 if (sat) { \ 631 vscr_sat->u32[0] = 1; \ 632 } \ 633 } 634 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 635 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 636 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 637 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 638 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 639 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 640 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 641 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 642 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 643 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 644 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 645 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 646 #undef VARITHSAT_CASE 647 #undef VARITHSAT_DO 648 #undef VARITHSAT_SIGNED 649 #undef VARITHSAT_UNSIGNED 650 651 #define VAVG_DO(name, element, etype) \ 652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 657 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 658 r->element[i] = x >> 1; \ 659 } \ 660 } 661 662 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 663 unsigned_type) \ 664 VAVG_DO(avgs##type, signed_element, signed_type) \ 665 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 666 VAVG(b, s8, int16_t, u8, uint16_t) 667 VAVG(h, s16, int32_t, u16, uint32_t) 668 VAVG(w, s32, int64_t, u32, uint64_t) 669 #undef VAVG_DO 670 #undef VAVG 671 672 #define VABSDU_DO(name, element) \ 673 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 674 { \ 675 int i; \ 676 \ 677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 678 r->element[i] = (a->element[i] > b->element[i]) ? \ 679 (a->element[i] - b->element[i]) : \ 680 (b->element[i] - a->element[i]); \ 681 } \ 682 } 683 684 /* 685 * VABSDU - Vector absolute difference unsigned 686 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 687 * element - element type to access from vector 688 */ 689 #define VABSDU(type, element) \ 690 VABSDU_DO(absdu##type, element) 691 VABSDU(b, u8) 692 VABSDU(h, u16) 693 VABSDU(w, u32) 694 #undef VABSDU_DO 695 #undef VABSDU 696 697 #define VCF(suffix, cvt, element) \ 698 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 699 ppc_avr_t *b, uint32_t uim) \ 700 { \ 701 int i; \ 702 \ 703 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 704 float32 t = cvt(b->element[i], &env->vec_status); \ 705 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 706 } \ 707 } 708 VCF(ux, uint32_to_float32, u32) 709 VCF(sx, int32_to_float32, s32) 710 #undef VCF 711 712 #define VCMP_DO(suffix, compare, element, record) \ 713 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 714 ppc_avr_t *a, ppc_avr_t *b) \ 715 { \ 716 uint64_t ones = (uint64_t)-1; \ 717 uint64_t all = ones; \ 718 uint64_t none = 0; \ 719 int i; \ 720 \ 721 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 722 uint64_t result = (a->element[i] compare b->element[i] ? \ 723 ones : 0x0); \ 724 switch (sizeof(a->element[0])) { \ 725 case 8: \ 726 r->u64[i] = result; \ 727 break; \ 728 case 4: \ 729 r->u32[i] = result; \ 730 break; \ 731 case 2: \ 732 r->u16[i] = result; \ 733 break; \ 734 case 1: \ 735 r->u8[i] = result; \ 736 break; \ 737 } \ 738 all &= result; \ 739 none |= result; \ 740 } \ 741 if (record) { \ 742 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 743 } \ 744 } 745 #define VCMP(suffix, compare, element) \ 746 VCMP_DO(suffix, compare, element, 0) \ 747 VCMP_DO(suffix##_dot, compare, element, 1) 748 VCMP(equb, ==, u8) 749 VCMP(equh, ==, u16) 750 VCMP(equw, ==, u32) 751 VCMP(equd, ==, u64) 752 VCMP(gtub, >, u8) 753 VCMP(gtuh, >, u16) 754 VCMP(gtuw, >, u32) 755 VCMP(gtud, >, u64) 756 VCMP(gtsb, >, s8) 757 VCMP(gtsh, >, s16) 758 VCMP(gtsw, >, s32) 759 VCMP(gtsd, >, s64) 760 #undef VCMP_DO 761 #undef VCMP 762 763 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 764 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 765 ppc_avr_t *a, ppc_avr_t *b) \ 766 { \ 767 etype ones = (etype)-1; \ 768 etype all = ones; \ 769 etype result, none = 0; \ 770 int i; \ 771 \ 772 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 773 if (cmpzero) { \ 774 result = ((a->element[i] == 0) \ 775 || (b->element[i] == 0) \ 776 || (a->element[i] != b->element[i]) ? \ 777 ones : 0x0); \ 778 } else { \ 779 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 780 } \ 781 r->element[i] = result; \ 782 all &= result; \ 783 none |= result; \ 784 } \ 785 if (record) { \ 786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 787 } \ 788 } 789 790 /* 791 * VCMPNEZ - Vector compare not equal to zero 792 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 793 * element - element type to access from vector 794 */ 795 #define VCMPNE(suffix, element, etype, cmpzero) \ 796 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 797 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 798 VCMPNE(zb, u8, uint8_t, 1) 799 VCMPNE(zh, u16, uint16_t, 1) 800 VCMPNE(zw, u32, uint32_t, 1) 801 VCMPNE(b, u8, uint8_t, 0) 802 VCMPNE(h, u16, uint16_t, 0) 803 VCMPNE(w, u32, uint32_t, 0) 804 #undef VCMPNE_DO 805 #undef VCMPNE 806 807 #define VCMPFP_DO(suffix, compare, order, record) \ 808 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 809 ppc_avr_t *a, ppc_avr_t *b) \ 810 { \ 811 uint32_t ones = (uint32_t)-1; \ 812 uint32_t all = ones; \ 813 uint32_t none = 0; \ 814 int i; \ 815 \ 816 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 817 uint32_t result; \ 818 FloatRelation rel = \ 819 float32_compare_quiet(a->f32[i], b->f32[i], \ 820 &env->vec_status); \ 821 if (rel == float_relation_unordered) { \ 822 result = 0; \ 823 } else if (rel compare order) { \ 824 result = ones; \ 825 } else { \ 826 result = 0; \ 827 } \ 828 r->u32[i] = result; \ 829 all &= result; \ 830 none |= result; \ 831 } \ 832 if (record) { \ 833 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 834 } \ 835 } 836 #define VCMPFP(suffix, compare, order) \ 837 VCMPFP_DO(suffix, compare, order, 0) \ 838 VCMPFP_DO(suffix##_dot, compare, order, 1) 839 VCMPFP(eqfp, ==, float_relation_equal) 840 VCMPFP(gefp, !=, float_relation_less) 841 VCMPFP(gtfp, ==, float_relation_greater) 842 #undef VCMPFP_DO 843 #undef VCMPFP 844 845 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 846 ppc_avr_t *a, ppc_avr_t *b, int record) 847 { 848 int i; 849 int all_in = 0; 850 851 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 852 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 853 &env->vec_status); 854 if (le_rel == float_relation_unordered) { 855 r->u32[i] = 0xc0000000; 856 all_in = 1; 857 } else { 858 float32 bneg = float32_chs(b->f32[i]); 859 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 860 &env->vec_status); 861 int le = le_rel != float_relation_greater; 862 int ge = ge_rel != float_relation_less; 863 864 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 865 all_in |= (!le | !ge); 866 } 867 } 868 if (record) { 869 env->crf[6] = (all_in == 0) << 1; 870 } 871 } 872 873 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 874 { 875 vcmpbfp_internal(env, r, a, b, 0); 876 } 877 878 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 879 ppc_avr_t *b) 880 { 881 vcmpbfp_internal(env, r, a, b, 1); 882 } 883 884 #define VCT(suffix, satcvt, element) \ 885 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 886 ppc_avr_t *b, uint32_t uim) \ 887 { \ 888 int i; \ 889 int sat = 0; \ 890 float_status s = env->vec_status; \ 891 \ 892 set_float_rounding_mode(float_round_to_zero, &s); \ 893 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 894 if (float32_is_any_nan(b->f32[i])) { \ 895 r->element[i] = 0; \ 896 } else { \ 897 float64 t = float32_to_float64(b->f32[i], &s); \ 898 int64_t j; \ 899 \ 900 t = float64_scalbn(t, uim, &s); \ 901 j = float64_to_int64(t, &s); \ 902 r->element[i] = satcvt(j, &sat); \ 903 } \ 904 } \ 905 if (sat) { \ 906 set_vscr_sat(env); \ 907 } \ 908 } 909 VCT(uxs, cvtsduw, u32) 910 VCT(sxs, cvtsdsw, s32) 911 #undef VCT 912 913 target_ulong helper_vclzlsbb(ppc_avr_t *r) 914 { 915 target_ulong count = 0; 916 int i; 917 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 918 if (r->VsrB(i) & 0x01) { 919 break; 920 } 921 count++; 922 } 923 return count; 924 } 925 926 target_ulong helper_vctzlsbb(ppc_avr_t *r) 927 { 928 target_ulong count = 0; 929 int i; 930 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 931 if (r->VsrB(i) & 0x01) { 932 break; 933 } 934 count++; 935 } 936 return count; 937 } 938 939 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 940 ppc_avr_t *b, ppc_avr_t *c) 941 { 942 int sat = 0; 943 int i; 944 945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 946 int32_t prod = a->s16[i] * b->s16[i]; 947 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 948 949 r->s16[i] = cvtswsh(t, &sat); 950 } 951 952 if (sat) { 953 set_vscr_sat(env); 954 } 955 } 956 957 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 958 ppc_avr_t *b, ppc_avr_t *c) 959 { 960 int sat = 0; 961 int i; 962 963 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 964 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 965 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 966 r->s16[i] = cvtswsh(t, &sat); 967 } 968 969 if (sat) { 970 set_vscr_sat(env); 971 } 972 } 973 974 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int i; 977 978 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 979 int32_t prod = a->s16[i] * b->s16[i]; 980 r->s16[i] = (int16_t) (prod + c->s16[i]); 981 } 982 } 983 984 #define VMRG_DO(name, element, access, ofs) \ 985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 986 { \ 987 ppc_avr_t result; \ 988 int i, half = ARRAY_SIZE(r->element) / 2; \ 989 \ 990 for (i = 0; i < half; i++) { \ 991 result.access(i * 2 + 0) = a->access(i + ofs); \ 992 result.access(i * 2 + 1) = b->access(i + ofs); \ 993 } \ 994 *r = result; \ 995 } 996 997 #define VMRG(suffix, element, access) \ 998 VMRG_DO(mrgl##suffix, element, access, half) \ 999 VMRG_DO(mrgh##suffix, element, access, 0) 1000 VMRG(b, u8, VsrB) 1001 VMRG(h, u16, VsrH) 1002 VMRG(w, u32, VsrW) 1003 #undef VMRG_DO 1004 #undef VMRG 1005 1006 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1007 ppc_avr_t *b, ppc_avr_t *c) 1008 { 1009 int32_t prod[16]; 1010 int i; 1011 1012 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1013 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1014 } 1015 1016 VECTOR_FOR_INORDER_I(i, s32) { 1017 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1018 prod[4 * i + 2] + prod[4 * i + 3]; 1019 } 1020 } 1021 1022 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1023 ppc_avr_t *b, ppc_avr_t *c) 1024 { 1025 int32_t prod[8]; 1026 int i; 1027 1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1029 prod[i] = a->s16[i] * b->s16[i]; 1030 } 1031 1032 VECTOR_FOR_INORDER_I(i, s32) { 1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1034 } 1035 } 1036 1037 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1038 ppc_avr_t *b, ppc_avr_t *c) 1039 { 1040 int32_t prod[8]; 1041 int i; 1042 int sat = 0; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1045 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, s32) { 1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1050 1051 r->u32[i] = cvtsdsw(t, &sat); 1052 } 1053 1054 if (sat) { 1055 set_vscr_sat(env); 1056 } 1057 } 1058 1059 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1060 ppc_avr_t *b, ppc_avr_t *c) 1061 { 1062 uint16_t prod[16]; 1063 int i; 1064 1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1066 prod[i] = a->u8[i] * b->u8[i]; 1067 } 1068 1069 VECTOR_FOR_INORDER_I(i, u32) { 1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1071 prod[4 * i + 2] + prod[4 * i + 3]; 1072 } 1073 } 1074 1075 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1076 ppc_avr_t *b, ppc_avr_t *c) 1077 { 1078 uint32_t prod[8]; 1079 int i; 1080 1081 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1082 prod[i] = a->u16[i] * b->u16[i]; 1083 } 1084 1085 VECTOR_FOR_INORDER_I(i, u32) { 1086 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1087 } 1088 } 1089 1090 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1091 ppc_avr_t *b, ppc_avr_t *c) 1092 { 1093 uint32_t prod[8]; 1094 int i; 1095 int sat = 0; 1096 1097 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1098 prod[i] = a->u16[i] * b->u16[i]; 1099 } 1100 1101 VECTOR_FOR_INORDER_I(i, s32) { 1102 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1103 1104 r->u32[i] = cvtuduw(t, &sat); 1105 } 1106 1107 if (sat) { 1108 set_vscr_sat(env); 1109 } 1110 } 1111 1112 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1113 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1114 { \ 1115 int i; \ 1116 \ 1117 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1118 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1119 (cast)b->mul_access(i); \ 1120 } \ 1121 } 1122 1123 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1125 { \ 1126 int i; \ 1127 \ 1128 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1129 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1130 (cast)b->mul_access(i + 1); \ 1131 } \ 1132 } 1133 1134 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1135 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1136 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1137 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1138 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1139 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1140 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1141 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1142 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1143 #undef VMUL_DO_EVN 1144 #undef VMUL_DO_ODD 1145 #undef VMUL 1146 1147 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1148 { 1149 int i; 1150 1151 for (i = 0; i < 4; i++) { 1152 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1153 } 1154 } 1155 1156 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1157 { 1158 int i; 1159 1160 for (i = 0; i < 4; i++) { 1161 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1162 (uint64_t)b->u32[i]) >> 32); 1163 } 1164 } 1165 1166 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1167 { 1168 uint64_t discard; 1169 1170 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1171 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1172 } 1173 1174 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1175 { 1176 uint64_t discard; 1177 1178 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1179 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1180 } 1181 1182 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1183 ppc_avr_t *c) 1184 { 1185 ppc_avr_t result; 1186 int i; 1187 1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1189 int s = c->VsrB(i) & 0x1f; 1190 int index = s & 0xf; 1191 1192 if (s & 0x10) { 1193 result.VsrB(i) = b->VsrB(index); 1194 } else { 1195 result.VsrB(i) = a->VsrB(index); 1196 } 1197 } 1198 *r = result; 1199 } 1200 1201 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1202 ppc_avr_t *c) 1203 { 1204 ppc_avr_t result; 1205 int i; 1206 1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1208 int s = c->VsrB(i) & 0x1f; 1209 int index = 15 - (s & 0xf); 1210 1211 if (s & 0x10) { 1212 result.VsrB(i) = a->VsrB(index); 1213 } else { 1214 result.VsrB(i) = b->VsrB(index); 1215 } 1216 } 1217 *r = result; 1218 } 1219 1220 #if defined(HOST_WORDS_BIGENDIAN) 1221 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1222 #define VBPERMD_INDEX(i) (i) 1223 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1224 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1225 #else 1226 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1227 #define VBPERMD_INDEX(i) (1 - i) 1228 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1229 #define EXTRACT_BIT(avr, i, index) \ 1230 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1231 #endif 1232 1233 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1234 { 1235 int i, j; 1236 ppc_avr_t result = { .u64 = { 0, 0 } }; 1237 VECTOR_FOR_INORDER_I(i, u64) { 1238 for (j = 0; j < 8; j++) { 1239 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1240 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1242 } 1243 } 1244 } 1245 *r = result; 1246 } 1247 1248 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1249 { 1250 int i; 1251 uint64_t perm = 0; 1252 1253 VECTOR_FOR_INORDER_I(i, u8) { 1254 int index = VBPERMQ_INDEX(b, i); 1255 1256 if (index < 128) { 1257 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1258 if (a->u64[VBPERMQ_DW(index)] & mask) { 1259 perm |= (0x8000 >> i); 1260 } 1261 } 1262 } 1263 1264 r->VsrD(0) = perm; 1265 r->VsrD(1) = 0; 1266 } 1267 1268 #undef VBPERMQ_INDEX 1269 #undef VBPERMQ_DW 1270 1271 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1272 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1273 { \ 1274 int i, j; \ 1275 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1276 \ 1277 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1278 prod[i] = 0; \ 1279 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1280 if (a->srcfld[i] & (1ull << j)) { \ 1281 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1282 } \ 1283 } \ 1284 } \ 1285 \ 1286 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1287 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1288 } \ 1289 } 1290 1291 PMSUM(vpmsumb, u8, u16, uint16_t) 1292 PMSUM(vpmsumh, u16, u32, uint32_t) 1293 PMSUM(vpmsumw, u32, u64, uint64_t) 1294 1295 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1296 { 1297 1298 #ifdef CONFIG_INT128 1299 int i, j; 1300 __uint128_t prod[2]; 1301 1302 VECTOR_FOR_INORDER_I(i, u64) { 1303 prod[i] = 0; 1304 for (j = 0; j < 64; j++) { 1305 if (a->u64[i] & (1ull << j)) { 1306 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1307 } 1308 } 1309 } 1310 1311 r->u128 = prod[0] ^ prod[1]; 1312 1313 #else 1314 int i, j; 1315 ppc_avr_t prod[2]; 1316 1317 VECTOR_FOR_INORDER_I(i, u64) { 1318 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1319 for (j = 0; j < 64; j++) { 1320 if (a->u64[i] & (1ull << j)) { 1321 ppc_avr_t bshift; 1322 if (j == 0) { 1323 bshift.VsrD(0) = 0; 1324 bshift.VsrD(1) = b->u64[i]; 1325 } else { 1326 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1327 bshift.VsrD(1) = b->u64[i] << j; 1328 } 1329 prod[i].VsrD(1) ^= bshift.VsrD(1); 1330 prod[i].VsrD(0) ^= bshift.VsrD(0); 1331 } 1332 } 1333 } 1334 1335 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1336 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1337 #endif 1338 } 1339 1340 1341 #if defined(HOST_WORDS_BIGENDIAN) 1342 #define PKBIG 1 1343 #else 1344 #define PKBIG 0 1345 #endif 1346 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1347 { 1348 int i, j; 1349 ppc_avr_t result; 1350 #if defined(HOST_WORDS_BIGENDIAN) 1351 const ppc_avr_t *x[2] = { a, b }; 1352 #else 1353 const ppc_avr_t *x[2] = { b, a }; 1354 #endif 1355 1356 VECTOR_FOR_INORDER_I(i, u64) { 1357 VECTOR_FOR_INORDER_I(j, u32) { 1358 uint32_t e = x[i]->u32[j]; 1359 1360 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1361 ((e >> 6) & 0x3e0) | 1362 ((e >> 3) & 0x1f)); 1363 } 1364 } 1365 *r = result; 1366 } 1367 1368 #define VPK(suffix, from, to, cvt, dosat) \ 1369 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1370 ppc_avr_t *a, ppc_avr_t *b) \ 1371 { \ 1372 int i; \ 1373 int sat = 0; \ 1374 ppc_avr_t result; \ 1375 ppc_avr_t *a0 = PKBIG ? a : b; \ 1376 ppc_avr_t *a1 = PKBIG ? b : a; \ 1377 \ 1378 VECTOR_FOR_INORDER_I(i, from) { \ 1379 result.to[i] = cvt(a0->from[i], &sat); \ 1380 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1381 } \ 1382 *r = result; \ 1383 if (dosat && sat) { \ 1384 set_vscr_sat(env); \ 1385 } \ 1386 } 1387 #define I(x, y) (x) 1388 VPK(shss, s16, s8, cvtshsb, 1) 1389 VPK(shus, s16, u8, cvtshub, 1) 1390 VPK(swss, s32, s16, cvtswsh, 1) 1391 VPK(swus, s32, u16, cvtswuh, 1) 1392 VPK(sdss, s64, s32, cvtsdsw, 1) 1393 VPK(sdus, s64, u32, cvtsduw, 1) 1394 VPK(uhus, u16, u8, cvtuhub, 1) 1395 VPK(uwus, u32, u16, cvtuwuh, 1) 1396 VPK(udus, u64, u32, cvtuduw, 1) 1397 VPK(uhum, u16, u8, I, 0) 1398 VPK(uwum, u32, u16, I, 0) 1399 VPK(udum, u64, u32, I, 0) 1400 #undef I 1401 #undef VPK 1402 #undef PKBIG 1403 1404 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1405 { 1406 int i; 1407 1408 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1409 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1410 } 1411 } 1412 1413 #define VRFI(suffix, rounding) \ 1414 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1415 ppc_avr_t *b) \ 1416 { \ 1417 int i; \ 1418 float_status s = env->vec_status; \ 1419 \ 1420 set_float_rounding_mode(rounding, &s); \ 1421 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1422 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1423 } \ 1424 } 1425 VRFI(n, float_round_nearest_even) 1426 VRFI(m, float_round_down) 1427 VRFI(p, float_round_up) 1428 VRFI(z, float_round_to_zero) 1429 #undef VRFI 1430 1431 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1432 { 1433 int i; 1434 1435 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1436 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1437 1438 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1439 } 1440 } 1441 1442 #define VRLMI(name, size, element, insert) \ 1443 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1444 { \ 1445 int i; \ 1446 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1447 uint##size##_t src1 = a->element[i]; \ 1448 uint##size##_t src2 = b->element[i]; \ 1449 uint##size##_t src3 = r->element[i]; \ 1450 uint##size##_t begin, end, shift, mask, rot_val; \ 1451 \ 1452 shift = extract##size(src2, 0, 6); \ 1453 end = extract##size(src2, 8, 6); \ 1454 begin = extract##size(src2, 16, 6); \ 1455 rot_val = rol##size(src1, shift); \ 1456 mask = mask_u##size(begin, end); \ 1457 if (insert) { \ 1458 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1459 } else { \ 1460 r->element[i] = (rot_val & mask); \ 1461 } \ 1462 } \ 1463 } 1464 1465 VRLMI(vrldmi, 64, u64, 1); 1466 VRLMI(vrlwmi, 32, u32, 1); 1467 VRLMI(vrldnm, 64, u64, 0); 1468 VRLMI(vrlwnm, 32, u32, 0); 1469 1470 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1471 ppc_avr_t *c) 1472 { 1473 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1474 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1475 } 1476 1477 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1478 { 1479 int i; 1480 1481 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1482 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1483 } 1484 } 1485 1486 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1487 { 1488 int i; 1489 1490 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1491 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1492 } 1493 } 1494 1495 #if defined(HOST_WORDS_BIGENDIAN) 1496 #define VEXTU_X_DO(name, size, left) \ 1497 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1498 { \ 1499 int index; \ 1500 if (left) { \ 1501 index = (a & 0xf) * 8; \ 1502 } else { \ 1503 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1504 } \ 1505 return int128_getlo(int128_rshift(b->s128, index)) & \ 1506 MAKE_64BIT_MASK(0, size); \ 1507 } 1508 #else 1509 #define VEXTU_X_DO(name, size, left) \ 1510 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1511 { \ 1512 int index; \ 1513 if (left) { \ 1514 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1515 } else { \ 1516 index = (a & 0xf) * 8; \ 1517 } \ 1518 return int128_getlo(int128_rshift(b->s128, index)) & \ 1519 MAKE_64BIT_MASK(0, size); \ 1520 } 1521 #endif 1522 1523 VEXTU_X_DO(vextublx, 8, 1) 1524 VEXTU_X_DO(vextuhlx, 16, 1) 1525 VEXTU_X_DO(vextuwlx, 32, 1) 1526 VEXTU_X_DO(vextubrx, 8, 0) 1527 VEXTU_X_DO(vextuhrx, 16, 0) 1528 VEXTU_X_DO(vextuwrx, 32, 0) 1529 #undef VEXTU_X_DO 1530 1531 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1532 { 1533 int i; 1534 unsigned int shift, bytes, size; 1535 1536 size = ARRAY_SIZE(r->u8); 1537 for (i = 0; i < size; i++) { 1538 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1539 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1540 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1541 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1542 } 1543 } 1544 1545 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1546 { 1547 int i; 1548 unsigned int shift, bytes; 1549 1550 /* 1551 * Use reverse order, as destination and source register can be 1552 * same. Its being modified in place saving temporary, reverse 1553 * order will guarantee that computed result is not fed back. 1554 */ 1555 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1556 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1557 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1558 /* extract adjacent bytes */ 1559 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1560 } 1561 } 1562 1563 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1564 { 1565 int sh = shift & 0xf; 1566 int i; 1567 ppc_avr_t result; 1568 1569 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1570 int index = sh + i; 1571 if (index > 0xf) { 1572 result.VsrB(i) = b->VsrB(index - 0x10); 1573 } else { 1574 result.VsrB(i) = a->VsrB(index); 1575 } 1576 } 1577 *r = result; 1578 } 1579 1580 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1581 { 1582 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1583 1584 #if defined(HOST_WORDS_BIGENDIAN) 1585 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1586 memset(&r->u8[16 - sh], 0, sh); 1587 #else 1588 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1589 memset(&r->u8[0], 0, sh); 1590 #endif 1591 } 1592 1593 #if defined(HOST_WORDS_BIGENDIAN) 1594 #define VINSERT(suffix, element) \ 1595 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1596 { \ 1597 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1598 sizeof(r->element[0])); \ 1599 } 1600 #else 1601 #define VINSERT(suffix, element) \ 1602 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1603 { \ 1604 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1605 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1606 } 1607 #endif 1608 VINSERT(b, u8) 1609 VINSERT(h, u16) 1610 VINSERT(w, u32) 1611 VINSERT(d, u64) 1612 #undef VINSERT 1613 #if defined(HOST_WORDS_BIGENDIAN) 1614 #define VEXTRACT(suffix, element) \ 1615 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1616 { \ 1617 uint32_t es = sizeof(r->element[0]); \ 1618 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1619 memset(&r->u8[8], 0, 8); \ 1620 memset(&r->u8[0], 0, 8 - es); \ 1621 } 1622 #else 1623 #define VEXTRACT(suffix, element) \ 1624 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1625 { \ 1626 uint32_t es = sizeof(r->element[0]); \ 1627 uint32_t s = (16 - index) - es; \ 1628 memmove(&r->u8[8], &b->u8[s], es); \ 1629 memset(&r->u8[0], 0, 8); \ 1630 memset(&r->u8[8 + es], 0, 8 - es); \ 1631 } 1632 #endif 1633 VEXTRACT(ub, u8) 1634 VEXTRACT(uh, u16) 1635 VEXTRACT(uw, u32) 1636 VEXTRACT(d, u64) 1637 #undef VEXTRACT 1638 1639 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1640 ppc_vsr_t *xb, uint32_t index) 1641 { 1642 ppc_vsr_t t = { }; 1643 size_t es = sizeof(uint32_t); 1644 uint32_t ext_index; 1645 int i; 1646 1647 ext_index = index; 1648 for (i = 0; i < es; i++, ext_index++) { 1649 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1650 } 1651 1652 *xt = t; 1653 } 1654 1655 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1656 ppc_vsr_t *xb, uint32_t index) 1657 { 1658 ppc_vsr_t t = *xt; 1659 size_t es = sizeof(uint32_t); 1660 int ins_index, i = 0; 1661 1662 ins_index = index; 1663 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1664 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1665 } 1666 1667 *xt = t; 1668 } 1669 1670 #define VEXT_SIGNED(name, element, cast) \ 1671 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1672 { \ 1673 int i; \ 1674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1675 r->element[i] = (cast)b->element[i]; \ 1676 } \ 1677 } 1678 VEXT_SIGNED(vextsb2w, s32, int8_t) 1679 VEXT_SIGNED(vextsb2d, s64, int8_t) 1680 VEXT_SIGNED(vextsh2w, s32, int16_t) 1681 VEXT_SIGNED(vextsh2d, s64, int16_t) 1682 VEXT_SIGNED(vextsw2d, s64, int32_t) 1683 #undef VEXT_SIGNED 1684 1685 #define VNEG(name, element) \ 1686 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1687 { \ 1688 int i; \ 1689 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1690 r->element[i] = -b->element[i]; \ 1691 } \ 1692 } 1693 VNEG(vnegw, s32) 1694 VNEG(vnegd, s64) 1695 #undef VNEG 1696 1697 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1698 { 1699 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1700 1701 #if defined(HOST_WORDS_BIGENDIAN) 1702 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1703 memset(&r->u8[0], 0, sh); 1704 #else 1705 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1706 memset(&r->u8[16 - sh], 0, sh); 1707 #endif 1708 } 1709 1710 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1711 { 1712 int i; 1713 1714 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1715 r->u32[i] = a->u32[i] >= b->u32[i]; 1716 } 1717 } 1718 1719 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1720 { 1721 int64_t t; 1722 int i, upper; 1723 ppc_avr_t result; 1724 int sat = 0; 1725 1726 upper = ARRAY_SIZE(r->s32) - 1; 1727 t = (int64_t)b->VsrSW(upper); 1728 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1729 t += a->VsrSW(i); 1730 result.VsrSW(i) = 0; 1731 } 1732 result.VsrSW(upper) = cvtsdsw(t, &sat); 1733 *r = result; 1734 1735 if (sat) { 1736 set_vscr_sat(env); 1737 } 1738 } 1739 1740 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1741 { 1742 int i, j, upper; 1743 ppc_avr_t result; 1744 int sat = 0; 1745 1746 upper = 1; 1747 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1748 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1749 1750 result.VsrD(i) = 0; 1751 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1752 t += a->VsrSW(2 * i + j); 1753 } 1754 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1755 } 1756 1757 *r = result; 1758 if (sat) { 1759 set_vscr_sat(env); 1760 } 1761 } 1762 1763 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1764 { 1765 int i, j; 1766 int sat = 0; 1767 1768 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1769 int64_t t = (int64_t)b->s32[i]; 1770 1771 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1772 t += a->s8[4 * i + j]; 1773 } 1774 r->s32[i] = cvtsdsw(t, &sat); 1775 } 1776 1777 if (sat) { 1778 set_vscr_sat(env); 1779 } 1780 } 1781 1782 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1783 { 1784 int sat = 0; 1785 int i; 1786 1787 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1788 int64_t t = (int64_t)b->s32[i]; 1789 1790 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1791 r->s32[i] = cvtsdsw(t, &sat); 1792 } 1793 1794 if (sat) { 1795 set_vscr_sat(env); 1796 } 1797 } 1798 1799 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1800 { 1801 int i, j; 1802 int sat = 0; 1803 1804 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1805 uint64_t t = (uint64_t)b->u32[i]; 1806 1807 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1808 t += a->u8[4 * i + j]; 1809 } 1810 r->u32[i] = cvtuduw(t, &sat); 1811 } 1812 1813 if (sat) { 1814 set_vscr_sat(env); 1815 } 1816 } 1817 1818 #if defined(HOST_WORDS_BIGENDIAN) 1819 #define UPKHI 1 1820 #define UPKLO 0 1821 #else 1822 #define UPKHI 0 1823 #define UPKLO 1 1824 #endif 1825 #define VUPKPX(suffix, hi) \ 1826 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1827 { \ 1828 int i; \ 1829 ppc_avr_t result; \ 1830 \ 1831 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1832 uint16_t e = b->u16[hi ? i : i + 4]; \ 1833 uint8_t a = (e >> 15) ? 0xff : 0; \ 1834 uint8_t r = (e >> 10) & 0x1f; \ 1835 uint8_t g = (e >> 5) & 0x1f; \ 1836 uint8_t b = e & 0x1f; \ 1837 \ 1838 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1839 } \ 1840 *r = result; \ 1841 } 1842 VUPKPX(lpx, UPKLO) 1843 VUPKPX(hpx, UPKHI) 1844 #undef VUPKPX 1845 1846 #define VUPK(suffix, unpacked, packee, hi) \ 1847 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1848 { \ 1849 int i; \ 1850 ppc_avr_t result; \ 1851 \ 1852 if (hi) { \ 1853 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1854 result.unpacked[i] = b->packee[i]; \ 1855 } \ 1856 } else { \ 1857 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1858 i++) { \ 1859 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1860 } \ 1861 } \ 1862 *r = result; \ 1863 } 1864 VUPK(hsb, s16, s8, UPKHI) 1865 VUPK(hsh, s32, s16, UPKHI) 1866 VUPK(hsw, s64, s32, UPKHI) 1867 VUPK(lsb, s16, s8, UPKLO) 1868 VUPK(lsh, s32, s16, UPKLO) 1869 VUPK(lsw, s64, s32, UPKLO) 1870 #undef VUPK 1871 #undef UPKHI 1872 #undef UPKLO 1873 1874 #define VGENERIC_DO(name, element) \ 1875 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1876 { \ 1877 int i; \ 1878 \ 1879 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1880 r->element[i] = name(b->element[i]); \ 1881 } \ 1882 } 1883 1884 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1885 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1886 1887 VGENERIC_DO(clzb, u8) 1888 VGENERIC_DO(clzh, u16) 1889 1890 #undef clzb 1891 #undef clzh 1892 1893 #define ctzb(v) ((v) ? ctz32(v) : 8) 1894 #define ctzh(v) ((v) ? ctz32(v) : 16) 1895 #define ctzw(v) ctz32((v)) 1896 #define ctzd(v) ctz64((v)) 1897 1898 VGENERIC_DO(ctzb, u8) 1899 VGENERIC_DO(ctzh, u16) 1900 VGENERIC_DO(ctzw, u32) 1901 VGENERIC_DO(ctzd, u64) 1902 1903 #undef ctzb 1904 #undef ctzh 1905 #undef ctzw 1906 #undef ctzd 1907 1908 #define popcntb(v) ctpop8(v) 1909 #define popcnth(v) ctpop16(v) 1910 #define popcntw(v) ctpop32(v) 1911 #define popcntd(v) ctpop64(v) 1912 1913 VGENERIC_DO(popcntb, u8) 1914 VGENERIC_DO(popcnth, u16) 1915 VGENERIC_DO(popcntw, u32) 1916 VGENERIC_DO(popcntd, u64) 1917 1918 #undef popcntb 1919 #undef popcnth 1920 #undef popcntw 1921 #undef popcntd 1922 1923 #undef VGENERIC_DO 1924 1925 #if defined(HOST_WORDS_BIGENDIAN) 1926 #define QW_ONE { .u64 = { 0, 1 } } 1927 #else 1928 #define QW_ONE { .u64 = { 1, 0 } } 1929 #endif 1930 1931 #ifndef CONFIG_INT128 1932 1933 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1934 { 1935 t->u64[0] = ~a.u64[0]; 1936 t->u64[1] = ~a.u64[1]; 1937 } 1938 1939 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1940 { 1941 if (a.VsrD(0) < b.VsrD(0)) { 1942 return -1; 1943 } else if (a.VsrD(0) > b.VsrD(0)) { 1944 return 1; 1945 } else if (a.VsrD(1) < b.VsrD(1)) { 1946 return -1; 1947 } else if (a.VsrD(1) > b.VsrD(1)) { 1948 return 1; 1949 } else { 1950 return 0; 1951 } 1952 } 1953 1954 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1955 { 1956 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1957 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1958 (~a.VsrD(1) < b.VsrD(1)); 1959 } 1960 1961 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1962 { 1963 ppc_avr_t not_a; 1964 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1965 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1966 (~a.VsrD(1) < b.VsrD(1)); 1967 avr_qw_not(¬_a, a); 1968 return avr_qw_cmpu(not_a, b) < 0; 1969 } 1970 1971 #endif 1972 1973 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1974 { 1975 #ifdef CONFIG_INT128 1976 r->u128 = a->u128 + b->u128; 1977 #else 1978 avr_qw_add(r, *a, *b); 1979 #endif 1980 } 1981 1982 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1983 { 1984 #ifdef CONFIG_INT128 1985 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1986 #else 1987 1988 if (c->VsrD(1) & 1) { 1989 ppc_avr_t tmp; 1990 1991 tmp.VsrD(0) = 0; 1992 tmp.VsrD(1) = c->VsrD(1) & 1; 1993 avr_qw_add(&tmp, *a, tmp); 1994 avr_qw_add(r, tmp, *b); 1995 } else { 1996 avr_qw_add(r, *a, *b); 1997 } 1998 #endif 1999 } 2000 2001 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2002 { 2003 #ifdef CONFIG_INT128 2004 r->u128 = (~a->u128 < b->u128); 2005 #else 2006 ppc_avr_t not_a; 2007 2008 avr_qw_not(¬_a, *a); 2009 2010 r->VsrD(0) = 0; 2011 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2012 #endif 2013 } 2014 2015 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2016 { 2017 #ifdef CONFIG_INT128 2018 int carry_out = (~a->u128 < b->u128); 2019 if (!carry_out && (c->u128 & 1)) { 2020 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2021 ((a->u128 != 0) || (b->u128 != 0)); 2022 } 2023 r->u128 = carry_out; 2024 #else 2025 2026 int carry_in = c->VsrD(1) & 1; 2027 int carry_out = 0; 2028 ppc_avr_t tmp; 2029 2030 carry_out = avr_qw_addc(&tmp, *a, *b); 2031 2032 if (!carry_out && carry_in) { 2033 ppc_avr_t one = QW_ONE; 2034 carry_out = avr_qw_addc(&tmp, tmp, one); 2035 } 2036 r->VsrD(0) = 0; 2037 r->VsrD(1) = carry_out; 2038 #endif 2039 } 2040 2041 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2042 { 2043 #ifdef CONFIG_INT128 2044 r->u128 = a->u128 - b->u128; 2045 #else 2046 ppc_avr_t tmp; 2047 ppc_avr_t one = QW_ONE; 2048 2049 avr_qw_not(&tmp, *b); 2050 avr_qw_add(&tmp, *a, tmp); 2051 avr_qw_add(r, tmp, one); 2052 #endif 2053 } 2054 2055 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2056 { 2057 #ifdef CONFIG_INT128 2058 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2059 #else 2060 ppc_avr_t tmp, sum; 2061 2062 avr_qw_not(&tmp, *b); 2063 avr_qw_add(&sum, *a, tmp); 2064 2065 tmp.VsrD(0) = 0; 2066 tmp.VsrD(1) = c->VsrD(1) & 1; 2067 avr_qw_add(r, sum, tmp); 2068 #endif 2069 } 2070 2071 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2072 { 2073 #ifdef CONFIG_INT128 2074 r->u128 = (~a->u128 < ~b->u128) || 2075 (a->u128 + ~b->u128 == (__uint128_t)-1); 2076 #else 2077 int carry = (avr_qw_cmpu(*a, *b) > 0); 2078 if (!carry) { 2079 ppc_avr_t tmp; 2080 avr_qw_not(&tmp, *b); 2081 avr_qw_add(&tmp, *a, tmp); 2082 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2083 } 2084 r->VsrD(0) = 0; 2085 r->VsrD(1) = carry; 2086 #endif 2087 } 2088 2089 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2090 { 2091 #ifdef CONFIG_INT128 2092 r->u128 = 2093 (~a->u128 < ~b->u128) || 2094 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2095 #else 2096 int carry_in = c->VsrD(1) & 1; 2097 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2098 if (!carry_out && carry_in) { 2099 ppc_avr_t tmp; 2100 avr_qw_not(&tmp, *b); 2101 avr_qw_add(&tmp, *a, tmp); 2102 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2103 } 2104 2105 r->VsrD(0) = 0; 2106 r->VsrD(1) = carry_out; 2107 #endif 2108 } 2109 2110 #define BCD_PLUS_PREF_1 0xC 2111 #define BCD_PLUS_PREF_2 0xF 2112 #define BCD_PLUS_ALT_1 0xA 2113 #define BCD_NEG_PREF 0xD 2114 #define BCD_NEG_ALT 0xB 2115 #define BCD_PLUS_ALT_2 0xE 2116 #define NATIONAL_PLUS 0x2B 2117 #define NATIONAL_NEG 0x2D 2118 2119 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2120 2121 static int bcd_get_sgn(ppc_avr_t *bcd) 2122 { 2123 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2124 case BCD_PLUS_PREF_1: 2125 case BCD_PLUS_PREF_2: 2126 case BCD_PLUS_ALT_1: 2127 case BCD_PLUS_ALT_2: 2128 { 2129 return 1; 2130 } 2131 2132 case BCD_NEG_PREF: 2133 case BCD_NEG_ALT: 2134 { 2135 return -1; 2136 } 2137 2138 default: 2139 { 2140 return 0; 2141 } 2142 } 2143 } 2144 2145 static int bcd_preferred_sgn(int sgn, int ps) 2146 { 2147 if (sgn >= 0) { 2148 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2149 } else { 2150 return BCD_NEG_PREF; 2151 } 2152 } 2153 2154 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2155 { 2156 uint8_t result; 2157 if (n & 1) { 2158 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2159 } else { 2160 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2161 } 2162 2163 if (unlikely(result > 9)) { 2164 *invalid = true; 2165 } 2166 return result; 2167 } 2168 2169 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2170 { 2171 if (n & 1) { 2172 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2173 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2174 } else { 2175 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2176 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2177 } 2178 } 2179 2180 static bool bcd_is_valid(ppc_avr_t *bcd) 2181 { 2182 int i; 2183 int invalid = 0; 2184 2185 if (bcd_get_sgn(bcd) == 0) { 2186 return false; 2187 } 2188 2189 for (i = 1; i < 32; i++) { 2190 bcd_get_digit(bcd, i, &invalid); 2191 if (unlikely(invalid)) { 2192 return false; 2193 } 2194 } 2195 return true; 2196 } 2197 2198 static int bcd_cmp_zero(ppc_avr_t *bcd) 2199 { 2200 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2201 return CRF_EQ; 2202 } else { 2203 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2204 } 2205 } 2206 2207 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2208 { 2209 return reg->VsrH(7 - n); 2210 } 2211 2212 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2213 { 2214 reg->VsrH(7 - n) = val; 2215 } 2216 2217 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2218 { 2219 int i; 2220 int invalid = 0; 2221 for (i = 31; i > 0; i--) { 2222 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2223 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2224 if (unlikely(invalid)) { 2225 return 0; /* doesn't matter */ 2226 } else if (dig_a > dig_b) { 2227 return 1; 2228 } else if (dig_a < dig_b) { 2229 return -1; 2230 } 2231 } 2232 2233 return 0; 2234 } 2235 2236 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2237 int *overflow) 2238 { 2239 int carry = 0; 2240 int i; 2241 int is_zero = 1; 2242 2243 for (i = 1; i <= 31; i++) { 2244 uint8_t digit = bcd_get_digit(a, i, invalid) + 2245 bcd_get_digit(b, i, invalid) + carry; 2246 is_zero &= (digit == 0); 2247 if (digit > 9) { 2248 carry = 1; 2249 digit -= 10; 2250 } else { 2251 carry = 0; 2252 } 2253 2254 bcd_put_digit(t, digit, i); 2255 } 2256 2257 *overflow = carry; 2258 return is_zero; 2259 } 2260 2261 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2262 int *overflow) 2263 { 2264 int carry = 0; 2265 int i; 2266 2267 for (i = 1; i <= 31; i++) { 2268 uint8_t digit = bcd_get_digit(a, i, invalid) - 2269 bcd_get_digit(b, i, invalid) + carry; 2270 if (digit & 0x80) { 2271 carry = -1; 2272 digit += 10; 2273 } else { 2274 carry = 0; 2275 } 2276 2277 bcd_put_digit(t, digit, i); 2278 } 2279 2280 *overflow = carry; 2281 } 2282 2283 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2284 { 2285 2286 int sgna = bcd_get_sgn(a); 2287 int sgnb = bcd_get_sgn(b); 2288 int invalid = (sgna == 0) || (sgnb == 0); 2289 int overflow = 0; 2290 int zero = 0; 2291 uint32_t cr = 0; 2292 ppc_avr_t result = { .u64 = { 0, 0 } }; 2293 2294 if (!invalid) { 2295 if (sgna == sgnb) { 2296 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2297 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2298 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2299 } else { 2300 int magnitude = bcd_cmp_mag(a, b); 2301 if (magnitude > 0) { 2302 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2303 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2304 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2305 } else if (magnitude < 0) { 2306 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2307 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2308 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2309 } else { 2310 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2311 cr = CRF_EQ; 2312 } 2313 } 2314 } 2315 2316 if (unlikely(invalid)) { 2317 result.VsrD(0) = result.VsrD(1) = -1; 2318 cr = CRF_SO; 2319 } else if (overflow) { 2320 cr |= CRF_SO; 2321 } else if (zero) { 2322 cr |= CRF_EQ; 2323 } 2324 2325 *r = result; 2326 2327 return cr; 2328 } 2329 2330 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2331 { 2332 ppc_avr_t bcopy = *b; 2333 int sgnb = bcd_get_sgn(b); 2334 if (sgnb < 0) { 2335 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2336 } else if (sgnb > 0) { 2337 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2338 } 2339 /* else invalid ... defer to bcdadd code for proper handling */ 2340 2341 return helper_bcdadd(r, a, &bcopy, ps); 2342 } 2343 2344 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2345 { 2346 int i; 2347 int cr = 0; 2348 uint16_t national = 0; 2349 uint16_t sgnb = get_national_digit(b, 0); 2350 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2351 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2352 2353 for (i = 1; i < 8; i++) { 2354 national = get_national_digit(b, i); 2355 if (unlikely(national < 0x30 || national > 0x39)) { 2356 invalid = 1; 2357 break; 2358 } 2359 2360 bcd_put_digit(&ret, national & 0xf, i); 2361 } 2362 2363 if (sgnb == NATIONAL_PLUS) { 2364 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2365 } else { 2366 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2367 } 2368 2369 cr = bcd_cmp_zero(&ret); 2370 2371 if (unlikely(invalid)) { 2372 cr = CRF_SO; 2373 } 2374 2375 *r = ret; 2376 2377 return cr; 2378 } 2379 2380 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2381 { 2382 int i; 2383 int cr = 0; 2384 int sgnb = bcd_get_sgn(b); 2385 int invalid = (sgnb == 0); 2386 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2387 2388 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2389 2390 for (i = 1; i < 8; i++) { 2391 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2392 2393 if (unlikely(invalid)) { 2394 break; 2395 } 2396 } 2397 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2398 2399 cr = bcd_cmp_zero(b); 2400 2401 if (ox_flag) { 2402 cr |= CRF_SO; 2403 } 2404 2405 if (unlikely(invalid)) { 2406 cr = CRF_SO; 2407 } 2408 2409 *r = ret; 2410 2411 return cr; 2412 } 2413 2414 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2415 { 2416 int i; 2417 int cr = 0; 2418 int invalid = 0; 2419 int zone_digit = 0; 2420 int zone_lead = ps ? 0xF : 0x3; 2421 int digit = 0; 2422 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2423 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2424 2425 if (unlikely((sgnb < 0xA) && ps)) { 2426 invalid = 1; 2427 } 2428 2429 for (i = 0; i < 16; i++) { 2430 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2431 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2432 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2433 invalid = 1; 2434 break; 2435 } 2436 2437 bcd_put_digit(&ret, digit, i + 1); 2438 } 2439 2440 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2441 (!ps && (sgnb & 0x4))) { 2442 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2443 } else { 2444 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2445 } 2446 2447 cr = bcd_cmp_zero(&ret); 2448 2449 if (unlikely(invalid)) { 2450 cr = CRF_SO; 2451 } 2452 2453 *r = ret; 2454 2455 return cr; 2456 } 2457 2458 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2459 { 2460 int i; 2461 int cr = 0; 2462 uint8_t digit = 0; 2463 int sgnb = bcd_get_sgn(b); 2464 int zone_lead = (ps) ? 0xF0 : 0x30; 2465 int invalid = (sgnb == 0); 2466 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2467 2468 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2469 2470 for (i = 0; i < 16; i++) { 2471 digit = bcd_get_digit(b, i + 1, &invalid); 2472 2473 if (unlikely(invalid)) { 2474 break; 2475 } 2476 2477 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2478 } 2479 2480 if (ps) { 2481 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2482 } else { 2483 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2484 } 2485 2486 cr = bcd_cmp_zero(b); 2487 2488 if (ox_flag) { 2489 cr |= CRF_SO; 2490 } 2491 2492 if (unlikely(invalid)) { 2493 cr = CRF_SO; 2494 } 2495 2496 *r = ret; 2497 2498 return cr; 2499 } 2500 2501 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2502 { 2503 int i; 2504 int cr = 0; 2505 uint64_t lo_value; 2506 uint64_t hi_value; 2507 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2508 2509 if (b->VsrSD(0) < 0) { 2510 lo_value = -b->VsrSD(1); 2511 hi_value = ~b->VsrD(0) + !lo_value; 2512 bcd_put_digit(&ret, 0xD, 0); 2513 } else { 2514 lo_value = b->VsrD(1); 2515 hi_value = b->VsrD(0); 2516 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2517 } 2518 2519 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2520 lo_value > 9999999999999999ULL) { 2521 cr = CRF_SO; 2522 } 2523 2524 for (i = 1; i < 16; hi_value /= 10, i++) { 2525 bcd_put_digit(&ret, hi_value % 10, i); 2526 } 2527 2528 for (; i < 32; lo_value /= 10, i++) { 2529 bcd_put_digit(&ret, lo_value % 10, i); 2530 } 2531 2532 cr |= bcd_cmp_zero(&ret); 2533 2534 *r = ret; 2535 2536 return cr; 2537 } 2538 2539 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2540 { 2541 uint8_t i; 2542 int cr; 2543 uint64_t carry; 2544 uint64_t unused; 2545 uint64_t lo_value; 2546 uint64_t hi_value = 0; 2547 int sgnb = bcd_get_sgn(b); 2548 int invalid = (sgnb == 0); 2549 2550 lo_value = bcd_get_digit(b, 31, &invalid); 2551 for (i = 30; i > 0; i--) { 2552 mulu64(&lo_value, &carry, lo_value, 10ULL); 2553 mulu64(&hi_value, &unused, hi_value, 10ULL); 2554 lo_value += bcd_get_digit(b, i, &invalid); 2555 hi_value += carry; 2556 2557 if (unlikely(invalid)) { 2558 break; 2559 } 2560 } 2561 2562 if (sgnb == -1) { 2563 r->VsrSD(1) = -lo_value; 2564 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2565 } else { 2566 r->VsrSD(1) = lo_value; 2567 r->VsrSD(0) = hi_value; 2568 } 2569 2570 cr = bcd_cmp_zero(b); 2571 2572 if (unlikely(invalid)) { 2573 cr = CRF_SO; 2574 } 2575 2576 return cr; 2577 } 2578 2579 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2580 { 2581 int i; 2582 int invalid = 0; 2583 2584 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2585 return CRF_SO; 2586 } 2587 2588 *r = *a; 2589 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2590 2591 for (i = 1; i < 32; i++) { 2592 bcd_get_digit(a, i, &invalid); 2593 bcd_get_digit(b, i, &invalid); 2594 if (unlikely(invalid)) { 2595 return CRF_SO; 2596 } 2597 } 2598 2599 return bcd_cmp_zero(r); 2600 } 2601 2602 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2603 { 2604 int sgnb = bcd_get_sgn(b); 2605 2606 *r = *b; 2607 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2608 2609 if (bcd_is_valid(b) == false) { 2610 return CRF_SO; 2611 } 2612 2613 return bcd_cmp_zero(r); 2614 } 2615 2616 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2617 { 2618 int cr; 2619 int i = a->VsrSB(7); 2620 bool ox_flag = false; 2621 int sgnb = bcd_get_sgn(b); 2622 ppc_avr_t ret = *b; 2623 ret.VsrD(1) &= ~0xf; 2624 2625 if (bcd_is_valid(b) == false) { 2626 return CRF_SO; 2627 } 2628 2629 if (unlikely(i > 31)) { 2630 i = 31; 2631 } else if (unlikely(i < -31)) { 2632 i = -31; 2633 } 2634 2635 if (i > 0) { 2636 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2637 } else { 2638 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2639 } 2640 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2641 2642 *r = ret; 2643 2644 cr = bcd_cmp_zero(r); 2645 if (ox_flag) { 2646 cr |= CRF_SO; 2647 } 2648 2649 return cr; 2650 } 2651 2652 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2653 { 2654 int cr; 2655 int i; 2656 int invalid = 0; 2657 bool ox_flag = false; 2658 ppc_avr_t ret = *b; 2659 2660 for (i = 0; i < 32; i++) { 2661 bcd_get_digit(b, i, &invalid); 2662 2663 if (unlikely(invalid)) { 2664 return CRF_SO; 2665 } 2666 } 2667 2668 i = a->VsrSB(7); 2669 if (i >= 32) { 2670 ox_flag = true; 2671 ret.VsrD(1) = ret.VsrD(0) = 0; 2672 } else if (i <= -32) { 2673 ret.VsrD(1) = ret.VsrD(0) = 0; 2674 } else if (i > 0) { 2675 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2676 } else { 2677 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2678 } 2679 *r = ret; 2680 2681 cr = bcd_cmp_zero(r); 2682 if (ox_flag) { 2683 cr |= CRF_SO; 2684 } 2685 2686 return cr; 2687 } 2688 2689 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2690 { 2691 int cr; 2692 int unused = 0; 2693 int invalid = 0; 2694 bool ox_flag = false; 2695 int sgnb = bcd_get_sgn(b); 2696 ppc_avr_t ret = *b; 2697 ret.VsrD(1) &= ~0xf; 2698 2699 int i = a->VsrSB(7); 2700 ppc_avr_t bcd_one; 2701 2702 bcd_one.VsrD(0) = 0; 2703 bcd_one.VsrD(1) = 0x10; 2704 2705 if (bcd_is_valid(b) == false) { 2706 return CRF_SO; 2707 } 2708 2709 if (unlikely(i > 31)) { 2710 i = 31; 2711 } else if (unlikely(i < -31)) { 2712 i = -31; 2713 } 2714 2715 if (i > 0) { 2716 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2717 } else { 2718 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2719 2720 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2721 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2722 } 2723 } 2724 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2725 2726 cr = bcd_cmp_zero(&ret); 2727 if (ox_flag) { 2728 cr |= CRF_SO; 2729 } 2730 *r = ret; 2731 2732 return cr; 2733 } 2734 2735 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2736 { 2737 uint64_t mask; 2738 uint32_t ox_flag = 0; 2739 int i = a->VsrSH(3) + 1; 2740 ppc_avr_t ret = *b; 2741 2742 if (bcd_is_valid(b) == false) { 2743 return CRF_SO; 2744 } 2745 2746 if (i > 16 && i < 32) { 2747 mask = (uint64_t)-1 >> (128 - i * 4); 2748 if (ret.VsrD(0) & ~mask) { 2749 ox_flag = CRF_SO; 2750 } 2751 2752 ret.VsrD(0) &= mask; 2753 } else if (i >= 0 && i <= 16) { 2754 mask = (uint64_t)-1 >> (64 - i * 4); 2755 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2756 ox_flag = CRF_SO; 2757 } 2758 2759 ret.VsrD(1) &= mask; 2760 ret.VsrD(0) = 0; 2761 } 2762 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2763 *r = ret; 2764 2765 return bcd_cmp_zero(&ret) | ox_flag; 2766 } 2767 2768 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2769 { 2770 int i; 2771 uint64_t mask; 2772 uint32_t ox_flag = 0; 2773 int invalid = 0; 2774 ppc_avr_t ret = *b; 2775 2776 for (i = 0; i < 32; i++) { 2777 bcd_get_digit(b, i, &invalid); 2778 2779 if (unlikely(invalid)) { 2780 return CRF_SO; 2781 } 2782 } 2783 2784 i = a->VsrSH(3); 2785 if (i > 16 && i < 33) { 2786 mask = (uint64_t)-1 >> (128 - i * 4); 2787 if (ret.VsrD(0) & ~mask) { 2788 ox_flag = CRF_SO; 2789 } 2790 2791 ret.VsrD(0) &= mask; 2792 } else if (i > 0 && i <= 16) { 2793 mask = (uint64_t)-1 >> (64 - i * 4); 2794 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2795 ox_flag = CRF_SO; 2796 } 2797 2798 ret.VsrD(1) &= mask; 2799 ret.VsrD(0) = 0; 2800 } else if (i == 0) { 2801 if (ret.VsrD(0) || ret.VsrD(1)) { 2802 ox_flag = CRF_SO; 2803 } 2804 ret.VsrD(0) = ret.VsrD(1) = 0; 2805 } 2806 2807 *r = ret; 2808 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2809 return ox_flag | CRF_EQ; 2810 } 2811 2812 return ox_flag | CRF_GT; 2813 } 2814 2815 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2816 { 2817 int i; 2818 VECTOR_FOR_INORDER_I(i, u8) { 2819 r->u8[i] = AES_sbox[a->u8[i]]; 2820 } 2821 } 2822 2823 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2824 { 2825 ppc_avr_t result; 2826 int i; 2827 2828 VECTOR_FOR_INORDER_I(i, u32) { 2829 result.VsrW(i) = b->VsrW(i) ^ 2830 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2831 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2832 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2833 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2834 } 2835 *r = result; 2836 } 2837 2838 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2839 { 2840 ppc_avr_t result; 2841 int i; 2842 2843 VECTOR_FOR_INORDER_I(i, u8) { 2844 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2845 } 2846 *r = result; 2847 } 2848 2849 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2850 { 2851 /* This differs from what is written in ISA V2.07. The RTL is */ 2852 /* incorrect and will be fixed in V2.07B. */ 2853 int i; 2854 ppc_avr_t tmp; 2855 2856 VECTOR_FOR_INORDER_I(i, u8) { 2857 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2858 } 2859 2860 VECTOR_FOR_INORDER_I(i, u32) { 2861 r->VsrW(i) = 2862 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2863 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2864 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2865 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2866 } 2867 } 2868 2869 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2870 { 2871 ppc_avr_t result; 2872 int i; 2873 2874 VECTOR_FOR_INORDER_I(i, u8) { 2875 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2876 } 2877 *r = result; 2878 } 2879 2880 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2881 { 2882 int st = (st_six & 0x10) != 0; 2883 int six = st_six & 0xF; 2884 int i; 2885 2886 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2887 if (st == 0) { 2888 if ((six & (0x8 >> i)) == 0) { 2889 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2890 ror32(a->VsrW(i), 18) ^ 2891 (a->VsrW(i) >> 3); 2892 } else { /* six.bit[i] == 1 */ 2893 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2894 ror32(a->VsrW(i), 19) ^ 2895 (a->VsrW(i) >> 10); 2896 } 2897 } else { /* st == 1 */ 2898 if ((six & (0x8 >> i)) == 0) { 2899 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2900 ror32(a->VsrW(i), 13) ^ 2901 ror32(a->VsrW(i), 22); 2902 } else { /* six.bit[i] == 1 */ 2903 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2904 ror32(a->VsrW(i), 11) ^ 2905 ror32(a->VsrW(i), 25); 2906 } 2907 } 2908 } 2909 } 2910 2911 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2912 { 2913 int st = (st_six & 0x10) != 0; 2914 int six = st_six & 0xF; 2915 int i; 2916 2917 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2918 if (st == 0) { 2919 if ((six & (0x8 >> (2 * i))) == 0) { 2920 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2921 ror64(a->VsrD(i), 8) ^ 2922 (a->VsrD(i) >> 7); 2923 } else { /* six.bit[2*i] == 1 */ 2924 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2925 ror64(a->VsrD(i), 61) ^ 2926 (a->VsrD(i) >> 6); 2927 } 2928 } else { /* st == 1 */ 2929 if ((six & (0x8 >> (2 * i))) == 0) { 2930 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2931 ror64(a->VsrD(i), 34) ^ 2932 ror64(a->VsrD(i), 39); 2933 } else { /* six.bit[2*i] == 1 */ 2934 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2935 ror64(a->VsrD(i), 18) ^ 2936 ror64(a->VsrD(i), 41); 2937 } 2938 } 2939 } 2940 } 2941 2942 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2943 { 2944 ppc_avr_t result; 2945 int i; 2946 2947 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2948 int indexA = c->VsrB(i) >> 4; 2949 int indexB = c->VsrB(i) & 0xF; 2950 2951 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2952 } 2953 *r = result; 2954 } 2955 2956 #undef VECTOR_FOR_INORDER_I 2957 2958 /*****************************************************************************/ 2959 /* SPE extension helpers */ 2960 /* Use a table to make this quicker */ 2961 static const uint8_t hbrev[16] = { 2962 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2963 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2964 }; 2965 2966 static inline uint8_t byte_reverse(uint8_t val) 2967 { 2968 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2969 } 2970 2971 static inline uint32_t word_reverse(uint32_t val) 2972 { 2973 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2974 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2975 } 2976 2977 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2978 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2979 { 2980 uint32_t a, b, d, mask; 2981 2982 mask = UINT32_MAX >> (32 - MASKBITS); 2983 a = arg1 & mask; 2984 b = arg2 & mask; 2985 d = word_reverse(1 + word_reverse(a | ~b)); 2986 return (arg1 & ~mask) | (d & b); 2987 } 2988 2989 uint32_t helper_cntlsw32(uint32_t val) 2990 { 2991 if (val & 0x80000000) { 2992 return clz32(~val); 2993 } else { 2994 return clz32(val); 2995 } 2996 } 2997 2998 uint32_t helper_cntlzw32(uint32_t val) 2999 { 3000 return clz32(val); 3001 } 3002 3003 /* 440 specific */ 3004 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3005 target_ulong low, uint32_t update_Rc) 3006 { 3007 target_ulong mask; 3008 int i; 3009 3010 i = 1; 3011 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3012 if ((high & mask) == 0) { 3013 if (update_Rc) { 3014 env->crf[0] = 0x4; 3015 } 3016 goto done; 3017 } 3018 i++; 3019 } 3020 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3021 if ((low & mask) == 0) { 3022 if (update_Rc) { 3023 env->crf[0] = 0x8; 3024 } 3025 goto done; 3026 } 3027 i++; 3028 } 3029 i = 8; 3030 if (update_Rc) { 3031 env->crf[0] = 0x2; 3032 } 3033 done: 3034 env->xer = (env->xer & ~0x7F) | i; 3035 if (update_Rc) { 3036 env->crf[0] |= xer_so; 3037 } 3038 return i; 3039 } 3040