1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_cfuged(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 /*****************************************************************************/ 390 /* PowerPC 601 specific instructions (POWER bridge) */ 391 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 392 { 393 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 394 395 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 396 (int32_t)arg2 == 0) { 397 env->spr[SPR_MQ] = 0; 398 return INT32_MIN; 399 } else { 400 env->spr[SPR_MQ] = tmp % arg2; 401 return tmp / (int32_t)arg2; 402 } 403 } 404 405 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 406 target_ulong arg2) 407 { 408 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 409 410 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 411 (int32_t)arg2 == 0) { 412 env->so = env->ov = 1; 413 env->spr[SPR_MQ] = 0; 414 return INT32_MIN; 415 } else { 416 env->spr[SPR_MQ] = tmp % arg2; 417 tmp /= (int32_t)arg2; 418 if ((int32_t)tmp != tmp) { 419 env->so = env->ov = 1; 420 } else { 421 env->ov = 0; 422 } 423 return tmp; 424 } 425 } 426 427 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 428 target_ulong arg2) 429 { 430 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 431 (int32_t)arg2 == 0) { 432 env->spr[SPR_MQ] = 0; 433 return INT32_MIN; 434 } else { 435 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 436 return (int32_t)arg1 / (int32_t)arg2; 437 } 438 } 439 440 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 441 target_ulong arg2) 442 { 443 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 444 (int32_t)arg2 == 0) { 445 env->so = env->ov = 1; 446 env->spr[SPR_MQ] = 0; 447 return INT32_MIN; 448 } else { 449 env->ov = 0; 450 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 451 return (int32_t)arg1 / (int32_t)arg2; 452 } 453 } 454 455 /*****************************************************************************/ 456 /* 602 specific instructions */ 457 /* mfrom is the most crazy instruction ever seen, imho ! */ 458 /* Real implementation uses a ROM table. Do the same */ 459 /* 460 * Extremely decomposed: 461 * -arg / 256 462 * return 256 * log10(10 + 1.0) + 0.5 463 */ 464 #if !defined(CONFIG_USER_ONLY) 465 target_ulong helper_602_mfrom(target_ulong arg) 466 { 467 if (likely(arg < 602)) { 468 #include "mfrom_table.c.inc" 469 return mfrom_ROM_table[arg]; 470 } else { 471 return 0; 472 } 473 } 474 #endif 475 476 /*****************************************************************************/ 477 /* Altivec extension helpers */ 478 #if defined(HOST_WORDS_BIGENDIAN) 479 #define VECTOR_FOR_INORDER_I(index, element) \ 480 for (index = 0; index < ARRAY_SIZE(r->element); index++) 481 #else 482 #define VECTOR_FOR_INORDER_I(index, element) \ 483 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 484 #endif 485 486 /* Saturating arithmetic helpers. */ 487 #define SATCVT(from, to, from_type, to_type, min, max) \ 488 static inline to_type cvt##from##to(from_type x, int *sat) \ 489 { \ 490 to_type r; \ 491 \ 492 if (x < (from_type)min) { \ 493 r = min; \ 494 *sat = 1; \ 495 } else if (x > (from_type)max) { \ 496 r = max; \ 497 *sat = 1; \ 498 } else { \ 499 r = x; \ 500 } \ 501 return r; \ 502 } 503 #define SATCVTU(from, to, from_type, to_type, min, max) \ 504 static inline to_type cvt##from##to(from_type x, int *sat) \ 505 { \ 506 to_type r; \ 507 \ 508 if (x > (from_type)max) { \ 509 r = max; \ 510 *sat = 1; \ 511 } else { \ 512 r = x; \ 513 } \ 514 return r; \ 515 } 516 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 517 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 518 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 519 520 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 521 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 522 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 523 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 524 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 525 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 526 #undef SATCVT 527 #undef SATCVTU 528 529 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 530 { 531 ppc_store_vscr(env, vscr); 532 } 533 534 uint32_t helper_mfvscr(CPUPPCState *env) 535 { 536 return ppc_get_vscr(env); 537 } 538 539 static inline void set_vscr_sat(CPUPPCState *env) 540 { 541 /* The choice of non-zero value is arbitrary. */ 542 env->vscr_sat.u32[0] = 1; 543 } 544 545 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 546 { 547 int i; 548 549 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 550 r->u32[i] = ~a->u32[i] < b->u32[i]; 551 } 552 } 553 554 /* vprtybw */ 555 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 556 { 557 int i; 558 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 559 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 560 res ^= res >> 8; 561 r->u32[i] = res & 1; 562 } 563 } 564 565 /* vprtybd */ 566 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 567 { 568 int i; 569 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 570 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 571 res ^= res >> 16; 572 res ^= res >> 8; 573 r->u64[i] = res & 1; 574 } 575 } 576 577 /* vprtybq */ 578 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 579 { 580 uint64_t res = b->u64[0] ^ b->u64[1]; 581 res ^= res >> 32; 582 res ^= res >> 16; 583 res ^= res >> 8; 584 r->VsrD(1) = res & 1; 585 r->VsrD(0) = 0; 586 } 587 588 #define VARITHFP(suffix, func) \ 589 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 590 ppc_avr_t *b) \ 591 { \ 592 int i; \ 593 \ 594 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 595 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 596 } \ 597 } 598 VARITHFP(addfp, float32_add) 599 VARITHFP(subfp, float32_sub) 600 VARITHFP(minfp, float32_min) 601 VARITHFP(maxfp, float32_max) 602 #undef VARITHFP 603 604 #define VARITHFPFMA(suffix, type) \ 605 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 606 ppc_avr_t *b, ppc_avr_t *c) \ 607 { \ 608 int i; \ 609 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 610 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 611 type, &env->vec_status); \ 612 } \ 613 } 614 VARITHFPFMA(maddfp, 0); 615 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 616 #undef VARITHFPFMA 617 618 #define VARITHSAT_CASE(type, op, cvt, element) \ 619 { \ 620 type result = (type)a->element[i] op (type)b->element[i]; \ 621 r->element[i] = cvt(result, &sat); \ 622 } 623 624 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 625 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 626 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 627 { \ 628 int sat = 0; \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 VARITHSAT_CASE(optype, op, cvt, element); \ 633 } \ 634 if (sat) { \ 635 vscr_sat->u32[0] = 1; \ 636 } \ 637 } 638 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 639 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 640 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 641 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 642 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 643 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 644 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 645 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 646 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 647 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 648 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 649 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 650 #undef VARITHSAT_CASE 651 #undef VARITHSAT_DO 652 #undef VARITHSAT_SIGNED 653 #undef VARITHSAT_UNSIGNED 654 655 #define VAVG_DO(name, element, etype) \ 656 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 657 { \ 658 int i; \ 659 \ 660 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 661 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 662 r->element[i] = x >> 1; \ 663 } \ 664 } 665 666 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 667 unsigned_type) \ 668 VAVG_DO(avgs##type, signed_element, signed_type) \ 669 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 670 VAVG(b, s8, int16_t, u8, uint16_t) 671 VAVG(h, s16, int32_t, u16, uint32_t) 672 VAVG(w, s32, int64_t, u32, uint64_t) 673 #undef VAVG_DO 674 #undef VAVG 675 676 #define VABSDU_DO(name, element) \ 677 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 678 { \ 679 int i; \ 680 \ 681 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 682 r->element[i] = (a->element[i] > b->element[i]) ? \ 683 (a->element[i] - b->element[i]) : \ 684 (b->element[i] - a->element[i]); \ 685 } \ 686 } 687 688 /* 689 * VABSDU - Vector absolute difference unsigned 690 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 691 * element - element type to access from vector 692 */ 693 #define VABSDU(type, element) \ 694 VABSDU_DO(absdu##type, element) 695 VABSDU(b, u8) 696 VABSDU(h, u16) 697 VABSDU(w, u32) 698 #undef VABSDU_DO 699 #undef VABSDU 700 701 #define VCF(suffix, cvt, element) \ 702 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 703 ppc_avr_t *b, uint32_t uim) \ 704 { \ 705 int i; \ 706 \ 707 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 708 float32 t = cvt(b->element[i], &env->vec_status); \ 709 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 710 } \ 711 } 712 VCF(ux, uint32_to_float32, u32) 713 VCF(sx, int32_to_float32, s32) 714 #undef VCF 715 716 #define VCMP_DO(suffix, compare, element, record) \ 717 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 718 ppc_avr_t *a, ppc_avr_t *b) \ 719 { \ 720 uint64_t ones = (uint64_t)-1; \ 721 uint64_t all = ones; \ 722 uint64_t none = 0; \ 723 int i; \ 724 \ 725 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 726 uint64_t result = (a->element[i] compare b->element[i] ? \ 727 ones : 0x0); \ 728 switch (sizeof(a->element[0])) { \ 729 case 8: \ 730 r->u64[i] = result; \ 731 break; \ 732 case 4: \ 733 r->u32[i] = result; \ 734 break; \ 735 case 2: \ 736 r->u16[i] = result; \ 737 break; \ 738 case 1: \ 739 r->u8[i] = result; \ 740 break; \ 741 } \ 742 all &= result; \ 743 none |= result; \ 744 } \ 745 if (record) { \ 746 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 747 } \ 748 } 749 #define VCMP(suffix, compare, element) \ 750 VCMP_DO(suffix, compare, element, 0) \ 751 VCMP_DO(suffix##_dot, compare, element, 1) 752 VCMP(equb, ==, u8) 753 VCMP(equh, ==, u16) 754 VCMP(equw, ==, u32) 755 VCMP(equd, ==, u64) 756 VCMP(gtub, >, u8) 757 VCMP(gtuh, >, u16) 758 VCMP(gtuw, >, u32) 759 VCMP(gtud, >, u64) 760 VCMP(gtsb, >, s8) 761 VCMP(gtsh, >, s16) 762 VCMP(gtsw, >, s32) 763 VCMP(gtsd, >, s64) 764 #undef VCMP_DO 765 #undef VCMP 766 767 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 768 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 769 ppc_avr_t *a, ppc_avr_t *b) \ 770 { \ 771 etype ones = (etype)-1; \ 772 etype all = ones; \ 773 etype result, none = 0; \ 774 int i; \ 775 \ 776 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 777 if (cmpzero) { \ 778 result = ((a->element[i] == 0) \ 779 || (b->element[i] == 0) \ 780 || (a->element[i] != b->element[i]) ? \ 781 ones : 0x0); \ 782 } else { \ 783 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 784 } \ 785 r->element[i] = result; \ 786 all &= result; \ 787 none |= result; \ 788 } \ 789 if (record) { \ 790 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 791 } \ 792 } 793 794 /* 795 * VCMPNEZ - Vector compare not equal to zero 796 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 797 * element - element type to access from vector 798 */ 799 #define VCMPNE(suffix, element, etype, cmpzero) \ 800 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 801 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 802 VCMPNE(zb, u8, uint8_t, 1) 803 VCMPNE(zh, u16, uint16_t, 1) 804 VCMPNE(zw, u32, uint32_t, 1) 805 VCMPNE(b, u8, uint8_t, 0) 806 VCMPNE(h, u16, uint16_t, 0) 807 VCMPNE(w, u32, uint32_t, 0) 808 #undef VCMPNE_DO 809 #undef VCMPNE 810 811 #define VCMPFP_DO(suffix, compare, order, record) \ 812 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 813 ppc_avr_t *a, ppc_avr_t *b) \ 814 { \ 815 uint32_t ones = (uint32_t)-1; \ 816 uint32_t all = ones; \ 817 uint32_t none = 0; \ 818 int i; \ 819 \ 820 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 821 uint32_t result; \ 822 FloatRelation rel = \ 823 float32_compare_quiet(a->f32[i], b->f32[i], \ 824 &env->vec_status); \ 825 if (rel == float_relation_unordered) { \ 826 result = 0; \ 827 } else if (rel compare order) { \ 828 result = ones; \ 829 } else { \ 830 result = 0; \ 831 } \ 832 r->u32[i] = result; \ 833 all &= result; \ 834 none |= result; \ 835 } \ 836 if (record) { \ 837 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 838 } \ 839 } 840 #define VCMPFP(suffix, compare, order) \ 841 VCMPFP_DO(suffix, compare, order, 0) \ 842 VCMPFP_DO(suffix##_dot, compare, order, 1) 843 VCMPFP(eqfp, ==, float_relation_equal) 844 VCMPFP(gefp, !=, float_relation_less) 845 VCMPFP(gtfp, ==, float_relation_greater) 846 #undef VCMPFP_DO 847 #undef VCMPFP 848 849 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 850 ppc_avr_t *a, ppc_avr_t *b, int record) 851 { 852 int i; 853 int all_in = 0; 854 855 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 856 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 857 &env->vec_status); 858 if (le_rel == float_relation_unordered) { 859 r->u32[i] = 0xc0000000; 860 all_in = 1; 861 } else { 862 float32 bneg = float32_chs(b->f32[i]); 863 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 864 &env->vec_status); 865 int le = le_rel != float_relation_greater; 866 int ge = ge_rel != float_relation_less; 867 868 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 869 all_in |= (!le | !ge); 870 } 871 } 872 if (record) { 873 env->crf[6] = (all_in == 0) << 1; 874 } 875 } 876 877 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 878 { 879 vcmpbfp_internal(env, r, a, b, 0); 880 } 881 882 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 883 ppc_avr_t *b) 884 { 885 vcmpbfp_internal(env, r, a, b, 1); 886 } 887 888 #define VCT(suffix, satcvt, element) \ 889 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 890 ppc_avr_t *b, uint32_t uim) \ 891 { \ 892 int i; \ 893 int sat = 0; \ 894 float_status s = env->vec_status; \ 895 \ 896 set_float_rounding_mode(float_round_to_zero, &s); \ 897 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 898 if (float32_is_any_nan(b->f32[i])) { \ 899 r->element[i] = 0; \ 900 } else { \ 901 float64 t = float32_to_float64(b->f32[i], &s); \ 902 int64_t j; \ 903 \ 904 t = float64_scalbn(t, uim, &s); \ 905 j = float64_to_int64(t, &s); \ 906 r->element[i] = satcvt(j, &sat); \ 907 } \ 908 } \ 909 if (sat) { \ 910 set_vscr_sat(env); \ 911 } \ 912 } 913 VCT(uxs, cvtsduw, u32) 914 VCT(sxs, cvtsdsw, s32) 915 #undef VCT 916 917 target_ulong helper_vclzlsbb(ppc_avr_t *r) 918 { 919 target_ulong count = 0; 920 int i; 921 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 922 if (r->VsrB(i) & 0x01) { 923 break; 924 } 925 count++; 926 } 927 return count; 928 } 929 930 target_ulong helper_vctzlsbb(ppc_avr_t *r) 931 { 932 target_ulong count = 0; 933 int i; 934 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 935 if (r->VsrB(i) & 0x01) { 936 break; 937 } 938 count++; 939 } 940 return count; 941 } 942 943 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 944 ppc_avr_t *b, ppc_avr_t *c) 945 { 946 int sat = 0; 947 int i; 948 949 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 950 int32_t prod = a->s16[i] * b->s16[i]; 951 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 952 953 r->s16[i] = cvtswsh(t, &sat); 954 } 955 956 if (sat) { 957 set_vscr_sat(env); 958 } 959 } 960 961 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 962 ppc_avr_t *b, ppc_avr_t *c) 963 { 964 int sat = 0; 965 int i; 966 967 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 968 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 969 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 970 r->s16[i] = cvtswsh(t, &sat); 971 } 972 973 if (sat) { 974 set_vscr_sat(env); 975 } 976 } 977 978 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 979 { 980 int i; 981 982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 983 int32_t prod = a->s16[i] * b->s16[i]; 984 r->s16[i] = (int16_t) (prod + c->s16[i]); 985 } 986 } 987 988 #define VMRG_DO(name, element, access, ofs) \ 989 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 990 { \ 991 ppc_avr_t result; \ 992 int i, half = ARRAY_SIZE(r->element) / 2; \ 993 \ 994 for (i = 0; i < half; i++) { \ 995 result.access(i * 2 + 0) = a->access(i + ofs); \ 996 result.access(i * 2 + 1) = b->access(i + ofs); \ 997 } \ 998 *r = result; \ 999 } 1000 1001 #define VMRG(suffix, element, access) \ 1002 VMRG_DO(mrgl##suffix, element, access, half) \ 1003 VMRG_DO(mrgh##suffix, element, access, 0) 1004 VMRG(b, u8, VsrB) 1005 VMRG(h, u16, VsrH) 1006 VMRG(w, u32, VsrW) 1007 #undef VMRG_DO 1008 #undef VMRG 1009 1010 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1011 ppc_avr_t *b, ppc_avr_t *c) 1012 { 1013 int32_t prod[16]; 1014 int i; 1015 1016 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1017 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1018 } 1019 1020 VECTOR_FOR_INORDER_I(i, s32) { 1021 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1022 prod[4 * i + 2] + prod[4 * i + 3]; 1023 } 1024 } 1025 1026 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1027 ppc_avr_t *b, ppc_avr_t *c) 1028 { 1029 int32_t prod[8]; 1030 int i; 1031 1032 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1033 prod[i] = a->s16[i] * b->s16[i]; 1034 } 1035 1036 VECTOR_FOR_INORDER_I(i, s32) { 1037 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1038 } 1039 } 1040 1041 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1042 ppc_avr_t *b, ppc_avr_t *c) 1043 { 1044 int32_t prod[8]; 1045 int i; 1046 int sat = 0; 1047 1048 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1049 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1050 } 1051 1052 VECTOR_FOR_INORDER_I(i, s32) { 1053 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1054 1055 r->u32[i] = cvtsdsw(t, &sat); 1056 } 1057 1058 if (sat) { 1059 set_vscr_sat(env); 1060 } 1061 } 1062 1063 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1064 ppc_avr_t *b, ppc_avr_t *c) 1065 { 1066 uint16_t prod[16]; 1067 int i; 1068 1069 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1070 prod[i] = a->u8[i] * b->u8[i]; 1071 } 1072 1073 VECTOR_FOR_INORDER_I(i, u32) { 1074 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1075 prod[4 * i + 2] + prod[4 * i + 3]; 1076 } 1077 } 1078 1079 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1080 ppc_avr_t *b, ppc_avr_t *c) 1081 { 1082 uint32_t prod[8]; 1083 int i; 1084 1085 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1086 prod[i] = a->u16[i] * b->u16[i]; 1087 } 1088 1089 VECTOR_FOR_INORDER_I(i, u32) { 1090 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1091 } 1092 } 1093 1094 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1095 ppc_avr_t *b, ppc_avr_t *c) 1096 { 1097 uint32_t prod[8]; 1098 int i; 1099 int sat = 0; 1100 1101 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1102 prod[i] = a->u16[i] * b->u16[i]; 1103 } 1104 1105 VECTOR_FOR_INORDER_I(i, s32) { 1106 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1107 1108 r->u32[i] = cvtuduw(t, &sat); 1109 } 1110 1111 if (sat) { 1112 set_vscr_sat(env); 1113 } 1114 } 1115 1116 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1117 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1118 { \ 1119 int i; \ 1120 \ 1121 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1122 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1123 (cast)b->mul_access(i); \ 1124 } \ 1125 } 1126 1127 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1128 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1129 { \ 1130 int i; \ 1131 \ 1132 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1133 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1134 (cast)b->mul_access(i + 1); \ 1135 } \ 1136 } 1137 1138 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1139 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1140 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1141 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1142 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1143 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1144 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1145 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1146 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1147 #undef VMUL_DO_EVN 1148 #undef VMUL_DO_ODD 1149 #undef VMUL 1150 1151 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1152 { 1153 int i; 1154 1155 for (i = 0; i < 4; i++) { 1156 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1157 } 1158 } 1159 1160 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1161 { 1162 int i; 1163 1164 for (i = 0; i < 4; i++) { 1165 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1166 (uint64_t)b->u32[i]) >> 32); 1167 } 1168 } 1169 1170 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1171 { 1172 uint64_t discard; 1173 1174 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1175 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1176 } 1177 1178 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1179 { 1180 uint64_t discard; 1181 1182 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1183 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1184 } 1185 1186 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1187 ppc_avr_t *c) 1188 { 1189 ppc_avr_t result; 1190 int i; 1191 1192 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1193 int s = c->VsrB(i) & 0x1f; 1194 int index = s & 0xf; 1195 1196 if (s & 0x10) { 1197 result.VsrB(i) = b->VsrB(index); 1198 } else { 1199 result.VsrB(i) = a->VsrB(index); 1200 } 1201 } 1202 *r = result; 1203 } 1204 1205 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1206 ppc_avr_t *c) 1207 { 1208 ppc_avr_t result; 1209 int i; 1210 1211 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1212 int s = c->VsrB(i) & 0x1f; 1213 int index = 15 - (s & 0xf); 1214 1215 if (s & 0x10) { 1216 result.VsrB(i) = a->VsrB(index); 1217 } else { 1218 result.VsrB(i) = b->VsrB(index); 1219 } 1220 } 1221 *r = result; 1222 } 1223 1224 #if defined(HOST_WORDS_BIGENDIAN) 1225 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1226 #define VBPERMD_INDEX(i) (i) 1227 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1228 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1229 #else 1230 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1231 #define VBPERMD_INDEX(i) (1 - i) 1232 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1233 #define EXTRACT_BIT(avr, i, index) \ 1234 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1235 #endif 1236 1237 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1238 { 1239 int i, j; 1240 ppc_avr_t result = { .u64 = { 0, 0 } }; 1241 VECTOR_FOR_INORDER_I(i, u64) { 1242 for (j = 0; j < 8; j++) { 1243 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1244 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1245 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1246 } 1247 } 1248 } 1249 *r = result; 1250 } 1251 1252 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1253 { 1254 int i; 1255 uint64_t perm = 0; 1256 1257 VECTOR_FOR_INORDER_I(i, u8) { 1258 int index = VBPERMQ_INDEX(b, i); 1259 1260 if (index < 128) { 1261 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1262 if (a->u64[VBPERMQ_DW(index)] & mask) { 1263 perm |= (0x8000 >> i); 1264 } 1265 } 1266 } 1267 1268 r->VsrD(0) = perm; 1269 r->VsrD(1) = 0; 1270 } 1271 1272 #undef VBPERMQ_INDEX 1273 #undef VBPERMQ_DW 1274 1275 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1276 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1277 { \ 1278 int i, j; \ 1279 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1280 \ 1281 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1282 prod[i] = 0; \ 1283 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1284 if (a->srcfld[i] & (1ull << j)) { \ 1285 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1286 } \ 1287 } \ 1288 } \ 1289 \ 1290 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1291 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1292 } \ 1293 } 1294 1295 PMSUM(vpmsumb, u8, u16, uint16_t) 1296 PMSUM(vpmsumh, u16, u32, uint32_t) 1297 PMSUM(vpmsumw, u32, u64, uint64_t) 1298 1299 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1300 { 1301 1302 #ifdef CONFIG_INT128 1303 int i, j; 1304 __uint128_t prod[2]; 1305 1306 VECTOR_FOR_INORDER_I(i, u64) { 1307 prod[i] = 0; 1308 for (j = 0; j < 64; j++) { 1309 if (a->u64[i] & (1ull << j)) { 1310 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1311 } 1312 } 1313 } 1314 1315 r->u128 = prod[0] ^ prod[1]; 1316 1317 #else 1318 int i, j; 1319 ppc_avr_t prod[2]; 1320 1321 VECTOR_FOR_INORDER_I(i, u64) { 1322 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1323 for (j = 0; j < 64; j++) { 1324 if (a->u64[i] & (1ull << j)) { 1325 ppc_avr_t bshift; 1326 if (j == 0) { 1327 bshift.VsrD(0) = 0; 1328 bshift.VsrD(1) = b->u64[i]; 1329 } else { 1330 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1331 bshift.VsrD(1) = b->u64[i] << j; 1332 } 1333 prod[i].VsrD(1) ^= bshift.VsrD(1); 1334 prod[i].VsrD(0) ^= bshift.VsrD(0); 1335 } 1336 } 1337 } 1338 1339 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1340 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1341 #endif 1342 } 1343 1344 1345 #if defined(HOST_WORDS_BIGENDIAN) 1346 #define PKBIG 1 1347 #else 1348 #define PKBIG 0 1349 #endif 1350 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1351 { 1352 int i, j; 1353 ppc_avr_t result; 1354 #if defined(HOST_WORDS_BIGENDIAN) 1355 const ppc_avr_t *x[2] = { a, b }; 1356 #else 1357 const ppc_avr_t *x[2] = { b, a }; 1358 #endif 1359 1360 VECTOR_FOR_INORDER_I(i, u64) { 1361 VECTOR_FOR_INORDER_I(j, u32) { 1362 uint32_t e = x[i]->u32[j]; 1363 1364 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1365 ((e >> 6) & 0x3e0) | 1366 ((e >> 3) & 0x1f)); 1367 } 1368 } 1369 *r = result; 1370 } 1371 1372 #define VPK(suffix, from, to, cvt, dosat) \ 1373 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1374 ppc_avr_t *a, ppc_avr_t *b) \ 1375 { \ 1376 int i; \ 1377 int sat = 0; \ 1378 ppc_avr_t result; \ 1379 ppc_avr_t *a0 = PKBIG ? a : b; \ 1380 ppc_avr_t *a1 = PKBIG ? b : a; \ 1381 \ 1382 VECTOR_FOR_INORDER_I(i, from) { \ 1383 result.to[i] = cvt(a0->from[i], &sat); \ 1384 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1385 } \ 1386 *r = result; \ 1387 if (dosat && sat) { \ 1388 set_vscr_sat(env); \ 1389 } \ 1390 } 1391 #define I(x, y) (x) 1392 VPK(shss, s16, s8, cvtshsb, 1) 1393 VPK(shus, s16, u8, cvtshub, 1) 1394 VPK(swss, s32, s16, cvtswsh, 1) 1395 VPK(swus, s32, u16, cvtswuh, 1) 1396 VPK(sdss, s64, s32, cvtsdsw, 1) 1397 VPK(sdus, s64, u32, cvtsduw, 1) 1398 VPK(uhus, u16, u8, cvtuhub, 1) 1399 VPK(uwus, u32, u16, cvtuwuh, 1) 1400 VPK(udus, u64, u32, cvtuduw, 1) 1401 VPK(uhum, u16, u8, I, 0) 1402 VPK(uwum, u32, u16, I, 0) 1403 VPK(udum, u64, u32, I, 0) 1404 #undef I 1405 #undef VPK 1406 #undef PKBIG 1407 1408 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1409 { 1410 int i; 1411 1412 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1413 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1414 } 1415 } 1416 1417 #define VRFI(suffix, rounding) \ 1418 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1419 ppc_avr_t *b) \ 1420 { \ 1421 int i; \ 1422 float_status s = env->vec_status; \ 1423 \ 1424 set_float_rounding_mode(rounding, &s); \ 1425 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1426 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1427 } \ 1428 } 1429 VRFI(n, float_round_nearest_even) 1430 VRFI(m, float_round_down) 1431 VRFI(p, float_round_up) 1432 VRFI(z, float_round_to_zero) 1433 #undef VRFI 1434 1435 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1436 { 1437 int i; 1438 1439 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1440 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1441 1442 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1443 } 1444 } 1445 1446 #define VRLMI(name, size, element, insert) \ 1447 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1448 { \ 1449 int i; \ 1450 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1451 uint##size##_t src1 = a->element[i]; \ 1452 uint##size##_t src2 = b->element[i]; \ 1453 uint##size##_t src3 = r->element[i]; \ 1454 uint##size##_t begin, end, shift, mask, rot_val; \ 1455 \ 1456 shift = extract##size(src2, 0, 6); \ 1457 end = extract##size(src2, 8, 6); \ 1458 begin = extract##size(src2, 16, 6); \ 1459 rot_val = rol##size(src1, shift); \ 1460 mask = mask_u##size(begin, end); \ 1461 if (insert) { \ 1462 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1463 } else { \ 1464 r->element[i] = (rot_val & mask); \ 1465 } \ 1466 } \ 1467 } 1468 1469 VRLMI(vrldmi, 64, u64, 1); 1470 VRLMI(vrlwmi, 32, u32, 1); 1471 VRLMI(vrldnm, 64, u64, 0); 1472 VRLMI(vrlwnm, 32, u32, 0); 1473 1474 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1475 ppc_avr_t *c) 1476 { 1477 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1478 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1479 } 1480 1481 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1482 { 1483 int i; 1484 1485 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1486 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1487 } 1488 } 1489 1490 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1491 { 1492 int i; 1493 1494 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1495 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1496 } 1497 } 1498 1499 #define VEXTU_X_DO(name, size, left) \ 1500 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1501 { \ 1502 int index = (a & 0xf) * 8; \ 1503 if (left) { \ 1504 index = 128 - index - size; \ 1505 } \ 1506 return int128_getlo(int128_rshift(b->s128, index)) & \ 1507 MAKE_64BIT_MASK(0, size); \ 1508 } 1509 VEXTU_X_DO(vextublx, 8, 1) 1510 VEXTU_X_DO(vextuhlx, 16, 1) 1511 VEXTU_X_DO(vextuwlx, 32, 1) 1512 VEXTU_X_DO(vextubrx, 8, 0) 1513 VEXTU_X_DO(vextuhrx, 16, 0) 1514 VEXTU_X_DO(vextuwrx, 32, 0) 1515 #undef VEXTU_X_DO 1516 1517 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1518 { 1519 int i; 1520 unsigned int shift, bytes, size; 1521 1522 size = ARRAY_SIZE(r->u8); 1523 for (i = 0; i < size; i++) { 1524 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1525 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1526 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1527 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1528 } 1529 } 1530 1531 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1532 { 1533 int i; 1534 unsigned int shift, bytes; 1535 1536 /* 1537 * Use reverse order, as destination and source register can be 1538 * same. Its being modified in place saving temporary, reverse 1539 * order will guarantee that computed result is not fed back. 1540 */ 1541 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1542 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1543 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1544 /* extract adjacent bytes */ 1545 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1546 } 1547 } 1548 1549 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1550 { 1551 int sh = shift & 0xf; 1552 int i; 1553 ppc_avr_t result; 1554 1555 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1556 int index = sh + i; 1557 if (index > 0xf) { 1558 result.VsrB(i) = b->VsrB(index - 0x10); 1559 } else { 1560 result.VsrB(i) = a->VsrB(index); 1561 } 1562 } 1563 *r = result; 1564 } 1565 1566 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1567 { 1568 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1569 1570 #if defined(HOST_WORDS_BIGENDIAN) 1571 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1572 memset(&r->u8[16 - sh], 0, sh); 1573 #else 1574 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1575 memset(&r->u8[0], 0, sh); 1576 #endif 1577 } 1578 1579 #if defined(HOST_WORDS_BIGENDIAN) 1580 #define VINSERT(suffix, element) \ 1581 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1582 { \ 1583 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1584 sizeof(r->element[0])); \ 1585 } 1586 #else 1587 #define VINSERT(suffix, element) \ 1588 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1589 { \ 1590 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1591 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1592 } 1593 #endif 1594 VINSERT(b, u8) 1595 VINSERT(h, u16) 1596 VINSERT(w, u32) 1597 VINSERT(d, u64) 1598 #undef VINSERT 1599 #if defined(HOST_WORDS_BIGENDIAN) 1600 #define VEXTRACT(suffix, element) \ 1601 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1602 { \ 1603 uint32_t es = sizeof(r->element[0]); \ 1604 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1605 memset(&r->u8[8], 0, 8); \ 1606 memset(&r->u8[0], 0, 8 - es); \ 1607 } 1608 #else 1609 #define VEXTRACT(suffix, element) \ 1610 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1611 { \ 1612 uint32_t es = sizeof(r->element[0]); \ 1613 uint32_t s = (16 - index) - es; \ 1614 memmove(&r->u8[8], &b->u8[s], es); \ 1615 memset(&r->u8[0], 0, 8); \ 1616 memset(&r->u8[8 + es], 0, 8 - es); \ 1617 } 1618 #endif 1619 VEXTRACT(ub, u8) 1620 VEXTRACT(uh, u16) 1621 VEXTRACT(uw, u32) 1622 VEXTRACT(d, u64) 1623 #undef VEXTRACT 1624 1625 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1626 ppc_vsr_t *xb, uint32_t index) 1627 { 1628 ppc_vsr_t t = { }; 1629 size_t es = sizeof(uint32_t); 1630 uint32_t ext_index; 1631 int i; 1632 1633 ext_index = index; 1634 for (i = 0; i < es; i++, ext_index++) { 1635 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1636 } 1637 1638 *xt = t; 1639 } 1640 1641 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1642 ppc_vsr_t *xb, uint32_t index) 1643 { 1644 ppc_vsr_t t = *xt; 1645 size_t es = sizeof(uint32_t); 1646 int ins_index, i = 0; 1647 1648 ins_index = index; 1649 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1650 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1651 } 1652 1653 *xt = t; 1654 } 1655 1656 #define VEXT_SIGNED(name, element, cast) \ 1657 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1658 { \ 1659 int i; \ 1660 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1661 r->element[i] = (cast)b->element[i]; \ 1662 } \ 1663 } 1664 VEXT_SIGNED(vextsb2w, s32, int8_t) 1665 VEXT_SIGNED(vextsb2d, s64, int8_t) 1666 VEXT_SIGNED(vextsh2w, s32, int16_t) 1667 VEXT_SIGNED(vextsh2d, s64, int16_t) 1668 VEXT_SIGNED(vextsw2d, s64, int32_t) 1669 #undef VEXT_SIGNED 1670 1671 #define VNEG(name, element) \ 1672 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1673 { \ 1674 int i; \ 1675 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1676 r->element[i] = -b->element[i]; \ 1677 } \ 1678 } 1679 VNEG(vnegw, s32) 1680 VNEG(vnegd, s64) 1681 #undef VNEG 1682 1683 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1684 { 1685 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1686 1687 #if defined(HOST_WORDS_BIGENDIAN) 1688 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1689 memset(&r->u8[0], 0, sh); 1690 #else 1691 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1692 memset(&r->u8[16 - sh], 0, sh); 1693 #endif 1694 } 1695 1696 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1697 { 1698 int i; 1699 1700 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1701 r->u32[i] = a->u32[i] >= b->u32[i]; 1702 } 1703 } 1704 1705 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1706 { 1707 int64_t t; 1708 int i, upper; 1709 ppc_avr_t result; 1710 int sat = 0; 1711 1712 upper = ARRAY_SIZE(r->s32) - 1; 1713 t = (int64_t)b->VsrSW(upper); 1714 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1715 t += a->VsrSW(i); 1716 result.VsrSW(i) = 0; 1717 } 1718 result.VsrSW(upper) = cvtsdsw(t, &sat); 1719 *r = result; 1720 1721 if (sat) { 1722 set_vscr_sat(env); 1723 } 1724 } 1725 1726 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1727 { 1728 int i, j, upper; 1729 ppc_avr_t result; 1730 int sat = 0; 1731 1732 upper = 1; 1733 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1734 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1735 1736 result.VsrD(i) = 0; 1737 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1738 t += a->VsrSW(2 * i + j); 1739 } 1740 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1741 } 1742 1743 *r = result; 1744 if (sat) { 1745 set_vscr_sat(env); 1746 } 1747 } 1748 1749 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1750 { 1751 int i, j; 1752 int sat = 0; 1753 1754 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1755 int64_t t = (int64_t)b->s32[i]; 1756 1757 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1758 t += a->s8[4 * i + j]; 1759 } 1760 r->s32[i] = cvtsdsw(t, &sat); 1761 } 1762 1763 if (sat) { 1764 set_vscr_sat(env); 1765 } 1766 } 1767 1768 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1769 { 1770 int sat = 0; 1771 int i; 1772 1773 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1774 int64_t t = (int64_t)b->s32[i]; 1775 1776 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1777 r->s32[i] = cvtsdsw(t, &sat); 1778 } 1779 1780 if (sat) { 1781 set_vscr_sat(env); 1782 } 1783 } 1784 1785 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1786 { 1787 int i, j; 1788 int sat = 0; 1789 1790 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1791 uint64_t t = (uint64_t)b->u32[i]; 1792 1793 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1794 t += a->u8[4 * i + j]; 1795 } 1796 r->u32[i] = cvtuduw(t, &sat); 1797 } 1798 1799 if (sat) { 1800 set_vscr_sat(env); 1801 } 1802 } 1803 1804 #if defined(HOST_WORDS_BIGENDIAN) 1805 #define UPKHI 1 1806 #define UPKLO 0 1807 #else 1808 #define UPKHI 0 1809 #define UPKLO 1 1810 #endif 1811 #define VUPKPX(suffix, hi) \ 1812 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1813 { \ 1814 int i; \ 1815 ppc_avr_t result; \ 1816 \ 1817 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1818 uint16_t e = b->u16[hi ? i : i + 4]; \ 1819 uint8_t a = (e >> 15) ? 0xff : 0; \ 1820 uint8_t r = (e >> 10) & 0x1f; \ 1821 uint8_t g = (e >> 5) & 0x1f; \ 1822 uint8_t b = e & 0x1f; \ 1823 \ 1824 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1825 } \ 1826 *r = result; \ 1827 } 1828 VUPKPX(lpx, UPKLO) 1829 VUPKPX(hpx, UPKHI) 1830 #undef VUPKPX 1831 1832 #define VUPK(suffix, unpacked, packee, hi) \ 1833 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1834 { \ 1835 int i; \ 1836 ppc_avr_t result; \ 1837 \ 1838 if (hi) { \ 1839 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1840 result.unpacked[i] = b->packee[i]; \ 1841 } \ 1842 } else { \ 1843 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1844 i++) { \ 1845 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1846 } \ 1847 } \ 1848 *r = result; \ 1849 } 1850 VUPK(hsb, s16, s8, UPKHI) 1851 VUPK(hsh, s32, s16, UPKHI) 1852 VUPK(hsw, s64, s32, UPKHI) 1853 VUPK(lsb, s16, s8, UPKLO) 1854 VUPK(lsh, s32, s16, UPKLO) 1855 VUPK(lsw, s64, s32, UPKLO) 1856 #undef VUPK 1857 #undef UPKHI 1858 #undef UPKLO 1859 1860 #define VGENERIC_DO(name, element) \ 1861 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1862 { \ 1863 int i; \ 1864 \ 1865 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1866 r->element[i] = name(b->element[i]); \ 1867 } \ 1868 } 1869 1870 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1871 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1872 1873 VGENERIC_DO(clzb, u8) 1874 VGENERIC_DO(clzh, u16) 1875 1876 #undef clzb 1877 #undef clzh 1878 1879 #define ctzb(v) ((v) ? ctz32(v) : 8) 1880 #define ctzh(v) ((v) ? ctz32(v) : 16) 1881 #define ctzw(v) ctz32((v)) 1882 #define ctzd(v) ctz64((v)) 1883 1884 VGENERIC_DO(ctzb, u8) 1885 VGENERIC_DO(ctzh, u16) 1886 VGENERIC_DO(ctzw, u32) 1887 VGENERIC_DO(ctzd, u64) 1888 1889 #undef ctzb 1890 #undef ctzh 1891 #undef ctzw 1892 #undef ctzd 1893 1894 #define popcntb(v) ctpop8(v) 1895 #define popcnth(v) ctpop16(v) 1896 #define popcntw(v) ctpop32(v) 1897 #define popcntd(v) ctpop64(v) 1898 1899 VGENERIC_DO(popcntb, u8) 1900 VGENERIC_DO(popcnth, u16) 1901 VGENERIC_DO(popcntw, u32) 1902 VGENERIC_DO(popcntd, u64) 1903 1904 #undef popcntb 1905 #undef popcnth 1906 #undef popcntw 1907 #undef popcntd 1908 1909 #undef VGENERIC_DO 1910 1911 #if defined(HOST_WORDS_BIGENDIAN) 1912 #define QW_ONE { .u64 = { 0, 1 } } 1913 #else 1914 #define QW_ONE { .u64 = { 1, 0 } } 1915 #endif 1916 1917 #ifndef CONFIG_INT128 1918 1919 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1920 { 1921 t->u64[0] = ~a.u64[0]; 1922 t->u64[1] = ~a.u64[1]; 1923 } 1924 1925 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1926 { 1927 if (a.VsrD(0) < b.VsrD(0)) { 1928 return -1; 1929 } else if (a.VsrD(0) > b.VsrD(0)) { 1930 return 1; 1931 } else if (a.VsrD(1) < b.VsrD(1)) { 1932 return -1; 1933 } else if (a.VsrD(1) > b.VsrD(1)) { 1934 return 1; 1935 } else { 1936 return 0; 1937 } 1938 } 1939 1940 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1941 { 1942 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1943 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1944 (~a.VsrD(1) < b.VsrD(1)); 1945 } 1946 1947 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1948 { 1949 ppc_avr_t not_a; 1950 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1951 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1952 (~a.VsrD(1) < b.VsrD(1)); 1953 avr_qw_not(¬_a, a); 1954 return avr_qw_cmpu(not_a, b) < 0; 1955 } 1956 1957 #endif 1958 1959 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1960 { 1961 #ifdef CONFIG_INT128 1962 r->u128 = a->u128 + b->u128; 1963 #else 1964 avr_qw_add(r, *a, *b); 1965 #endif 1966 } 1967 1968 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1969 { 1970 #ifdef CONFIG_INT128 1971 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1972 #else 1973 1974 if (c->VsrD(1) & 1) { 1975 ppc_avr_t tmp; 1976 1977 tmp.VsrD(0) = 0; 1978 tmp.VsrD(1) = c->VsrD(1) & 1; 1979 avr_qw_add(&tmp, *a, tmp); 1980 avr_qw_add(r, tmp, *b); 1981 } else { 1982 avr_qw_add(r, *a, *b); 1983 } 1984 #endif 1985 } 1986 1987 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1988 { 1989 #ifdef CONFIG_INT128 1990 r->u128 = (~a->u128 < b->u128); 1991 #else 1992 ppc_avr_t not_a; 1993 1994 avr_qw_not(¬_a, *a); 1995 1996 r->VsrD(0) = 0; 1997 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1998 #endif 1999 } 2000 2001 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2002 { 2003 #ifdef CONFIG_INT128 2004 int carry_out = (~a->u128 < b->u128); 2005 if (!carry_out && (c->u128 & 1)) { 2006 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2007 ((a->u128 != 0) || (b->u128 != 0)); 2008 } 2009 r->u128 = carry_out; 2010 #else 2011 2012 int carry_in = c->VsrD(1) & 1; 2013 int carry_out = 0; 2014 ppc_avr_t tmp; 2015 2016 carry_out = avr_qw_addc(&tmp, *a, *b); 2017 2018 if (!carry_out && carry_in) { 2019 ppc_avr_t one = QW_ONE; 2020 carry_out = avr_qw_addc(&tmp, tmp, one); 2021 } 2022 r->VsrD(0) = 0; 2023 r->VsrD(1) = carry_out; 2024 #endif 2025 } 2026 2027 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2028 { 2029 #ifdef CONFIG_INT128 2030 r->u128 = a->u128 - b->u128; 2031 #else 2032 ppc_avr_t tmp; 2033 ppc_avr_t one = QW_ONE; 2034 2035 avr_qw_not(&tmp, *b); 2036 avr_qw_add(&tmp, *a, tmp); 2037 avr_qw_add(r, tmp, one); 2038 #endif 2039 } 2040 2041 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2042 { 2043 #ifdef CONFIG_INT128 2044 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2045 #else 2046 ppc_avr_t tmp, sum; 2047 2048 avr_qw_not(&tmp, *b); 2049 avr_qw_add(&sum, *a, tmp); 2050 2051 tmp.VsrD(0) = 0; 2052 tmp.VsrD(1) = c->VsrD(1) & 1; 2053 avr_qw_add(r, sum, tmp); 2054 #endif 2055 } 2056 2057 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2058 { 2059 #ifdef CONFIG_INT128 2060 r->u128 = (~a->u128 < ~b->u128) || 2061 (a->u128 + ~b->u128 == (__uint128_t)-1); 2062 #else 2063 int carry = (avr_qw_cmpu(*a, *b) > 0); 2064 if (!carry) { 2065 ppc_avr_t tmp; 2066 avr_qw_not(&tmp, *b); 2067 avr_qw_add(&tmp, *a, tmp); 2068 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2069 } 2070 r->VsrD(0) = 0; 2071 r->VsrD(1) = carry; 2072 #endif 2073 } 2074 2075 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2076 { 2077 #ifdef CONFIG_INT128 2078 r->u128 = 2079 (~a->u128 < ~b->u128) || 2080 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2081 #else 2082 int carry_in = c->VsrD(1) & 1; 2083 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2084 if (!carry_out && carry_in) { 2085 ppc_avr_t tmp; 2086 avr_qw_not(&tmp, *b); 2087 avr_qw_add(&tmp, *a, tmp); 2088 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2089 } 2090 2091 r->VsrD(0) = 0; 2092 r->VsrD(1) = carry_out; 2093 #endif 2094 } 2095 2096 #define BCD_PLUS_PREF_1 0xC 2097 #define BCD_PLUS_PREF_2 0xF 2098 #define BCD_PLUS_ALT_1 0xA 2099 #define BCD_NEG_PREF 0xD 2100 #define BCD_NEG_ALT 0xB 2101 #define BCD_PLUS_ALT_2 0xE 2102 #define NATIONAL_PLUS 0x2B 2103 #define NATIONAL_NEG 0x2D 2104 2105 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2106 2107 static int bcd_get_sgn(ppc_avr_t *bcd) 2108 { 2109 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2110 case BCD_PLUS_PREF_1: 2111 case BCD_PLUS_PREF_2: 2112 case BCD_PLUS_ALT_1: 2113 case BCD_PLUS_ALT_2: 2114 { 2115 return 1; 2116 } 2117 2118 case BCD_NEG_PREF: 2119 case BCD_NEG_ALT: 2120 { 2121 return -1; 2122 } 2123 2124 default: 2125 { 2126 return 0; 2127 } 2128 } 2129 } 2130 2131 static int bcd_preferred_sgn(int sgn, int ps) 2132 { 2133 if (sgn >= 0) { 2134 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2135 } else { 2136 return BCD_NEG_PREF; 2137 } 2138 } 2139 2140 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2141 { 2142 uint8_t result; 2143 if (n & 1) { 2144 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2145 } else { 2146 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2147 } 2148 2149 if (unlikely(result > 9)) { 2150 *invalid = true; 2151 } 2152 return result; 2153 } 2154 2155 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2156 { 2157 if (n & 1) { 2158 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2159 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2160 } else { 2161 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2162 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2163 } 2164 } 2165 2166 static bool bcd_is_valid(ppc_avr_t *bcd) 2167 { 2168 int i; 2169 int invalid = 0; 2170 2171 if (bcd_get_sgn(bcd) == 0) { 2172 return false; 2173 } 2174 2175 for (i = 1; i < 32; i++) { 2176 bcd_get_digit(bcd, i, &invalid); 2177 if (unlikely(invalid)) { 2178 return false; 2179 } 2180 } 2181 return true; 2182 } 2183 2184 static int bcd_cmp_zero(ppc_avr_t *bcd) 2185 { 2186 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2187 return CRF_EQ; 2188 } else { 2189 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2190 } 2191 } 2192 2193 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2194 { 2195 return reg->VsrH(7 - n); 2196 } 2197 2198 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2199 { 2200 reg->VsrH(7 - n) = val; 2201 } 2202 2203 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2204 { 2205 int i; 2206 int invalid = 0; 2207 for (i = 31; i > 0; i--) { 2208 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2209 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2210 if (unlikely(invalid)) { 2211 return 0; /* doesn't matter */ 2212 } else if (dig_a > dig_b) { 2213 return 1; 2214 } else if (dig_a < dig_b) { 2215 return -1; 2216 } 2217 } 2218 2219 return 0; 2220 } 2221 2222 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2223 int *overflow) 2224 { 2225 int carry = 0; 2226 int i; 2227 int is_zero = 1; 2228 2229 for (i = 1; i <= 31; i++) { 2230 uint8_t digit = bcd_get_digit(a, i, invalid) + 2231 bcd_get_digit(b, i, invalid) + carry; 2232 is_zero &= (digit == 0); 2233 if (digit > 9) { 2234 carry = 1; 2235 digit -= 10; 2236 } else { 2237 carry = 0; 2238 } 2239 2240 bcd_put_digit(t, digit, i); 2241 } 2242 2243 *overflow = carry; 2244 return is_zero; 2245 } 2246 2247 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2248 int *overflow) 2249 { 2250 int carry = 0; 2251 int i; 2252 2253 for (i = 1; i <= 31; i++) { 2254 uint8_t digit = bcd_get_digit(a, i, invalid) - 2255 bcd_get_digit(b, i, invalid) + carry; 2256 if (digit & 0x80) { 2257 carry = -1; 2258 digit += 10; 2259 } else { 2260 carry = 0; 2261 } 2262 2263 bcd_put_digit(t, digit, i); 2264 } 2265 2266 *overflow = carry; 2267 } 2268 2269 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2270 { 2271 2272 int sgna = bcd_get_sgn(a); 2273 int sgnb = bcd_get_sgn(b); 2274 int invalid = (sgna == 0) || (sgnb == 0); 2275 int overflow = 0; 2276 int zero = 0; 2277 uint32_t cr = 0; 2278 ppc_avr_t result = { .u64 = { 0, 0 } }; 2279 2280 if (!invalid) { 2281 if (sgna == sgnb) { 2282 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2283 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2284 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2285 } else { 2286 int magnitude = bcd_cmp_mag(a, b); 2287 if (magnitude > 0) { 2288 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2289 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2290 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2291 } else if (magnitude < 0) { 2292 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2293 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2294 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2295 } else { 2296 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2297 cr = CRF_EQ; 2298 } 2299 } 2300 } 2301 2302 if (unlikely(invalid)) { 2303 result.VsrD(0) = result.VsrD(1) = -1; 2304 cr = CRF_SO; 2305 } else if (overflow) { 2306 cr |= CRF_SO; 2307 } else if (zero) { 2308 cr |= CRF_EQ; 2309 } 2310 2311 *r = result; 2312 2313 return cr; 2314 } 2315 2316 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2317 { 2318 ppc_avr_t bcopy = *b; 2319 int sgnb = bcd_get_sgn(b); 2320 if (sgnb < 0) { 2321 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2322 } else if (sgnb > 0) { 2323 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2324 } 2325 /* else invalid ... defer to bcdadd code for proper handling */ 2326 2327 return helper_bcdadd(r, a, &bcopy, ps); 2328 } 2329 2330 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2331 { 2332 int i; 2333 int cr = 0; 2334 uint16_t national = 0; 2335 uint16_t sgnb = get_national_digit(b, 0); 2336 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2337 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2338 2339 for (i = 1; i < 8; i++) { 2340 national = get_national_digit(b, i); 2341 if (unlikely(national < 0x30 || national > 0x39)) { 2342 invalid = 1; 2343 break; 2344 } 2345 2346 bcd_put_digit(&ret, national & 0xf, i); 2347 } 2348 2349 if (sgnb == NATIONAL_PLUS) { 2350 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2351 } else { 2352 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2353 } 2354 2355 cr = bcd_cmp_zero(&ret); 2356 2357 if (unlikely(invalid)) { 2358 cr = CRF_SO; 2359 } 2360 2361 *r = ret; 2362 2363 return cr; 2364 } 2365 2366 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2367 { 2368 int i; 2369 int cr = 0; 2370 int sgnb = bcd_get_sgn(b); 2371 int invalid = (sgnb == 0); 2372 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2373 2374 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2375 2376 for (i = 1; i < 8; i++) { 2377 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2378 2379 if (unlikely(invalid)) { 2380 break; 2381 } 2382 } 2383 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2384 2385 cr = bcd_cmp_zero(b); 2386 2387 if (ox_flag) { 2388 cr |= CRF_SO; 2389 } 2390 2391 if (unlikely(invalid)) { 2392 cr = CRF_SO; 2393 } 2394 2395 *r = ret; 2396 2397 return cr; 2398 } 2399 2400 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2401 { 2402 int i; 2403 int cr = 0; 2404 int invalid = 0; 2405 int zone_digit = 0; 2406 int zone_lead = ps ? 0xF : 0x3; 2407 int digit = 0; 2408 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2409 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2410 2411 if (unlikely((sgnb < 0xA) && ps)) { 2412 invalid = 1; 2413 } 2414 2415 for (i = 0; i < 16; i++) { 2416 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2417 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2418 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2419 invalid = 1; 2420 break; 2421 } 2422 2423 bcd_put_digit(&ret, digit, i + 1); 2424 } 2425 2426 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2427 (!ps && (sgnb & 0x4))) { 2428 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2429 } else { 2430 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2431 } 2432 2433 cr = bcd_cmp_zero(&ret); 2434 2435 if (unlikely(invalid)) { 2436 cr = CRF_SO; 2437 } 2438 2439 *r = ret; 2440 2441 return cr; 2442 } 2443 2444 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2445 { 2446 int i; 2447 int cr = 0; 2448 uint8_t digit = 0; 2449 int sgnb = bcd_get_sgn(b); 2450 int zone_lead = (ps) ? 0xF0 : 0x30; 2451 int invalid = (sgnb == 0); 2452 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2453 2454 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2455 2456 for (i = 0; i < 16; i++) { 2457 digit = bcd_get_digit(b, i + 1, &invalid); 2458 2459 if (unlikely(invalid)) { 2460 break; 2461 } 2462 2463 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2464 } 2465 2466 if (ps) { 2467 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2468 } else { 2469 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2470 } 2471 2472 cr = bcd_cmp_zero(b); 2473 2474 if (ox_flag) { 2475 cr |= CRF_SO; 2476 } 2477 2478 if (unlikely(invalid)) { 2479 cr = CRF_SO; 2480 } 2481 2482 *r = ret; 2483 2484 return cr; 2485 } 2486 2487 /** 2488 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2489 * 2490 * Returns: 2491 * > 0 if ahi|alo > bhi|blo, 2492 * 0 if ahi|alo == bhi|blo, 2493 * < 0 if ahi|alo < bhi|blo 2494 */ 2495 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2496 uint64_t blo, uint64_t bhi) 2497 { 2498 return (ahi == bhi) ? 2499 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2500 (ahi > bhi ? 1 : -1); 2501 } 2502 2503 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2504 { 2505 int i; 2506 int cr; 2507 uint64_t lo_value; 2508 uint64_t hi_value; 2509 uint64_t rem; 2510 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2511 2512 if (b->VsrSD(0) < 0) { 2513 lo_value = -b->VsrSD(1); 2514 hi_value = ~b->VsrD(0) + !lo_value; 2515 bcd_put_digit(&ret, 0xD, 0); 2516 2517 cr = CRF_LT; 2518 } else { 2519 lo_value = b->VsrD(1); 2520 hi_value = b->VsrD(0); 2521 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2522 2523 if (hi_value == 0 && lo_value == 0) { 2524 cr = CRF_EQ; 2525 } else { 2526 cr = CRF_GT; 2527 } 2528 } 2529 2530 /* 2531 * Check src limits: abs(src) <= 10^31 - 1 2532 * 2533 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2534 */ 2535 if (ucmp128(lo_value, hi_value, 2536 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2537 cr |= CRF_SO; 2538 2539 /* 2540 * According to the ISA, if src wouldn't fit in the destination 2541 * register, the result is undefined. 2542 * In that case, we leave r unchanged. 2543 */ 2544 } else { 2545 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2546 2547 for (i = 1; i < 16; rem /= 10, i++) { 2548 bcd_put_digit(&ret, rem % 10, i); 2549 } 2550 2551 for (; i < 32; lo_value /= 10, i++) { 2552 bcd_put_digit(&ret, lo_value % 10, i); 2553 } 2554 2555 *r = ret; 2556 } 2557 2558 return cr; 2559 } 2560 2561 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2562 { 2563 uint8_t i; 2564 int cr; 2565 uint64_t carry; 2566 uint64_t unused; 2567 uint64_t lo_value; 2568 uint64_t hi_value = 0; 2569 int sgnb = bcd_get_sgn(b); 2570 int invalid = (sgnb == 0); 2571 2572 lo_value = bcd_get_digit(b, 31, &invalid); 2573 for (i = 30; i > 0; i--) { 2574 mulu64(&lo_value, &carry, lo_value, 10ULL); 2575 mulu64(&hi_value, &unused, hi_value, 10ULL); 2576 lo_value += bcd_get_digit(b, i, &invalid); 2577 hi_value += carry; 2578 2579 if (unlikely(invalid)) { 2580 break; 2581 } 2582 } 2583 2584 if (sgnb == -1) { 2585 r->VsrSD(1) = -lo_value; 2586 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2587 } else { 2588 r->VsrSD(1) = lo_value; 2589 r->VsrSD(0) = hi_value; 2590 } 2591 2592 cr = bcd_cmp_zero(b); 2593 2594 if (unlikely(invalid)) { 2595 cr = CRF_SO; 2596 } 2597 2598 return cr; 2599 } 2600 2601 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2602 { 2603 int i; 2604 int invalid = 0; 2605 2606 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2607 return CRF_SO; 2608 } 2609 2610 *r = *a; 2611 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2612 2613 for (i = 1; i < 32; i++) { 2614 bcd_get_digit(a, i, &invalid); 2615 bcd_get_digit(b, i, &invalid); 2616 if (unlikely(invalid)) { 2617 return CRF_SO; 2618 } 2619 } 2620 2621 return bcd_cmp_zero(r); 2622 } 2623 2624 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2625 { 2626 int sgnb = bcd_get_sgn(b); 2627 2628 *r = *b; 2629 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2630 2631 if (bcd_is_valid(b) == false) { 2632 return CRF_SO; 2633 } 2634 2635 return bcd_cmp_zero(r); 2636 } 2637 2638 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2639 { 2640 int cr; 2641 int i = a->VsrSB(7); 2642 bool ox_flag = false; 2643 int sgnb = bcd_get_sgn(b); 2644 ppc_avr_t ret = *b; 2645 ret.VsrD(1) &= ~0xf; 2646 2647 if (bcd_is_valid(b) == false) { 2648 return CRF_SO; 2649 } 2650 2651 if (unlikely(i > 31)) { 2652 i = 31; 2653 } else if (unlikely(i < -31)) { 2654 i = -31; 2655 } 2656 2657 if (i > 0) { 2658 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2659 } else { 2660 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2661 } 2662 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2663 2664 *r = ret; 2665 2666 cr = bcd_cmp_zero(r); 2667 if (ox_flag) { 2668 cr |= CRF_SO; 2669 } 2670 2671 return cr; 2672 } 2673 2674 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2675 { 2676 int cr; 2677 int i; 2678 int invalid = 0; 2679 bool ox_flag = false; 2680 ppc_avr_t ret = *b; 2681 2682 for (i = 0; i < 32; i++) { 2683 bcd_get_digit(b, i, &invalid); 2684 2685 if (unlikely(invalid)) { 2686 return CRF_SO; 2687 } 2688 } 2689 2690 i = a->VsrSB(7); 2691 if (i >= 32) { 2692 ox_flag = true; 2693 ret.VsrD(1) = ret.VsrD(0) = 0; 2694 } else if (i <= -32) { 2695 ret.VsrD(1) = ret.VsrD(0) = 0; 2696 } else if (i > 0) { 2697 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2698 } else { 2699 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2700 } 2701 *r = ret; 2702 2703 cr = bcd_cmp_zero(r); 2704 if (ox_flag) { 2705 cr |= CRF_SO; 2706 } 2707 2708 return cr; 2709 } 2710 2711 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2712 { 2713 int cr; 2714 int unused = 0; 2715 int invalid = 0; 2716 bool ox_flag = false; 2717 int sgnb = bcd_get_sgn(b); 2718 ppc_avr_t ret = *b; 2719 ret.VsrD(1) &= ~0xf; 2720 2721 int i = a->VsrSB(7); 2722 ppc_avr_t bcd_one; 2723 2724 bcd_one.VsrD(0) = 0; 2725 bcd_one.VsrD(1) = 0x10; 2726 2727 if (bcd_is_valid(b) == false) { 2728 return CRF_SO; 2729 } 2730 2731 if (unlikely(i > 31)) { 2732 i = 31; 2733 } else if (unlikely(i < -31)) { 2734 i = -31; 2735 } 2736 2737 if (i > 0) { 2738 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2739 } else { 2740 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2741 2742 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2743 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2744 } 2745 } 2746 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2747 2748 cr = bcd_cmp_zero(&ret); 2749 if (ox_flag) { 2750 cr |= CRF_SO; 2751 } 2752 *r = ret; 2753 2754 return cr; 2755 } 2756 2757 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2758 { 2759 uint64_t mask; 2760 uint32_t ox_flag = 0; 2761 int i = a->VsrSH(3) + 1; 2762 ppc_avr_t ret = *b; 2763 2764 if (bcd_is_valid(b) == false) { 2765 return CRF_SO; 2766 } 2767 2768 if (i > 16 && i < 32) { 2769 mask = (uint64_t)-1 >> (128 - i * 4); 2770 if (ret.VsrD(0) & ~mask) { 2771 ox_flag = CRF_SO; 2772 } 2773 2774 ret.VsrD(0) &= mask; 2775 } else if (i >= 0 && i <= 16) { 2776 mask = (uint64_t)-1 >> (64 - i * 4); 2777 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2778 ox_flag = CRF_SO; 2779 } 2780 2781 ret.VsrD(1) &= mask; 2782 ret.VsrD(0) = 0; 2783 } 2784 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2785 *r = ret; 2786 2787 return bcd_cmp_zero(&ret) | ox_flag; 2788 } 2789 2790 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2791 { 2792 int i; 2793 uint64_t mask; 2794 uint32_t ox_flag = 0; 2795 int invalid = 0; 2796 ppc_avr_t ret = *b; 2797 2798 for (i = 0; i < 32; i++) { 2799 bcd_get_digit(b, i, &invalid); 2800 2801 if (unlikely(invalid)) { 2802 return CRF_SO; 2803 } 2804 } 2805 2806 i = a->VsrSH(3); 2807 if (i > 16 && i < 33) { 2808 mask = (uint64_t)-1 >> (128 - i * 4); 2809 if (ret.VsrD(0) & ~mask) { 2810 ox_flag = CRF_SO; 2811 } 2812 2813 ret.VsrD(0) &= mask; 2814 } else if (i > 0 && i <= 16) { 2815 mask = (uint64_t)-1 >> (64 - i * 4); 2816 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2817 ox_flag = CRF_SO; 2818 } 2819 2820 ret.VsrD(1) &= mask; 2821 ret.VsrD(0) = 0; 2822 } else if (i == 0) { 2823 if (ret.VsrD(0) || ret.VsrD(1)) { 2824 ox_flag = CRF_SO; 2825 } 2826 ret.VsrD(0) = ret.VsrD(1) = 0; 2827 } 2828 2829 *r = ret; 2830 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2831 return ox_flag | CRF_EQ; 2832 } 2833 2834 return ox_flag | CRF_GT; 2835 } 2836 2837 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2838 { 2839 int i; 2840 VECTOR_FOR_INORDER_I(i, u8) { 2841 r->u8[i] = AES_sbox[a->u8[i]]; 2842 } 2843 } 2844 2845 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2846 { 2847 ppc_avr_t result; 2848 int i; 2849 2850 VECTOR_FOR_INORDER_I(i, u32) { 2851 result.VsrW(i) = b->VsrW(i) ^ 2852 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2853 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2854 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2855 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2856 } 2857 *r = result; 2858 } 2859 2860 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2861 { 2862 ppc_avr_t result; 2863 int i; 2864 2865 VECTOR_FOR_INORDER_I(i, u8) { 2866 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2867 } 2868 *r = result; 2869 } 2870 2871 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2872 { 2873 /* This differs from what is written in ISA V2.07. The RTL is */ 2874 /* incorrect and will be fixed in V2.07B. */ 2875 int i; 2876 ppc_avr_t tmp; 2877 2878 VECTOR_FOR_INORDER_I(i, u8) { 2879 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2880 } 2881 2882 VECTOR_FOR_INORDER_I(i, u32) { 2883 r->VsrW(i) = 2884 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2885 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2886 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2887 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2888 } 2889 } 2890 2891 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2892 { 2893 ppc_avr_t result; 2894 int i; 2895 2896 VECTOR_FOR_INORDER_I(i, u8) { 2897 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2898 } 2899 *r = result; 2900 } 2901 2902 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2903 { 2904 int st = (st_six & 0x10) != 0; 2905 int six = st_six & 0xF; 2906 int i; 2907 2908 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2909 if (st == 0) { 2910 if ((six & (0x8 >> i)) == 0) { 2911 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2912 ror32(a->VsrW(i), 18) ^ 2913 (a->VsrW(i) >> 3); 2914 } else { /* six.bit[i] == 1 */ 2915 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2916 ror32(a->VsrW(i), 19) ^ 2917 (a->VsrW(i) >> 10); 2918 } 2919 } else { /* st == 1 */ 2920 if ((six & (0x8 >> i)) == 0) { 2921 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2922 ror32(a->VsrW(i), 13) ^ 2923 ror32(a->VsrW(i), 22); 2924 } else { /* six.bit[i] == 1 */ 2925 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2926 ror32(a->VsrW(i), 11) ^ 2927 ror32(a->VsrW(i), 25); 2928 } 2929 } 2930 } 2931 } 2932 2933 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2934 { 2935 int st = (st_six & 0x10) != 0; 2936 int six = st_six & 0xF; 2937 int i; 2938 2939 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2940 if (st == 0) { 2941 if ((six & (0x8 >> (2 * i))) == 0) { 2942 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2943 ror64(a->VsrD(i), 8) ^ 2944 (a->VsrD(i) >> 7); 2945 } else { /* six.bit[2*i] == 1 */ 2946 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2947 ror64(a->VsrD(i), 61) ^ 2948 (a->VsrD(i) >> 6); 2949 } 2950 } else { /* st == 1 */ 2951 if ((six & (0x8 >> (2 * i))) == 0) { 2952 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2953 ror64(a->VsrD(i), 34) ^ 2954 ror64(a->VsrD(i), 39); 2955 } else { /* six.bit[2*i] == 1 */ 2956 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2957 ror64(a->VsrD(i), 18) ^ 2958 ror64(a->VsrD(i), 41); 2959 } 2960 } 2961 } 2962 } 2963 2964 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2965 { 2966 ppc_avr_t result; 2967 int i; 2968 2969 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2970 int indexA = c->VsrB(i) >> 4; 2971 int indexB = c->VsrB(i) & 0xF; 2972 2973 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2974 } 2975 *r = result; 2976 } 2977 2978 #undef VECTOR_FOR_INORDER_I 2979 2980 /*****************************************************************************/ 2981 /* SPE extension helpers */ 2982 /* Use a table to make this quicker */ 2983 static const uint8_t hbrev[16] = { 2984 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2985 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2986 }; 2987 2988 static inline uint8_t byte_reverse(uint8_t val) 2989 { 2990 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2991 } 2992 2993 static inline uint32_t word_reverse(uint32_t val) 2994 { 2995 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2996 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2997 } 2998 2999 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3000 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3001 { 3002 uint32_t a, b, d, mask; 3003 3004 mask = UINT32_MAX >> (32 - MASKBITS); 3005 a = arg1 & mask; 3006 b = arg2 & mask; 3007 d = word_reverse(1 + word_reverse(a | ~b)); 3008 return (arg1 & ~mask) | (d & b); 3009 } 3010 3011 uint32_t helper_cntlsw32(uint32_t val) 3012 { 3013 if (val & 0x80000000) { 3014 return clz32(~val); 3015 } else { 3016 return clz32(val); 3017 } 3018 } 3019 3020 uint32_t helper_cntlzw32(uint32_t val) 3021 { 3022 return clz32(val); 3023 } 3024 3025 /* 440 specific */ 3026 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3027 target_ulong low, uint32_t update_Rc) 3028 { 3029 target_ulong mask; 3030 int i; 3031 3032 i = 1; 3033 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3034 if ((high & mask) == 0) { 3035 if (update_Rc) { 3036 env->crf[0] = 0x4; 3037 } 3038 goto done; 3039 } 3040 i++; 3041 } 3042 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3043 if ((low & mask) == 0) { 3044 if (update_Rc) { 3045 env->crf[0] = 0x8; 3046 } 3047 goto done; 3048 } 3049 i++; 3050 } 3051 i = 8; 3052 if (update_Rc) { 3053 env->crf[0] = 0x2; 3054 } 3055 done: 3056 env->xer = (env->xer & ~0x7F) | i; 3057 if (update_Rc) { 3058 env->crf[0] |= xer_so; 3059 } 3060 return i; 3061 } 3062