1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 overflow = divu128(&rt, &ra, rb); 108 109 if (unlikely(overflow)) { 110 rt = 0; /* Undefined */ 111 } 112 113 if (oe) { 114 helper_update_ov_legacy(env, overflow); 115 } 116 117 return rt; 118 } 119 120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 121 { 122 int64_t rt = 0; 123 int64_t ra = (int64_t)rau; 124 int64_t rb = (int64_t)rbu; 125 int overflow = divs128(&rt, &ra, rb); 126 127 if (unlikely(overflow)) { 128 rt = 0; /* Undefined */ 129 } 130 131 if (oe) { 132 helper_update_ov_legacy(env, overflow); 133 } 134 135 return rt; 136 } 137 138 #endif 139 140 141 #if defined(TARGET_PPC64) 142 /* if x = 0xab, returns 0xababababababababa */ 143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 144 145 /* 146 * subtract 1 from each byte, and with inverse, check if MSB is set at each 147 * byte. 148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 150 */ 151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 152 153 /* When you XOR the pattern and there is a match, that byte will be zero */ 154 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 155 156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 157 { 158 return hasvalue(rb, ra) ? CRF_GT : 0; 159 } 160 161 #undef pattern 162 #undef haszero 163 #undef hasvalue 164 165 /* 166 * Return a random number. 167 */ 168 uint64_t helper_darn32(void) 169 { 170 Error *err = NULL; 171 uint32_t ret; 172 173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 175 error_get_pretty(err)); 176 error_free(err); 177 return -1; 178 } 179 180 return ret; 181 } 182 183 uint64_t helper_darn64(void) 184 { 185 Error *err = NULL; 186 uint64_t ret; 187 188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 190 error_get_pretty(err)); 191 error_free(err); 192 return -1; 193 } 194 195 return ret; 196 } 197 198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 199 { 200 int i; 201 uint64_t ra = 0; 202 203 for (i = 0; i < 8; i++) { 204 int index = (rs >> (i * 8)) & 0xFF; 205 if (index < 64) { 206 if (rb & PPC_BIT(index)) { 207 ra |= 1 << i; 208 } 209 } 210 } 211 return ra; 212 } 213 214 #endif 215 216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 217 { 218 target_ulong mask = 0xff; 219 target_ulong ra = 0; 220 int i; 221 222 for (i = 0; i < sizeof(target_ulong); i++) { 223 if ((rs & mask) == (rb & mask)) { 224 ra |= mask; 225 } 226 mask <<= 8; 227 } 228 return ra; 229 } 230 231 /* shift right arithmetic helper */ 232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 233 target_ulong shift) 234 { 235 int32_t ret; 236 237 if (likely(!(shift & 0x20))) { 238 if (likely((uint32_t)shift != 0)) { 239 shift &= 0x1f; 240 ret = (int32_t)value >> shift; 241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 242 env->ca32 = env->ca = 0; 243 } else { 244 env->ca32 = env->ca = 1; 245 } 246 } else { 247 ret = (int32_t)value; 248 env->ca32 = env->ca = 0; 249 } 250 } else { 251 ret = (int32_t)value >> 31; 252 env->ca32 = env->ca = (ret != 0); 253 } 254 return (target_long)ret; 255 } 256 257 #if defined(TARGET_PPC64) 258 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 259 target_ulong shift) 260 { 261 int64_t ret; 262 263 if (likely(!(shift & 0x40))) { 264 if (likely((uint64_t)shift != 0)) { 265 shift &= 0x3f; 266 ret = (int64_t)value >> shift; 267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 268 env->ca32 = env->ca = 0; 269 } else { 270 env->ca32 = env->ca = 1; 271 } 272 } else { 273 ret = (int64_t)value; 274 env->ca32 = env->ca = 0; 275 } 276 } else { 277 ret = (int64_t)value >> 63; 278 env->ca32 = env->ca = (ret != 0); 279 } 280 return ret; 281 } 282 #endif 283 284 #if defined(TARGET_PPC64) 285 target_ulong helper_popcntb(target_ulong val) 286 { 287 /* Note that we don't fold past bytes */ 288 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 289 0x5555555555555555ULL); 290 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 291 0x3333333333333333ULL); 292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 293 0x0f0f0f0f0f0f0f0fULL); 294 return val; 295 } 296 297 target_ulong helper_popcntw(target_ulong val) 298 { 299 /* Note that we don't fold past words. */ 300 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 301 0x5555555555555555ULL); 302 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 303 0x3333333333333333ULL); 304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 305 0x0f0f0f0f0f0f0f0fULL); 306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 307 0x00ff00ff00ff00ffULL); 308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 309 0x0000ffff0000ffffULL); 310 return val; 311 } 312 #else 313 target_ulong helper_popcntb(target_ulong val) 314 { 315 /* Note that we don't fold past bytes */ 316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 319 return val; 320 } 321 #endif 322 323 uint64_t helper_cfuged(uint64_t src, uint64_t mask) 324 { 325 /* 326 * Instead of processing the mask bit-by-bit from the most significant to 327 * the least significant bit, as described in PowerISA, we'll handle it in 328 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 329 * ctz or cto, we negate the mask at the end of the loop. 330 */ 331 target_ulong m, left = 0, right = 0; 332 unsigned int n, i = 64; 333 bool bit = false; /* tracks if we are processing zeros or ones */ 334 335 if (mask == 0 || mask == -1) { 336 return src; 337 } 338 339 /* Processes the mask in blocks, from LSB to MSB */ 340 while (i) { 341 /* Find how many bits we should take */ 342 n = ctz64(mask); 343 if (n > i) { 344 n = i; 345 } 346 347 /* 348 * Extracts 'n' trailing bits of src and put them on the leading 'n' 349 * bits of 'right' or 'left', pushing down the previously extracted 350 * values. 351 */ 352 m = (1ll << n) - 1; 353 if (bit) { 354 right = ror64(right | (src & m), n); 355 } else { 356 left = ror64(left | (src & m), n); 357 } 358 359 /* 360 * Discards the processed bits from 'src' and 'mask'. Note that we are 361 * removing 'n' trailing zeros from 'mask', but the logical shift will 362 * add 'n' leading zeros back, so the population count of 'mask' is kept 363 * the same. 364 */ 365 src >>= n; 366 mask >>= n; 367 i -= n; 368 bit = !bit; 369 mask = ~mask; 370 } 371 372 /* 373 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 374 * we'll shift it more 64-ctpop(mask) times. 375 */ 376 if (bit) { 377 n = ctpop64(mask); 378 } else { 379 n = 64 - ctpop64(mask); 380 } 381 382 return left | (right >> n); 383 } 384 385 /*****************************************************************************/ 386 /* PowerPC 601 specific instructions (POWER bridge) */ 387 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 388 { 389 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 390 391 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 392 (int32_t)arg2 == 0) { 393 env->spr[SPR_MQ] = 0; 394 return INT32_MIN; 395 } else { 396 env->spr[SPR_MQ] = tmp % arg2; 397 return tmp / (int32_t)arg2; 398 } 399 } 400 401 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 402 target_ulong arg2) 403 { 404 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 405 406 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 407 (int32_t)arg2 == 0) { 408 env->so = env->ov = 1; 409 env->spr[SPR_MQ] = 0; 410 return INT32_MIN; 411 } else { 412 env->spr[SPR_MQ] = tmp % arg2; 413 tmp /= (int32_t)arg2; 414 if ((int32_t)tmp != tmp) { 415 env->so = env->ov = 1; 416 } else { 417 env->ov = 0; 418 } 419 return tmp; 420 } 421 } 422 423 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 424 target_ulong arg2) 425 { 426 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 427 (int32_t)arg2 == 0) { 428 env->spr[SPR_MQ] = 0; 429 return INT32_MIN; 430 } else { 431 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 432 return (int32_t)arg1 / (int32_t)arg2; 433 } 434 } 435 436 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 437 target_ulong arg2) 438 { 439 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 440 (int32_t)arg2 == 0) { 441 env->so = env->ov = 1; 442 env->spr[SPR_MQ] = 0; 443 return INT32_MIN; 444 } else { 445 env->ov = 0; 446 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 447 return (int32_t)arg1 / (int32_t)arg2; 448 } 449 } 450 451 /*****************************************************************************/ 452 /* 602 specific instructions */ 453 /* mfrom is the most crazy instruction ever seen, imho ! */ 454 /* Real implementation uses a ROM table. Do the same */ 455 /* 456 * Extremely decomposed: 457 * -arg / 256 458 * return 256 * log10(10 + 1.0) + 0.5 459 */ 460 #if !defined(CONFIG_USER_ONLY) 461 target_ulong helper_602_mfrom(target_ulong arg) 462 { 463 if (likely(arg < 602)) { 464 #include "mfrom_table.c.inc" 465 return mfrom_ROM_table[arg]; 466 } else { 467 return 0; 468 } 469 } 470 #endif 471 472 /*****************************************************************************/ 473 /* Altivec extension helpers */ 474 #if defined(HOST_WORDS_BIGENDIAN) 475 #define VECTOR_FOR_INORDER_I(index, element) \ 476 for (index = 0; index < ARRAY_SIZE(r->element); index++) 477 #else 478 #define VECTOR_FOR_INORDER_I(index, element) \ 479 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 480 #endif 481 482 /* Saturating arithmetic helpers. */ 483 #define SATCVT(from, to, from_type, to_type, min, max) \ 484 static inline to_type cvt##from##to(from_type x, int *sat) \ 485 { \ 486 to_type r; \ 487 \ 488 if (x < (from_type)min) { \ 489 r = min; \ 490 *sat = 1; \ 491 } else if (x > (from_type)max) { \ 492 r = max; \ 493 *sat = 1; \ 494 } else { \ 495 r = x; \ 496 } \ 497 return r; \ 498 } 499 #define SATCVTU(from, to, from_type, to_type, min, max) \ 500 static inline to_type cvt##from##to(from_type x, int *sat) \ 501 { \ 502 to_type r; \ 503 \ 504 if (x > (from_type)max) { \ 505 r = max; \ 506 *sat = 1; \ 507 } else { \ 508 r = x; \ 509 } \ 510 return r; \ 511 } 512 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 513 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 514 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 515 516 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 517 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 518 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 519 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 520 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 521 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 522 #undef SATCVT 523 #undef SATCVTU 524 525 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 526 { 527 ppc_store_vscr(env, vscr); 528 } 529 530 uint32_t helper_mfvscr(CPUPPCState *env) 531 { 532 return ppc_get_vscr(env); 533 } 534 535 static inline void set_vscr_sat(CPUPPCState *env) 536 { 537 /* The choice of non-zero value is arbitrary. */ 538 env->vscr_sat.u32[0] = 1; 539 } 540 541 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 542 { 543 int i; 544 545 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 546 r->u32[i] = ~a->u32[i] < b->u32[i]; 547 } 548 } 549 550 /* vprtybw */ 551 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 552 { 553 int i; 554 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 555 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 556 res ^= res >> 8; 557 r->u32[i] = res & 1; 558 } 559 } 560 561 /* vprtybd */ 562 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 563 { 564 int i; 565 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 566 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 567 res ^= res >> 16; 568 res ^= res >> 8; 569 r->u64[i] = res & 1; 570 } 571 } 572 573 /* vprtybq */ 574 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 575 { 576 uint64_t res = b->u64[0] ^ b->u64[1]; 577 res ^= res >> 32; 578 res ^= res >> 16; 579 res ^= res >> 8; 580 r->VsrD(1) = res & 1; 581 r->VsrD(0) = 0; 582 } 583 584 #define VARITHFP(suffix, func) \ 585 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 586 ppc_avr_t *b) \ 587 { \ 588 int i; \ 589 \ 590 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 591 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 592 } \ 593 } 594 VARITHFP(addfp, float32_add) 595 VARITHFP(subfp, float32_sub) 596 VARITHFP(minfp, float32_min) 597 VARITHFP(maxfp, float32_max) 598 #undef VARITHFP 599 600 #define VARITHFPFMA(suffix, type) \ 601 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 602 ppc_avr_t *b, ppc_avr_t *c) \ 603 { \ 604 int i; \ 605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 606 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 607 type, &env->vec_status); \ 608 } \ 609 } 610 VARITHFPFMA(maddfp, 0); 611 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 612 #undef VARITHFPFMA 613 614 #define VARITHSAT_CASE(type, op, cvt, element) \ 615 { \ 616 type result = (type)a->element[i] op (type)b->element[i]; \ 617 r->element[i] = cvt(result, &sat); \ 618 } 619 620 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 621 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 622 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 623 { \ 624 int sat = 0; \ 625 int i; \ 626 \ 627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 628 VARITHSAT_CASE(optype, op, cvt, element); \ 629 } \ 630 if (sat) { \ 631 vscr_sat->u32[0] = 1; \ 632 } \ 633 } 634 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 635 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 636 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 637 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 638 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 639 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 640 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 641 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 642 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 643 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 644 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 645 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 646 #undef VARITHSAT_CASE 647 #undef VARITHSAT_DO 648 #undef VARITHSAT_SIGNED 649 #undef VARITHSAT_UNSIGNED 650 651 #define VAVG_DO(name, element, etype) \ 652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 657 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 658 r->element[i] = x >> 1; \ 659 } \ 660 } 661 662 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 663 unsigned_type) \ 664 VAVG_DO(avgs##type, signed_element, signed_type) \ 665 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 666 VAVG(b, s8, int16_t, u8, uint16_t) 667 VAVG(h, s16, int32_t, u16, uint32_t) 668 VAVG(w, s32, int64_t, u32, uint64_t) 669 #undef VAVG_DO 670 #undef VAVG 671 672 #define VABSDU_DO(name, element) \ 673 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 674 { \ 675 int i; \ 676 \ 677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 678 r->element[i] = (a->element[i] > b->element[i]) ? \ 679 (a->element[i] - b->element[i]) : \ 680 (b->element[i] - a->element[i]); \ 681 } \ 682 } 683 684 /* 685 * VABSDU - Vector absolute difference unsigned 686 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 687 * element - element type to access from vector 688 */ 689 #define VABSDU(type, element) \ 690 VABSDU_DO(absdu##type, element) 691 VABSDU(b, u8) 692 VABSDU(h, u16) 693 VABSDU(w, u32) 694 #undef VABSDU_DO 695 #undef VABSDU 696 697 #define VCF(suffix, cvt, element) \ 698 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 699 ppc_avr_t *b, uint32_t uim) \ 700 { \ 701 int i; \ 702 \ 703 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 704 float32 t = cvt(b->element[i], &env->vec_status); \ 705 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 706 } \ 707 } 708 VCF(ux, uint32_to_float32, u32) 709 VCF(sx, int32_to_float32, s32) 710 #undef VCF 711 712 #define VCMP_DO(suffix, compare, element, record) \ 713 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 714 ppc_avr_t *a, ppc_avr_t *b) \ 715 { \ 716 uint64_t ones = (uint64_t)-1; \ 717 uint64_t all = ones; \ 718 uint64_t none = 0; \ 719 int i; \ 720 \ 721 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 722 uint64_t result = (a->element[i] compare b->element[i] ? \ 723 ones : 0x0); \ 724 switch (sizeof(a->element[0])) { \ 725 case 8: \ 726 r->u64[i] = result; \ 727 break; \ 728 case 4: \ 729 r->u32[i] = result; \ 730 break; \ 731 case 2: \ 732 r->u16[i] = result; \ 733 break; \ 734 case 1: \ 735 r->u8[i] = result; \ 736 break; \ 737 } \ 738 all &= result; \ 739 none |= result; \ 740 } \ 741 if (record) { \ 742 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 743 } \ 744 } 745 #define VCMP(suffix, compare, element) \ 746 VCMP_DO(suffix, compare, element, 0) \ 747 VCMP_DO(suffix##_dot, compare, element, 1) 748 VCMP(equb, ==, u8) 749 VCMP(equh, ==, u16) 750 VCMP(equw, ==, u32) 751 VCMP(equd, ==, u64) 752 VCMP(gtub, >, u8) 753 VCMP(gtuh, >, u16) 754 VCMP(gtuw, >, u32) 755 VCMP(gtud, >, u64) 756 VCMP(gtsb, >, s8) 757 VCMP(gtsh, >, s16) 758 VCMP(gtsw, >, s32) 759 VCMP(gtsd, >, s64) 760 #undef VCMP_DO 761 #undef VCMP 762 763 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 764 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 765 ppc_avr_t *a, ppc_avr_t *b) \ 766 { \ 767 etype ones = (etype)-1; \ 768 etype all = ones; \ 769 etype result, none = 0; \ 770 int i; \ 771 \ 772 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 773 if (cmpzero) { \ 774 result = ((a->element[i] == 0) \ 775 || (b->element[i] == 0) \ 776 || (a->element[i] != b->element[i]) ? \ 777 ones : 0x0); \ 778 } else { \ 779 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 780 } \ 781 r->element[i] = result; \ 782 all &= result; \ 783 none |= result; \ 784 } \ 785 if (record) { \ 786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 787 } \ 788 } 789 790 /* 791 * VCMPNEZ - Vector compare not equal to zero 792 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 793 * element - element type to access from vector 794 */ 795 #define VCMPNE(suffix, element, etype, cmpzero) \ 796 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 797 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 798 VCMPNE(zb, u8, uint8_t, 1) 799 VCMPNE(zh, u16, uint16_t, 1) 800 VCMPNE(zw, u32, uint32_t, 1) 801 VCMPNE(b, u8, uint8_t, 0) 802 VCMPNE(h, u16, uint16_t, 0) 803 VCMPNE(w, u32, uint32_t, 0) 804 #undef VCMPNE_DO 805 #undef VCMPNE 806 807 #define VCMPFP_DO(suffix, compare, order, record) \ 808 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 809 ppc_avr_t *a, ppc_avr_t *b) \ 810 { \ 811 uint32_t ones = (uint32_t)-1; \ 812 uint32_t all = ones; \ 813 uint32_t none = 0; \ 814 int i; \ 815 \ 816 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 817 uint32_t result; \ 818 FloatRelation rel = \ 819 float32_compare_quiet(a->f32[i], b->f32[i], \ 820 &env->vec_status); \ 821 if (rel == float_relation_unordered) { \ 822 result = 0; \ 823 } else if (rel compare order) { \ 824 result = ones; \ 825 } else { \ 826 result = 0; \ 827 } \ 828 r->u32[i] = result; \ 829 all &= result; \ 830 none |= result; \ 831 } \ 832 if (record) { \ 833 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 834 } \ 835 } 836 #define VCMPFP(suffix, compare, order) \ 837 VCMPFP_DO(suffix, compare, order, 0) \ 838 VCMPFP_DO(suffix##_dot, compare, order, 1) 839 VCMPFP(eqfp, ==, float_relation_equal) 840 VCMPFP(gefp, !=, float_relation_less) 841 VCMPFP(gtfp, ==, float_relation_greater) 842 #undef VCMPFP_DO 843 #undef VCMPFP 844 845 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 846 ppc_avr_t *a, ppc_avr_t *b, int record) 847 { 848 int i; 849 int all_in = 0; 850 851 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 852 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 853 &env->vec_status); 854 if (le_rel == float_relation_unordered) { 855 r->u32[i] = 0xc0000000; 856 all_in = 1; 857 } else { 858 float32 bneg = float32_chs(b->f32[i]); 859 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 860 &env->vec_status); 861 int le = le_rel != float_relation_greater; 862 int ge = ge_rel != float_relation_less; 863 864 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 865 all_in |= (!le | !ge); 866 } 867 } 868 if (record) { 869 env->crf[6] = (all_in == 0) << 1; 870 } 871 } 872 873 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 874 { 875 vcmpbfp_internal(env, r, a, b, 0); 876 } 877 878 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 879 ppc_avr_t *b) 880 { 881 vcmpbfp_internal(env, r, a, b, 1); 882 } 883 884 #define VCT(suffix, satcvt, element) \ 885 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 886 ppc_avr_t *b, uint32_t uim) \ 887 { \ 888 int i; \ 889 int sat = 0; \ 890 float_status s = env->vec_status; \ 891 \ 892 set_float_rounding_mode(float_round_to_zero, &s); \ 893 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 894 if (float32_is_any_nan(b->f32[i])) { \ 895 r->element[i] = 0; \ 896 } else { \ 897 float64 t = float32_to_float64(b->f32[i], &s); \ 898 int64_t j; \ 899 \ 900 t = float64_scalbn(t, uim, &s); \ 901 j = float64_to_int64(t, &s); \ 902 r->element[i] = satcvt(j, &sat); \ 903 } \ 904 } \ 905 if (sat) { \ 906 set_vscr_sat(env); \ 907 } \ 908 } 909 VCT(uxs, cvtsduw, u32) 910 VCT(sxs, cvtsdsw, s32) 911 #undef VCT 912 913 target_ulong helper_vclzlsbb(ppc_avr_t *r) 914 { 915 target_ulong count = 0; 916 int i; 917 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 918 if (r->VsrB(i) & 0x01) { 919 break; 920 } 921 count++; 922 } 923 return count; 924 } 925 926 target_ulong helper_vctzlsbb(ppc_avr_t *r) 927 { 928 target_ulong count = 0; 929 int i; 930 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 931 if (r->VsrB(i) & 0x01) { 932 break; 933 } 934 count++; 935 } 936 return count; 937 } 938 939 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 940 ppc_avr_t *b, ppc_avr_t *c) 941 { 942 int sat = 0; 943 int i; 944 945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 946 int32_t prod = a->s16[i] * b->s16[i]; 947 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 948 949 r->s16[i] = cvtswsh(t, &sat); 950 } 951 952 if (sat) { 953 set_vscr_sat(env); 954 } 955 } 956 957 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 958 ppc_avr_t *b, ppc_avr_t *c) 959 { 960 int sat = 0; 961 int i; 962 963 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 964 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 965 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 966 r->s16[i] = cvtswsh(t, &sat); 967 } 968 969 if (sat) { 970 set_vscr_sat(env); 971 } 972 } 973 974 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int i; 977 978 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 979 int32_t prod = a->s16[i] * b->s16[i]; 980 r->s16[i] = (int16_t) (prod + c->s16[i]); 981 } 982 } 983 984 #define VMRG_DO(name, element, access, ofs) \ 985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 986 { \ 987 ppc_avr_t result; \ 988 int i, half = ARRAY_SIZE(r->element) / 2; \ 989 \ 990 for (i = 0; i < half; i++) { \ 991 result.access(i * 2 + 0) = a->access(i + ofs); \ 992 result.access(i * 2 + 1) = b->access(i + ofs); \ 993 } \ 994 *r = result; \ 995 } 996 997 #define VMRG(suffix, element, access) \ 998 VMRG_DO(mrgl##suffix, element, access, half) \ 999 VMRG_DO(mrgh##suffix, element, access, 0) 1000 VMRG(b, u8, VsrB) 1001 VMRG(h, u16, VsrH) 1002 VMRG(w, u32, VsrW) 1003 #undef VMRG_DO 1004 #undef VMRG 1005 1006 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1007 ppc_avr_t *b, ppc_avr_t *c) 1008 { 1009 int32_t prod[16]; 1010 int i; 1011 1012 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1013 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1014 } 1015 1016 VECTOR_FOR_INORDER_I(i, s32) { 1017 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1018 prod[4 * i + 2] + prod[4 * i + 3]; 1019 } 1020 } 1021 1022 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1023 ppc_avr_t *b, ppc_avr_t *c) 1024 { 1025 int32_t prod[8]; 1026 int i; 1027 1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1029 prod[i] = a->s16[i] * b->s16[i]; 1030 } 1031 1032 VECTOR_FOR_INORDER_I(i, s32) { 1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1034 } 1035 } 1036 1037 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1038 ppc_avr_t *b, ppc_avr_t *c) 1039 { 1040 int32_t prod[8]; 1041 int i; 1042 int sat = 0; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1045 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, s32) { 1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1050 1051 r->u32[i] = cvtsdsw(t, &sat); 1052 } 1053 1054 if (sat) { 1055 set_vscr_sat(env); 1056 } 1057 } 1058 1059 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1060 ppc_avr_t *b, ppc_avr_t *c) 1061 { 1062 uint16_t prod[16]; 1063 int i; 1064 1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1066 prod[i] = a->u8[i] * b->u8[i]; 1067 } 1068 1069 VECTOR_FOR_INORDER_I(i, u32) { 1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1071 prod[4 * i + 2] + prod[4 * i + 3]; 1072 } 1073 } 1074 1075 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1076 ppc_avr_t *b, ppc_avr_t *c) 1077 { 1078 uint32_t prod[8]; 1079 int i; 1080 1081 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1082 prod[i] = a->u16[i] * b->u16[i]; 1083 } 1084 1085 VECTOR_FOR_INORDER_I(i, u32) { 1086 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1087 } 1088 } 1089 1090 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1091 ppc_avr_t *b, ppc_avr_t *c) 1092 { 1093 uint32_t prod[8]; 1094 int i; 1095 int sat = 0; 1096 1097 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1098 prod[i] = a->u16[i] * b->u16[i]; 1099 } 1100 1101 VECTOR_FOR_INORDER_I(i, s32) { 1102 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1103 1104 r->u32[i] = cvtuduw(t, &sat); 1105 } 1106 1107 if (sat) { 1108 set_vscr_sat(env); 1109 } 1110 } 1111 1112 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1113 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1114 { \ 1115 int i; \ 1116 \ 1117 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1118 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1119 (cast)b->mul_access(i); \ 1120 } \ 1121 } 1122 1123 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1125 { \ 1126 int i; \ 1127 \ 1128 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1129 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1130 (cast)b->mul_access(i + 1); \ 1131 } \ 1132 } 1133 1134 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1135 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1136 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1137 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1138 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1139 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1140 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1141 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1142 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1143 #undef VMUL_DO_EVN 1144 #undef VMUL_DO_ODD 1145 #undef VMUL 1146 1147 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1148 { 1149 int i; 1150 1151 for (i = 0; i < 4; i++) { 1152 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1153 } 1154 } 1155 1156 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1157 { 1158 int i; 1159 1160 for (i = 0; i < 4; i++) { 1161 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1162 (uint64_t)b->u32[i]) >> 32); 1163 } 1164 } 1165 1166 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1167 { 1168 uint64_t discard; 1169 1170 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1171 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1172 } 1173 1174 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1175 { 1176 uint64_t discard; 1177 1178 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1179 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1180 } 1181 1182 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1183 ppc_avr_t *c) 1184 { 1185 ppc_avr_t result; 1186 int i; 1187 1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1189 int s = c->VsrB(i) & 0x1f; 1190 int index = s & 0xf; 1191 1192 if (s & 0x10) { 1193 result.VsrB(i) = b->VsrB(index); 1194 } else { 1195 result.VsrB(i) = a->VsrB(index); 1196 } 1197 } 1198 *r = result; 1199 } 1200 1201 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1202 ppc_avr_t *c) 1203 { 1204 ppc_avr_t result; 1205 int i; 1206 1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1208 int s = c->VsrB(i) & 0x1f; 1209 int index = 15 - (s & 0xf); 1210 1211 if (s & 0x10) { 1212 result.VsrB(i) = a->VsrB(index); 1213 } else { 1214 result.VsrB(i) = b->VsrB(index); 1215 } 1216 } 1217 *r = result; 1218 } 1219 1220 #if defined(HOST_WORDS_BIGENDIAN) 1221 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1222 #define VBPERMD_INDEX(i) (i) 1223 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1224 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1225 #else 1226 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1227 #define VBPERMD_INDEX(i) (1 - i) 1228 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1229 #define EXTRACT_BIT(avr, i, index) \ 1230 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1231 #endif 1232 1233 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1234 { 1235 int i, j; 1236 ppc_avr_t result = { .u64 = { 0, 0 } }; 1237 VECTOR_FOR_INORDER_I(i, u64) { 1238 for (j = 0; j < 8; j++) { 1239 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1240 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1242 } 1243 } 1244 } 1245 *r = result; 1246 } 1247 1248 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1249 { 1250 int i; 1251 uint64_t perm = 0; 1252 1253 VECTOR_FOR_INORDER_I(i, u8) { 1254 int index = VBPERMQ_INDEX(b, i); 1255 1256 if (index < 128) { 1257 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1258 if (a->u64[VBPERMQ_DW(index)] & mask) { 1259 perm |= (0x8000 >> i); 1260 } 1261 } 1262 } 1263 1264 r->VsrD(0) = perm; 1265 r->VsrD(1) = 0; 1266 } 1267 1268 #undef VBPERMQ_INDEX 1269 #undef VBPERMQ_DW 1270 1271 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1272 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1273 { \ 1274 int i, j; \ 1275 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1276 \ 1277 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1278 prod[i] = 0; \ 1279 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1280 if (a->srcfld[i] & (1ull << j)) { \ 1281 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1282 } \ 1283 } \ 1284 } \ 1285 \ 1286 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1287 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1288 } \ 1289 } 1290 1291 PMSUM(vpmsumb, u8, u16, uint16_t) 1292 PMSUM(vpmsumh, u16, u32, uint32_t) 1293 PMSUM(vpmsumw, u32, u64, uint64_t) 1294 1295 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1296 { 1297 1298 #ifdef CONFIG_INT128 1299 int i, j; 1300 __uint128_t prod[2]; 1301 1302 VECTOR_FOR_INORDER_I(i, u64) { 1303 prod[i] = 0; 1304 for (j = 0; j < 64; j++) { 1305 if (a->u64[i] & (1ull << j)) { 1306 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1307 } 1308 } 1309 } 1310 1311 r->u128 = prod[0] ^ prod[1]; 1312 1313 #else 1314 int i, j; 1315 ppc_avr_t prod[2]; 1316 1317 VECTOR_FOR_INORDER_I(i, u64) { 1318 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1319 for (j = 0; j < 64; j++) { 1320 if (a->u64[i] & (1ull << j)) { 1321 ppc_avr_t bshift; 1322 if (j == 0) { 1323 bshift.VsrD(0) = 0; 1324 bshift.VsrD(1) = b->u64[i]; 1325 } else { 1326 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1327 bshift.VsrD(1) = b->u64[i] << j; 1328 } 1329 prod[i].VsrD(1) ^= bshift.VsrD(1); 1330 prod[i].VsrD(0) ^= bshift.VsrD(0); 1331 } 1332 } 1333 } 1334 1335 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1336 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1337 #endif 1338 } 1339 1340 1341 #if defined(HOST_WORDS_BIGENDIAN) 1342 #define PKBIG 1 1343 #else 1344 #define PKBIG 0 1345 #endif 1346 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1347 { 1348 int i, j; 1349 ppc_avr_t result; 1350 #if defined(HOST_WORDS_BIGENDIAN) 1351 const ppc_avr_t *x[2] = { a, b }; 1352 #else 1353 const ppc_avr_t *x[2] = { b, a }; 1354 #endif 1355 1356 VECTOR_FOR_INORDER_I(i, u64) { 1357 VECTOR_FOR_INORDER_I(j, u32) { 1358 uint32_t e = x[i]->u32[j]; 1359 1360 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1361 ((e >> 6) & 0x3e0) | 1362 ((e >> 3) & 0x1f)); 1363 } 1364 } 1365 *r = result; 1366 } 1367 1368 #define VPK(suffix, from, to, cvt, dosat) \ 1369 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1370 ppc_avr_t *a, ppc_avr_t *b) \ 1371 { \ 1372 int i; \ 1373 int sat = 0; \ 1374 ppc_avr_t result; \ 1375 ppc_avr_t *a0 = PKBIG ? a : b; \ 1376 ppc_avr_t *a1 = PKBIG ? b : a; \ 1377 \ 1378 VECTOR_FOR_INORDER_I(i, from) { \ 1379 result.to[i] = cvt(a0->from[i], &sat); \ 1380 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1381 } \ 1382 *r = result; \ 1383 if (dosat && sat) { \ 1384 set_vscr_sat(env); \ 1385 } \ 1386 } 1387 #define I(x, y) (x) 1388 VPK(shss, s16, s8, cvtshsb, 1) 1389 VPK(shus, s16, u8, cvtshub, 1) 1390 VPK(swss, s32, s16, cvtswsh, 1) 1391 VPK(swus, s32, u16, cvtswuh, 1) 1392 VPK(sdss, s64, s32, cvtsdsw, 1) 1393 VPK(sdus, s64, u32, cvtsduw, 1) 1394 VPK(uhus, u16, u8, cvtuhub, 1) 1395 VPK(uwus, u32, u16, cvtuwuh, 1) 1396 VPK(udus, u64, u32, cvtuduw, 1) 1397 VPK(uhum, u16, u8, I, 0) 1398 VPK(uwum, u32, u16, I, 0) 1399 VPK(udum, u64, u32, I, 0) 1400 #undef I 1401 #undef VPK 1402 #undef PKBIG 1403 1404 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1405 { 1406 int i; 1407 1408 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1409 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1410 } 1411 } 1412 1413 #define VRFI(suffix, rounding) \ 1414 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1415 ppc_avr_t *b) \ 1416 { \ 1417 int i; \ 1418 float_status s = env->vec_status; \ 1419 \ 1420 set_float_rounding_mode(rounding, &s); \ 1421 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1422 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1423 } \ 1424 } 1425 VRFI(n, float_round_nearest_even) 1426 VRFI(m, float_round_down) 1427 VRFI(p, float_round_up) 1428 VRFI(z, float_round_to_zero) 1429 #undef VRFI 1430 1431 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1432 { 1433 int i; 1434 1435 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1436 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1437 1438 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1439 } 1440 } 1441 1442 #define VRLMI(name, size, element, insert) \ 1443 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1444 { \ 1445 int i; \ 1446 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1447 uint##size##_t src1 = a->element[i]; \ 1448 uint##size##_t src2 = b->element[i]; \ 1449 uint##size##_t src3 = r->element[i]; \ 1450 uint##size##_t begin, end, shift, mask, rot_val; \ 1451 \ 1452 shift = extract##size(src2, 0, 6); \ 1453 end = extract##size(src2, 8, 6); \ 1454 begin = extract##size(src2, 16, 6); \ 1455 rot_val = rol##size(src1, shift); \ 1456 mask = mask_u##size(begin, end); \ 1457 if (insert) { \ 1458 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1459 } else { \ 1460 r->element[i] = (rot_val & mask); \ 1461 } \ 1462 } \ 1463 } 1464 1465 VRLMI(vrldmi, 64, u64, 1); 1466 VRLMI(vrlwmi, 32, u32, 1); 1467 VRLMI(vrldnm, 64, u64, 0); 1468 VRLMI(vrlwnm, 32, u32, 0); 1469 1470 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1471 ppc_avr_t *c) 1472 { 1473 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1474 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1475 } 1476 1477 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1478 { 1479 int i; 1480 1481 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1482 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1483 } 1484 } 1485 1486 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1487 { 1488 int i; 1489 1490 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1491 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1492 } 1493 } 1494 1495 #define VEXTU_X_DO(name, size, left) \ 1496 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1497 { \ 1498 int index = (a & 0xf) * 8; \ 1499 if (left) { \ 1500 index = 128 - index - size; \ 1501 } \ 1502 return int128_getlo(int128_rshift(b->s128, index)) & \ 1503 MAKE_64BIT_MASK(0, size); \ 1504 } 1505 VEXTU_X_DO(vextublx, 8, 1) 1506 VEXTU_X_DO(vextuhlx, 16, 1) 1507 VEXTU_X_DO(vextuwlx, 32, 1) 1508 VEXTU_X_DO(vextubrx, 8, 0) 1509 VEXTU_X_DO(vextuhrx, 16, 0) 1510 VEXTU_X_DO(vextuwrx, 32, 0) 1511 #undef VEXTU_X_DO 1512 1513 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1514 { 1515 int i; 1516 unsigned int shift, bytes, size; 1517 1518 size = ARRAY_SIZE(r->u8); 1519 for (i = 0; i < size; i++) { 1520 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1521 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1522 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1523 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1524 } 1525 } 1526 1527 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1528 { 1529 int i; 1530 unsigned int shift, bytes; 1531 1532 /* 1533 * Use reverse order, as destination and source register can be 1534 * same. Its being modified in place saving temporary, reverse 1535 * order will guarantee that computed result is not fed back. 1536 */ 1537 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1538 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1539 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1540 /* extract adjacent bytes */ 1541 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1542 } 1543 } 1544 1545 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1546 { 1547 int sh = shift & 0xf; 1548 int i; 1549 ppc_avr_t result; 1550 1551 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1552 int index = sh + i; 1553 if (index > 0xf) { 1554 result.VsrB(i) = b->VsrB(index - 0x10); 1555 } else { 1556 result.VsrB(i) = a->VsrB(index); 1557 } 1558 } 1559 *r = result; 1560 } 1561 1562 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1563 { 1564 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1565 1566 #if defined(HOST_WORDS_BIGENDIAN) 1567 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1568 memset(&r->u8[16 - sh], 0, sh); 1569 #else 1570 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1571 memset(&r->u8[0], 0, sh); 1572 #endif 1573 } 1574 1575 #if defined(HOST_WORDS_BIGENDIAN) 1576 #define VINSERT(suffix, element) \ 1577 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1578 { \ 1579 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1580 sizeof(r->element[0])); \ 1581 } 1582 #else 1583 #define VINSERT(suffix, element) \ 1584 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1585 { \ 1586 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1587 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1588 } 1589 #endif 1590 VINSERT(b, u8) 1591 VINSERT(h, u16) 1592 VINSERT(w, u32) 1593 VINSERT(d, u64) 1594 #undef VINSERT 1595 #if defined(HOST_WORDS_BIGENDIAN) 1596 #define VEXTRACT(suffix, element) \ 1597 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1598 { \ 1599 uint32_t es = sizeof(r->element[0]); \ 1600 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1601 memset(&r->u8[8], 0, 8); \ 1602 memset(&r->u8[0], 0, 8 - es); \ 1603 } 1604 #else 1605 #define VEXTRACT(suffix, element) \ 1606 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1607 { \ 1608 uint32_t es = sizeof(r->element[0]); \ 1609 uint32_t s = (16 - index) - es; \ 1610 memmove(&r->u8[8], &b->u8[s], es); \ 1611 memset(&r->u8[0], 0, 8); \ 1612 memset(&r->u8[8 + es], 0, 8 - es); \ 1613 } 1614 #endif 1615 VEXTRACT(ub, u8) 1616 VEXTRACT(uh, u16) 1617 VEXTRACT(uw, u32) 1618 VEXTRACT(d, u64) 1619 #undef VEXTRACT 1620 1621 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1622 ppc_vsr_t *xb, uint32_t index) 1623 { 1624 ppc_vsr_t t = { }; 1625 size_t es = sizeof(uint32_t); 1626 uint32_t ext_index; 1627 int i; 1628 1629 ext_index = index; 1630 for (i = 0; i < es; i++, ext_index++) { 1631 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1632 } 1633 1634 *xt = t; 1635 } 1636 1637 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1638 ppc_vsr_t *xb, uint32_t index) 1639 { 1640 ppc_vsr_t t = *xt; 1641 size_t es = sizeof(uint32_t); 1642 int ins_index, i = 0; 1643 1644 ins_index = index; 1645 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1646 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1647 } 1648 1649 *xt = t; 1650 } 1651 1652 #define VEXT_SIGNED(name, element, cast) \ 1653 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1654 { \ 1655 int i; \ 1656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1657 r->element[i] = (cast)b->element[i]; \ 1658 } \ 1659 } 1660 VEXT_SIGNED(vextsb2w, s32, int8_t) 1661 VEXT_SIGNED(vextsb2d, s64, int8_t) 1662 VEXT_SIGNED(vextsh2w, s32, int16_t) 1663 VEXT_SIGNED(vextsh2d, s64, int16_t) 1664 VEXT_SIGNED(vextsw2d, s64, int32_t) 1665 #undef VEXT_SIGNED 1666 1667 #define VNEG(name, element) \ 1668 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1669 { \ 1670 int i; \ 1671 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1672 r->element[i] = -b->element[i]; \ 1673 } \ 1674 } 1675 VNEG(vnegw, s32) 1676 VNEG(vnegd, s64) 1677 #undef VNEG 1678 1679 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1680 { 1681 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1682 1683 #if defined(HOST_WORDS_BIGENDIAN) 1684 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1685 memset(&r->u8[0], 0, sh); 1686 #else 1687 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1688 memset(&r->u8[16 - sh], 0, sh); 1689 #endif 1690 } 1691 1692 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1693 { 1694 int i; 1695 1696 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1697 r->u32[i] = a->u32[i] >= b->u32[i]; 1698 } 1699 } 1700 1701 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1702 { 1703 int64_t t; 1704 int i, upper; 1705 ppc_avr_t result; 1706 int sat = 0; 1707 1708 upper = ARRAY_SIZE(r->s32) - 1; 1709 t = (int64_t)b->VsrSW(upper); 1710 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1711 t += a->VsrSW(i); 1712 result.VsrSW(i) = 0; 1713 } 1714 result.VsrSW(upper) = cvtsdsw(t, &sat); 1715 *r = result; 1716 1717 if (sat) { 1718 set_vscr_sat(env); 1719 } 1720 } 1721 1722 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1723 { 1724 int i, j, upper; 1725 ppc_avr_t result; 1726 int sat = 0; 1727 1728 upper = 1; 1729 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1730 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1731 1732 result.VsrD(i) = 0; 1733 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1734 t += a->VsrSW(2 * i + j); 1735 } 1736 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1737 } 1738 1739 *r = result; 1740 if (sat) { 1741 set_vscr_sat(env); 1742 } 1743 } 1744 1745 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1746 { 1747 int i, j; 1748 int sat = 0; 1749 1750 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1751 int64_t t = (int64_t)b->s32[i]; 1752 1753 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1754 t += a->s8[4 * i + j]; 1755 } 1756 r->s32[i] = cvtsdsw(t, &sat); 1757 } 1758 1759 if (sat) { 1760 set_vscr_sat(env); 1761 } 1762 } 1763 1764 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1765 { 1766 int sat = 0; 1767 int i; 1768 1769 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1770 int64_t t = (int64_t)b->s32[i]; 1771 1772 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1773 r->s32[i] = cvtsdsw(t, &sat); 1774 } 1775 1776 if (sat) { 1777 set_vscr_sat(env); 1778 } 1779 } 1780 1781 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1782 { 1783 int i, j; 1784 int sat = 0; 1785 1786 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1787 uint64_t t = (uint64_t)b->u32[i]; 1788 1789 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1790 t += a->u8[4 * i + j]; 1791 } 1792 r->u32[i] = cvtuduw(t, &sat); 1793 } 1794 1795 if (sat) { 1796 set_vscr_sat(env); 1797 } 1798 } 1799 1800 #if defined(HOST_WORDS_BIGENDIAN) 1801 #define UPKHI 1 1802 #define UPKLO 0 1803 #else 1804 #define UPKHI 0 1805 #define UPKLO 1 1806 #endif 1807 #define VUPKPX(suffix, hi) \ 1808 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1809 { \ 1810 int i; \ 1811 ppc_avr_t result; \ 1812 \ 1813 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1814 uint16_t e = b->u16[hi ? i : i + 4]; \ 1815 uint8_t a = (e >> 15) ? 0xff : 0; \ 1816 uint8_t r = (e >> 10) & 0x1f; \ 1817 uint8_t g = (e >> 5) & 0x1f; \ 1818 uint8_t b = e & 0x1f; \ 1819 \ 1820 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1821 } \ 1822 *r = result; \ 1823 } 1824 VUPKPX(lpx, UPKLO) 1825 VUPKPX(hpx, UPKHI) 1826 #undef VUPKPX 1827 1828 #define VUPK(suffix, unpacked, packee, hi) \ 1829 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1830 { \ 1831 int i; \ 1832 ppc_avr_t result; \ 1833 \ 1834 if (hi) { \ 1835 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1836 result.unpacked[i] = b->packee[i]; \ 1837 } \ 1838 } else { \ 1839 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1840 i++) { \ 1841 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1842 } \ 1843 } \ 1844 *r = result; \ 1845 } 1846 VUPK(hsb, s16, s8, UPKHI) 1847 VUPK(hsh, s32, s16, UPKHI) 1848 VUPK(hsw, s64, s32, UPKHI) 1849 VUPK(lsb, s16, s8, UPKLO) 1850 VUPK(lsh, s32, s16, UPKLO) 1851 VUPK(lsw, s64, s32, UPKLO) 1852 #undef VUPK 1853 #undef UPKHI 1854 #undef UPKLO 1855 1856 #define VGENERIC_DO(name, element) \ 1857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1858 { \ 1859 int i; \ 1860 \ 1861 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1862 r->element[i] = name(b->element[i]); \ 1863 } \ 1864 } 1865 1866 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1867 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1868 1869 VGENERIC_DO(clzb, u8) 1870 VGENERIC_DO(clzh, u16) 1871 1872 #undef clzb 1873 #undef clzh 1874 1875 #define ctzb(v) ((v) ? ctz32(v) : 8) 1876 #define ctzh(v) ((v) ? ctz32(v) : 16) 1877 #define ctzw(v) ctz32((v)) 1878 #define ctzd(v) ctz64((v)) 1879 1880 VGENERIC_DO(ctzb, u8) 1881 VGENERIC_DO(ctzh, u16) 1882 VGENERIC_DO(ctzw, u32) 1883 VGENERIC_DO(ctzd, u64) 1884 1885 #undef ctzb 1886 #undef ctzh 1887 #undef ctzw 1888 #undef ctzd 1889 1890 #define popcntb(v) ctpop8(v) 1891 #define popcnth(v) ctpop16(v) 1892 #define popcntw(v) ctpop32(v) 1893 #define popcntd(v) ctpop64(v) 1894 1895 VGENERIC_DO(popcntb, u8) 1896 VGENERIC_DO(popcnth, u16) 1897 VGENERIC_DO(popcntw, u32) 1898 VGENERIC_DO(popcntd, u64) 1899 1900 #undef popcntb 1901 #undef popcnth 1902 #undef popcntw 1903 #undef popcntd 1904 1905 #undef VGENERIC_DO 1906 1907 #if defined(HOST_WORDS_BIGENDIAN) 1908 #define QW_ONE { .u64 = { 0, 1 } } 1909 #else 1910 #define QW_ONE { .u64 = { 1, 0 } } 1911 #endif 1912 1913 #ifndef CONFIG_INT128 1914 1915 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1916 { 1917 t->u64[0] = ~a.u64[0]; 1918 t->u64[1] = ~a.u64[1]; 1919 } 1920 1921 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1922 { 1923 if (a.VsrD(0) < b.VsrD(0)) { 1924 return -1; 1925 } else if (a.VsrD(0) > b.VsrD(0)) { 1926 return 1; 1927 } else if (a.VsrD(1) < b.VsrD(1)) { 1928 return -1; 1929 } else if (a.VsrD(1) > b.VsrD(1)) { 1930 return 1; 1931 } else { 1932 return 0; 1933 } 1934 } 1935 1936 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1937 { 1938 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1939 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1940 (~a.VsrD(1) < b.VsrD(1)); 1941 } 1942 1943 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1944 { 1945 ppc_avr_t not_a; 1946 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1947 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1948 (~a.VsrD(1) < b.VsrD(1)); 1949 avr_qw_not(¬_a, a); 1950 return avr_qw_cmpu(not_a, b) < 0; 1951 } 1952 1953 #endif 1954 1955 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1956 { 1957 #ifdef CONFIG_INT128 1958 r->u128 = a->u128 + b->u128; 1959 #else 1960 avr_qw_add(r, *a, *b); 1961 #endif 1962 } 1963 1964 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1965 { 1966 #ifdef CONFIG_INT128 1967 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1968 #else 1969 1970 if (c->VsrD(1) & 1) { 1971 ppc_avr_t tmp; 1972 1973 tmp.VsrD(0) = 0; 1974 tmp.VsrD(1) = c->VsrD(1) & 1; 1975 avr_qw_add(&tmp, *a, tmp); 1976 avr_qw_add(r, tmp, *b); 1977 } else { 1978 avr_qw_add(r, *a, *b); 1979 } 1980 #endif 1981 } 1982 1983 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1984 { 1985 #ifdef CONFIG_INT128 1986 r->u128 = (~a->u128 < b->u128); 1987 #else 1988 ppc_avr_t not_a; 1989 1990 avr_qw_not(¬_a, *a); 1991 1992 r->VsrD(0) = 0; 1993 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1994 #endif 1995 } 1996 1997 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1998 { 1999 #ifdef CONFIG_INT128 2000 int carry_out = (~a->u128 < b->u128); 2001 if (!carry_out && (c->u128 & 1)) { 2002 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2003 ((a->u128 != 0) || (b->u128 != 0)); 2004 } 2005 r->u128 = carry_out; 2006 #else 2007 2008 int carry_in = c->VsrD(1) & 1; 2009 int carry_out = 0; 2010 ppc_avr_t tmp; 2011 2012 carry_out = avr_qw_addc(&tmp, *a, *b); 2013 2014 if (!carry_out && carry_in) { 2015 ppc_avr_t one = QW_ONE; 2016 carry_out = avr_qw_addc(&tmp, tmp, one); 2017 } 2018 r->VsrD(0) = 0; 2019 r->VsrD(1) = carry_out; 2020 #endif 2021 } 2022 2023 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2024 { 2025 #ifdef CONFIG_INT128 2026 r->u128 = a->u128 - b->u128; 2027 #else 2028 ppc_avr_t tmp; 2029 ppc_avr_t one = QW_ONE; 2030 2031 avr_qw_not(&tmp, *b); 2032 avr_qw_add(&tmp, *a, tmp); 2033 avr_qw_add(r, tmp, one); 2034 #endif 2035 } 2036 2037 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2038 { 2039 #ifdef CONFIG_INT128 2040 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2041 #else 2042 ppc_avr_t tmp, sum; 2043 2044 avr_qw_not(&tmp, *b); 2045 avr_qw_add(&sum, *a, tmp); 2046 2047 tmp.VsrD(0) = 0; 2048 tmp.VsrD(1) = c->VsrD(1) & 1; 2049 avr_qw_add(r, sum, tmp); 2050 #endif 2051 } 2052 2053 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2054 { 2055 #ifdef CONFIG_INT128 2056 r->u128 = (~a->u128 < ~b->u128) || 2057 (a->u128 + ~b->u128 == (__uint128_t)-1); 2058 #else 2059 int carry = (avr_qw_cmpu(*a, *b) > 0); 2060 if (!carry) { 2061 ppc_avr_t tmp; 2062 avr_qw_not(&tmp, *b); 2063 avr_qw_add(&tmp, *a, tmp); 2064 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2065 } 2066 r->VsrD(0) = 0; 2067 r->VsrD(1) = carry; 2068 #endif 2069 } 2070 2071 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2072 { 2073 #ifdef CONFIG_INT128 2074 r->u128 = 2075 (~a->u128 < ~b->u128) || 2076 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2077 #else 2078 int carry_in = c->VsrD(1) & 1; 2079 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2080 if (!carry_out && carry_in) { 2081 ppc_avr_t tmp; 2082 avr_qw_not(&tmp, *b); 2083 avr_qw_add(&tmp, *a, tmp); 2084 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2085 } 2086 2087 r->VsrD(0) = 0; 2088 r->VsrD(1) = carry_out; 2089 #endif 2090 } 2091 2092 #define BCD_PLUS_PREF_1 0xC 2093 #define BCD_PLUS_PREF_2 0xF 2094 #define BCD_PLUS_ALT_1 0xA 2095 #define BCD_NEG_PREF 0xD 2096 #define BCD_NEG_ALT 0xB 2097 #define BCD_PLUS_ALT_2 0xE 2098 #define NATIONAL_PLUS 0x2B 2099 #define NATIONAL_NEG 0x2D 2100 2101 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2102 2103 static int bcd_get_sgn(ppc_avr_t *bcd) 2104 { 2105 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2106 case BCD_PLUS_PREF_1: 2107 case BCD_PLUS_PREF_2: 2108 case BCD_PLUS_ALT_1: 2109 case BCD_PLUS_ALT_2: 2110 { 2111 return 1; 2112 } 2113 2114 case BCD_NEG_PREF: 2115 case BCD_NEG_ALT: 2116 { 2117 return -1; 2118 } 2119 2120 default: 2121 { 2122 return 0; 2123 } 2124 } 2125 } 2126 2127 static int bcd_preferred_sgn(int sgn, int ps) 2128 { 2129 if (sgn >= 0) { 2130 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2131 } else { 2132 return BCD_NEG_PREF; 2133 } 2134 } 2135 2136 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2137 { 2138 uint8_t result; 2139 if (n & 1) { 2140 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2141 } else { 2142 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2143 } 2144 2145 if (unlikely(result > 9)) { 2146 *invalid = true; 2147 } 2148 return result; 2149 } 2150 2151 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2152 { 2153 if (n & 1) { 2154 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2155 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2156 } else { 2157 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2158 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2159 } 2160 } 2161 2162 static bool bcd_is_valid(ppc_avr_t *bcd) 2163 { 2164 int i; 2165 int invalid = 0; 2166 2167 if (bcd_get_sgn(bcd) == 0) { 2168 return false; 2169 } 2170 2171 for (i = 1; i < 32; i++) { 2172 bcd_get_digit(bcd, i, &invalid); 2173 if (unlikely(invalid)) { 2174 return false; 2175 } 2176 } 2177 return true; 2178 } 2179 2180 static int bcd_cmp_zero(ppc_avr_t *bcd) 2181 { 2182 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2183 return CRF_EQ; 2184 } else { 2185 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2186 } 2187 } 2188 2189 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2190 { 2191 return reg->VsrH(7 - n); 2192 } 2193 2194 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2195 { 2196 reg->VsrH(7 - n) = val; 2197 } 2198 2199 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2200 { 2201 int i; 2202 int invalid = 0; 2203 for (i = 31; i > 0; i--) { 2204 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2205 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2206 if (unlikely(invalid)) { 2207 return 0; /* doesn't matter */ 2208 } else if (dig_a > dig_b) { 2209 return 1; 2210 } else if (dig_a < dig_b) { 2211 return -1; 2212 } 2213 } 2214 2215 return 0; 2216 } 2217 2218 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2219 int *overflow) 2220 { 2221 int carry = 0; 2222 int i; 2223 int is_zero = 1; 2224 2225 for (i = 1; i <= 31; i++) { 2226 uint8_t digit = bcd_get_digit(a, i, invalid) + 2227 bcd_get_digit(b, i, invalid) + carry; 2228 is_zero &= (digit == 0); 2229 if (digit > 9) { 2230 carry = 1; 2231 digit -= 10; 2232 } else { 2233 carry = 0; 2234 } 2235 2236 bcd_put_digit(t, digit, i); 2237 } 2238 2239 *overflow = carry; 2240 return is_zero; 2241 } 2242 2243 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2244 int *overflow) 2245 { 2246 int carry = 0; 2247 int i; 2248 2249 for (i = 1; i <= 31; i++) { 2250 uint8_t digit = bcd_get_digit(a, i, invalid) - 2251 bcd_get_digit(b, i, invalid) + carry; 2252 if (digit & 0x80) { 2253 carry = -1; 2254 digit += 10; 2255 } else { 2256 carry = 0; 2257 } 2258 2259 bcd_put_digit(t, digit, i); 2260 } 2261 2262 *overflow = carry; 2263 } 2264 2265 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2266 { 2267 2268 int sgna = bcd_get_sgn(a); 2269 int sgnb = bcd_get_sgn(b); 2270 int invalid = (sgna == 0) || (sgnb == 0); 2271 int overflow = 0; 2272 int zero = 0; 2273 uint32_t cr = 0; 2274 ppc_avr_t result = { .u64 = { 0, 0 } }; 2275 2276 if (!invalid) { 2277 if (sgna == sgnb) { 2278 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2279 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2280 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2281 } else { 2282 int magnitude = bcd_cmp_mag(a, b); 2283 if (magnitude > 0) { 2284 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2285 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2286 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2287 } else if (magnitude < 0) { 2288 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2289 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2290 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2291 } else { 2292 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2293 cr = CRF_EQ; 2294 } 2295 } 2296 } 2297 2298 if (unlikely(invalid)) { 2299 result.VsrD(0) = result.VsrD(1) = -1; 2300 cr = CRF_SO; 2301 } else if (overflow) { 2302 cr |= CRF_SO; 2303 } else if (zero) { 2304 cr |= CRF_EQ; 2305 } 2306 2307 *r = result; 2308 2309 return cr; 2310 } 2311 2312 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2313 { 2314 ppc_avr_t bcopy = *b; 2315 int sgnb = bcd_get_sgn(b); 2316 if (sgnb < 0) { 2317 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2318 } else if (sgnb > 0) { 2319 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2320 } 2321 /* else invalid ... defer to bcdadd code for proper handling */ 2322 2323 return helper_bcdadd(r, a, &bcopy, ps); 2324 } 2325 2326 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2327 { 2328 int i; 2329 int cr = 0; 2330 uint16_t national = 0; 2331 uint16_t sgnb = get_national_digit(b, 0); 2332 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2333 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2334 2335 for (i = 1; i < 8; i++) { 2336 national = get_national_digit(b, i); 2337 if (unlikely(national < 0x30 || national > 0x39)) { 2338 invalid = 1; 2339 break; 2340 } 2341 2342 bcd_put_digit(&ret, national & 0xf, i); 2343 } 2344 2345 if (sgnb == NATIONAL_PLUS) { 2346 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2347 } else { 2348 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2349 } 2350 2351 cr = bcd_cmp_zero(&ret); 2352 2353 if (unlikely(invalid)) { 2354 cr = CRF_SO; 2355 } 2356 2357 *r = ret; 2358 2359 return cr; 2360 } 2361 2362 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2363 { 2364 int i; 2365 int cr = 0; 2366 int sgnb = bcd_get_sgn(b); 2367 int invalid = (sgnb == 0); 2368 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2369 2370 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2371 2372 for (i = 1; i < 8; i++) { 2373 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2374 2375 if (unlikely(invalid)) { 2376 break; 2377 } 2378 } 2379 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2380 2381 cr = bcd_cmp_zero(b); 2382 2383 if (ox_flag) { 2384 cr |= CRF_SO; 2385 } 2386 2387 if (unlikely(invalid)) { 2388 cr = CRF_SO; 2389 } 2390 2391 *r = ret; 2392 2393 return cr; 2394 } 2395 2396 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2397 { 2398 int i; 2399 int cr = 0; 2400 int invalid = 0; 2401 int zone_digit = 0; 2402 int zone_lead = ps ? 0xF : 0x3; 2403 int digit = 0; 2404 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2405 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2406 2407 if (unlikely((sgnb < 0xA) && ps)) { 2408 invalid = 1; 2409 } 2410 2411 for (i = 0; i < 16; i++) { 2412 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2413 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2414 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2415 invalid = 1; 2416 break; 2417 } 2418 2419 bcd_put_digit(&ret, digit, i + 1); 2420 } 2421 2422 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2423 (!ps && (sgnb & 0x4))) { 2424 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2425 } else { 2426 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2427 } 2428 2429 cr = bcd_cmp_zero(&ret); 2430 2431 if (unlikely(invalid)) { 2432 cr = CRF_SO; 2433 } 2434 2435 *r = ret; 2436 2437 return cr; 2438 } 2439 2440 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2441 { 2442 int i; 2443 int cr = 0; 2444 uint8_t digit = 0; 2445 int sgnb = bcd_get_sgn(b); 2446 int zone_lead = (ps) ? 0xF0 : 0x30; 2447 int invalid = (sgnb == 0); 2448 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2449 2450 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2451 2452 for (i = 0; i < 16; i++) { 2453 digit = bcd_get_digit(b, i + 1, &invalid); 2454 2455 if (unlikely(invalid)) { 2456 break; 2457 } 2458 2459 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2460 } 2461 2462 if (ps) { 2463 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2464 } else { 2465 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2466 } 2467 2468 cr = bcd_cmp_zero(b); 2469 2470 if (ox_flag) { 2471 cr |= CRF_SO; 2472 } 2473 2474 if (unlikely(invalid)) { 2475 cr = CRF_SO; 2476 } 2477 2478 *r = ret; 2479 2480 return cr; 2481 } 2482 2483 /** 2484 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2485 * 2486 * Returns: 2487 * > 0 if ahi|alo > bhi|blo, 2488 * 0 if ahi|alo == bhi|blo, 2489 * < 0 if ahi|alo < bhi|blo 2490 */ 2491 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2492 uint64_t blo, uint64_t bhi) 2493 { 2494 return (ahi == bhi) ? 2495 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2496 (ahi > bhi ? 1 : -1); 2497 } 2498 2499 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2500 { 2501 int i; 2502 int cr; 2503 uint64_t lo_value; 2504 uint64_t hi_value; 2505 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2506 2507 if (b->VsrSD(0) < 0) { 2508 lo_value = -b->VsrSD(1); 2509 hi_value = ~b->VsrD(0) + !lo_value; 2510 bcd_put_digit(&ret, 0xD, 0); 2511 2512 cr = CRF_LT; 2513 } else { 2514 lo_value = b->VsrD(1); 2515 hi_value = b->VsrD(0); 2516 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2517 2518 if (hi_value == 0 && lo_value == 0) { 2519 cr = CRF_EQ; 2520 } else { 2521 cr = CRF_GT; 2522 } 2523 } 2524 2525 /* 2526 * Check src limits: abs(src) <= 10^31 - 1 2527 * 2528 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2529 */ 2530 if (ucmp128(lo_value, hi_value, 2531 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2532 cr |= CRF_SO; 2533 2534 /* 2535 * According to the ISA, if src wouldn't fit in the destination 2536 * register, the result is undefined. 2537 * In that case, we leave r unchanged. 2538 */ 2539 } else { 2540 divu128(&lo_value, &hi_value, 1000000000000000ULL); 2541 2542 for (i = 1; i < 16; hi_value /= 10, i++) { 2543 bcd_put_digit(&ret, hi_value % 10, i); 2544 } 2545 2546 for (; i < 32; lo_value /= 10, i++) { 2547 bcd_put_digit(&ret, lo_value % 10, i); 2548 } 2549 2550 *r = ret; 2551 } 2552 2553 return cr; 2554 } 2555 2556 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2557 { 2558 uint8_t i; 2559 int cr; 2560 uint64_t carry; 2561 uint64_t unused; 2562 uint64_t lo_value; 2563 uint64_t hi_value = 0; 2564 int sgnb = bcd_get_sgn(b); 2565 int invalid = (sgnb == 0); 2566 2567 lo_value = bcd_get_digit(b, 31, &invalid); 2568 for (i = 30; i > 0; i--) { 2569 mulu64(&lo_value, &carry, lo_value, 10ULL); 2570 mulu64(&hi_value, &unused, hi_value, 10ULL); 2571 lo_value += bcd_get_digit(b, i, &invalid); 2572 hi_value += carry; 2573 2574 if (unlikely(invalid)) { 2575 break; 2576 } 2577 } 2578 2579 if (sgnb == -1) { 2580 r->VsrSD(1) = -lo_value; 2581 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2582 } else { 2583 r->VsrSD(1) = lo_value; 2584 r->VsrSD(0) = hi_value; 2585 } 2586 2587 cr = bcd_cmp_zero(b); 2588 2589 if (unlikely(invalid)) { 2590 cr = CRF_SO; 2591 } 2592 2593 return cr; 2594 } 2595 2596 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2597 { 2598 int i; 2599 int invalid = 0; 2600 2601 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2602 return CRF_SO; 2603 } 2604 2605 *r = *a; 2606 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2607 2608 for (i = 1; i < 32; i++) { 2609 bcd_get_digit(a, i, &invalid); 2610 bcd_get_digit(b, i, &invalid); 2611 if (unlikely(invalid)) { 2612 return CRF_SO; 2613 } 2614 } 2615 2616 return bcd_cmp_zero(r); 2617 } 2618 2619 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2620 { 2621 int sgnb = bcd_get_sgn(b); 2622 2623 *r = *b; 2624 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2625 2626 if (bcd_is_valid(b) == false) { 2627 return CRF_SO; 2628 } 2629 2630 return bcd_cmp_zero(r); 2631 } 2632 2633 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2634 { 2635 int cr; 2636 int i = a->VsrSB(7); 2637 bool ox_flag = false; 2638 int sgnb = bcd_get_sgn(b); 2639 ppc_avr_t ret = *b; 2640 ret.VsrD(1) &= ~0xf; 2641 2642 if (bcd_is_valid(b) == false) { 2643 return CRF_SO; 2644 } 2645 2646 if (unlikely(i > 31)) { 2647 i = 31; 2648 } else if (unlikely(i < -31)) { 2649 i = -31; 2650 } 2651 2652 if (i > 0) { 2653 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2654 } else { 2655 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2656 } 2657 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2658 2659 *r = ret; 2660 2661 cr = bcd_cmp_zero(r); 2662 if (ox_flag) { 2663 cr |= CRF_SO; 2664 } 2665 2666 return cr; 2667 } 2668 2669 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2670 { 2671 int cr; 2672 int i; 2673 int invalid = 0; 2674 bool ox_flag = false; 2675 ppc_avr_t ret = *b; 2676 2677 for (i = 0; i < 32; i++) { 2678 bcd_get_digit(b, i, &invalid); 2679 2680 if (unlikely(invalid)) { 2681 return CRF_SO; 2682 } 2683 } 2684 2685 i = a->VsrSB(7); 2686 if (i >= 32) { 2687 ox_flag = true; 2688 ret.VsrD(1) = ret.VsrD(0) = 0; 2689 } else if (i <= -32) { 2690 ret.VsrD(1) = ret.VsrD(0) = 0; 2691 } else if (i > 0) { 2692 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2693 } else { 2694 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2695 } 2696 *r = ret; 2697 2698 cr = bcd_cmp_zero(r); 2699 if (ox_flag) { 2700 cr |= CRF_SO; 2701 } 2702 2703 return cr; 2704 } 2705 2706 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2707 { 2708 int cr; 2709 int unused = 0; 2710 int invalid = 0; 2711 bool ox_flag = false; 2712 int sgnb = bcd_get_sgn(b); 2713 ppc_avr_t ret = *b; 2714 ret.VsrD(1) &= ~0xf; 2715 2716 int i = a->VsrSB(7); 2717 ppc_avr_t bcd_one; 2718 2719 bcd_one.VsrD(0) = 0; 2720 bcd_one.VsrD(1) = 0x10; 2721 2722 if (bcd_is_valid(b) == false) { 2723 return CRF_SO; 2724 } 2725 2726 if (unlikely(i > 31)) { 2727 i = 31; 2728 } else if (unlikely(i < -31)) { 2729 i = -31; 2730 } 2731 2732 if (i > 0) { 2733 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2734 } else { 2735 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2736 2737 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2738 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2739 } 2740 } 2741 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2742 2743 cr = bcd_cmp_zero(&ret); 2744 if (ox_flag) { 2745 cr |= CRF_SO; 2746 } 2747 *r = ret; 2748 2749 return cr; 2750 } 2751 2752 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2753 { 2754 uint64_t mask; 2755 uint32_t ox_flag = 0; 2756 int i = a->VsrSH(3) + 1; 2757 ppc_avr_t ret = *b; 2758 2759 if (bcd_is_valid(b) == false) { 2760 return CRF_SO; 2761 } 2762 2763 if (i > 16 && i < 32) { 2764 mask = (uint64_t)-1 >> (128 - i * 4); 2765 if (ret.VsrD(0) & ~mask) { 2766 ox_flag = CRF_SO; 2767 } 2768 2769 ret.VsrD(0) &= mask; 2770 } else if (i >= 0 && i <= 16) { 2771 mask = (uint64_t)-1 >> (64 - i * 4); 2772 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2773 ox_flag = CRF_SO; 2774 } 2775 2776 ret.VsrD(1) &= mask; 2777 ret.VsrD(0) = 0; 2778 } 2779 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2780 *r = ret; 2781 2782 return bcd_cmp_zero(&ret) | ox_flag; 2783 } 2784 2785 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2786 { 2787 int i; 2788 uint64_t mask; 2789 uint32_t ox_flag = 0; 2790 int invalid = 0; 2791 ppc_avr_t ret = *b; 2792 2793 for (i = 0; i < 32; i++) { 2794 bcd_get_digit(b, i, &invalid); 2795 2796 if (unlikely(invalid)) { 2797 return CRF_SO; 2798 } 2799 } 2800 2801 i = a->VsrSH(3); 2802 if (i > 16 && i < 33) { 2803 mask = (uint64_t)-1 >> (128 - i * 4); 2804 if (ret.VsrD(0) & ~mask) { 2805 ox_flag = CRF_SO; 2806 } 2807 2808 ret.VsrD(0) &= mask; 2809 } else if (i > 0 && i <= 16) { 2810 mask = (uint64_t)-1 >> (64 - i * 4); 2811 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2812 ox_flag = CRF_SO; 2813 } 2814 2815 ret.VsrD(1) &= mask; 2816 ret.VsrD(0) = 0; 2817 } else if (i == 0) { 2818 if (ret.VsrD(0) || ret.VsrD(1)) { 2819 ox_flag = CRF_SO; 2820 } 2821 ret.VsrD(0) = ret.VsrD(1) = 0; 2822 } 2823 2824 *r = ret; 2825 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2826 return ox_flag | CRF_EQ; 2827 } 2828 2829 return ox_flag | CRF_GT; 2830 } 2831 2832 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2833 { 2834 int i; 2835 VECTOR_FOR_INORDER_I(i, u8) { 2836 r->u8[i] = AES_sbox[a->u8[i]]; 2837 } 2838 } 2839 2840 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2841 { 2842 ppc_avr_t result; 2843 int i; 2844 2845 VECTOR_FOR_INORDER_I(i, u32) { 2846 result.VsrW(i) = b->VsrW(i) ^ 2847 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2848 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2849 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2850 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2851 } 2852 *r = result; 2853 } 2854 2855 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2856 { 2857 ppc_avr_t result; 2858 int i; 2859 2860 VECTOR_FOR_INORDER_I(i, u8) { 2861 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2862 } 2863 *r = result; 2864 } 2865 2866 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2867 { 2868 /* This differs from what is written in ISA V2.07. The RTL is */ 2869 /* incorrect and will be fixed in V2.07B. */ 2870 int i; 2871 ppc_avr_t tmp; 2872 2873 VECTOR_FOR_INORDER_I(i, u8) { 2874 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2875 } 2876 2877 VECTOR_FOR_INORDER_I(i, u32) { 2878 r->VsrW(i) = 2879 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2880 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2881 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2882 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2883 } 2884 } 2885 2886 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2887 { 2888 ppc_avr_t result; 2889 int i; 2890 2891 VECTOR_FOR_INORDER_I(i, u8) { 2892 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2893 } 2894 *r = result; 2895 } 2896 2897 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2898 { 2899 int st = (st_six & 0x10) != 0; 2900 int six = st_six & 0xF; 2901 int i; 2902 2903 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2904 if (st == 0) { 2905 if ((six & (0x8 >> i)) == 0) { 2906 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2907 ror32(a->VsrW(i), 18) ^ 2908 (a->VsrW(i) >> 3); 2909 } else { /* six.bit[i] == 1 */ 2910 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2911 ror32(a->VsrW(i), 19) ^ 2912 (a->VsrW(i) >> 10); 2913 } 2914 } else { /* st == 1 */ 2915 if ((six & (0x8 >> i)) == 0) { 2916 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2917 ror32(a->VsrW(i), 13) ^ 2918 ror32(a->VsrW(i), 22); 2919 } else { /* six.bit[i] == 1 */ 2920 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2921 ror32(a->VsrW(i), 11) ^ 2922 ror32(a->VsrW(i), 25); 2923 } 2924 } 2925 } 2926 } 2927 2928 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2929 { 2930 int st = (st_six & 0x10) != 0; 2931 int six = st_six & 0xF; 2932 int i; 2933 2934 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2935 if (st == 0) { 2936 if ((six & (0x8 >> (2 * i))) == 0) { 2937 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2938 ror64(a->VsrD(i), 8) ^ 2939 (a->VsrD(i) >> 7); 2940 } else { /* six.bit[2*i] == 1 */ 2941 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2942 ror64(a->VsrD(i), 61) ^ 2943 (a->VsrD(i) >> 6); 2944 } 2945 } else { /* st == 1 */ 2946 if ((six & (0x8 >> (2 * i))) == 0) { 2947 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2948 ror64(a->VsrD(i), 34) ^ 2949 ror64(a->VsrD(i), 39); 2950 } else { /* six.bit[2*i] == 1 */ 2951 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2952 ror64(a->VsrD(i), 18) ^ 2953 ror64(a->VsrD(i), 41); 2954 } 2955 } 2956 } 2957 } 2958 2959 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2960 { 2961 ppc_avr_t result; 2962 int i; 2963 2964 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2965 int indexA = c->VsrB(i) >> 4; 2966 int indexB = c->VsrB(i) & 0xF; 2967 2968 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2969 } 2970 *r = result; 2971 } 2972 2973 #undef VECTOR_FOR_INORDER_I 2974 2975 /*****************************************************************************/ 2976 /* SPE extension helpers */ 2977 /* Use a table to make this quicker */ 2978 static const uint8_t hbrev[16] = { 2979 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2980 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2981 }; 2982 2983 static inline uint8_t byte_reverse(uint8_t val) 2984 { 2985 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2986 } 2987 2988 static inline uint32_t word_reverse(uint32_t val) 2989 { 2990 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2991 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2992 } 2993 2994 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2995 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2996 { 2997 uint32_t a, b, d, mask; 2998 2999 mask = UINT32_MAX >> (32 - MASKBITS); 3000 a = arg1 & mask; 3001 b = arg2 & mask; 3002 d = word_reverse(1 + word_reverse(a | ~b)); 3003 return (arg1 & ~mask) | (d & b); 3004 } 3005 3006 uint32_t helper_cntlsw32(uint32_t val) 3007 { 3008 if (val & 0x80000000) { 3009 return clz32(~val); 3010 } else { 3011 return clz32(val); 3012 } 3013 } 3014 3015 uint32_t helper_cntlzw32(uint32_t val) 3016 { 3017 return clz32(val); 3018 } 3019 3020 /* 440 specific */ 3021 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3022 target_ulong low, uint32_t update_Rc) 3023 { 3024 target_ulong mask; 3025 int i; 3026 3027 i = 1; 3028 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3029 if ((high & mask) == 0) { 3030 if (update_Rc) { 3031 env->crf[0] = 0x4; 3032 } 3033 goto done; 3034 } 3035 i++; 3036 } 3037 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3038 if ((low & mask) == 0) { 3039 if (update_Rc) { 3040 env->crf[0] = 0x8; 3041 } 3042 goto done; 3043 } 3044 i++; 3045 } 3046 i = 8; 3047 if (update_Rc) { 3048 env->crf[0] = 0x2; 3049 } 3050 done: 3051 env->xer = (env->xer & ~0x7F) | i; 3052 if (update_Rc) { 3053 env->crf[0] |= xer_so; 3054 } 3055 return i; 3056 } 3057