1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = 1; 41 } else { 42 env->ov = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 496 { 497 int i; 498 499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 500 r->u32[i] = ~a->u32[i] < b->u32[i]; 501 } 502 } 503 504 /* vprtybw */ 505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 510 res ^= res >> 8; 511 r->u32[i] = res & 1; 512 } 513 } 514 515 /* vprtybd */ 516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 int i; 519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->u64[i] = res & 1; 524 } 525 } 526 527 /* vprtybq */ 528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 529 { 530 uint64_t res = b->u64[0] ^ b->u64[1]; 531 res ^= res >> 32; 532 res ^= res >> 16; 533 res ^= res >> 8; 534 r->VsrD(1) = res & 1; 535 r->VsrD(0) = 0; 536 } 537 538 #define VARITHFP(suffix, func) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b) \ 541 { \ 542 int i; \ 543 \ 544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 546 } \ 547 } 548 VARITHFP(addfp, float32_add) 549 VARITHFP(subfp, float32_sub) 550 VARITHFP(minfp, float32_min) 551 VARITHFP(maxfp, float32_max) 552 #undef VARITHFP 553 554 #define VARITHFPFMA(suffix, type) \ 555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 556 ppc_avr_t *b, ppc_avr_t *c) \ 557 { \ 558 int i; \ 559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 561 type, &env->vec_status); \ 562 } \ 563 } 564 VARITHFPFMA(maddfp, 0); 565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 566 #undef VARITHFPFMA 567 568 #define VARITHSAT_CASE(type, op, cvt, element) \ 569 { \ 570 type result = (type)a->element[i] op (type)b->element[i]; \ 571 r->element[i] = cvt(result, &sat); \ 572 } 573 574 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 577 { \ 578 int sat = 0; \ 579 int i; \ 580 \ 581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 } \ 584 if (sat) { \ 585 vscr_sat->u32[0] = 1; \ 586 } \ 587 } 588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 600 #undef VARITHSAT_CASE 601 #undef VARITHSAT_DO 602 #undef VARITHSAT_SIGNED 603 #undef VARITHSAT_UNSIGNED 604 605 #define VAVG_DO(name, element, etype) \ 606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 607 { \ 608 int i; \ 609 \ 610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 612 r->element[i] = x >> 1; \ 613 } \ 614 } 615 616 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 617 unsigned_type) \ 618 VAVG_DO(avgs##type, signed_element, signed_type) \ 619 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 620 VAVG(b, s8, int16_t, u8, uint16_t) 621 VAVG(h, s16, int32_t, u16, uint32_t) 622 VAVG(w, s32, int64_t, u32, uint64_t) 623 #undef VAVG_DO 624 #undef VAVG 625 626 #define VABSDU_DO(name, element) \ 627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 r->element[i] = (a->element[i] > b->element[i]) ? \ 633 (a->element[i] - b->element[i]) : \ 634 (b->element[i] - a->element[i]); \ 635 } \ 636 } 637 638 /* 639 * VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMPNEZ(NAME, ELEM) \ 667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 668 { \ 669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 672 } \ 673 } 674 VCMPNEZ(VCMPNEZB, u8) 675 VCMPNEZ(VCMPNEZH, u16) 676 VCMPNEZ(VCMPNEZW, u32) 677 #undef VCMPNEZ 678 679 #define VCMPFP_DO(suffix, compare, order, record) \ 680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 681 ppc_avr_t *a, ppc_avr_t *b) \ 682 { \ 683 uint32_t ones = (uint32_t)-1; \ 684 uint32_t all = ones; \ 685 uint32_t none = 0; \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 689 uint32_t result; \ 690 FloatRelation rel = \ 691 float32_compare_quiet(a->f32[i], b->f32[i], \ 692 &env->vec_status); \ 693 if (rel == float_relation_unordered) { \ 694 result = 0; \ 695 } else if (rel compare order) { \ 696 result = ones; \ 697 } else { \ 698 result = 0; \ 699 } \ 700 r->u32[i] = result; \ 701 all &= result; \ 702 none |= result; \ 703 } \ 704 if (record) { \ 705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 706 } \ 707 } 708 #define VCMPFP(suffix, compare, order) \ 709 VCMPFP_DO(suffix, compare, order, 0) \ 710 VCMPFP_DO(suffix##_dot, compare, order, 1) 711 VCMPFP(eqfp, ==, float_relation_equal) 712 VCMPFP(gefp, !=, float_relation_less) 713 VCMPFP(gtfp, ==, float_relation_greater) 714 #undef VCMPFP_DO 715 #undef VCMPFP 716 717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 718 ppc_avr_t *a, ppc_avr_t *b, int record) 719 { 720 int i; 721 int all_in = 0; 722 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 725 &env->vec_status); 726 if (le_rel == float_relation_unordered) { 727 r->u32[i] = 0xc0000000; 728 all_in = 1; 729 } else { 730 float32 bneg = float32_chs(b->f32[i]); 731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 732 &env->vec_status); 733 int le = le_rel != float_relation_greater; 734 int ge = ge_rel != float_relation_less; 735 736 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 737 all_in |= (!le | !ge); 738 } 739 } 740 if (record) { 741 env->crf[6] = (all_in == 0) << 1; 742 } 743 } 744 745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 746 { 747 vcmpbfp_internal(env, r, a, b, 0); 748 } 749 750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 751 ppc_avr_t *b) 752 { 753 vcmpbfp_internal(env, r, a, b, 1); 754 } 755 756 #define VCT(suffix, satcvt, element) \ 757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 758 ppc_avr_t *b, uint32_t uim) \ 759 { \ 760 int i; \ 761 int sat = 0; \ 762 float_status s = env->vec_status; \ 763 \ 764 set_float_rounding_mode(float_round_to_zero, &s); \ 765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 766 if (float32_is_any_nan(b->f32[i])) { \ 767 r->element[i] = 0; \ 768 } else { \ 769 float64 t = float32_to_float64(b->f32[i], &s); \ 770 int64_t j; \ 771 \ 772 t = float64_scalbn(t, uim, &s); \ 773 j = float64_to_int64(t, &s); \ 774 r->element[i] = satcvt(j, &sat); \ 775 } \ 776 } \ 777 if (sat) { \ 778 set_vscr_sat(env); \ 779 } \ 780 } 781 VCT(uxs, cvtsduw, u32) 782 VCT(sxs, cvtsdsw, s32) 783 #undef VCT 784 785 target_ulong helper_vclzlsbb(ppc_avr_t *r) 786 { 787 target_ulong count = 0; 788 int i; 789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 790 if (r->VsrB(i) & 0x01) { 791 break; 792 } 793 count++; 794 } 795 return count; 796 } 797 798 target_ulong helper_vctzlsbb(ppc_avr_t *r) 799 { 800 target_ulong count = 0; 801 int i; 802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 803 if (r->VsrB(i) & 0x01) { 804 break; 805 } 806 count++; 807 } 808 return count; 809 } 810 811 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 812 ppc_avr_t *b, ppc_avr_t *c) 813 { 814 int sat = 0; 815 int i; 816 817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 818 int32_t prod = a->s16[i] * b->s16[i]; 819 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 820 821 r->s16[i] = cvtswsh(t, &sat); 822 } 823 824 if (sat) { 825 set_vscr_sat(env); 826 } 827 } 828 829 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 830 ppc_avr_t *b, ppc_avr_t *c) 831 { 832 int sat = 0; 833 int i; 834 835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 837 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 838 r->s16[i] = cvtswsh(t, &sat); 839 } 840 841 if (sat) { 842 set_vscr_sat(env); 843 } 844 } 845 846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 847 { 848 int i; 849 850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 851 int32_t prod = a->s16[i] * b->s16[i]; 852 r->s16[i] = (int16_t) (prod + c->s16[i]); 853 } 854 } 855 856 #define VMRG_DO(name, element, access, ofs) \ 857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 858 { \ 859 ppc_avr_t result; \ 860 int i, half = ARRAY_SIZE(r->element) / 2; \ 861 \ 862 for (i = 0; i < half; i++) { \ 863 result.access(i * 2 + 0) = a->access(i + ofs); \ 864 result.access(i * 2 + 1) = b->access(i + ofs); \ 865 } \ 866 *r = result; \ 867 } 868 869 #define VMRG(suffix, element, access) \ 870 VMRG_DO(mrgl##suffix, element, access, half) \ 871 VMRG_DO(mrgh##suffix, element, access, 0) 872 VMRG(b, u8, VsrB) 873 VMRG(h, u16, VsrH) 874 VMRG(w, u32, VsrW) 875 #undef VMRG_DO 876 #undef VMRG 877 878 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 879 ppc_avr_t *b, ppc_avr_t *c) 880 { 881 int32_t prod[16]; 882 int i; 883 884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 885 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 886 } 887 888 VECTOR_FOR_INORDER_I(i, s32) { 889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 890 prod[4 * i + 2] + prod[4 * i + 3]; 891 } 892 } 893 894 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 895 ppc_avr_t *b, ppc_avr_t *c) 896 { 897 int32_t prod[8]; 898 int i; 899 900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 901 prod[i] = a->s16[i] * b->s16[i]; 902 } 903 904 VECTOR_FOR_INORDER_I(i, s32) { 905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 906 } 907 } 908 909 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 910 ppc_avr_t *b, ppc_avr_t *c) 911 { 912 int32_t prod[8]; 913 int i; 914 int sat = 0; 915 916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 917 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 918 } 919 920 VECTOR_FOR_INORDER_I(i, s32) { 921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 922 923 r->u32[i] = cvtsdsw(t, &sat); 924 } 925 926 if (sat) { 927 set_vscr_sat(env); 928 } 929 } 930 931 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 932 ppc_avr_t *b, ppc_avr_t *c) 933 { 934 uint16_t prod[16]; 935 int i; 936 937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 938 prod[i] = a->u8[i] * b->u8[i]; 939 } 940 941 VECTOR_FOR_INORDER_I(i, u32) { 942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 943 prod[4 * i + 2] + prod[4 * i + 3]; 944 } 945 } 946 947 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 948 ppc_avr_t *b, ppc_avr_t *c) 949 { 950 uint32_t prod[8]; 951 int i; 952 953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 954 prod[i] = a->u16[i] * b->u16[i]; 955 } 956 957 VECTOR_FOR_INORDER_I(i, u32) { 958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 959 } 960 } 961 962 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 963 ppc_avr_t *b, ppc_avr_t *c) 964 { 965 uint32_t prod[8]; 966 int i; 967 int sat = 0; 968 969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 970 prod[i] = a->u16[i] * b->u16[i]; 971 } 972 973 VECTOR_FOR_INORDER_I(i, s32) { 974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 975 976 r->u32[i] = cvtuduw(t, &sat); 977 } 978 979 if (sat) { 980 set_vscr_sat(env); 981 } 982 } 983 984 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 985 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 986 { \ 987 int i; \ 988 \ 989 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 990 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 991 (cast)b->mul_access(i); \ 992 } \ 993 } 994 995 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 996 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 997 { \ 998 int i; \ 999 \ 1000 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1001 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1002 (cast)b->mul_access(i + 1); \ 1003 } \ 1004 } 1005 1006 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1007 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1008 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1009 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1010 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1011 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1012 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1013 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1014 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1015 #undef VMUL_DO_EVN 1016 #undef VMUL_DO_ODD 1017 #undef VMUL 1018 1019 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1020 target_ulong uim) 1021 { 1022 int i, idx; 1023 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1024 1025 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1026 if ((pcv->VsrB(i) >> 5) == uim) { 1027 idx = pcv->VsrB(i) & 0x1f; 1028 if (idx < ARRAY_SIZE(t->u8)) { 1029 tmp.VsrB(i) = s0->VsrB(idx); 1030 } else { 1031 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1032 } 1033 } 1034 } 1035 1036 *t = tmp; 1037 } 1038 1039 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1040 { 1041 ppc_avr_t result; 1042 int i; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1045 int s = c->VsrB(i) & 0x1f; 1046 int index = s & 0xf; 1047 1048 if (s & 0x10) { 1049 result.VsrB(i) = b->VsrB(index); 1050 } else { 1051 result.VsrB(i) = a->VsrB(index); 1052 } 1053 } 1054 *r = result; 1055 } 1056 1057 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1058 { 1059 ppc_avr_t result; 1060 int i; 1061 1062 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1063 int s = c->VsrB(i) & 0x1f; 1064 int index = 15 - (s & 0xf); 1065 1066 if (s & 0x10) { 1067 result.VsrB(i) = a->VsrB(index); 1068 } else { 1069 result.VsrB(i) = b->VsrB(index); 1070 } 1071 } 1072 *r = result; 1073 } 1074 1075 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1076 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1077 { \ 1078 ppc_vsr_t tmp; \ 1079 \ 1080 /* Initialize tmp with the result of an all-zeros mask */ \ 1081 tmp.VsrD(0) = 0x1011121314151617; \ 1082 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1083 \ 1084 /* Iterate over the most significant byte of each element */ \ 1085 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1086 if (b->VsrB(i) & 0x80) { \ 1087 /* Update each byte of the element */ \ 1088 for (int k = 0; k < SZ; k++) { \ 1089 tmp.VsrB(i + k) = j + k; \ 1090 } \ 1091 j += SZ; \ 1092 } \ 1093 } \ 1094 \ 1095 *t = tmp; \ 1096 } 1097 1098 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1099 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1100 { \ 1101 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1102 \ 1103 /* Iterate over the most significant byte of each element */ \ 1104 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1105 if (b->VsrB(i) & 0x80) { \ 1106 /* Update each byte of the element */ \ 1107 for (int k = 0; k < SZ; k++) { \ 1108 tmp.VsrB(j + k) = i + k; \ 1109 } \ 1110 j += SZ; \ 1111 } \ 1112 } \ 1113 \ 1114 *t = tmp; \ 1115 } 1116 1117 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1118 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1119 { \ 1120 ppc_vsr_t tmp; \ 1121 \ 1122 /* Initialize tmp with the result of an all-zeros mask */ \ 1123 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1124 tmp.VsrD(1) = 0x1716151413121110; \ 1125 \ 1126 /* Iterate over the most significant byte of each element */ \ 1127 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1128 /* Reverse indexing of "i" */ \ 1129 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1130 if (b->VsrB(idx) & 0x80) { \ 1131 /* Update each byte of the element */ \ 1132 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1133 tmp.VsrB(idx + rk) = j + k; \ 1134 } \ 1135 j += SZ; \ 1136 } \ 1137 } \ 1138 \ 1139 *t = tmp; \ 1140 } 1141 1142 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1143 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1144 { \ 1145 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1146 \ 1147 /* Iterate over the most significant byte of each element */ \ 1148 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1149 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1150 /* Update each byte of the element */ \ 1151 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1152 /* Reverse indexing of "j" */ \ 1153 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1154 tmp.VsrB(idx + rk) = i + k; \ 1155 } \ 1156 j += SZ; \ 1157 } \ 1158 } \ 1159 \ 1160 *t = tmp; \ 1161 } 1162 1163 #define XXGENPCV(NAME, SZ) \ 1164 XXGENPCV_BE_EXP(NAME, SZ) \ 1165 XXGENPCV_BE_COMP(NAME, SZ) \ 1166 XXGENPCV_LE_EXP(NAME, SZ) \ 1167 XXGENPCV_LE_COMP(NAME, SZ) \ 1168 1169 XXGENPCV(XXGENPCVBM, 1) 1170 XXGENPCV(XXGENPCVHM, 2) 1171 XXGENPCV(XXGENPCVWM, 4) 1172 XXGENPCV(XXGENPCVDM, 8) 1173 1174 #undef XXGENPCV_BE_EXP 1175 #undef XXGENPCV_BE_COMP 1176 #undef XXGENPCV_LE_EXP 1177 #undef XXGENPCV_LE_COMP 1178 #undef XXGENPCV 1179 1180 #if HOST_BIG_ENDIAN 1181 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1182 #define VBPERMD_INDEX(i) (i) 1183 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1184 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1185 #else 1186 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1187 #define VBPERMD_INDEX(i) (1 - i) 1188 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1189 #define EXTRACT_BIT(avr, i, index) \ 1190 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1191 #endif 1192 1193 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1194 { 1195 int i, j; 1196 ppc_avr_t result = { .u64 = { 0, 0 } }; 1197 VECTOR_FOR_INORDER_I(i, u64) { 1198 for (j = 0; j < 8; j++) { 1199 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1200 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1201 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1202 } 1203 } 1204 } 1205 *r = result; 1206 } 1207 1208 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1209 { 1210 int i; 1211 uint64_t perm = 0; 1212 1213 VECTOR_FOR_INORDER_I(i, u8) { 1214 int index = VBPERMQ_INDEX(b, i); 1215 1216 if (index < 128) { 1217 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1218 if (a->u64[VBPERMQ_DW(index)] & mask) { 1219 perm |= (0x8000 >> i); 1220 } 1221 } 1222 } 1223 1224 r->VsrD(0) = perm; 1225 r->VsrD(1) = 0; 1226 } 1227 1228 #undef VBPERMQ_INDEX 1229 #undef VBPERMQ_DW 1230 1231 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1232 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1233 { \ 1234 int i, j; \ 1235 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1236 \ 1237 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1238 prod[i] = 0; \ 1239 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1240 if (a->srcfld[i] & (1ull << j)) { \ 1241 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1242 } \ 1243 } \ 1244 } \ 1245 \ 1246 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1247 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1248 } \ 1249 } 1250 1251 PMSUM(vpmsumb, u8, u16, uint16_t) 1252 PMSUM(vpmsumh, u16, u32, uint32_t) 1253 PMSUM(vpmsumw, u32, u64, uint64_t) 1254 1255 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1256 { 1257 1258 #ifdef CONFIG_INT128 1259 int i, j; 1260 __uint128_t prod[2]; 1261 1262 VECTOR_FOR_INORDER_I(i, u64) { 1263 prod[i] = 0; 1264 for (j = 0; j < 64; j++) { 1265 if (a->u64[i] & (1ull << j)) { 1266 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1267 } 1268 } 1269 } 1270 1271 r->u128 = prod[0] ^ prod[1]; 1272 1273 #else 1274 int i, j; 1275 ppc_avr_t prod[2]; 1276 1277 VECTOR_FOR_INORDER_I(i, u64) { 1278 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1279 for (j = 0; j < 64; j++) { 1280 if (a->u64[i] & (1ull << j)) { 1281 ppc_avr_t bshift; 1282 if (j == 0) { 1283 bshift.VsrD(0) = 0; 1284 bshift.VsrD(1) = b->u64[i]; 1285 } else { 1286 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1287 bshift.VsrD(1) = b->u64[i] << j; 1288 } 1289 prod[i].VsrD(1) ^= bshift.VsrD(1); 1290 prod[i].VsrD(0) ^= bshift.VsrD(0); 1291 } 1292 } 1293 } 1294 1295 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1296 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1297 #endif 1298 } 1299 1300 1301 #if HOST_BIG_ENDIAN 1302 #define PKBIG 1 1303 #else 1304 #define PKBIG 0 1305 #endif 1306 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1307 { 1308 int i, j; 1309 ppc_avr_t result; 1310 #if HOST_BIG_ENDIAN 1311 const ppc_avr_t *x[2] = { a, b }; 1312 #else 1313 const ppc_avr_t *x[2] = { b, a }; 1314 #endif 1315 1316 VECTOR_FOR_INORDER_I(i, u64) { 1317 VECTOR_FOR_INORDER_I(j, u32) { 1318 uint32_t e = x[i]->u32[j]; 1319 1320 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1321 ((e >> 6) & 0x3e0) | 1322 ((e >> 3) & 0x1f)); 1323 } 1324 } 1325 *r = result; 1326 } 1327 1328 #define VPK(suffix, from, to, cvt, dosat) \ 1329 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1330 ppc_avr_t *a, ppc_avr_t *b) \ 1331 { \ 1332 int i; \ 1333 int sat = 0; \ 1334 ppc_avr_t result; \ 1335 ppc_avr_t *a0 = PKBIG ? a : b; \ 1336 ppc_avr_t *a1 = PKBIG ? b : a; \ 1337 \ 1338 VECTOR_FOR_INORDER_I(i, from) { \ 1339 result.to[i] = cvt(a0->from[i], &sat); \ 1340 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1341 } \ 1342 *r = result; \ 1343 if (dosat && sat) { \ 1344 set_vscr_sat(env); \ 1345 } \ 1346 } 1347 #define I(x, y) (x) 1348 VPK(shss, s16, s8, cvtshsb, 1) 1349 VPK(shus, s16, u8, cvtshub, 1) 1350 VPK(swss, s32, s16, cvtswsh, 1) 1351 VPK(swus, s32, u16, cvtswuh, 1) 1352 VPK(sdss, s64, s32, cvtsdsw, 1) 1353 VPK(sdus, s64, u32, cvtsduw, 1) 1354 VPK(uhus, u16, u8, cvtuhub, 1) 1355 VPK(uwus, u32, u16, cvtuwuh, 1) 1356 VPK(udus, u64, u32, cvtuduw, 1) 1357 VPK(uhum, u16, u8, I, 0) 1358 VPK(uwum, u32, u16, I, 0) 1359 VPK(udum, u64, u32, I, 0) 1360 #undef I 1361 #undef VPK 1362 #undef PKBIG 1363 1364 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1365 { 1366 int i; 1367 1368 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1369 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1370 } 1371 } 1372 1373 #define VRFI(suffix, rounding) \ 1374 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1375 ppc_avr_t *b) \ 1376 { \ 1377 int i; \ 1378 float_status s = env->vec_status; \ 1379 \ 1380 set_float_rounding_mode(rounding, &s); \ 1381 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1382 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1383 } \ 1384 } 1385 VRFI(n, float_round_nearest_even) 1386 VRFI(m, float_round_down) 1387 VRFI(p, float_round_up) 1388 VRFI(z, float_round_to_zero) 1389 #undef VRFI 1390 1391 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1392 { 1393 int i; 1394 1395 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1396 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1397 1398 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1399 } 1400 } 1401 1402 #define VRLMI(name, size, element, insert) \ 1403 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1404 { \ 1405 int i; \ 1406 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1407 uint##size##_t src1 = a->element[i]; \ 1408 uint##size##_t src2 = b->element[i]; \ 1409 uint##size##_t src3 = r->element[i]; \ 1410 uint##size##_t begin, end, shift, mask, rot_val; \ 1411 \ 1412 shift = extract##size(src2, 0, 6); \ 1413 end = extract##size(src2, 8, 6); \ 1414 begin = extract##size(src2, 16, 6); \ 1415 rot_val = rol##size(src1, shift); \ 1416 mask = mask_u##size(begin, end); \ 1417 if (insert) { \ 1418 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1419 } else { \ 1420 r->element[i] = (rot_val & mask); \ 1421 } \ 1422 } \ 1423 } 1424 1425 VRLMI(VRLDMI, 64, u64, 1); 1426 VRLMI(VRLWMI, 32, u32, 1); 1427 VRLMI(VRLDNM, 64, u64, 0); 1428 VRLMI(VRLWNM, 32, u32, 0); 1429 1430 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1431 { 1432 int i; 1433 1434 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1435 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1436 } 1437 } 1438 1439 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1440 { 1441 int i; 1442 1443 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1444 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1445 } 1446 } 1447 1448 #define VEXTU_X_DO(name, size, left) \ 1449 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1450 { \ 1451 int index = (a & 0xf) * 8; \ 1452 if (left) { \ 1453 index = 128 - index - size; \ 1454 } \ 1455 return int128_getlo(int128_rshift(b->s128, index)) & \ 1456 MAKE_64BIT_MASK(0, size); \ 1457 } 1458 VEXTU_X_DO(vextublx, 8, 1) 1459 VEXTU_X_DO(vextuhlx, 16, 1) 1460 VEXTU_X_DO(vextuwlx, 32, 1) 1461 VEXTU_X_DO(vextubrx, 8, 0) 1462 VEXTU_X_DO(vextuhrx, 16, 0) 1463 VEXTU_X_DO(vextuwrx, 32, 0) 1464 #undef VEXTU_X_DO 1465 1466 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1467 { 1468 int i; 1469 unsigned int shift, bytes, size; 1470 1471 size = ARRAY_SIZE(r->u8); 1472 for (i = 0; i < size; i++) { 1473 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1474 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1475 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1476 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1477 } 1478 } 1479 1480 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1481 { 1482 int i; 1483 unsigned int shift, bytes; 1484 1485 /* 1486 * Use reverse order, as destination and source register can be 1487 * same. Its being modified in place saving temporary, reverse 1488 * order will guarantee that computed result is not fed back. 1489 */ 1490 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1491 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1492 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1493 /* extract adjacent bytes */ 1494 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1495 } 1496 } 1497 1498 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1499 { 1500 int sh = shift & 0xf; 1501 int i; 1502 ppc_avr_t result; 1503 1504 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1505 int index = sh + i; 1506 if (index > 0xf) { 1507 result.VsrB(i) = b->VsrB(index - 0x10); 1508 } else { 1509 result.VsrB(i) = a->VsrB(index); 1510 } 1511 } 1512 *r = result; 1513 } 1514 1515 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1516 { 1517 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1518 1519 #if HOST_BIG_ENDIAN 1520 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1521 memset(&r->u8[16 - sh], 0, sh); 1522 #else 1523 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1524 memset(&r->u8[0], 0, sh); 1525 #endif 1526 } 1527 1528 #if HOST_BIG_ENDIAN 1529 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1530 #else 1531 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1532 #endif 1533 1534 #define VINSX(SUFFIX, TYPE) \ 1535 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1536 uint64_t val, target_ulong index) \ 1537 { \ 1538 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1539 target_long idx = index; \ 1540 \ 1541 if (idx < 0 || idx > maxidx) { \ 1542 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1543 qemu_log_mask(LOG_GUEST_ERROR, \ 1544 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1545 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1546 } else { \ 1547 TYPE src = val; \ 1548 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1549 } \ 1550 } 1551 VINSX(B, uint8_t) 1552 VINSX(H, uint16_t) 1553 VINSX(W, uint32_t) 1554 VINSX(D, uint64_t) 1555 #undef ELEM_ADDR 1556 #undef VINSX 1557 #if HOST_BIG_ENDIAN 1558 #define VEXTDVLX(NAME, SIZE) \ 1559 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1560 target_ulong index) \ 1561 { \ 1562 const target_long idx = index; \ 1563 ppc_avr_t tmp[2] = { *a, *b }; \ 1564 memset(t, 0, sizeof(*t)); \ 1565 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1566 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1567 } else { \ 1568 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1569 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1570 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1571 } \ 1572 } 1573 #else 1574 #define VEXTDVLX(NAME, SIZE) \ 1575 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1576 target_ulong index) \ 1577 { \ 1578 const target_long idx = index; \ 1579 ppc_avr_t tmp[2] = { *b, *a }; \ 1580 memset(t, 0, sizeof(*t)); \ 1581 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1582 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1583 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1584 } else { \ 1585 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1586 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1587 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1588 } \ 1589 } 1590 #endif 1591 VEXTDVLX(VEXTDUBVLX, 1) 1592 VEXTDVLX(VEXTDUHVLX, 2) 1593 VEXTDVLX(VEXTDUWVLX, 4) 1594 VEXTDVLX(VEXTDDVLX, 8) 1595 #undef VEXTDVLX 1596 #if HOST_BIG_ENDIAN 1597 #define VEXTRACT(suffix, element) \ 1598 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1599 { \ 1600 uint32_t es = sizeof(r->element[0]); \ 1601 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1602 memset(&r->u8[8], 0, 8); \ 1603 memset(&r->u8[0], 0, 8 - es); \ 1604 } 1605 #else 1606 #define VEXTRACT(suffix, element) \ 1607 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1608 { \ 1609 uint32_t es = sizeof(r->element[0]); \ 1610 uint32_t s = (16 - index) - es; \ 1611 memmove(&r->u8[8], &b->u8[s], es); \ 1612 memset(&r->u8[0], 0, 8); \ 1613 memset(&r->u8[8 + es], 0, 8 - es); \ 1614 } 1615 #endif 1616 VEXTRACT(ub, u8) 1617 VEXTRACT(uh, u16) 1618 VEXTRACT(uw, u32) 1619 VEXTRACT(d, u64) 1620 #undef VEXTRACT 1621 1622 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1623 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1624 { \ 1625 int i, idx, crf = 0; \ 1626 \ 1627 for (i = 0; i < NUM_ELEMS; i++) { \ 1628 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1629 if (b->Vsr##ELEM(idx)) { \ 1630 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1631 } else { \ 1632 crf = 0b0010; \ 1633 break; \ 1634 } \ 1635 } \ 1636 \ 1637 for (; i < NUM_ELEMS; i++) { \ 1638 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1639 t->Vsr##ELEM(idx) = 0; \ 1640 } \ 1641 \ 1642 return crf; \ 1643 } 1644 VSTRI(VSTRIBL, B, 16, true) 1645 VSTRI(VSTRIBR, B, 16, false) 1646 VSTRI(VSTRIHL, H, 8, true) 1647 VSTRI(VSTRIHR, H, 8, false) 1648 #undef VSTRI 1649 1650 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1651 { 1652 ppc_vsr_t t = { }; 1653 size_t es = sizeof(uint32_t); 1654 uint32_t ext_index; 1655 int i; 1656 1657 ext_index = index; 1658 for (i = 0; i < es; i++, ext_index++) { 1659 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1660 } 1661 1662 *xt = t; 1663 } 1664 1665 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1666 { 1667 ppc_vsr_t t = *xt; 1668 size_t es = sizeof(uint32_t); 1669 int ins_index, i = 0; 1670 1671 ins_index = index; 1672 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1673 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1674 } 1675 1676 *xt = t; 1677 } 1678 1679 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1680 uint32_t desc) 1681 { 1682 /* 1683 * Instead of processing imm bit-by-bit, we'll skip the computation of 1684 * conjunctions whose corresponding bit is unset. 1685 */ 1686 int bit, imm = simd_data(desc); 1687 Int128 conj, disj = int128_zero(); 1688 1689 /* Iterate over set bits from the least to the most significant bit */ 1690 while (imm) { 1691 /* 1692 * Get the next bit to be processed with ctz64. Invert the result of 1693 * ctz64 to match the indexing used by PowerISA. 1694 */ 1695 bit = 7 - ctzl(imm); 1696 if (bit & 0x4) { 1697 conj = a->s128; 1698 } else { 1699 conj = int128_not(a->s128); 1700 } 1701 if (bit & 0x2) { 1702 conj = int128_and(conj, b->s128); 1703 } else { 1704 conj = int128_and(conj, int128_not(b->s128)); 1705 } 1706 if (bit & 0x1) { 1707 conj = int128_and(conj, c->s128); 1708 } else { 1709 conj = int128_and(conj, int128_not(c->s128)); 1710 } 1711 disj = int128_or(disj, conj); 1712 1713 /* Unset the least significant bit that is set */ 1714 imm &= imm - 1; 1715 } 1716 1717 t->s128 = disj; 1718 } 1719 1720 #define XXBLEND(name, sz) \ 1721 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1722 ppc_avr_t *c, uint32_t desc) \ 1723 { \ 1724 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1725 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1726 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1727 } \ 1728 } 1729 XXBLEND(B, 8) 1730 XXBLEND(H, 16) 1731 XXBLEND(W, 32) 1732 XXBLEND(D, 64) 1733 #undef XXBLEND 1734 1735 #define VNEG(name, element) \ 1736 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1737 { \ 1738 int i; \ 1739 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1740 r->element[i] = -b->element[i]; \ 1741 } \ 1742 } 1743 VNEG(vnegw, s32) 1744 VNEG(vnegd, s64) 1745 #undef VNEG 1746 1747 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1748 { 1749 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1750 1751 #if HOST_BIG_ENDIAN 1752 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1753 memset(&r->u8[0], 0, sh); 1754 #else 1755 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1756 memset(&r->u8[16 - sh], 0, sh); 1757 #endif 1758 } 1759 1760 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1761 { 1762 int i; 1763 1764 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1765 r->u32[i] = a->u32[i] >= b->u32[i]; 1766 } 1767 } 1768 1769 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1770 { 1771 int64_t t; 1772 int i, upper; 1773 ppc_avr_t result; 1774 int sat = 0; 1775 1776 upper = ARRAY_SIZE(r->s32) - 1; 1777 t = (int64_t)b->VsrSW(upper); 1778 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1779 t += a->VsrSW(i); 1780 result.VsrSW(i) = 0; 1781 } 1782 result.VsrSW(upper) = cvtsdsw(t, &sat); 1783 *r = result; 1784 1785 if (sat) { 1786 set_vscr_sat(env); 1787 } 1788 } 1789 1790 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1791 { 1792 int i, j, upper; 1793 ppc_avr_t result; 1794 int sat = 0; 1795 1796 upper = 1; 1797 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1798 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1799 1800 result.VsrD(i) = 0; 1801 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1802 t += a->VsrSW(2 * i + j); 1803 } 1804 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1805 } 1806 1807 *r = result; 1808 if (sat) { 1809 set_vscr_sat(env); 1810 } 1811 } 1812 1813 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1814 { 1815 int i, j; 1816 int sat = 0; 1817 1818 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1819 int64_t t = (int64_t)b->s32[i]; 1820 1821 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1822 t += a->s8[4 * i + j]; 1823 } 1824 r->s32[i] = cvtsdsw(t, &sat); 1825 } 1826 1827 if (sat) { 1828 set_vscr_sat(env); 1829 } 1830 } 1831 1832 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1833 { 1834 int sat = 0; 1835 int i; 1836 1837 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1838 int64_t t = (int64_t)b->s32[i]; 1839 1840 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1841 r->s32[i] = cvtsdsw(t, &sat); 1842 } 1843 1844 if (sat) { 1845 set_vscr_sat(env); 1846 } 1847 } 1848 1849 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1850 { 1851 int i, j; 1852 int sat = 0; 1853 1854 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1855 uint64_t t = (uint64_t)b->u32[i]; 1856 1857 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1858 t += a->u8[4 * i + j]; 1859 } 1860 r->u32[i] = cvtuduw(t, &sat); 1861 } 1862 1863 if (sat) { 1864 set_vscr_sat(env); 1865 } 1866 } 1867 1868 #if HOST_BIG_ENDIAN 1869 #define UPKHI 1 1870 #define UPKLO 0 1871 #else 1872 #define UPKHI 0 1873 #define UPKLO 1 1874 #endif 1875 #define VUPKPX(suffix, hi) \ 1876 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1877 { \ 1878 int i; \ 1879 ppc_avr_t result; \ 1880 \ 1881 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1882 uint16_t e = b->u16[hi ? i : i + 4]; \ 1883 uint8_t a = (e >> 15) ? 0xff : 0; \ 1884 uint8_t r = (e >> 10) & 0x1f; \ 1885 uint8_t g = (e >> 5) & 0x1f; \ 1886 uint8_t b = e & 0x1f; \ 1887 \ 1888 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1889 } \ 1890 *r = result; \ 1891 } 1892 VUPKPX(lpx, UPKLO) 1893 VUPKPX(hpx, UPKHI) 1894 #undef VUPKPX 1895 1896 #define VUPK(suffix, unpacked, packee, hi) \ 1897 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1898 { \ 1899 int i; \ 1900 ppc_avr_t result; \ 1901 \ 1902 if (hi) { \ 1903 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1904 result.unpacked[i] = b->packee[i]; \ 1905 } \ 1906 } else { \ 1907 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1908 i++) { \ 1909 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1910 } \ 1911 } \ 1912 *r = result; \ 1913 } 1914 VUPK(hsb, s16, s8, UPKHI) 1915 VUPK(hsh, s32, s16, UPKHI) 1916 VUPK(hsw, s64, s32, UPKHI) 1917 VUPK(lsb, s16, s8, UPKLO) 1918 VUPK(lsh, s32, s16, UPKLO) 1919 VUPK(lsw, s64, s32, UPKLO) 1920 #undef VUPK 1921 #undef UPKHI 1922 #undef UPKLO 1923 1924 #define VGENERIC_DO(name, element) \ 1925 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1926 { \ 1927 int i; \ 1928 \ 1929 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1930 r->element[i] = name(b->element[i]); \ 1931 } \ 1932 } 1933 1934 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1935 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1936 1937 VGENERIC_DO(clzb, u8) 1938 VGENERIC_DO(clzh, u16) 1939 1940 #undef clzb 1941 #undef clzh 1942 1943 #define ctzb(v) ((v) ? ctz32(v) : 8) 1944 #define ctzh(v) ((v) ? ctz32(v) : 16) 1945 #define ctzw(v) ctz32((v)) 1946 #define ctzd(v) ctz64((v)) 1947 1948 VGENERIC_DO(ctzb, u8) 1949 VGENERIC_DO(ctzh, u16) 1950 VGENERIC_DO(ctzw, u32) 1951 VGENERIC_DO(ctzd, u64) 1952 1953 #undef ctzb 1954 #undef ctzh 1955 #undef ctzw 1956 #undef ctzd 1957 1958 #define popcntb(v) ctpop8(v) 1959 #define popcnth(v) ctpop16(v) 1960 #define popcntw(v) ctpop32(v) 1961 #define popcntd(v) ctpop64(v) 1962 1963 VGENERIC_DO(popcntb, u8) 1964 VGENERIC_DO(popcnth, u16) 1965 VGENERIC_DO(popcntw, u32) 1966 VGENERIC_DO(popcntd, u64) 1967 1968 #undef popcntb 1969 #undef popcnth 1970 #undef popcntw 1971 #undef popcntd 1972 1973 #undef VGENERIC_DO 1974 1975 #if HOST_BIG_ENDIAN 1976 #define QW_ONE { .u64 = { 0, 1 } } 1977 #else 1978 #define QW_ONE { .u64 = { 1, 0 } } 1979 #endif 1980 1981 #ifndef CONFIG_INT128 1982 1983 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1984 { 1985 t->u64[0] = ~a.u64[0]; 1986 t->u64[1] = ~a.u64[1]; 1987 } 1988 1989 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1990 { 1991 if (a.VsrD(0) < b.VsrD(0)) { 1992 return -1; 1993 } else if (a.VsrD(0) > b.VsrD(0)) { 1994 return 1; 1995 } else if (a.VsrD(1) < b.VsrD(1)) { 1996 return -1; 1997 } else if (a.VsrD(1) > b.VsrD(1)) { 1998 return 1; 1999 } else { 2000 return 0; 2001 } 2002 } 2003 2004 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2005 { 2006 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2007 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2008 (~a.VsrD(1) < b.VsrD(1)); 2009 } 2010 2011 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2012 { 2013 ppc_avr_t not_a; 2014 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2015 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2016 (~a.VsrD(1) < b.VsrD(1)); 2017 avr_qw_not(¬_a, a); 2018 return avr_qw_cmpu(not_a, b) < 0; 2019 } 2020 2021 #endif 2022 2023 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2024 { 2025 #ifdef CONFIG_INT128 2026 r->u128 = a->u128 + b->u128; 2027 #else 2028 avr_qw_add(r, *a, *b); 2029 #endif 2030 } 2031 2032 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2033 { 2034 #ifdef CONFIG_INT128 2035 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2036 #else 2037 2038 if (c->VsrD(1) & 1) { 2039 ppc_avr_t tmp; 2040 2041 tmp.VsrD(0) = 0; 2042 tmp.VsrD(1) = c->VsrD(1) & 1; 2043 avr_qw_add(&tmp, *a, tmp); 2044 avr_qw_add(r, tmp, *b); 2045 } else { 2046 avr_qw_add(r, *a, *b); 2047 } 2048 #endif 2049 } 2050 2051 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2052 { 2053 #ifdef CONFIG_INT128 2054 r->u128 = (~a->u128 < b->u128); 2055 #else 2056 ppc_avr_t not_a; 2057 2058 avr_qw_not(¬_a, *a); 2059 2060 r->VsrD(0) = 0; 2061 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2062 #endif 2063 } 2064 2065 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2066 { 2067 #ifdef CONFIG_INT128 2068 int carry_out = (~a->u128 < b->u128); 2069 if (!carry_out && (c->u128 & 1)) { 2070 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2071 ((a->u128 != 0) || (b->u128 != 0)); 2072 } 2073 r->u128 = carry_out; 2074 #else 2075 2076 int carry_in = c->VsrD(1) & 1; 2077 int carry_out = 0; 2078 ppc_avr_t tmp; 2079 2080 carry_out = avr_qw_addc(&tmp, *a, *b); 2081 2082 if (!carry_out && carry_in) { 2083 ppc_avr_t one = QW_ONE; 2084 carry_out = avr_qw_addc(&tmp, tmp, one); 2085 } 2086 r->VsrD(0) = 0; 2087 r->VsrD(1) = carry_out; 2088 #endif 2089 } 2090 2091 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2092 { 2093 #ifdef CONFIG_INT128 2094 r->u128 = a->u128 - b->u128; 2095 #else 2096 ppc_avr_t tmp; 2097 ppc_avr_t one = QW_ONE; 2098 2099 avr_qw_not(&tmp, *b); 2100 avr_qw_add(&tmp, *a, tmp); 2101 avr_qw_add(r, tmp, one); 2102 #endif 2103 } 2104 2105 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2106 { 2107 #ifdef CONFIG_INT128 2108 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2109 #else 2110 ppc_avr_t tmp, sum; 2111 2112 avr_qw_not(&tmp, *b); 2113 avr_qw_add(&sum, *a, tmp); 2114 2115 tmp.VsrD(0) = 0; 2116 tmp.VsrD(1) = c->VsrD(1) & 1; 2117 avr_qw_add(r, sum, tmp); 2118 #endif 2119 } 2120 2121 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2122 { 2123 #ifdef CONFIG_INT128 2124 r->u128 = (~a->u128 < ~b->u128) || 2125 (a->u128 + ~b->u128 == (__uint128_t)-1); 2126 #else 2127 int carry = (avr_qw_cmpu(*a, *b) > 0); 2128 if (!carry) { 2129 ppc_avr_t tmp; 2130 avr_qw_not(&tmp, *b); 2131 avr_qw_add(&tmp, *a, tmp); 2132 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2133 } 2134 r->VsrD(0) = 0; 2135 r->VsrD(1) = carry; 2136 #endif 2137 } 2138 2139 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2140 { 2141 #ifdef CONFIG_INT128 2142 r->u128 = 2143 (~a->u128 < ~b->u128) || 2144 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2145 #else 2146 int carry_in = c->VsrD(1) & 1; 2147 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2148 if (!carry_out && carry_in) { 2149 ppc_avr_t tmp; 2150 avr_qw_not(&tmp, *b); 2151 avr_qw_add(&tmp, *a, tmp); 2152 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2153 } 2154 2155 r->VsrD(0) = 0; 2156 r->VsrD(1) = carry_out; 2157 #endif 2158 } 2159 2160 #define BCD_PLUS_PREF_1 0xC 2161 #define BCD_PLUS_PREF_2 0xF 2162 #define BCD_PLUS_ALT_1 0xA 2163 #define BCD_NEG_PREF 0xD 2164 #define BCD_NEG_ALT 0xB 2165 #define BCD_PLUS_ALT_2 0xE 2166 #define NATIONAL_PLUS 0x2B 2167 #define NATIONAL_NEG 0x2D 2168 2169 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2170 2171 static int bcd_get_sgn(ppc_avr_t *bcd) 2172 { 2173 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2174 case BCD_PLUS_PREF_1: 2175 case BCD_PLUS_PREF_2: 2176 case BCD_PLUS_ALT_1: 2177 case BCD_PLUS_ALT_2: 2178 { 2179 return 1; 2180 } 2181 2182 case BCD_NEG_PREF: 2183 case BCD_NEG_ALT: 2184 { 2185 return -1; 2186 } 2187 2188 default: 2189 { 2190 return 0; 2191 } 2192 } 2193 } 2194 2195 static int bcd_preferred_sgn(int sgn, int ps) 2196 { 2197 if (sgn >= 0) { 2198 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2199 } else { 2200 return BCD_NEG_PREF; 2201 } 2202 } 2203 2204 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2205 { 2206 uint8_t result; 2207 if (n & 1) { 2208 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2209 } else { 2210 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2211 } 2212 2213 if (unlikely(result > 9)) { 2214 *invalid = true; 2215 } 2216 return result; 2217 } 2218 2219 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2220 { 2221 if (n & 1) { 2222 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2223 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2224 } else { 2225 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2226 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2227 } 2228 } 2229 2230 static bool bcd_is_valid(ppc_avr_t *bcd) 2231 { 2232 int i; 2233 int invalid = 0; 2234 2235 if (bcd_get_sgn(bcd) == 0) { 2236 return false; 2237 } 2238 2239 for (i = 1; i < 32; i++) { 2240 bcd_get_digit(bcd, i, &invalid); 2241 if (unlikely(invalid)) { 2242 return false; 2243 } 2244 } 2245 return true; 2246 } 2247 2248 static int bcd_cmp_zero(ppc_avr_t *bcd) 2249 { 2250 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2251 return CRF_EQ; 2252 } else { 2253 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2254 } 2255 } 2256 2257 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2258 { 2259 return reg->VsrH(7 - n); 2260 } 2261 2262 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2263 { 2264 reg->VsrH(7 - n) = val; 2265 } 2266 2267 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2268 { 2269 int i; 2270 int invalid = 0; 2271 for (i = 31; i > 0; i--) { 2272 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2273 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2274 if (unlikely(invalid)) { 2275 return 0; /* doesn't matter */ 2276 } else if (dig_a > dig_b) { 2277 return 1; 2278 } else if (dig_a < dig_b) { 2279 return -1; 2280 } 2281 } 2282 2283 return 0; 2284 } 2285 2286 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2287 int *overflow) 2288 { 2289 int carry = 0; 2290 int i; 2291 int is_zero = 1; 2292 2293 for (i = 1; i <= 31; i++) { 2294 uint8_t digit = bcd_get_digit(a, i, invalid) + 2295 bcd_get_digit(b, i, invalid) + carry; 2296 is_zero &= (digit == 0); 2297 if (digit > 9) { 2298 carry = 1; 2299 digit -= 10; 2300 } else { 2301 carry = 0; 2302 } 2303 2304 bcd_put_digit(t, digit, i); 2305 } 2306 2307 *overflow = carry; 2308 return is_zero; 2309 } 2310 2311 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2312 int *overflow) 2313 { 2314 int carry = 0; 2315 int i; 2316 2317 for (i = 1; i <= 31; i++) { 2318 uint8_t digit = bcd_get_digit(a, i, invalid) - 2319 bcd_get_digit(b, i, invalid) + carry; 2320 if (digit & 0x80) { 2321 carry = -1; 2322 digit += 10; 2323 } else { 2324 carry = 0; 2325 } 2326 2327 bcd_put_digit(t, digit, i); 2328 } 2329 2330 *overflow = carry; 2331 } 2332 2333 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2334 { 2335 2336 int sgna = bcd_get_sgn(a); 2337 int sgnb = bcd_get_sgn(b); 2338 int invalid = (sgna == 0) || (sgnb == 0); 2339 int overflow = 0; 2340 int zero = 0; 2341 uint32_t cr = 0; 2342 ppc_avr_t result = { .u64 = { 0, 0 } }; 2343 2344 if (!invalid) { 2345 if (sgna == sgnb) { 2346 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2347 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2348 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2349 } else { 2350 int magnitude = bcd_cmp_mag(a, b); 2351 if (magnitude > 0) { 2352 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2353 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2354 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2355 } else if (magnitude < 0) { 2356 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2357 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2358 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2359 } else { 2360 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2361 cr = CRF_EQ; 2362 } 2363 } 2364 } 2365 2366 if (unlikely(invalid)) { 2367 result.VsrD(0) = result.VsrD(1) = -1; 2368 cr = CRF_SO; 2369 } else if (overflow) { 2370 cr |= CRF_SO; 2371 } else if (zero) { 2372 cr |= CRF_EQ; 2373 } 2374 2375 *r = result; 2376 2377 return cr; 2378 } 2379 2380 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2381 { 2382 ppc_avr_t bcopy = *b; 2383 int sgnb = bcd_get_sgn(b); 2384 if (sgnb < 0) { 2385 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2386 } else if (sgnb > 0) { 2387 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2388 } 2389 /* else invalid ... defer to bcdadd code for proper handling */ 2390 2391 return helper_bcdadd(r, a, &bcopy, ps); 2392 } 2393 2394 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2395 { 2396 int i; 2397 int cr = 0; 2398 uint16_t national = 0; 2399 uint16_t sgnb = get_national_digit(b, 0); 2400 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2401 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2402 2403 for (i = 1; i < 8; i++) { 2404 national = get_national_digit(b, i); 2405 if (unlikely(national < 0x30 || national > 0x39)) { 2406 invalid = 1; 2407 break; 2408 } 2409 2410 bcd_put_digit(&ret, national & 0xf, i); 2411 } 2412 2413 if (sgnb == NATIONAL_PLUS) { 2414 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2415 } else { 2416 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2417 } 2418 2419 cr = bcd_cmp_zero(&ret); 2420 2421 if (unlikely(invalid)) { 2422 cr = CRF_SO; 2423 } 2424 2425 *r = ret; 2426 2427 return cr; 2428 } 2429 2430 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2431 { 2432 int i; 2433 int cr = 0; 2434 int sgnb = bcd_get_sgn(b); 2435 int invalid = (sgnb == 0); 2436 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2437 2438 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2439 2440 for (i = 1; i < 8; i++) { 2441 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2442 2443 if (unlikely(invalid)) { 2444 break; 2445 } 2446 } 2447 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2448 2449 cr = bcd_cmp_zero(b); 2450 2451 if (ox_flag) { 2452 cr |= CRF_SO; 2453 } 2454 2455 if (unlikely(invalid)) { 2456 cr = CRF_SO; 2457 } 2458 2459 *r = ret; 2460 2461 return cr; 2462 } 2463 2464 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2465 { 2466 int i; 2467 int cr = 0; 2468 int invalid = 0; 2469 int zone_digit = 0; 2470 int zone_lead = ps ? 0xF : 0x3; 2471 int digit = 0; 2472 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2473 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2474 2475 if (unlikely((sgnb < 0xA) && ps)) { 2476 invalid = 1; 2477 } 2478 2479 for (i = 0; i < 16; i++) { 2480 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2481 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2482 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2483 invalid = 1; 2484 break; 2485 } 2486 2487 bcd_put_digit(&ret, digit, i + 1); 2488 } 2489 2490 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2491 (!ps && (sgnb & 0x4))) { 2492 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2493 } else { 2494 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2495 } 2496 2497 cr = bcd_cmp_zero(&ret); 2498 2499 if (unlikely(invalid)) { 2500 cr = CRF_SO; 2501 } 2502 2503 *r = ret; 2504 2505 return cr; 2506 } 2507 2508 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2509 { 2510 int i; 2511 int cr = 0; 2512 uint8_t digit = 0; 2513 int sgnb = bcd_get_sgn(b); 2514 int zone_lead = (ps) ? 0xF0 : 0x30; 2515 int invalid = (sgnb == 0); 2516 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2517 2518 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2519 2520 for (i = 0; i < 16; i++) { 2521 digit = bcd_get_digit(b, i + 1, &invalid); 2522 2523 if (unlikely(invalid)) { 2524 break; 2525 } 2526 2527 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2528 } 2529 2530 if (ps) { 2531 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2532 } else { 2533 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2534 } 2535 2536 cr = bcd_cmp_zero(b); 2537 2538 if (ox_flag) { 2539 cr |= CRF_SO; 2540 } 2541 2542 if (unlikely(invalid)) { 2543 cr = CRF_SO; 2544 } 2545 2546 *r = ret; 2547 2548 return cr; 2549 } 2550 2551 /** 2552 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2553 * 2554 * Returns: 2555 * > 0 if ahi|alo > bhi|blo, 2556 * 0 if ahi|alo == bhi|blo, 2557 * < 0 if ahi|alo < bhi|blo 2558 */ 2559 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2560 uint64_t blo, uint64_t bhi) 2561 { 2562 return (ahi == bhi) ? 2563 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2564 (ahi > bhi ? 1 : -1); 2565 } 2566 2567 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2568 { 2569 int i; 2570 int cr; 2571 uint64_t lo_value; 2572 uint64_t hi_value; 2573 uint64_t rem; 2574 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2575 2576 if (b->VsrSD(0) < 0) { 2577 lo_value = -b->VsrSD(1); 2578 hi_value = ~b->VsrD(0) + !lo_value; 2579 bcd_put_digit(&ret, 0xD, 0); 2580 2581 cr = CRF_LT; 2582 } else { 2583 lo_value = b->VsrD(1); 2584 hi_value = b->VsrD(0); 2585 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2586 2587 if (hi_value == 0 && lo_value == 0) { 2588 cr = CRF_EQ; 2589 } else { 2590 cr = CRF_GT; 2591 } 2592 } 2593 2594 /* 2595 * Check src limits: abs(src) <= 10^31 - 1 2596 * 2597 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2598 */ 2599 if (ucmp128(lo_value, hi_value, 2600 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2601 cr |= CRF_SO; 2602 2603 /* 2604 * According to the ISA, if src wouldn't fit in the destination 2605 * register, the result is undefined. 2606 * In that case, we leave r unchanged. 2607 */ 2608 } else { 2609 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2610 2611 for (i = 1; i < 16; rem /= 10, i++) { 2612 bcd_put_digit(&ret, rem % 10, i); 2613 } 2614 2615 for (; i < 32; lo_value /= 10, i++) { 2616 bcd_put_digit(&ret, lo_value % 10, i); 2617 } 2618 2619 *r = ret; 2620 } 2621 2622 return cr; 2623 } 2624 2625 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2626 { 2627 uint8_t i; 2628 int cr; 2629 uint64_t carry; 2630 uint64_t unused; 2631 uint64_t lo_value; 2632 uint64_t hi_value = 0; 2633 int sgnb = bcd_get_sgn(b); 2634 int invalid = (sgnb == 0); 2635 2636 lo_value = bcd_get_digit(b, 31, &invalid); 2637 for (i = 30; i > 0; i--) { 2638 mulu64(&lo_value, &carry, lo_value, 10ULL); 2639 mulu64(&hi_value, &unused, hi_value, 10ULL); 2640 lo_value += bcd_get_digit(b, i, &invalid); 2641 hi_value += carry; 2642 2643 if (unlikely(invalid)) { 2644 break; 2645 } 2646 } 2647 2648 if (sgnb == -1) { 2649 r->VsrSD(1) = -lo_value; 2650 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2651 } else { 2652 r->VsrSD(1) = lo_value; 2653 r->VsrSD(0) = hi_value; 2654 } 2655 2656 cr = bcd_cmp_zero(b); 2657 2658 if (unlikely(invalid)) { 2659 cr = CRF_SO; 2660 } 2661 2662 return cr; 2663 } 2664 2665 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2666 { 2667 int i; 2668 int invalid = 0; 2669 2670 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2671 return CRF_SO; 2672 } 2673 2674 *r = *a; 2675 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2676 2677 for (i = 1; i < 32; i++) { 2678 bcd_get_digit(a, i, &invalid); 2679 bcd_get_digit(b, i, &invalid); 2680 if (unlikely(invalid)) { 2681 return CRF_SO; 2682 } 2683 } 2684 2685 return bcd_cmp_zero(r); 2686 } 2687 2688 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2689 { 2690 int sgnb = bcd_get_sgn(b); 2691 2692 *r = *b; 2693 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2694 2695 if (bcd_is_valid(b) == false) { 2696 return CRF_SO; 2697 } 2698 2699 return bcd_cmp_zero(r); 2700 } 2701 2702 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2703 { 2704 int cr; 2705 int i = a->VsrSB(7); 2706 bool ox_flag = false; 2707 int sgnb = bcd_get_sgn(b); 2708 ppc_avr_t ret = *b; 2709 ret.VsrD(1) &= ~0xf; 2710 2711 if (bcd_is_valid(b) == false) { 2712 return CRF_SO; 2713 } 2714 2715 if (unlikely(i > 31)) { 2716 i = 31; 2717 } else if (unlikely(i < -31)) { 2718 i = -31; 2719 } 2720 2721 if (i > 0) { 2722 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2723 } else { 2724 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2725 } 2726 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2727 2728 *r = ret; 2729 2730 cr = bcd_cmp_zero(r); 2731 if (ox_flag) { 2732 cr |= CRF_SO; 2733 } 2734 2735 return cr; 2736 } 2737 2738 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2739 { 2740 int cr; 2741 int i; 2742 int invalid = 0; 2743 bool ox_flag = false; 2744 ppc_avr_t ret = *b; 2745 2746 for (i = 0; i < 32; i++) { 2747 bcd_get_digit(b, i, &invalid); 2748 2749 if (unlikely(invalid)) { 2750 return CRF_SO; 2751 } 2752 } 2753 2754 i = a->VsrSB(7); 2755 if (i >= 32) { 2756 ox_flag = true; 2757 ret.VsrD(1) = ret.VsrD(0) = 0; 2758 } else if (i <= -32) { 2759 ret.VsrD(1) = ret.VsrD(0) = 0; 2760 } else if (i > 0) { 2761 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2762 } else { 2763 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2764 } 2765 *r = ret; 2766 2767 cr = bcd_cmp_zero(r); 2768 if (ox_flag) { 2769 cr |= CRF_SO; 2770 } 2771 2772 return cr; 2773 } 2774 2775 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2776 { 2777 int cr; 2778 int unused = 0; 2779 int invalid = 0; 2780 bool ox_flag = false; 2781 int sgnb = bcd_get_sgn(b); 2782 ppc_avr_t ret = *b; 2783 ret.VsrD(1) &= ~0xf; 2784 2785 int i = a->VsrSB(7); 2786 ppc_avr_t bcd_one; 2787 2788 bcd_one.VsrD(0) = 0; 2789 bcd_one.VsrD(1) = 0x10; 2790 2791 if (bcd_is_valid(b) == false) { 2792 return CRF_SO; 2793 } 2794 2795 if (unlikely(i > 31)) { 2796 i = 31; 2797 } else if (unlikely(i < -31)) { 2798 i = -31; 2799 } 2800 2801 if (i > 0) { 2802 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2803 } else { 2804 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2805 2806 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2807 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2808 } 2809 } 2810 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2811 2812 cr = bcd_cmp_zero(&ret); 2813 if (ox_flag) { 2814 cr |= CRF_SO; 2815 } 2816 *r = ret; 2817 2818 return cr; 2819 } 2820 2821 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2822 { 2823 uint64_t mask; 2824 uint32_t ox_flag = 0; 2825 int i = a->VsrSH(3) + 1; 2826 ppc_avr_t ret = *b; 2827 2828 if (bcd_is_valid(b) == false) { 2829 return CRF_SO; 2830 } 2831 2832 if (i > 16 && i < 32) { 2833 mask = (uint64_t)-1 >> (128 - i * 4); 2834 if (ret.VsrD(0) & ~mask) { 2835 ox_flag = CRF_SO; 2836 } 2837 2838 ret.VsrD(0) &= mask; 2839 } else if (i >= 0 && i <= 16) { 2840 mask = (uint64_t)-1 >> (64 - i * 4); 2841 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2842 ox_flag = CRF_SO; 2843 } 2844 2845 ret.VsrD(1) &= mask; 2846 ret.VsrD(0) = 0; 2847 } 2848 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2849 *r = ret; 2850 2851 return bcd_cmp_zero(&ret) | ox_flag; 2852 } 2853 2854 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2855 { 2856 int i; 2857 uint64_t mask; 2858 uint32_t ox_flag = 0; 2859 int invalid = 0; 2860 ppc_avr_t ret = *b; 2861 2862 for (i = 0; i < 32; i++) { 2863 bcd_get_digit(b, i, &invalid); 2864 2865 if (unlikely(invalid)) { 2866 return CRF_SO; 2867 } 2868 } 2869 2870 i = a->VsrSH(3); 2871 if (i > 16 && i < 33) { 2872 mask = (uint64_t)-1 >> (128 - i * 4); 2873 if (ret.VsrD(0) & ~mask) { 2874 ox_flag = CRF_SO; 2875 } 2876 2877 ret.VsrD(0) &= mask; 2878 } else if (i > 0 && i <= 16) { 2879 mask = (uint64_t)-1 >> (64 - i * 4); 2880 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2881 ox_flag = CRF_SO; 2882 } 2883 2884 ret.VsrD(1) &= mask; 2885 ret.VsrD(0) = 0; 2886 } else if (i == 0) { 2887 if (ret.VsrD(0) || ret.VsrD(1)) { 2888 ox_flag = CRF_SO; 2889 } 2890 ret.VsrD(0) = ret.VsrD(1) = 0; 2891 } 2892 2893 *r = ret; 2894 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2895 return ox_flag | CRF_EQ; 2896 } 2897 2898 return ox_flag | CRF_GT; 2899 } 2900 2901 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2902 { 2903 int i; 2904 VECTOR_FOR_INORDER_I(i, u8) { 2905 r->u8[i] = AES_sbox[a->u8[i]]; 2906 } 2907 } 2908 2909 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2910 { 2911 ppc_avr_t result; 2912 int i; 2913 2914 VECTOR_FOR_INORDER_I(i, u32) { 2915 result.VsrW(i) = b->VsrW(i) ^ 2916 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2917 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2918 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2919 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2920 } 2921 *r = result; 2922 } 2923 2924 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2925 { 2926 ppc_avr_t result; 2927 int i; 2928 2929 VECTOR_FOR_INORDER_I(i, u8) { 2930 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2931 } 2932 *r = result; 2933 } 2934 2935 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2936 { 2937 /* This differs from what is written in ISA V2.07. The RTL is */ 2938 /* incorrect and will be fixed in V2.07B. */ 2939 int i; 2940 ppc_avr_t tmp; 2941 2942 VECTOR_FOR_INORDER_I(i, u8) { 2943 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2944 } 2945 2946 VECTOR_FOR_INORDER_I(i, u32) { 2947 r->VsrW(i) = 2948 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2949 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2950 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2951 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2952 } 2953 } 2954 2955 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2956 { 2957 ppc_avr_t result; 2958 int i; 2959 2960 VECTOR_FOR_INORDER_I(i, u8) { 2961 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2962 } 2963 *r = result; 2964 } 2965 2966 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2967 { 2968 int st = (st_six & 0x10) != 0; 2969 int six = st_six & 0xF; 2970 int i; 2971 2972 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2973 if (st == 0) { 2974 if ((six & (0x8 >> i)) == 0) { 2975 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2976 ror32(a->VsrW(i), 18) ^ 2977 (a->VsrW(i) >> 3); 2978 } else { /* six.bit[i] == 1 */ 2979 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2980 ror32(a->VsrW(i), 19) ^ 2981 (a->VsrW(i) >> 10); 2982 } 2983 } else { /* st == 1 */ 2984 if ((six & (0x8 >> i)) == 0) { 2985 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2986 ror32(a->VsrW(i), 13) ^ 2987 ror32(a->VsrW(i), 22); 2988 } else { /* six.bit[i] == 1 */ 2989 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2990 ror32(a->VsrW(i), 11) ^ 2991 ror32(a->VsrW(i), 25); 2992 } 2993 } 2994 } 2995 } 2996 2997 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2998 { 2999 int st = (st_six & 0x10) != 0; 3000 int six = st_six & 0xF; 3001 int i; 3002 3003 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3004 if (st == 0) { 3005 if ((six & (0x8 >> (2 * i))) == 0) { 3006 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3007 ror64(a->VsrD(i), 8) ^ 3008 (a->VsrD(i) >> 7); 3009 } else { /* six.bit[2*i] == 1 */ 3010 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3011 ror64(a->VsrD(i), 61) ^ 3012 (a->VsrD(i) >> 6); 3013 } 3014 } else { /* st == 1 */ 3015 if ((six & (0x8 >> (2 * i))) == 0) { 3016 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3017 ror64(a->VsrD(i), 34) ^ 3018 ror64(a->VsrD(i), 39); 3019 } else { /* six.bit[2*i] == 1 */ 3020 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3021 ror64(a->VsrD(i), 18) ^ 3022 ror64(a->VsrD(i), 41); 3023 } 3024 } 3025 } 3026 } 3027 3028 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3029 { 3030 ppc_avr_t result; 3031 int i; 3032 3033 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3034 int indexA = c->VsrB(i) >> 4; 3035 int indexB = c->VsrB(i) & 0xF; 3036 3037 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3038 } 3039 *r = result; 3040 } 3041 3042 #undef VECTOR_FOR_INORDER_I 3043 3044 /*****************************************************************************/ 3045 /* SPE extension helpers */ 3046 /* Use a table to make this quicker */ 3047 static const uint8_t hbrev[16] = { 3048 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3049 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3050 }; 3051 3052 static inline uint8_t byte_reverse(uint8_t val) 3053 { 3054 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3055 } 3056 3057 static inline uint32_t word_reverse(uint32_t val) 3058 { 3059 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3060 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3061 } 3062 3063 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3064 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3065 { 3066 uint32_t a, b, d, mask; 3067 3068 mask = UINT32_MAX >> (32 - MASKBITS); 3069 a = arg1 & mask; 3070 b = arg2 & mask; 3071 d = word_reverse(1 + word_reverse(a | ~b)); 3072 return (arg1 & ~mask) | (d & b); 3073 } 3074 3075 uint32_t helper_cntlsw32(uint32_t val) 3076 { 3077 if (val & 0x80000000) { 3078 return clz32(~val); 3079 } else { 3080 return clz32(val); 3081 } 3082 } 3083 3084 uint32_t helper_cntlzw32(uint32_t val) 3085 { 3086 return clz32(val); 3087 } 3088 3089 /* 440 specific */ 3090 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3091 target_ulong low, uint32_t update_Rc) 3092 { 3093 target_ulong mask; 3094 int i; 3095 3096 i = 1; 3097 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3098 if ((high & mask) == 0) { 3099 if (update_Rc) { 3100 env->crf[0] = 0x4; 3101 } 3102 goto done; 3103 } 3104 i++; 3105 } 3106 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3107 if ((low & mask) == 0) { 3108 if (update_Rc) { 3109 env->crf[0] = 0x8; 3110 } 3111 goto done; 3112 } 3113 i++; 3114 } 3115 i = 8; 3116 if (update_Rc) { 3117 env->crf[0] = 0x2; 3118 } 3119 done: 3120 env->xer = (env->xer & ~0x7F) | i; 3121 if (update_Rc) { 3122 env->crf[0] |= xer_so; 3123 } 3124 return i; 3125 } 3126