1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = 1; 41 } else { 42 env->ov = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 496 { 497 int i; 498 499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 500 r->u32[i] = ~a->u32[i] < b->u32[i]; 501 } 502 } 503 504 /* vprtybw */ 505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 510 res ^= res >> 8; 511 r->u32[i] = res & 1; 512 } 513 } 514 515 /* vprtybd */ 516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 int i; 519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->u64[i] = res & 1; 524 } 525 } 526 527 /* vprtybq */ 528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 529 { 530 uint64_t res = b->u64[0] ^ b->u64[1]; 531 res ^= res >> 32; 532 res ^= res >> 16; 533 res ^= res >> 8; 534 r->VsrD(1) = res & 1; 535 r->VsrD(0) = 0; 536 } 537 538 #define VARITHFP(suffix, func) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b) \ 541 { \ 542 int i; \ 543 \ 544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 546 } \ 547 } 548 VARITHFP(addfp, float32_add) 549 VARITHFP(subfp, float32_sub) 550 VARITHFP(minfp, float32_min) 551 VARITHFP(maxfp, float32_max) 552 #undef VARITHFP 553 554 #define VARITHFPFMA(suffix, type) \ 555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 556 ppc_avr_t *b, ppc_avr_t *c) \ 557 { \ 558 int i; \ 559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 561 type, &env->vec_status); \ 562 } \ 563 } 564 VARITHFPFMA(maddfp, 0); 565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 566 #undef VARITHFPFMA 567 568 #define VARITHSAT_CASE(type, op, cvt, element) \ 569 { \ 570 type result = (type)a->element[i] op (type)b->element[i]; \ 571 r->element[i] = cvt(result, &sat); \ 572 } 573 574 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 577 { \ 578 int sat = 0; \ 579 int i; \ 580 \ 581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 } \ 584 if (sat) { \ 585 vscr_sat->u32[0] = 1; \ 586 } \ 587 } 588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 600 #undef VARITHSAT_CASE 601 #undef VARITHSAT_DO 602 #undef VARITHSAT_SIGNED 603 #undef VARITHSAT_UNSIGNED 604 605 #define VAVG_DO(name, element, etype) \ 606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 607 { \ 608 int i; \ 609 \ 610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 612 r->element[i] = x >> 1; \ 613 } \ 614 } 615 616 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 617 unsigned_type) \ 618 VAVG_DO(avgs##type, signed_element, signed_type) \ 619 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 620 VAVG(b, s8, int16_t, u8, uint16_t) 621 VAVG(h, s16, int32_t, u16, uint32_t) 622 VAVG(w, s32, int64_t, u32, uint64_t) 623 #undef VAVG_DO 624 #undef VAVG 625 626 #define VABSDU_DO(name, element) \ 627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 r->element[i] = (a->element[i] > b->element[i]) ? \ 633 (a->element[i] - b->element[i]) : \ 634 (b->element[i] - a->element[i]); \ 635 } \ 636 } 637 638 /* 639 * VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMPNEZ(NAME, ELEM) \ 667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 668 { \ 669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 672 } \ 673 } 674 VCMPNEZ(VCMPNEZB, u8) 675 VCMPNEZ(VCMPNEZH, u16) 676 VCMPNEZ(VCMPNEZW, u32) 677 #undef VCMPNEZ 678 679 #define VCMPFP_DO(suffix, compare, order, record) \ 680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 681 ppc_avr_t *a, ppc_avr_t *b) \ 682 { \ 683 uint32_t ones = (uint32_t)-1; \ 684 uint32_t all = ones; \ 685 uint32_t none = 0; \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 689 uint32_t result; \ 690 FloatRelation rel = \ 691 float32_compare_quiet(a->f32[i], b->f32[i], \ 692 &env->vec_status); \ 693 if (rel == float_relation_unordered) { \ 694 result = 0; \ 695 } else if (rel compare order) { \ 696 result = ones; \ 697 } else { \ 698 result = 0; \ 699 } \ 700 r->u32[i] = result; \ 701 all &= result; \ 702 none |= result; \ 703 } \ 704 if (record) { \ 705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 706 } \ 707 } 708 #define VCMPFP(suffix, compare, order) \ 709 VCMPFP_DO(suffix, compare, order, 0) \ 710 VCMPFP_DO(suffix##_dot, compare, order, 1) 711 VCMPFP(eqfp, ==, float_relation_equal) 712 VCMPFP(gefp, !=, float_relation_less) 713 VCMPFP(gtfp, ==, float_relation_greater) 714 #undef VCMPFP_DO 715 #undef VCMPFP 716 717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 718 ppc_avr_t *a, ppc_avr_t *b, int record) 719 { 720 int i; 721 int all_in = 0; 722 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 725 &env->vec_status); 726 if (le_rel == float_relation_unordered) { 727 r->u32[i] = 0xc0000000; 728 all_in = 1; 729 } else { 730 float32 bneg = float32_chs(b->f32[i]); 731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 732 &env->vec_status); 733 int le = le_rel != float_relation_greater; 734 int ge = ge_rel != float_relation_less; 735 736 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 737 all_in |= (!le | !ge); 738 } 739 } 740 if (record) { 741 env->crf[6] = (all_in == 0) << 1; 742 } 743 } 744 745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 746 { 747 vcmpbfp_internal(env, r, a, b, 0); 748 } 749 750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 751 ppc_avr_t *b) 752 { 753 vcmpbfp_internal(env, r, a, b, 1); 754 } 755 756 #define VCT(suffix, satcvt, element) \ 757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 758 ppc_avr_t *b, uint32_t uim) \ 759 { \ 760 int i; \ 761 int sat = 0; \ 762 float_status s = env->vec_status; \ 763 \ 764 set_float_rounding_mode(float_round_to_zero, &s); \ 765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 766 if (float32_is_any_nan(b->f32[i])) { \ 767 r->element[i] = 0; \ 768 } else { \ 769 float64 t = float32_to_float64(b->f32[i], &s); \ 770 int64_t j; \ 771 \ 772 t = float64_scalbn(t, uim, &s); \ 773 j = float64_to_int64(t, &s); \ 774 r->element[i] = satcvt(j, &sat); \ 775 } \ 776 } \ 777 if (sat) { \ 778 set_vscr_sat(env); \ 779 } \ 780 } 781 VCT(uxs, cvtsduw, u32) 782 VCT(sxs, cvtsdsw, s32) 783 #undef VCT 784 785 target_ulong helper_vclzlsbb(ppc_avr_t *r) 786 { 787 target_ulong count = 0; 788 int i; 789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 790 if (r->VsrB(i) & 0x01) { 791 break; 792 } 793 count++; 794 } 795 return count; 796 } 797 798 target_ulong helper_vctzlsbb(ppc_avr_t *r) 799 { 800 target_ulong count = 0; 801 int i; 802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 803 if (r->VsrB(i) & 0x01) { 804 break; 805 } 806 count++; 807 } 808 return count; 809 } 810 811 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 812 ppc_avr_t *b, ppc_avr_t *c) 813 { 814 int sat = 0; 815 int i; 816 817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 818 int32_t prod = a->s16[i] * b->s16[i]; 819 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 820 821 r->s16[i] = cvtswsh(t, &sat); 822 } 823 824 if (sat) { 825 set_vscr_sat(env); 826 } 827 } 828 829 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 830 ppc_avr_t *b, ppc_avr_t *c) 831 { 832 int sat = 0; 833 int i; 834 835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 837 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 838 r->s16[i] = cvtswsh(t, &sat); 839 } 840 841 if (sat) { 842 set_vscr_sat(env); 843 } 844 } 845 846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 847 { 848 int i; 849 850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 851 int32_t prod = a->s16[i] * b->s16[i]; 852 r->s16[i] = (int16_t) (prod + c->s16[i]); 853 } 854 } 855 856 #define VMRG_DO(name, element, access, ofs) \ 857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 858 { \ 859 ppc_avr_t result; \ 860 int i, half = ARRAY_SIZE(r->element) / 2; \ 861 \ 862 for (i = 0; i < half; i++) { \ 863 result.access(i * 2 + 0) = a->access(i + ofs); \ 864 result.access(i * 2 + 1) = b->access(i + ofs); \ 865 } \ 866 *r = result; \ 867 } 868 869 #define VMRG(suffix, element, access) \ 870 VMRG_DO(mrgl##suffix, element, access, half) \ 871 VMRG_DO(mrgh##suffix, element, access, 0) 872 VMRG(b, u8, VsrB) 873 VMRG(h, u16, VsrH) 874 VMRG(w, u32, VsrW) 875 #undef VMRG_DO 876 #undef VMRG 877 878 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 879 { 880 int32_t prod[16]; 881 int i; 882 883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 884 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 885 } 886 887 VECTOR_FOR_INORDER_I(i, s32) { 888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 889 prod[4 * i + 2] + prod[4 * i + 3]; 890 } 891 } 892 893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 894 ppc_avr_t *b, ppc_avr_t *c) 895 { 896 int32_t prod[8]; 897 int i; 898 899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 900 prod[i] = a->s16[i] * b->s16[i]; 901 } 902 903 VECTOR_FOR_INORDER_I(i, s32) { 904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 905 } 906 } 907 908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 909 ppc_avr_t *b, ppc_avr_t *c) 910 { 911 int32_t prod[8]; 912 int i; 913 int sat = 0; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 917 } 918 919 VECTOR_FOR_INORDER_I(i, s32) { 920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 921 922 r->u32[i] = cvtsdsw(t, &sat); 923 } 924 925 if (sat) { 926 set_vscr_sat(env); 927 } 928 } 929 930 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 931 { 932 uint16_t prod[16]; 933 int i; 934 935 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 936 prod[i] = a->u8[i] * b->u8[i]; 937 } 938 939 VECTOR_FOR_INORDER_I(i, u32) { 940 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 941 prod[4 * i + 2] + prod[4 * i + 3]; 942 } 943 } 944 945 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 946 ppc_avr_t *b, ppc_avr_t *c) 947 { 948 uint32_t prod[8]; 949 int i; 950 951 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 952 prod[i] = a->u16[i] * b->u16[i]; 953 } 954 955 VECTOR_FOR_INORDER_I(i, u32) { 956 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 957 } 958 } 959 960 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 961 ppc_avr_t *b, ppc_avr_t *c) 962 { 963 uint32_t prod[8]; 964 int i; 965 int sat = 0; 966 967 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 968 prod[i] = a->u16[i] * b->u16[i]; 969 } 970 971 VECTOR_FOR_INORDER_I(i, s32) { 972 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 973 974 r->u32[i] = cvtuduw(t, &sat); 975 } 976 977 if (sat) { 978 set_vscr_sat(env); 979 } 980 } 981 982 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 983 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 984 { \ 985 int i; \ 986 \ 987 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 988 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 989 (cast)b->mul_access(i); \ 990 } \ 991 } 992 993 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 994 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 995 { \ 996 int i; \ 997 \ 998 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 999 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1000 (cast)b->mul_access(i + 1); \ 1001 } \ 1002 } 1003 1004 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1005 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1006 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1007 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1008 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1009 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1010 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1011 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1012 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1013 #undef VMUL_DO_EVN 1014 #undef VMUL_DO_ODD 1015 #undef VMUL 1016 1017 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1018 target_ulong uim) 1019 { 1020 int i, idx; 1021 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1022 1023 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1024 if ((pcv->VsrB(i) >> 5) == uim) { 1025 idx = pcv->VsrB(i) & 0x1f; 1026 if (idx < ARRAY_SIZE(t->u8)) { 1027 tmp.VsrB(i) = s0->VsrB(idx); 1028 } else { 1029 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1030 } 1031 } 1032 } 1033 1034 *t = tmp; 1035 } 1036 1037 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1038 { 1039 ppc_avr_t result; 1040 int i; 1041 1042 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1043 int s = c->VsrB(i) & 0x1f; 1044 int index = s & 0xf; 1045 1046 if (s & 0x10) { 1047 result.VsrB(i) = b->VsrB(index); 1048 } else { 1049 result.VsrB(i) = a->VsrB(index); 1050 } 1051 } 1052 *r = result; 1053 } 1054 1055 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 ppc_avr_t result; 1058 int i; 1059 1060 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1061 int s = c->VsrB(i) & 0x1f; 1062 int index = 15 - (s & 0xf); 1063 1064 if (s & 0x10) { 1065 result.VsrB(i) = a->VsrB(index); 1066 } else { 1067 result.VsrB(i) = b->VsrB(index); 1068 } 1069 } 1070 *r = result; 1071 } 1072 1073 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1074 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1075 { \ 1076 ppc_vsr_t tmp; \ 1077 \ 1078 /* Initialize tmp with the result of an all-zeros mask */ \ 1079 tmp.VsrD(0) = 0x1011121314151617; \ 1080 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1081 \ 1082 /* Iterate over the most significant byte of each element */ \ 1083 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1084 if (b->VsrB(i) & 0x80) { \ 1085 /* Update each byte of the element */ \ 1086 for (int k = 0; k < SZ; k++) { \ 1087 tmp.VsrB(i + k) = j + k; \ 1088 } \ 1089 j += SZ; \ 1090 } \ 1091 } \ 1092 \ 1093 *t = tmp; \ 1094 } 1095 1096 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1097 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1098 { \ 1099 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1100 \ 1101 /* Iterate over the most significant byte of each element */ \ 1102 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1103 if (b->VsrB(i) & 0x80) { \ 1104 /* Update each byte of the element */ \ 1105 for (int k = 0; k < SZ; k++) { \ 1106 tmp.VsrB(j + k) = i + k; \ 1107 } \ 1108 j += SZ; \ 1109 } \ 1110 } \ 1111 \ 1112 *t = tmp; \ 1113 } 1114 1115 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1116 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1117 { \ 1118 ppc_vsr_t tmp; \ 1119 \ 1120 /* Initialize tmp with the result of an all-zeros mask */ \ 1121 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1122 tmp.VsrD(1) = 0x1716151413121110; \ 1123 \ 1124 /* Iterate over the most significant byte of each element */ \ 1125 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1126 /* Reverse indexing of "i" */ \ 1127 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1128 if (b->VsrB(idx) & 0x80) { \ 1129 /* Update each byte of the element */ \ 1130 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1131 tmp.VsrB(idx + rk) = j + k; \ 1132 } \ 1133 j += SZ; \ 1134 } \ 1135 } \ 1136 \ 1137 *t = tmp; \ 1138 } 1139 1140 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1141 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1142 { \ 1143 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1144 \ 1145 /* Iterate over the most significant byte of each element */ \ 1146 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1147 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1148 /* Update each byte of the element */ \ 1149 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1150 /* Reverse indexing of "j" */ \ 1151 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1152 tmp.VsrB(idx + rk) = i + k; \ 1153 } \ 1154 j += SZ; \ 1155 } \ 1156 } \ 1157 \ 1158 *t = tmp; \ 1159 } 1160 1161 #define XXGENPCV(NAME, SZ) \ 1162 XXGENPCV_BE_EXP(NAME, SZ) \ 1163 XXGENPCV_BE_COMP(NAME, SZ) \ 1164 XXGENPCV_LE_EXP(NAME, SZ) \ 1165 XXGENPCV_LE_COMP(NAME, SZ) \ 1166 1167 XXGENPCV(XXGENPCVBM, 1) 1168 XXGENPCV(XXGENPCVHM, 2) 1169 XXGENPCV(XXGENPCVWM, 4) 1170 XXGENPCV(XXGENPCVDM, 8) 1171 1172 #undef XXGENPCV_BE_EXP 1173 #undef XXGENPCV_BE_COMP 1174 #undef XXGENPCV_LE_EXP 1175 #undef XXGENPCV_LE_COMP 1176 #undef XXGENPCV 1177 1178 #if HOST_BIG_ENDIAN 1179 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1180 #define VBPERMD_INDEX(i) (i) 1181 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1182 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1183 #else 1184 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1185 #define VBPERMD_INDEX(i) (1 - i) 1186 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1187 #define EXTRACT_BIT(avr, i, index) \ 1188 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1189 #endif 1190 1191 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1192 { 1193 int i, j; 1194 ppc_avr_t result = { .u64 = { 0, 0 } }; 1195 VECTOR_FOR_INORDER_I(i, u64) { 1196 for (j = 0; j < 8; j++) { 1197 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1198 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1199 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1200 } 1201 } 1202 } 1203 *r = result; 1204 } 1205 1206 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1207 { 1208 int i; 1209 uint64_t perm = 0; 1210 1211 VECTOR_FOR_INORDER_I(i, u8) { 1212 int index = VBPERMQ_INDEX(b, i); 1213 1214 if (index < 128) { 1215 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1216 if (a->u64[VBPERMQ_DW(index)] & mask) { 1217 perm |= (0x8000 >> i); 1218 } 1219 } 1220 } 1221 1222 r->VsrD(0) = perm; 1223 r->VsrD(1) = 0; 1224 } 1225 1226 #undef VBPERMQ_INDEX 1227 #undef VBPERMQ_DW 1228 1229 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1230 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1231 { \ 1232 int i, j; \ 1233 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1234 \ 1235 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1236 prod[i] = 0; \ 1237 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1238 if (a->srcfld[i] & (1ull << j)) { \ 1239 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1240 } \ 1241 } \ 1242 } \ 1243 \ 1244 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1245 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1246 } \ 1247 } 1248 1249 PMSUM(vpmsumb, u8, u16, uint16_t) 1250 PMSUM(vpmsumh, u16, u32, uint32_t) 1251 PMSUM(vpmsumw, u32, u64, uint64_t) 1252 1253 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1254 { 1255 1256 #ifdef CONFIG_INT128 1257 int i, j; 1258 __uint128_t prod[2]; 1259 1260 VECTOR_FOR_INORDER_I(i, u64) { 1261 prod[i] = 0; 1262 for (j = 0; j < 64; j++) { 1263 if (a->u64[i] & (1ull << j)) { 1264 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1265 } 1266 } 1267 } 1268 1269 r->u128 = prod[0] ^ prod[1]; 1270 1271 #else 1272 int i, j; 1273 ppc_avr_t prod[2]; 1274 1275 VECTOR_FOR_INORDER_I(i, u64) { 1276 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1277 for (j = 0; j < 64; j++) { 1278 if (a->u64[i] & (1ull << j)) { 1279 ppc_avr_t bshift; 1280 if (j == 0) { 1281 bshift.VsrD(0) = 0; 1282 bshift.VsrD(1) = b->u64[i]; 1283 } else { 1284 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1285 bshift.VsrD(1) = b->u64[i] << j; 1286 } 1287 prod[i].VsrD(1) ^= bshift.VsrD(1); 1288 prod[i].VsrD(0) ^= bshift.VsrD(0); 1289 } 1290 } 1291 } 1292 1293 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1294 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1295 #endif 1296 } 1297 1298 1299 #if HOST_BIG_ENDIAN 1300 #define PKBIG 1 1301 #else 1302 #define PKBIG 0 1303 #endif 1304 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1305 { 1306 int i, j; 1307 ppc_avr_t result; 1308 #if HOST_BIG_ENDIAN 1309 const ppc_avr_t *x[2] = { a, b }; 1310 #else 1311 const ppc_avr_t *x[2] = { b, a }; 1312 #endif 1313 1314 VECTOR_FOR_INORDER_I(i, u64) { 1315 VECTOR_FOR_INORDER_I(j, u32) { 1316 uint32_t e = x[i]->u32[j]; 1317 1318 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1319 ((e >> 6) & 0x3e0) | 1320 ((e >> 3) & 0x1f)); 1321 } 1322 } 1323 *r = result; 1324 } 1325 1326 #define VPK(suffix, from, to, cvt, dosat) \ 1327 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1328 ppc_avr_t *a, ppc_avr_t *b) \ 1329 { \ 1330 int i; \ 1331 int sat = 0; \ 1332 ppc_avr_t result; \ 1333 ppc_avr_t *a0 = PKBIG ? a : b; \ 1334 ppc_avr_t *a1 = PKBIG ? b : a; \ 1335 \ 1336 VECTOR_FOR_INORDER_I(i, from) { \ 1337 result.to[i] = cvt(a0->from[i], &sat); \ 1338 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1339 } \ 1340 *r = result; \ 1341 if (dosat && sat) { \ 1342 set_vscr_sat(env); \ 1343 } \ 1344 } 1345 #define I(x, y) (x) 1346 VPK(shss, s16, s8, cvtshsb, 1) 1347 VPK(shus, s16, u8, cvtshub, 1) 1348 VPK(swss, s32, s16, cvtswsh, 1) 1349 VPK(swus, s32, u16, cvtswuh, 1) 1350 VPK(sdss, s64, s32, cvtsdsw, 1) 1351 VPK(sdus, s64, u32, cvtsduw, 1) 1352 VPK(uhus, u16, u8, cvtuhub, 1) 1353 VPK(uwus, u32, u16, cvtuwuh, 1) 1354 VPK(udus, u64, u32, cvtuduw, 1) 1355 VPK(uhum, u16, u8, I, 0) 1356 VPK(uwum, u32, u16, I, 0) 1357 VPK(udum, u64, u32, I, 0) 1358 #undef I 1359 #undef VPK 1360 #undef PKBIG 1361 1362 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1363 { 1364 int i; 1365 1366 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1367 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1368 } 1369 } 1370 1371 #define VRFI(suffix, rounding) \ 1372 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1373 ppc_avr_t *b) \ 1374 { \ 1375 int i; \ 1376 float_status s = env->vec_status; \ 1377 \ 1378 set_float_rounding_mode(rounding, &s); \ 1379 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1380 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1381 } \ 1382 } 1383 VRFI(n, float_round_nearest_even) 1384 VRFI(m, float_round_down) 1385 VRFI(p, float_round_up) 1386 VRFI(z, float_round_to_zero) 1387 #undef VRFI 1388 1389 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1390 { 1391 int i; 1392 1393 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1394 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1395 1396 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1397 } 1398 } 1399 1400 #define VRLMI(name, size, element, insert) \ 1401 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1402 { \ 1403 int i; \ 1404 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1405 uint##size##_t src1 = a->element[i]; \ 1406 uint##size##_t src2 = b->element[i]; \ 1407 uint##size##_t src3 = r->element[i]; \ 1408 uint##size##_t begin, end, shift, mask, rot_val; \ 1409 \ 1410 shift = extract##size(src2, 0, 6); \ 1411 end = extract##size(src2, 8, 6); \ 1412 begin = extract##size(src2, 16, 6); \ 1413 rot_val = rol##size(src1, shift); \ 1414 mask = mask_u##size(begin, end); \ 1415 if (insert) { \ 1416 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1417 } else { \ 1418 r->element[i] = (rot_val & mask); \ 1419 } \ 1420 } \ 1421 } 1422 1423 VRLMI(VRLDMI, 64, u64, 1); 1424 VRLMI(VRLWMI, 32, u32, 1); 1425 VRLMI(VRLDNM, 64, u64, 0); 1426 VRLMI(VRLWNM, 32, u32, 0); 1427 1428 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1429 { 1430 int i; 1431 1432 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1433 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1434 } 1435 } 1436 1437 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1438 { 1439 int i; 1440 1441 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1442 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1443 } 1444 } 1445 1446 #define VEXTU_X_DO(name, size, left) \ 1447 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1448 { \ 1449 int index = (a & 0xf) * 8; \ 1450 if (left) { \ 1451 index = 128 - index - size; \ 1452 } \ 1453 return int128_getlo(int128_rshift(b->s128, index)) & \ 1454 MAKE_64BIT_MASK(0, size); \ 1455 } 1456 VEXTU_X_DO(vextublx, 8, 1) 1457 VEXTU_X_DO(vextuhlx, 16, 1) 1458 VEXTU_X_DO(vextuwlx, 32, 1) 1459 VEXTU_X_DO(vextubrx, 8, 0) 1460 VEXTU_X_DO(vextuhrx, 16, 0) 1461 VEXTU_X_DO(vextuwrx, 32, 0) 1462 #undef VEXTU_X_DO 1463 1464 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1465 { 1466 int i; 1467 unsigned int shift, bytes, size; 1468 1469 size = ARRAY_SIZE(r->u8); 1470 for (i = 0; i < size; i++) { 1471 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1472 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1473 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1474 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1475 } 1476 } 1477 1478 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1479 { 1480 int i; 1481 unsigned int shift, bytes; 1482 1483 /* 1484 * Use reverse order, as destination and source register can be 1485 * same. Its being modified in place saving temporary, reverse 1486 * order will guarantee that computed result is not fed back. 1487 */ 1488 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1489 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1490 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1491 /* extract adjacent bytes */ 1492 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1493 } 1494 } 1495 1496 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1497 { 1498 int sh = shift & 0xf; 1499 int i; 1500 ppc_avr_t result; 1501 1502 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1503 int index = sh + i; 1504 if (index > 0xf) { 1505 result.VsrB(i) = b->VsrB(index - 0x10); 1506 } else { 1507 result.VsrB(i) = a->VsrB(index); 1508 } 1509 } 1510 *r = result; 1511 } 1512 1513 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1514 { 1515 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1516 1517 #if HOST_BIG_ENDIAN 1518 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1519 memset(&r->u8[16 - sh], 0, sh); 1520 #else 1521 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1522 memset(&r->u8[0], 0, sh); 1523 #endif 1524 } 1525 1526 #if HOST_BIG_ENDIAN 1527 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1528 #else 1529 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1530 #endif 1531 1532 #define VINSX(SUFFIX, TYPE) \ 1533 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1534 uint64_t val, target_ulong index) \ 1535 { \ 1536 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1537 target_long idx = index; \ 1538 \ 1539 if (idx < 0 || idx > maxidx) { \ 1540 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1541 qemu_log_mask(LOG_GUEST_ERROR, \ 1542 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1543 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1544 } else { \ 1545 TYPE src = val; \ 1546 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1547 } \ 1548 } 1549 VINSX(B, uint8_t) 1550 VINSX(H, uint16_t) 1551 VINSX(W, uint32_t) 1552 VINSX(D, uint64_t) 1553 #undef ELEM_ADDR 1554 #undef VINSX 1555 #if HOST_BIG_ENDIAN 1556 #define VEXTDVLX(NAME, SIZE) \ 1557 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1558 target_ulong index) \ 1559 { \ 1560 const target_long idx = index; \ 1561 ppc_avr_t tmp[2] = { *a, *b }; \ 1562 memset(t, 0, sizeof(*t)); \ 1563 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1564 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1565 } else { \ 1566 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1567 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1568 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1569 } \ 1570 } 1571 #else 1572 #define VEXTDVLX(NAME, SIZE) \ 1573 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1574 target_ulong index) \ 1575 { \ 1576 const target_long idx = index; \ 1577 ppc_avr_t tmp[2] = { *b, *a }; \ 1578 memset(t, 0, sizeof(*t)); \ 1579 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1580 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1581 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1582 } else { \ 1583 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1584 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1585 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1586 } \ 1587 } 1588 #endif 1589 VEXTDVLX(VEXTDUBVLX, 1) 1590 VEXTDVLX(VEXTDUHVLX, 2) 1591 VEXTDVLX(VEXTDUWVLX, 4) 1592 VEXTDVLX(VEXTDDVLX, 8) 1593 #undef VEXTDVLX 1594 #if HOST_BIG_ENDIAN 1595 #define VEXTRACT(suffix, element) \ 1596 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1597 { \ 1598 uint32_t es = sizeof(r->element[0]); \ 1599 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1600 memset(&r->u8[8], 0, 8); \ 1601 memset(&r->u8[0], 0, 8 - es); \ 1602 } 1603 #else 1604 #define VEXTRACT(suffix, element) \ 1605 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1606 { \ 1607 uint32_t es = sizeof(r->element[0]); \ 1608 uint32_t s = (16 - index) - es; \ 1609 memmove(&r->u8[8], &b->u8[s], es); \ 1610 memset(&r->u8[0], 0, 8); \ 1611 memset(&r->u8[8 + es], 0, 8 - es); \ 1612 } 1613 #endif 1614 VEXTRACT(ub, u8) 1615 VEXTRACT(uh, u16) 1616 VEXTRACT(uw, u32) 1617 VEXTRACT(d, u64) 1618 #undef VEXTRACT 1619 1620 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1621 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1622 { \ 1623 int i, idx, crf = 0; \ 1624 \ 1625 for (i = 0; i < NUM_ELEMS; i++) { \ 1626 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1627 if (b->Vsr##ELEM(idx)) { \ 1628 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1629 } else { \ 1630 crf = 0b0010; \ 1631 break; \ 1632 } \ 1633 } \ 1634 \ 1635 for (; i < NUM_ELEMS; i++) { \ 1636 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1637 t->Vsr##ELEM(idx) = 0; \ 1638 } \ 1639 \ 1640 return crf; \ 1641 } 1642 VSTRI(VSTRIBL, B, 16, true) 1643 VSTRI(VSTRIBR, B, 16, false) 1644 VSTRI(VSTRIHL, H, 8, true) 1645 VSTRI(VSTRIHR, H, 8, false) 1646 #undef VSTRI 1647 1648 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1649 { 1650 ppc_vsr_t t = { }; 1651 size_t es = sizeof(uint32_t); 1652 uint32_t ext_index; 1653 int i; 1654 1655 ext_index = index; 1656 for (i = 0; i < es; i++, ext_index++) { 1657 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1658 } 1659 1660 *xt = t; 1661 } 1662 1663 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1664 { 1665 ppc_vsr_t t = *xt; 1666 size_t es = sizeof(uint32_t); 1667 int ins_index, i = 0; 1668 1669 ins_index = index; 1670 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1671 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1672 } 1673 1674 *xt = t; 1675 } 1676 1677 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1678 uint32_t desc) 1679 { 1680 /* 1681 * Instead of processing imm bit-by-bit, we'll skip the computation of 1682 * conjunctions whose corresponding bit is unset. 1683 */ 1684 int bit, imm = simd_data(desc); 1685 Int128 conj, disj = int128_zero(); 1686 1687 /* Iterate over set bits from the least to the most significant bit */ 1688 while (imm) { 1689 /* 1690 * Get the next bit to be processed with ctz64. Invert the result of 1691 * ctz64 to match the indexing used by PowerISA. 1692 */ 1693 bit = 7 - ctzl(imm); 1694 if (bit & 0x4) { 1695 conj = a->s128; 1696 } else { 1697 conj = int128_not(a->s128); 1698 } 1699 if (bit & 0x2) { 1700 conj = int128_and(conj, b->s128); 1701 } else { 1702 conj = int128_and(conj, int128_not(b->s128)); 1703 } 1704 if (bit & 0x1) { 1705 conj = int128_and(conj, c->s128); 1706 } else { 1707 conj = int128_and(conj, int128_not(c->s128)); 1708 } 1709 disj = int128_or(disj, conj); 1710 1711 /* Unset the least significant bit that is set */ 1712 imm &= imm - 1; 1713 } 1714 1715 t->s128 = disj; 1716 } 1717 1718 #define XXBLEND(name, sz) \ 1719 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1720 ppc_avr_t *c, uint32_t desc) \ 1721 { \ 1722 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1723 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1724 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1725 } \ 1726 } 1727 XXBLEND(B, 8) 1728 XXBLEND(H, 16) 1729 XXBLEND(W, 32) 1730 XXBLEND(D, 64) 1731 #undef XXBLEND 1732 1733 #define VNEG(name, element) \ 1734 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1735 { \ 1736 int i; \ 1737 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1738 r->element[i] = -b->element[i]; \ 1739 } \ 1740 } 1741 VNEG(vnegw, s32) 1742 VNEG(vnegd, s64) 1743 #undef VNEG 1744 1745 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1746 { 1747 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1748 1749 #if HOST_BIG_ENDIAN 1750 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1751 memset(&r->u8[0], 0, sh); 1752 #else 1753 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1754 memset(&r->u8[16 - sh], 0, sh); 1755 #endif 1756 } 1757 1758 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1759 { 1760 int i; 1761 1762 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1763 r->u32[i] = a->u32[i] >= b->u32[i]; 1764 } 1765 } 1766 1767 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1768 { 1769 int64_t t; 1770 int i, upper; 1771 ppc_avr_t result; 1772 int sat = 0; 1773 1774 upper = ARRAY_SIZE(r->s32) - 1; 1775 t = (int64_t)b->VsrSW(upper); 1776 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1777 t += a->VsrSW(i); 1778 result.VsrSW(i) = 0; 1779 } 1780 result.VsrSW(upper) = cvtsdsw(t, &sat); 1781 *r = result; 1782 1783 if (sat) { 1784 set_vscr_sat(env); 1785 } 1786 } 1787 1788 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1789 { 1790 int i, j, upper; 1791 ppc_avr_t result; 1792 int sat = 0; 1793 1794 upper = 1; 1795 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1796 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1797 1798 result.VsrD(i) = 0; 1799 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1800 t += a->VsrSW(2 * i + j); 1801 } 1802 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1803 } 1804 1805 *r = result; 1806 if (sat) { 1807 set_vscr_sat(env); 1808 } 1809 } 1810 1811 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1812 { 1813 int i, j; 1814 int sat = 0; 1815 1816 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1817 int64_t t = (int64_t)b->s32[i]; 1818 1819 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1820 t += a->s8[4 * i + j]; 1821 } 1822 r->s32[i] = cvtsdsw(t, &sat); 1823 } 1824 1825 if (sat) { 1826 set_vscr_sat(env); 1827 } 1828 } 1829 1830 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1831 { 1832 int sat = 0; 1833 int i; 1834 1835 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1836 int64_t t = (int64_t)b->s32[i]; 1837 1838 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1839 r->s32[i] = cvtsdsw(t, &sat); 1840 } 1841 1842 if (sat) { 1843 set_vscr_sat(env); 1844 } 1845 } 1846 1847 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1848 { 1849 int i, j; 1850 int sat = 0; 1851 1852 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1853 uint64_t t = (uint64_t)b->u32[i]; 1854 1855 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1856 t += a->u8[4 * i + j]; 1857 } 1858 r->u32[i] = cvtuduw(t, &sat); 1859 } 1860 1861 if (sat) { 1862 set_vscr_sat(env); 1863 } 1864 } 1865 1866 #if HOST_BIG_ENDIAN 1867 #define UPKHI 1 1868 #define UPKLO 0 1869 #else 1870 #define UPKHI 0 1871 #define UPKLO 1 1872 #endif 1873 #define VUPKPX(suffix, hi) \ 1874 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1875 { \ 1876 int i; \ 1877 ppc_avr_t result; \ 1878 \ 1879 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1880 uint16_t e = b->u16[hi ? i : i + 4]; \ 1881 uint8_t a = (e >> 15) ? 0xff : 0; \ 1882 uint8_t r = (e >> 10) & 0x1f; \ 1883 uint8_t g = (e >> 5) & 0x1f; \ 1884 uint8_t b = e & 0x1f; \ 1885 \ 1886 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1887 } \ 1888 *r = result; \ 1889 } 1890 VUPKPX(lpx, UPKLO) 1891 VUPKPX(hpx, UPKHI) 1892 #undef VUPKPX 1893 1894 #define VUPK(suffix, unpacked, packee, hi) \ 1895 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1896 { \ 1897 int i; \ 1898 ppc_avr_t result; \ 1899 \ 1900 if (hi) { \ 1901 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1902 result.unpacked[i] = b->packee[i]; \ 1903 } \ 1904 } else { \ 1905 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1906 i++) { \ 1907 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1908 } \ 1909 } \ 1910 *r = result; \ 1911 } 1912 VUPK(hsb, s16, s8, UPKHI) 1913 VUPK(hsh, s32, s16, UPKHI) 1914 VUPK(hsw, s64, s32, UPKHI) 1915 VUPK(lsb, s16, s8, UPKLO) 1916 VUPK(lsh, s32, s16, UPKLO) 1917 VUPK(lsw, s64, s32, UPKLO) 1918 #undef VUPK 1919 #undef UPKHI 1920 #undef UPKLO 1921 1922 #define VGENERIC_DO(name, element) \ 1923 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1924 { \ 1925 int i; \ 1926 \ 1927 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1928 r->element[i] = name(b->element[i]); \ 1929 } \ 1930 } 1931 1932 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1933 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1934 1935 VGENERIC_DO(clzb, u8) 1936 VGENERIC_DO(clzh, u16) 1937 1938 #undef clzb 1939 #undef clzh 1940 1941 #define ctzb(v) ((v) ? ctz32(v) : 8) 1942 #define ctzh(v) ((v) ? ctz32(v) : 16) 1943 #define ctzw(v) ctz32((v)) 1944 #define ctzd(v) ctz64((v)) 1945 1946 VGENERIC_DO(ctzb, u8) 1947 VGENERIC_DO(ctzh, u16) 1948 VGENERIC_DO(ctzw, u32) 1949 VGENERIC_DO(ctzd, u64) 1950 1951 #undef ctzb 1952 #undef ctzh 1953 #undef ctzw 1954 #undef ctzd 1955 1956 #define popcntb(v) ctpop8(v) 1957 #define popcnth(v) ctpop16(v) 1958 #define popcntw(v) ctpop32(v) 1959 #define popcntd(v) ctpop64(v) 1960 1961 VGENERIC_DO(popcntb, u8) 1962 VGENERIC_DO(popcnth, u16) 1963 VGENERIC_DO(popcntw, u32) 1964 VGENERIC_DO(popcntd, u64) 1965 1966 #undef popcntb 1967 #undef popcnth 1968 #undef popcntw 1969 #undef popcntd 1970 1971 #undef VGENERIC_DO 1972 1973 #if HOST_BIG_ENDIAN 1974 #define QW_ONE { .u64 = { 0, 1 } } 1975 #else 1976 #define QW_ONE { .u64 = { 1, 0 } } 1977 #endif 1978 1979 #ifndef CONFIG_INT128 1980 1981 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1982 { 1983 t->u64[0] = ~a.u64[0]; 1984 t->u64[1] = ~a.u64[1]; 1985 } 1986 1987 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1988 { 1989 if (a.VsrD(0) < b.VsrD(0)) { 1990 return -1; 1991 } else if (a.VsrD(0) > b.VsrD(0)) { 1992 return 1; 1993 } else if (a.VsrD(1) < b.VsrD(1)) { 1994 return -1; 1995 } else if (a.VsrD(1) > b.VsrD(1)) { 1996 return 1; 1997 } else { 1998 return 0; 1999 } 2000 } 2001 2002 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2003 { 2004 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2005 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2006 (~a.VsrD(1) < b.VsrD(1)); 2007 } 2008 2009 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2010 { 2011 ppc_avr_t not_a; 2012 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2013 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2014 (~a.VsrD(1) < b.VsrD(1)); 2015 avr_qw_not(¬_a, a); 2016 return avr_qw_cmpu(not_a, b) < 0; 2017 } 2018 2019 #endif 2020 2021 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2022 { 2023 #ifdef CONFIG_INT128 2024 r->u128 = a->u128 + b->u128; 2025 #else 2026 avr_qw_add(r, *a, *b); 2027 #endif 2028 } 2029 2030 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2031 { 2032 #ifdef CONFIG_INT128 2033 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2034 #else 2035 2036 if (c->VsrD(1) & 1) { 2037 ppc_avr_t tmp; 2038 2039 tmp.VsrD(0) = 0; 2040 tmp.VsrD(1) = c->VsrD(1) & 1; 2041 avr_qw_add(&tmp, *a, tmp); 2042 avr_qw_add(r, tmp, *b); 2043 } else { 2044 avr_qw_add(r, *a, *b); 2045 } 2046 #endif 2047 } 2048 2049 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2050 { 2051 #ifdef CONFIG_INT128 2052 r->u128 = (~a->u128 < b->u128); 2053 #else 2054 ppc_avr_t not_a; 2055 2056 avr_qw_not(¬_a, *a); 2057 2058 r->VsrD(0) = 0; 2059 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2060 #endif 2061 } 2062 2063 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2064 { 2065 #ifdef CONFIG_INT128 2066 int carry_out = (~a->u128 < b->u128); 2067 if (!carry_out && (c->u128 & 1)) { 2068 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2069 ((a->u128 != 0) || (b->u128 != 0)); 2070 } 2071 r->u128 = carry_out; 2072 #else 2073 2074 int carry_in = c->VsrD(1) & 1; 2075 int carry_out = 0; 2076 ppc_avr_t tmp; 2077 2078 carry_out = avr_qw_addc(&tmp, *a, *b); 2079 2080 if (!carry_out && carry_in) { 2081 ppc_avr_t one = QW_ONE; 2082 carry_out = avr_qw_addc(&tmp, tmp, one); 2083 } 2084 r->VsrD(0) = 0; 2085 r->VsrD(1) = carry_out; 2086 #endif 2087 } 2088 2089 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2090 { 2091 #ifdef CONFIG_INT128 2092 r->u128 = a->u128 - b->u128; 2093 #else 2094 ppc_avr_t tmp; 2095 ppc_avr_t one = QW_ONE; 2096 2097 avr_qw_not(&tmp, *b); 2098 avr_qw_add(&tmp, *a, tmp); 2099 avr_qw_add(r, tmp, one); 2100 #endif 2101 } 2102 2103 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2104 { 2105 #ifdef CONFIG_INT128 2106 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2107 #else 2108 ppc_avr_t tmp, sum; 2109 2110 avr_qw_not(&tmp, *b); 2111 avr_qw_add(&sum, *a, tmp); 2112 2113 tmp.VsrD(0) = 0; 2114 tmp.VsrD(1) = c->VsrD(1) & 1; 2115 avr_qw_add(r, sum, tmp); 2116 #endif 2117 } 2118 2119 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2120 { 2121 #ifdef CONFIG_INT128 2122 r->u128 = (~a->u128 < ~b->u128) || 2123 (a->u128 + ~b->u128 == (__uint128_t)-1); 2124 #else 2125 int carry = (avr_qw_cmpu(*a, *b) > 0); 2126 if (!carry) { 2127 ppc_avr_t tmp; 2128 avr_qw_not(&tmp, *b); 2129 avr_qw_add(&tmp, *a, tmp); 2130 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2131 } 2132 r->VsrD(0) = 0; 2133 r->VsrD(1) = carry; 2134 #endif 2135 } 2136 2137 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2138 { 2139 #ifdef CONFIG_INT128 2140 r->u128 = 2141 (~a->u128 < ~b->u128) || 2142 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2143 #else 2144 int carry_in = c->VsrD(1) & 1; 2145 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2146 if (!carry_out && carry_in) { 2147 ppc_avr_t tmp; 2148 avr_qw_not(&tmp, *b); 2149 avr_qw_add(&tmp, *a, tmp); 2150 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2151 } 2152 2153 r->VsrD(0) = 0; 2154 r->VsrD(1) = carry_out; 2155 #endif 2156 } 2157 2158 #define BCD_PLUS_PREF_1 0xC 2159 #define BCD_PLUS_PREF_2 0xF 2160 #define BCD_PLUS_ALT_1 0xA 2161 #define BCD_NEG_PREF 0xD 2162 #define BCD_NEG_ALT 0xB 2163 #define BCD_PLUS_ALT_2 0xE 2164 #define NATIONAL_PLUS 0x2B 2165 #define NATIONAL_NEG 0x2D 2166 2167 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2168 2169 static int bcd_get_sgn(ppc_avr_t *bcd) 2170 { 2171 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2172 case BCD_PLUS_PREF_1: 2173 case BCD_PLUS_PREF_2: 2174 case BCD_PLUS_ALT_1: 2175 case BCD_PLUS_ALT_2: 2176 { 2177 return 1; 2178 } 2179 2180 case BCD_NEG_PREF: 2181 case BCD_NEG_ALT: 2182 { 2183 return -1; 2184 } 2185 2186 default: 2187 { 2188 return 0; 2189 } 2190 } 2191 } 2192 2193 static int bcd_preferred_sgn(int sgn, int ps) 2194 { 2195 if (sgn >= 0) { 2196 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2197 } else { 2198 return BCD_NEG_PREF; 2199 } 2200 } 2201 2202 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2203 { 2204 uint8_t result; 2205 if (n & 1) { 2206 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2207 } else { 2208 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2209 } 2210 2211 if (unlikely(result > 9)) { 2212 *invalid = true; 2213 } 2214 return result; 2215 } 2216 2217 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2218 { 2219 if (n & 1) { 2220 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2221 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2222 } else { 2223 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2224 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2225 } 2226 } 2227 2228 static bool bcd_is_valid(ppc_avr_t *bcd) 2229 { 2230 int i; 2231 int invalid = 0; 2232 2233 if (bcd_get_sgn(bcd) == 0) { 2234 return false; 2235 } 2236 2237 for (i = 1; i < 32; i++) { 2238 bcd_get_digit(bcd, i, &invalid); 2239 if (unlikely(invalid)) { 2240 return false; 2241 } 2242 } 2243 return true; 2244 } 2245 2246 static int bcd_cmp_zero(ppc_avr_t *bcd) 2247 { 2248 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2249 return CRF_EQ; 2250 } else { 2251 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2252 } 2253 } 2254 2255 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2256 { 2257 return reg->VsrH(7 - n); 2258 } 2259 2260 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2261 { 2262 reg->VsrH(7 - n) = val; 2263 } 2264 2265 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2266 { 2267 int i; 2268 int invalid = 0; 2269 for (i = 31; i > 0; i--) { 2270 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2271 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2272 if (unlikely(invalid)) { 2273 return 0; /* doesn't matter */ 2274 } else if (dig_a > dig_b) { 2275 return 1; 2276 } else if (dig_a < dig_b) { 2277 return -1; 2278 } 2279 } 2280 2281 return 0; 2282 } 2283 2284 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2285 int *overflow) 2286 { 2287 int carry = 0; 2288 int i; 2289 int is_zero = 1; 2290 2291 for (i = 1; i <= 31; i++) { 2292 uint8_t digit = bcd_get_digit(a, i, invalid) + 2293 bcd_get_digit(b, i, invalid) + carry; 2294 is_zero &= (digit == 0); 2295 if (digit > 9) { 2296 carry = 1; 2297 digit -= 10; 2298 } else { 2299 carry = 0; 2300 } 2301 2302 bcd_put_digit(t, digit, i); 2303 } 2304 2305 *overflow = carry; 2306 return is_zero; 2307 } 2308 2309 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2310 int *overflow) 2311 { 2312 int carry = 0; 2313 int i; 2314 2315 for (i = 1; i <= 31; i++) { 2316 uint8_t digit = bcd_get_digit(a, i, invalid) - 2317 bcd_get_digit(b, i, invalid) + carry; 2318 if (digit & 0x80) { 2319 carry = -1; 2320 digit += 10; 2321 } else { 2322 carry = 0; 2323 } 2324 2325 bcd_put_digit(t, digit, i); 2326 } 2327 2328 *overflow = carry; 2329 } 2330 2331 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2332 { 2333 2334 int sgna = bcd_get_sgn(a); 2335 int sgnb = bcd_get_sgn(b); 2336 int invalid = (sgna == 0) || (sgnb == 0); 2337 int overflow = 0; 2338 int zero = 0; 2339 uint32_t cr = 0; 2340 ppc_avr_t result = { .u64 = { 0, 0 } }; 2341 2342 if (!invalid) { 2343 if (sgna == sgnb) { 2344 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2345 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2346 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2347 } else { 2348 int magnitude = bcd_cmp_mag(a, b); 2349 if (magnitude > 0) { 2350 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2351 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2352 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2353 } else if (magnitude < 0) { 2354 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2355 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2356 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2357 } else { 2358 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2359 cr = CRF_EQ; 2360 } 2361 } 2362 } 2363 2364 if (unlikely(invalid)) { 2365 result.VsrD(0) = result.VsrD(1) = -1; 2366 cr = CRF_SO; 2367 } else if (overflow) { 2368 cr |= CRF_SO; 2369 } else if (zero) { 2370 cr |= CRF_EQ; 2371 } 2372 2373 *r = result; 2374 2375 return cr; 2376 } 2377 2378 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2379 { 2380 ppc_avr_t bcopy = *b; 2381 int sgnb = bcd_get_sgn(b); 2382 if (sgnb < 0) { 2383 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2384 } else if (sgnb > 0) { 2385 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2386 } 2387 /* else invalid ... defer to bcdadd code for proper handling */ 2388 2389 return helper_bcdadd(r, a, &bcopy, ps); 2390 } 2391 2392 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2393 { 2394 int i; 2395 int cr = 0; 2396 uint16_t national = 0; 2397 uint16_t sgnb = get_national_digit(b, 0); 2398 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2399 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2400 2401 for (i = 1; i < 8; i++) { 2402 national = get_national_digit(b, i); 2403 if (unlikely(national < 0x30 || national > 0x39)) { 2404 invalid = 1; 2405 break; 2406 } 2407 2408 bcd_put_digit(&ret, national & 0xf, i); 2409 } 2410 2411 if (sgnb == NATIONAL_PLUS) { 2412 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2413 } else { 2414 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2415 } 2416 2417 cr = bcd_cmp_zero(&ret); 2418 2419 if (unlikely(invalid)) { 2420 cr = CRF_SO; 2421 } 2422 2423 *r = ret; 2424 2425 return cr; 2426 } 2427 2428 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2429 { 2430 int i; 2431 int cr = 0; 2432 int sgnb = bcd_get_sgn(b); 2433 int invalid = (sgnb == 0); 2434 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2435 2436 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2437 2438 for (i = 1; i < 8; i++) { 2439 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2440 2441 if (unlikely(invalid)) { 2442 break; 2443 } 2444 } 2445 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2446 2447 cr = bcd_cmp_zero(b); 2448 2449 if (ox_flag) { 2450 cr |= CRF_SO; 2451 } 2452 2453 if (unlikely(invalid)) { 2454 cr = CRF_SO; 2455 } 2456 2457 *r = ret; 2458 2459 return cr; 2460 } 2461 2462 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2463 { 2464 int i; 2465 int cr = 0; 2466 int invalid = 0; 2467 int zone_digit = 0; 2468 int zone_lead = ps ? 0xF : 0x3; 2469 int digit = 0; 2470 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2471 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2472 2473 if (unlikely((sgnb < 0xA) && ps)) { 2474 invalid = 1; 2475 } 2476 2477 for (i = 0; i < 16; i++) { 2478 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2479 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2480 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2481 invalid = 1; 2482 break; 2483 } 2484 2485 bcd_put_digit(&ret, digit, i + 1); 2486 } 2487 2488 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2489 (!ps && (sgnb & 0x4))) { 2490 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2491 } else { 2492 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2493 } 2494 2495 cr = bcd_cmp_zero(&ret); 2496 2497 if (unlikely(invalid)) { 2498 cr = CRF_SO; 2499 } 2500 2501 *r = ret; 2502 2503 return cr; 2504 } 2505 2506 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2507 { 2508 int i; 2509 int cr = 0; 2510 uint8_t digit = 0; 2511 int sgnb = bcd_get_sgn(b); 2512 int zone_lead = (ps) ? 0xF0 : 0x30; 2513 int invalid = (sgnb == 0); 2514 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2515 2516 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2517 2518 for (i = 0; i < 16; i++) { 2519 digit = bcd_get_digit(b, i + 1, &invalid); 2520 2521 if (unlikely(invalid)) { 2522 break; 2523 } 2524 2525 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2526 } 2527 2528 if (ps) { 2529 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2530 } else { 2531 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2532 } 2533 2534 cr = bcd_cmp_zero(b); 2535 2536 if (ox_flag) { 2537 cr |= CRF_SO; 2538 } 2539 2540 if (unlikely(invalid)) { 2541 cr = CRF_SO; 2542 } 2543 2544 *r = ret; 2545 2546 return cr; 2547 } 2548 2549 /** 2550 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2551 * 2552 * Returns: 2553 * > 0 if ahi|alo > bhi|blo, 2554 * 0 if ahi|alo == bhi|blo, 2555 * < 0 if ahi|alo < bhi|blo 2556 */ 2557 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2558 uint64_t blo, uint64_t bhi) 2559 { 2560 return (ahi == bhi) ? 2561 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2562 (ahi > bhi ? 1 : -1); 2563 } 2564 2565 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2566 { 2567 int i; 2568 int cr; 2569 uint64_t lo_value; 2570 uint64_t hi_value; 2571 uint64_t rem; 2572 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2573 2574 if (b->VsrSD(0) < 0) { 2575 lo_value = -b->VsrSD(1); 2576 hi_value = ~b->VsrD(0) + !lo_value; 2577 bcd_put_digit(&ret, 0xD, 0); 2578 2579 cr = CRF_LT; 2580 } else { 2581 lo_value = b->VsrD(1); 2582 hi_value = b->VsrD(0); 2583 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2584 2585 if (hi_value == 0 && lo_value == 0) { 2586 cr = CRF_EQ; 2587 } else { 2588 cr = CRF_GT; 2589 } 2590 } 2591 2592 /* 2593 * Check src limits: abs(src) <= 10^31 - 1 2594 * 2595 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2596 */ 2597 if (ucmp128(lo_value, hi_value, 2598 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2599 cr |= CRF_SO; 2600 2601 /* 2602 * According to the ISA, if src wouldn't fit in the destination 2603 * register, the result is undefined. 2604 * In that case, we leave r unchanged. 2605 */ 2606 } else { 2607 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2608 2609 for (i = 1; i < 16; rem /= 10, i++) { 2610 bcd_put_digit(&ret, rem % 10, i); 2611 } 2612 2613 for (; i < 32; lo_value /= 10, i++) { 2614 bcd_put_digit(&ret, lo_value % 10, i); 2615 } 2616 2617 *r = ret; 2618 } 2619 2620 return cr; 2621 } 2622 2623 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2624 { 2625 uint8_t i; 2626 int cr; 2627 uint64_t carry; 2628 uint64_t unused; 2629 uint64_t lo_value; 2630 uint64_t hi_value = 0; 2631 int sgnb = bcd_get_sgn(b); 2632 int invalid = (sgnb == 0); 2633 2634 lo_value = bcd_get_digit(b, 31, &invalid); 2635 for (i = 30; i > 0; i--) { 2636 mulu64(&lo_value, &carry, lo_value, 10ULL); 2637 mulu64(&hi_value, &unused, hi_value, 10ULL); 2638 lo_value += bcd_get_digit(b, i, &invalid); 2639 hi_value += carry; 2640 2641 if (unlikely(invalid)) { 2642 break; 2643 } 2644 } 2645 2646 if (sgnb == -1) { 2647 r->VsrSD(1) = -lo_value; 2648 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2649 } else { 2650 r->VsrSD(1) = lo_value; 2651 r->VsrSD(0) = hi_value; 2652 } 2653 2654 cr = bcd_cmp_zero(b); 2655 2656 if (unlikely(invalid)) { 2657 cr = CRF_SO; 2658 } 2659 2660 return cr; 2661 } 2662 2663 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2664 { 2665 int i; 2666 int invalid = 0; 2667 2668 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2669 return CRF_SO; 2670 } 2671 2672 *r = *a; 2673 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2674 2675 for (i = 1; i < 32; i++) { 2676 bcd_get_digit(a, i, &invalid); 2677 bcd_get_digit(b, i, &invalid); 2678 if (unlikely(invalid)) { 2679 return CRF_SO; 2680 } 2681 } 2682 2683 return bcd_cmp_zero(r); 2684 } 2685 2686 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2687 { 2688 int sgnb = bcd_get_sgn(b); 2689 2690 *r = *b; 2691 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2692 2693 if (bcd_is_valid(b) == false) { 2694 return CRF_SO; 2695 } 2696 2697 return bcd_cmp_zero(r); 2698 } 2699 2700 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2701 { 2702 int cr; 2703 int i = a->VsrSB(7); 2704 bool ox_flag = false; 2705 int sgnb = bcd_get_sgn(b); 2706 ppc_avr_t ret = *b; 2707 ret.VsrD(1) &= ~0xf; 2708 2709 if (bcd_is_valid(b) == false) { 2710 return CRF_SO; 2711 } 2712 2713 if (unlikely(i > 31)) { 2714 i = 31; 2715 } else if (unlikely(i < -31)) { 2716 i = -31; 2717 } 2718 2719 if (i > 0) { 2720 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2721 } else { 2722 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2723 } 2724 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2725 2726 *r = ret; 2727 2728 cr = bcd_cmp_zero(r); 2729 if (ox_flag) { 2730 cr |= CRF_SO; 2731 } 2732 2733 return cr; 2734 } 2735 2736 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2737 { 2738 int cr; 2739 int i; 2740 int invalid = 0; 2741 bool ox_flag = false; 2742 ppc_avr_t ret = *b; 2743 2744 for (i = 0; i < 32; i++) { 2745 bcd_get_digit(b, i, &invalid); 2746 2747 if (unlikely(invalid)) { 2748 return CRF_SO; 2749 } 2750 } 2751 2752 i = a->VsrSB(7); 2753 if (i >= 32) { 2754 ox_flag = true; 2755 ret.VsrD(1) = ret.VsrD(0) = 0; 2756 } else if (i <= -32) { 2757 ret.VsrD(1) = ret.VsrD(0) = 0; 2758 } else if (i > 0) { 2759 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2760 } else { 2761 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2762 } 2763 *r = ret; 2764 2765 cr = bcd_cmp_zero(r); 2766 if (ox_flag) { 2767 cr |= CRF_SO; 2768 } 2769 2770 return cr; 2771 } 2772 2773 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2774 { 2775 int cr; 2776 int unused = 0; 2777 int invalid = 0; 2778 bool ox_flag = false; 2779 int sgnb = bcd_get_sgn(b); 2780 ppc_avr_t ret = *b; 2781 ret.VsrD(1) &= ~0xf; 2782 2783 int i = a->VsrSB(7); 2784 ppc_avr_t bcd_one; 2785 2786 bcd_one.VsrD(0) = 0; 2787 bcd_one.VsrD(1) = 0x10; 2788 2789 if (bcd_is_valid(b) == false) { 2790 return CRF_SO; 2791 } 2792 2793 if (unlikely(i > 31)) { 2794 i = 31; 2795 } else if (unlikely(i < -31)) { 2796 i = -31; 2797 } 2798 2799 if (i > 0) { 2800 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2801 } else { 2802 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2803 2804 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2805 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2806 } 2807 } 2808 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2809 2810 cr = bcd_cmp_zero(&ret); 2811 if (ox_flag) { 2812 cr |= CRF_SO; 2813 } 2814 *r = ret; 2815 2816 return cr; 2817 } 2818 2819 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2820 { 2821 uint64_t mask; 2822 uint32_t ox_flag = 0; 2823 int i = a->VsrSH(3) + 1; 2824 ppc_avr_t ret = *b; 2825 2826 if (bcd_is_valid(b) == false) { 2827 return CRF_SO; 2828 } 2829 2830 if (i > 16 && i < 32) { 2831 mask = (uint64_t)-1 >> (128 - i * 4); 2832 if (ret.VsrD(0) & ~mask) { 2833 ox_flag = CRF_SO; 2834 } 2835 2836 ret.VsrD(0) &= mask; 2837 } else if (i >= 0 && i <= 16) { 2838 mask = (uint64_t)-1 >> (64 - i * 4); 2839 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2840 ox_flag = CRF_SO; 2841 } 2842 2843 ret.VsrD(1) &= mask; 2844 ret.VsrD(0) = 0; 2845 } 2846 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2847 *r = ret; 2848 2849 return bcd_cmp_zero(&ret) | ox_flag; 2850 } 2851 2852 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2853 { 2854 int i; 2855 uint64_t mask; 2856 uint32_t ox_flag = 0; 2857 int invalid = 0; 2858 ppc_avr_t ret = *b; 2859 2860 for (i = 0; i < 32; i++) { 2861 bcd_get_digit(b, i, &invalid); 2862 2863 if (unlikely(invalid)) { 2864 return CRF_SO; 2865 } 2866 } 2867 2868 i = a->VsrSH(3); 2869 if (i > 16 && i < 33) { 2870 mask = (uint64_t)-1 >> (128 - i * 4); 2871 if (ret.VsrD(0) & ~mask) { 2872 ox_flag = CRF_SO; 2873 } 2874 2875 ret.VsrD(0) &= mask; 2876 } else if (i > 0 && i <= 16) { 2877 mask = (uint64_t)-1 >> (64 - i * 4); 2878 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2879 ox_flag = CRF_SO; 2880 } 2881 2882 ret.VsrD(1) &= mask; 2883 ret.VsrD(0) = 0; 2884 } else if (i == 0) { 2885 if (ret.VsrD(0) || ret.VsrD(1)) { 2886 ox_flag = CRF_SO; 2887 } 2888 ret.VsrD(0) = ret.VsrD(1) = 0; 2889 } 2890 2891 *r = ret; 2892 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2893 return ox_flag | CRF_EQ; 2894 } 2895 2896 return ox_flag | CRF_GT; 2897 } 2898 2899 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2900 { 2901 int i; 2902 VECTOR_FOR_INORDER_I(i, u8) { 2903 r->u8[i] = AES_sbox[a->u8[i]]; 2904 } 2905 } 2906 2907 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2908 { 2909 ppc_avr_t result; 2910 int i; 2911 2912 VECTOR_FOR_INORDER_I(i, u32) { 2913 result.VsrW(i) = b->VsrW(i) ^ 2914 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2915 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2916 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2917 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2918 } 2919 *r = result; 2920 } 2921 2922 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2923 { 2924 ppc_avr_t result; 2925 int i; 2926 2927 VECTOR_FOR_INORDER_I(i, u8) { 2928 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2929 } 2930 *r = result; 2931 } 2932 2933 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2934 { 2935 /* This differs from what is written in ISA V2.07. The RTL is */ 2936 /* incorrect and will be fixed in V2.07B. */ 2937 int i; 2938 ppc_avr_t tmp; 2939 2940 VECTOR_FOR_INORDER_I(i, u8) { 2941 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2942 } 2943 2944 VECTOR_FOR_INORDER_I(i, u32) { 2945 r->VsrW(i) = 2946 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2947 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2948 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2949 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2950 } 2951 } 2952 2953 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2954 { 2955 ppc_avr_t result; 2956 int i; 2957 2958 VECTOR_FOR_INORDER_I(i, u8) { 2959 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2960 } 2961 *r = result; 2962 } 2963 2964 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2965 { 2966 int st = (st_six & 0x10) != 0; 2967 int six = st_six & 0xF; 2968 int i; 2969 2970 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2971 if (st == 0) { 2972 if ((six & (0x8 >> i)) == 0) { 2973 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2974 ror32(a->VsrW(i), 18) ^ 2975 (a->VsrW(i) >> 3); 2976 } else { /* six.bit[i] == 1 */ 2977 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2978 ror32(a->VsrW(i), 19) ^ 2979 (a->VsrW(i) >> 10); 2980 } 2981 } else { /* st == 1 */ 2982 if ((six & (0x8 >> i)) == 0) { 2983 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2984 ror32(a->VsrW(i), 13) ^ 2985 ror32(a->VsrW(i), 22); 2986 } else { /* six.bit[i] == 1 */ 2987 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2988 ror32(a->VsrW(i), 11) ^ 2989 ror32(a->VsrW(i), 25); 2990 } 2991 } 2992 } 2993 } 2994 2995 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2996 { 2997 int st = (st_six & 0x10) != 0; 2998 int six = st_six & 0xF; 2999 int i; 3000 3001 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3002 if (st == 0) { 3003 if ((six & (0x8 >> (2 * i))) == 0) { 3004 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3005 ror64(a->VsrD(i), 8) ^ 3006 (a->VsrD(i) >> 7); 3007 } else { /* six.bit[2*i] == 1 */ 3008 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3009 ror64(a->VsrD(i), 61) ^ 3010 (a->VsrD(i) >> 6); 3011 } 3012 } else { /* st == 1 */ 3013 if ((six & (0x8 >> (2 * i))) == 0) { 3014 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3015 ror64(a->VsrD(i), 34) ^ 3016 ror64(a->VsrD(i), 39); 3017 } else { /* six.bit[2*i] == 1 */ 3018 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3019 ror64(a->VsrD(i), 18) ^ 3020 ror64(a->VsrD(i), 41); 3021 } 3022 } 3023 } 3024 } 3025 3026 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3027 { 3028 ppc_avr_t result; 3029 int i; 3030 3031 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3032 int indexA = c->VsrB(i) >> 4; 3033 int indexB = c->VsrB(i) & 0xF; 3034 3035 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3036 } 3037 *r = result; 3038 } 3039 3040 #undef VECTOR_FOR_INORDER_I 3041 3042 /*****************************************************************************/ 3043 /* SPE extension helpers */ 3044 /* Use a table to make this quicker */ 3045 static const uint8_t hbrev[16] = { 3046 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3047 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3048 }; 3049 3050 static inline uint8_t byte_reverse(uint8_t val) 3051 { 3052 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3053 } 3054 3055 static inline uint32_t word_reverse(uint32_t val) 3056 { 3057 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3058 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3059 } 3060 3061 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3062 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3063 { 3064 uint32_t a, b, d, mask; 3065 3066 mask = UINT32_MAX >> (32 - MASKBITS); 3067 a = arg1 & mask; 3068 b = arg2 & mask; 3069 d = word_reverse(1 + word_reverse(a | ~b)); 3070 return (arg1 & ~mask) | (d & b); 3071 } 3072 3073 uint32_t helper_cntlsw32(uint32_t val) 3074 { 3075 if (val & 0x80000000) { 3076 return clz32(~val); 3077 } else { 3078 return clz32(val); 3079 } 3080 } 3081 3082 uint32_t helper_cntlzw32(uint32_t val) 3083 { 3084 return clz32(val); 3085 } 3086 3087 /* 440 specific */ 3088 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3089 target_ulong low, uint32_t update_Rc) 3090 { 3091 target_ulong mask; 3092 int i; 3093 3094 i = 1; 3095 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3096 if ((high & mask) == 0) { 3097 if (update_Rc) { 3098 env->crf[0] = 0x4; 3099 } 3100 goto done; 3101 } 3102 i++; 3103 } 3104 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3105 if ((low & mask) == 0) { 3106 if (update_Rc) { 3107 env->crf[0] = 0x8; 3108 } 3109 goto done; 3110 } 3111 i++; 3112 } 3113 i = 8; 3114 if (update_Rc) { 3115 env->crf[0] = 0x2; 3116 } 3117 done: 3118 env->xer = (env->xer & ~0x7F) | i; 3119 if (update_Rc) { 3120 env->crf[0] |= xer_so; 3121 } 3122 return i; 3123 } 3124