1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 #include "tcg/tcg-gvec-desc.h" 32 33 #include "helper_regs.h" 34 /*****************************************************************************/ 35 /* Fixed point operations helpers */ 36 37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 38 { 39 if (unlikely(ov)) { 40 env->so = env->ov = 1; 41 } else { 42 env->ov = 0; 43 } 44 } 45 46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 47 uint32_t oe) 48 { 49 uint64_t rt = 0; 50 int overflow = 0; 51 52 uint64_t dividend = (uint64_t)ra << 32; 53 uint64_t divisor = (uint32_t)rb; 54 55 if (unlikely(divisor == 0)) { 56 overflow = 1; 57 } else { 58 rt = dividend / divisor; 59 overflow = rt > UINT32_MAX; 60 } 61 62 if (unlikely(overflow)) { 63 rt = 0; /* Undefined */ 64 } 65 66 if (oe) { 67 helper_update_ov_legacy(env, overflow); 68 } 69 70 return (target_ulong)rt; 71 } 72 73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 74 uint32_t oe) 75 { 76 int64_t rt = 0; 77 int overflow = 0; 78 79 int64_t dividend = (int64_t)ra << 32; 80 int64_t divisor = (int64_t)((int32_t)rb); 81 82 if (unlikely((divisor == 0) || 83 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 84 overflow = 1; 85 } else { 86 rt = dividend / divisor; 87 overflow = rt != (int32_t)rt; 88 } 89 90 if (unlikely(overflow)) { 91 rt = 0; /* Undefined */ 92 } 93 94 if (oe) { 95 helper_update_ov_legacy(env, overflow); 96 } 97 98 return (target_ulong)rt; 99 } 100 101 #if defined(TARGET_PPC64) 102 103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 104 { 105 uint64_t rt = 0; 106 int overflow = 0; 107 108 if (unlikely(rb == 0 || ra >= rb)) { 109 overflow = 1; 110 rt = 0; /* Undefined */ 111 } else { 112 divu128(&rt, &ra, rb); 113 } 114 115 if (oe) { 116 helper_update_ov_legacy(env, overflow); 117 } 118 119 return rt; 120 } 121 122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 123 { 124 uint64_t rt = 0; 125 int64_t ra = (int64_t)rau; 126 int64_t rb = (int64_t)rbu; 127 int overflow = 0; 128 129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 130 overflow = 1; 131 rt = 0; /* Undefined */ 132 } else { 133 divs128(&rt, &ra, rb); 134 } 135 136 if (oe) { 137 helper_update_ov_legacy(env, overflow); 138 } 139 140 return rt; 141 } 142 143 #endif 144 145 146 #if defined(TARGET_PPC64) 147 /* if x = 0xab, returns 0xababababababababa */ 148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 149 150 /* 151 * subtract 1 from each byte, and with inverse, check if MSB is set at each 152 * byte. 153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 155 */ 156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 157 158 /* When you XOR the pattern and there is a match, that byte will be zero */ 159 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 160 161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 162 { 163 return hasvalue(rb, ra) ? CRF_GT : 0; 164 } 165 166 #undef pattern 167 #undef haszero 168 #undef hasvalue 169 170 /* 171 * Return a random number. 172 */ 173 uint64_t helper_darn32(void) 174 { 175 Error *err = NULL; 176 uint32_t ret; 177 178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 180 error_get_pretty(err)); 181 error_free(err); 182 return -1; 183 } 184 185 return ret; 186 } 187 188 uint64_t helper_darn64(void) 189 { 190 Error *err = NULL; 191 uint64_t ret; 192 193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 195 error_get_pretty(err)); 196 error_free(err); 197 return -1; 198 } 199 200 return ret; 201 } 202 203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 204 { 205 int i; 206 uint64_t ra = 0; 207 208 for (i = 0; i < 8; i++) { 209 int index = (rs >> (i * 8)) & 0xFF; 210 if (index < 64) { 211 if (rb & PPC_BIT(index)) { 212 ra |= 1 << i; 213 } 214 } 215 } 216 return ra; 217 } 218 219 #endif 220 221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 222 { 223 target_ulong mask = 0xff; 224 target_ulong ra = 0; 225 int i; 226 227 for (i = 0; i < sizeof(target_ulong); i++) { 228 if ((rs & mask) == (rb & mask)) { 229 ra |= mask; 230 } 231 mask <<= 8; 232 } 233 return ra; 234 } 235 236 /* shift right arithmetic helper */ 237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 238 target_ulong shift) 239 { 240 int32_t ret; 241 242 if (likely(!(shift & 0x20))) { 243 if (likely((uint32_t)shift != 0)) { 244 shift &= 0x1f; 245 ret = (int32_t)value >> shift; 246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 247 env->ca32 = env->ca = 0; 248 } else { 249 env->ca32 = env->ca = 1; 250 } 251 } else { 252 ret = (int32_t)value; 253 env->ca32 = env->ca = 0; 254 } 255 } else { 256 ret = (int32_t)value >> 31; 257 env->ca32 = env->ca = (ret != 0); 258 } 259 return (target_long)ret; 260 } 261 262 #if defined(TARGET_PPC64) 263 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 264 target_ulong shift) 265 { 266 int64_t ret; 267 268 if (likely(!(shift & 0x40))) { 269 if (likely((uint64_t)shift != 0)) { 270 shift &= 0x3f; 271 ret = (int64_t)value >> shift; 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 273 env->ca32 = env->ca = 0; 274 } else { 275 env->ca32 = env->ca = 1; 276 } 277 } else { 278 ret = (int64_t)value; 279 env->ca32 = env->ca = 0; 280 } 281 } else { 282 ret = (int64_t)value >> 63; 283 env->ca32 = env->ca = (ret != 0); 284 } 285 return ret; 286 } 287 #endif 288 289 #if defined(TARGET_PPC64) 290 target_ulong helper_popcntb(target_ulong val) 291 { 292 /* Note that we don't fold past bytes */ 293 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 294 0x5555555555555555ULL); 295 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 296 0x3333333333333333ULL); 297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 298 0x0f0f0f0f0f0f0f0fULL); 299 return val; 300 } 301 302 target_ulong helper_popcntw(target_ulong val) 303 { 304 /* Note that we don't fold past words. */ 305 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 306 0x5555555555555555ULL); 307 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 308 0x3333333333333333ULL); 309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 310 0x0f0f0f0f0f0f0f0fULL); 311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 312 0x00ff00ff00ff00ffULL); 313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 314 0x0000ffff0000ffffULL); 315 return val; 316 } 317 #else 318 target_ulong helper_popcntb(target_ulong val) 319 { 320 /* Note that we don't fold past bytes */ 321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 324 return val; 325 } 326 #endif 327 328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 329 { 330 /* 331 * Instead of processing the mask bit-by-bit from the most significant to 332 * the least significant bit, as described in PowerISA, we'll handle it in 333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 334 * ctz or cto, we negate the mask at the end of the loop. 335 */ 336 target_ulong m, left = 0, right = 0; 337 unsigned int n, i = 64; 338 bool bit = false; /* tracks if we are processing zeros or ones */ 339 340 if (mask == 0 || mask == -1) { 341 return src; 342 } 343 344 /* Processes the mask in blocks, from LSB to MSB */ 345 while (i) { 346 /* Find how many bits we should take */ 347 n = ctz64(mask); 348 if (n > i) { 349 n = i; 350 } 351 352 /* 353 * Extracts 'n' trailing bits of src and put them on the leading 'n' 354 * bits of 'right' or 'left', pushing down the previously extracted 355 * values. 356 */ 357 m = (1ll << n) - 1; 358 if (bit) { 359 right = ror64(right | (src & m), n); 360 } else { 361 left = ror64(left | (src & m), n); 362 } 363 364 /* 365 * Discards the processed bits from 'src' and 'mask'. Note that we are 366 * removing 'n' trailing zeros from 'mask', but the logical shift will 367 * add 'n' leading zeros back, so the population count of 'mask' is kept 368 * the same. 369 */ 370 src >>= n; 371 mask >>= n; 372 i -= n; 373 bit = !bit; 374 mask = ~mask; 375 } 376 377 /* 378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 379 * we'll shift it more 64-ctpop(mask) times. 380 */ 381 if (bit) { 382 n = ctpop64(mask); 383 } else { 384 n = 64 - ctpop64(mask); 385 } 386 387 return left | (right >> n); 388 } 389 390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 391 { 392 int i, o; 393 uint64_t result = 0; 394 395 if (mask == -1) { 396 return src; 397 } 398 399 for (i = 0; mask != 0; i++) { 400 o = ctz64(mask); 401 mask &= mask - 1; 402 result |= ((src >> i) & 1) << o; 403 } 404 405 return result; 406 } 407 408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 409 { 410 int i, o; 411 uint64_t result = 0; 412 413 if (mask == -1) { 414 return src; 415 } 416 417 for (o = 0; mask != 0; o++) { 418 i = ctz64(mask); 419 mask &= mask - 1; 420 result |= ((src >> i) & 1) << o; 421 } 422 423 return result; 424 } 425 426 /*****************************************************************************/ 427 /* Altivec extension helpers */ 428 #if HOST_BIG_ENDIAN 429 #define VECTOR_FOR_INORDER_I(index, element) \ 430 for (index = 0; index < ARRAY_SIZE(r->element); index++) 431 #else 432 #define VECTOR_FOR_INORDER_I(index, element) \ 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 434 #endif 435 436 /* Saturating arithmetic helpers. */ 437 #define SATCVT(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x < (from_type)min) { \ 443 r = min; \ 444 *sat = 1; \ 445 } else if (x > (from_type)max) { \ 446 r = max; \ 447 *sat = 1; \ 448 } else { \ 449 r = x; \ 450 } \ 451 return r; \ 452 } 453 #define SATCVTU(from, to, from_type, to_type, min, max) \ 454 static inline to_type cvt##from##to(from_type x, int *sat) \ 455 { \ 456 to_type r; \ 457 \ 458 if (x > (from_type)max) { \ 459 r = max; \ 460 *sat = 1; \ 461 } else { \ 462 r = x; \ 463 } \ 464 return r; \ 465 } 466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 469 470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 476 #undef SATCVT 477 #undef SATCVTU 478 479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 480 { 481 ppc_store_vscr(env, vscr); 482 } 483 484 uint32_t helper_mfvscr(CPUPPCState *env) 485 { 486 return ppc_get_vscr(env); 487 } 488 489 static inline void set_vscr_sat(CPUPPCState *env) 490 { 491 /* The choice of non-zero value is arbitrary. */ 492 env->vscr_sat.u32[0] = 1; 493 } 494 495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 496 { 497 int i; 498 499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 500 r->u32[i] = ~a->u32[i] < b->u32[i]; 501 } 502 } 503 504 /* vprtybw */ 505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 510 res ^= res >> 8; 511 r->u32[i] = res & 1; 512 } 513 } 514 515 /* vprtybd */ 516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 int i; 519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->u64[i] = res & 1; 524 } 525 } 526 527 /* vprtybq */ 528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 529 { 530 uint64_t res = b->u64[0] ^ b->u64[1]; 531 res ^= res >> 32; 532 res ^= res >> 16; 533 res ^= res >> 8; 534 r->VsrD(1) = res & 1; 535 r->VsrD(0) = 0; 536 } 537 538 #define VARITHFP(suffix, func) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b) \ 541 { \ 542 int i; \ 543 \ 544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 546 } \ 547 } 548 VARITHFP(addfp, float32_add) 549 VARITHFP(subfp, float32_sub) 550 VARITHFP(minfp, float32_min) 551 VARITHFP(maxfp, float32_max) 552 #undef VARITHFP 553 554 #define VARITHFPFMA(suffix, type) \ 555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 556 ppc_avr_t *b, ppc_avr_t *c) \ 557 { \ 558 int i; \ 559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 561 type, &env->vec_status); \ 562 } \ 563 } 564 VARITHFPFMA(maddfp, 0); 565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 566 #undef VARITHFPFMA 567 568 #define VARITHSAT_CASE(type, op, cvt, element) \ 569 { \ 570 type result = (type)a->element[i] op (type)b->element[i]; \ 571 r->element[i] = cvt(result, &sat); \ 572 } 573 574 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 577 { \ 578 int sat = 0; \ 579 int i; \ 580 \ 581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 } \ 584 if (sat) { \ 585 vscr_sat->u32[0] = 1; \ 586 } \ 587 } 588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 600 #undef VARITHSAT_CASE 601 #undef VARITHSAT_DO 602 #undef VARITHSAT_SIGNED 603 #undef VARITHSAT_UNSIGNED 604 605 #define VAVG_DO(name, element, etype) \ 606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 607 { \ 608 int i; \ 609 \ 610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 612 r->element[i] = x >> 1; \ 613 } \ 614 } 615 616 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 617 unsigned_type) \ 618 VAVG_DO(avgs##type, signed_element, signed_type) \ 619 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 620 VAVG(b, s8, int16_t, u8, uint16_t) 621 VAVG(h, s16, int32_t, u16, uint32_t) 622 VAVG(w, s32, int64_t, u32, uint64_t) 623 #undef VAVG_DO 624 #undef VAVG 625 626 #define VABSDU_DO(name, element) \ 627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 628 { \ 629 int i; \ 630 \ 631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 632 r->element[i] = (a->element[i] > b->element[i]) ? \ 633 (a->element[i] - b->element[i]) : \ 634 (b->element[i] - a->element[i]); \ 635 } \ 636 } 637 638 /* 639 * VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMPNEZ(NAME, ELEM) \ 667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 668 { \ 669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 672 } \ 673 } 674 VCMPNEZ(VCMPNEZB, u8) 675 VCMPNEZ(VCMPNEZH, u16) 676 VCMPNEZ(VCMPNEZW, u32) 677 #undef VCMPNEZ 678 679 #define VCMPFP_DO(suffix, compare, order, record) \ 680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 681 ppc_avr_t *a, ppc_avr_t *b) \ 682 { \ 683 uint32_t ones = (uint32_t)-1; \ 684 uint32_t all = ones; \ 685 uint32_t none = 0; \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 689 uint32_t result; \ 690 FloatRelation rel = \ 691 float32_compare_quiet(a->f32[i], b->f32[i], \ 692 &env->vec_status); \ 693 if (rel == float_relation_unordered) { \ 694 result = 0; \ 695 } else if (rel compare order) { \ 696 result = ones; \ 697 } else { \ 698 result = 0; \ 699 } \ 700 r->u32[i] = result; \ 701 all &= result; \ 702 none |= result; \ 703 } \ 704 if (record) { \ 705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 706 } \ 707 } 708 #define VCMPFP(suffix, compare, order) \ 709 VCMPFP_DO(suffix, compare, order, 0) \ 710 VCMPFP_DO(suffix##_dot, compare, order, 1) 711 VCMPFP(eqfp, ==, float_relation_equal) 712 VCMPFP(gefp, !=, float_relation_less) 713 VCMPFP(gtfp, ==, float_relation_greater) 714 #undef VCMPFP_DO 715 #undef VCMPFP 716 717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 718 ppc_avr_t *a, ppc_avr_t *b, int record) 719 { 720 int i; 721 int all_in = 0; 722 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 725 &env->vec_status); 726 if (le_rel == float_relation_unordered) { 727 r->u32[i] = 0xc0000000; 728 all_in = 1; 729 } else { 730 float32 bneg = float32_chs(b->f32[i]); 731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 732 &env->vec_status); 733 int le = le_rel != float_relation_greater; 734 int ge = ge_rel != float_relation_less; 735 736 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 737 all_in |= (!le | !ge); 738 } 739 } 740 if (record) { 741 env->crf[6] = (all_in == 0) << 1; 742 } 743 } 744 745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 746 { 747 vcmpbfp_internal(env, r, a, b, 0); 748 } 749 750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 751 ppc_avr_t *b) 752 { 753 vcmpbfp_internal(env, r, a, b, 1); 754 } 755 756 #define VCT(suffix, satcvt, element) \ 757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 758 ppc_avr_t *b, uint32_t uim) \ 759 { \ 760 int i; \ 761 int sat = 0; \ 762 float_status s = env->vec_status; \ 763 \ 764 set_float_rounding_mode(float_round_to_zero, &s); \ 765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 766 if (float32_is_any_nan(b->f32[i])) { \ 767 r->element[i] = 0; \ 768 } else { \ 769 float64 t = float32_to_float64(b->f32[i], &s); \ 770 int64_t j; \ 771 \ 772 t = float64_scalbn(t, uim, &s); \ 773 j = float64_to_int64(t, &s); \ 774 r->element[i] = satcvt(j, &sat); \ 775 } \ 776 } \ 777 if (sat) { \ 778 set_vscr_sat(env); \ 779 } \ 780 } 781 VCT(uxs, cvtsduw, u32) 782 VCT(sxs, cvtsdsw, s32) 783 #undef VCT 784 785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 786 787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 788 { 789 int64_t psum = 0; 790 for (int i = 0; i < 8; i++, mask >>= 1) { 791 if (mask & 1) { 792 psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 793 } 794 } 795 return psum; 796 } 797 798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 799 { 800 int64_t psum = 0; 801 for (int i = 0; i < 4; i++, mask >>= 1) { 802 if (mask & 1) { 803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 804 } 805 } 806 return psum; 807 } 808 809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 810 { 811 int64_t psum = 0; 812 for (int i = 0; i < 2; i++, mask >>= 1) { 813 if (mask & 1) { 814 psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); 815 } 816 } 817 return psum; 818 } 819 820 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 821 uint32_t mask, bool sat, bool acc, do_ger ger) 822 { 823 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 824 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 825 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 826 uint8_t xmsk_bit, ymsk_bit; 827 int64_t psum; 828 int i, j; 829 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 830 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 831 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 832 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 833 if (acc) { 834 psum += at[i].VsrSW(j); 835 } 836 if (sat && psum > INT32_MAX) { 837 set_vscr_sat(env); 838 at[i].VsrSW(j) = INT32_MAX; 839 } else if (sat && psum < INT32_MIN) { 840 set_vscr_sat(env); 841 at[i].VsrSW(j) = INT32_MIN; 842 } else { 843 at[i].VsrSW(j) = (int32_t) psum; 844 } 845 } else { 846 at[i].VsrSW(j) = 0; 847 } 848 } 849 } 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank8); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, false, true, ger_rank8); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, false, ger_rank4); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, false, true, ger_rank4); 878 } 879 880 QEMU_FLATTEN 881 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 882 ppc_acc_t *at, uint32_t mask) 883 { 884 xviger(env, a, b, at, mask, true, true, ger_rank4); 885 } 886 887 QEMU_FLATTEN 888 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 889 ppc_acc_t *at, uint32_t mask) 890 { 891 xviger(env, a, b, at, mask, false, false, ger_rank2); 892 } 893 894 QEMU_FLATTEN 895 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 896 ppc_acc_t *at, uint32_t mask) 897 { 898 xviger(env, a, b, at, mask, true, false, ger_rank2); 899 } 900 901 QEMU_FLATTEN 902 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 903 ppc_acc_t *at, uint32_t mask) 904 { 905 xviger(env, a, b, at, mask, false, true, ger_rank2); 906 } 907 908 QEMU_FLATTEN 909 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 910 ppc_acc_t *at, uint32_t mask) 911 { 912 xviger(env, a, b, at, mask, true, true, ger_rank2); 913 } 914 915 target_ulong helper_vclzlsbb(ppc_avr_t *r) 916 { 917 target_ulong count = 0; 918 int i; 919 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 920 if (r->VsrB(i) & 0x01) { 921 break; 922 } 923 count++; 924 } 925 return count; 926 } 927 928 target_ulong helper_vctzlsbb(ppc_avr_t *r) 929 { 930 target_ulong count = 0; 931 int i; 932 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 933 if (r->VsrB(i) & 0x01) { 934 break; 935 } 936 count++; 937 } 938 return count; 939 } 940 941 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 942 ppc_avr_t *b, ppc_avr_t *c) 943 { 944 int sat = 0; 945 int i; 946 947 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 948 int32_t prod = a->s16[i] * b->s16[i]; 949 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 950 951 r->s16[i] = cvtswsh(t, &sat); 952 } 953 954 if (sat) { 955 set_vscr_sat(env); 956 } 957 } 958 959 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 960 ppc_avr_t *b, ppc_avr_t *c) 961 { 962 int sat = 0; 963 int i; 964 965 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 966 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 967 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 968 r->s16[i] = cvtswsh(t, &sat); 969 } 970 971 if (sat) { 972 set_vscr_sat(env); 973 } 974 } 975 976 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 977 { 978 int i; 979 980 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 981 int32_t prod = a->s16[i] * b->s16[i]; 982 r->s16[i] = (int16_t) (prod + c->s16[i]); 983 } 984 } 985 986 #define VMRG_DO(name, element, access, ofs) \ 987 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 988 { \ 989 ppc_avr_t result; \ 990 int i, half = ARRAY_SIZE(r->element) / 2; \ 991 \ 992 for (i = 0; i < half; i++) { \ 993 result.access(i * 2 + 0) = a->access(i + ofs); \ 994 result.access(i * 2 + 1) = b->access(i + ofs); \ 995 } \ 996 *r = result; \ 997 } 998 999 #define VMRG(suffix, element, access) \ 1000 VMRG_DO(mrgl##suffix, element, access, half) \ 1001 VMRG_DO(mrgh##suffix, element, access, 0) 1002 VMRG(b, u8, VsrB) 1003 VMRG(h, u16, VsrH) 1004 VMRG(w, u32, VsrW) 1005 #undef VMRG_DO 1006 #undef VMRG 1007 1008 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1009 { 1010 int32_t prod[16]; 1011 int i; 1012 1013 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1014 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1015 } 1016 1017 VECTOR_FOR_INORDER_I(i, s32) { 1018 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1019 prod[4 * i + 2] + prod[4 * i + 3]; 1020 } 1021 } 1022 1023 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1024 { 1025 int32_t prod[8]; 1026 int i; 1027 1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1029 prod[i] = a->s16[i] * b->s16[i]; 1030 } 1031 1032 VECTOR_FOR_INORDER_I(i, s32) { 1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1034 } 1035 } 1036 1037 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1038 ppc_avr_t *b, ppc_avr_t *c) 1039 { 1040 int32_t prod[8]; 1041 int i; 1042 int sat = 0; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1045 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, s32) { 1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1050 1051 r->u32[i] = cvtsdsw(t, &sat); 1052 } 1053 1054 if (sat) { 1055 set_vscr_sat(env); 1056 } 1057 } 1058 1059 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1060 { 1061 uint16_t prod[16]; 1062 int i; 1063 1064 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1065 prod[i] = a->u8[i] * b->u8[i]; 1066 } 1067 1068 VECTOR_FOR_INORDER_I(i, u32) { 1069 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1070 prod[4 * i + 2] + prod[4 * i + 3]; 1071 } 1072 } 1073 1074 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1075 { 1076 uint32_t prod[8]; 1077 int i; 1078 1079 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1080 prod[i] = a->u16[i] * b->u16[i]; 1081 } 1082 1083 VECTOR_FOR_INORDER_I(i, u32) { 1084 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1085 } 1086 } 1087 1088 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1089 ppc_avr_t *b, ppc_avr_t *c) 1090 { 1091 uint32_t prod[8]; 1092 int i; 1093 int sat = 0; 1094 1095 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1096 prod[i] = a->u16[i] * b->u16[i]; 1097 } 1098 1099 VECTOR_FOR_INORDER_I(i, s32) { 1100 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1101 1102 r->u32[i] = cvtuduw(t, &sat); 1103 } 1104 1105 if (sat) { 1106 set_vscr_sat(env); 1107 } 1108 } 1109 1110 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1111 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1112 { \ 1113 int i; \ 1114 \ 1115 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1116 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1117 (cast)b->mul_access(i); \ 1118 } \ 1119 } 1120 1121 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1122 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1123 { \ 1124 int i; \ 1125 \ 1126 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1127 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1128 (cast)b->mul_access(i + 1); \ 1129 } \ 1130 } 1131 1132 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1133 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1134 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1135 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1136 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1137 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1138 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1139 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1140 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1141 #undef VMUL_DO_EVN 1142 #undef VMUL_DO_ODD 1143 #undef VMUL 1144 1145 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1146 target_ulong uim) 1147 { 1148 int i, idx; 1149 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1150 1151 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1152 if ((pcv->VsrB(i) >> 5) == uim) { 1153 idx = pcv->VsrB(i) & 0x1f; 1154 if (idx < ARRAY_SIZE(t->u8)) { 1155 tmp.VsrB(i) = s0->VsrB(idx); 1156 } else { 1157 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1158 } 1159 } 1160 } 1161 1162 *t = tmp; 1163 } 1164 1165 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1166 { 1167 ppc_avr_t result; 1168 int i; 1169 1170 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1171 int s = c->VsrB(i) & 0x1f; 1172 int index = s & 0xf; 1173 1174 if (s & 0x10) { 1175 result.VsrB(i) = b->VsrB(index); 1176 } else { 1177 result.VsrB(i) = a->VsrB(index); 1178 } 1179 } 1180 *r = result; 1181 } 1182 1183 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1184 { 1185 ppc_avr_t result; 1186 int i; 1187 1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1189 int s = c->VsrB(i) & 0x1f; 1190 int index = 15 - (s & 0xf); 1191 1192 if (s & 0x10) { 1193 result.VsrB(i) = a->VsrB(index); 1194 } else { 1195 result.VsrB(i) = b->VsrB(index); 1196 } 1197 } 1198 *r = result; 1199 } 1200 1201 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1202 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1203 { \ 1204 ppc_vsr_t tmp; \ 1205 \ 1206 /* Initialize tmp with the result of an all-zeros mask */ \ 1207 tmp.VsrD(0) = 0x1011121314151617; \ 1208 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1209 \ 1210 /* Iterate over the most significant byte of each element */ \ 1211 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1212 if (b->VsrB(i) & 0x80) { \ 1213 /* Update each byte of the element */ \ 1214 for (int k = 0; k < SZ; k++) { \ 1215 tmp.VsrB(i + k) = j + k; \ 1216 } \ 1217 j += SZ; \ 1218 } \ 1219 } \ 1220 \ 1221 *t = tmp; \ 1222 } 1223 1224 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1225 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1226 { \ 1227 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1228 \ 1229 /* Iterate over the most significant byte of each element */ \ 1230 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1231 if (b->VsrB(i) & 0x80) { \ 1232 /* Update each byte of the element */ \ 1233 for (int k = 0; k < SZ; k++) { \ 1234 tmp.VsrB(j + k) = i + k; \ 1235 } \ 1236 j += SZ; \ 1237 } \ 1238 } \ 1239 \ 1240 *t = tmp; \ 1241 } 1242 1243 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1244 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1245 { \ 1246 ppc_vsr_t tmp; \ 1247 \ 1248 /* Initialize tmp with the result of an all-zeros mask */ \ 1249 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1250 tmp.VsrD(1) = 0x1716151413121110; \ 1251 \ 1252 /* Iterate over the most significant byte of each element */ \ 1253 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1254 /* Reverse indexing of "i" */ \ 1255 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1256 if (b->VsrB(idx) & 0x80) { \ 1257 /* Update each byte of the element */ \ 1258 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1259 tmp.VsrB(idx + rk) = j + k; \ 1260 } \ 1261 j += SZ; \ 1262 } \ 1263 } \ 1264 \ 1265 *t = tmp; \ 1266 } 1267 1268 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1269 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1270 { \ 1271 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1272 \ 1273 /* Iterate over the most significant byte of each element */ \ 1274 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1275 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1276 /* Update each byte of the element */ \ 1277 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1278 /* Reverse indexing of "j" */ \ 1279 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1280 tmp.VsrB(idx + rk) = i + k; \ 1281 } \ 1282 j += SZ; \ 1283 } \ 1284 } \ 1285 \ 1286 *t = tmp; \ 1287 } 1288 1289 #define XXGENPCV(NAME, SZ) \ 1290 XXGENPCV_BE_EXP(NAME, SZ) \ 1291 XXGENPCV_BE_COMP(NAME, SZ) \ 1292 XXGENPCV_LE_EXP(NAME, SZ) \ 1293 XXGENPCV_LE_COMP(NAME, SZ) \ 1294 1295 XXGENPCV(XXGENPCVBM, 1) 1296 XXGENPCV(XXGENPCVHM, 2) 1297 XXGENPCV(XXGENPCVWM, 4) 1298 XXGENPCV(XXGENPCVDM, 8) 1299 1300 #undef XXGENPCV_BE_EXP 1301 #undef XXGENPCV_BE_COMP 1302 #undef XXGENPCV_LE_EXP 1303 #undef XXGENPCV_LE_COMP 1304 #undef XXGENPCV 1305 1306 #if HOST_BIG_ENDIAN 1307 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1308 #define VBPERMD_INDEX(i) (i) 1309 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1310 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1311 #else 1312 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1313 #define VBPERMD_INDEX(i) (1 - i) 1314 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1315 #define EXTRACT_BIT(avr, i, index) \ 1316 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1317 #endif 1318 1319 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1320 { 1321 int i, j; 1322 ppc_avr_t result = { .u64 = { 0, 0 } }; 1323 VECTOR_FOR_INORDER_I(i, u64) { 1324 for (j = 0; j < 8; j++) { 1325 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1326 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1327 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1328 } 1329 } 1330 } 1331 *r = result; 1332 } 1333 1334 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1335 { 1336 int i; 1337 uint64_t perm = 0; 1338 1339 VECTOR_FOR_INORDER_I(i, u8) { 1340 int index = VBPERMQ_INDEX(b, i); 1341 1342 if (index < 128) { 1343 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1344 if (a->u64[VBPERMQ_DW(index)] & mask) { 1345 perm |= (0x8000 >> i); 1346 } 1347 } 1348 } 1349 1350 r->VsrD(0) = perm; 1351 r->VsrD(1) = 0; 1352 } 1353 1354 #undef VBPERMQ_INDEX 1355 #undef VBPERMQ_DW 1356 1357 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1358 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1359 { \ 1360 int i, j; \ 1361 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1362 \ 1363 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1364 prod[i] = 0; \ 1365 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1366 if (a->srcfld[i] & (1ull << j)) { \ 1367 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1368 } \ 1369 } \ 1370 } \ 1371 \ 1372 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1373 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1374 } \ 1375 } 1376 1377 PMSUM(vpmsumb, u8, u16, uint16_t) 1378 PMSUM(vpmsumh, u16, u32, uint32_t) 1379 PMSUM(vpmsumw, u32, u64, uint64_t) 1380 1381 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1382 { 1383 1384 #ifdef CONFIG_INT128 1385 int i, j; 1386 __uint128_t prod[2]; 1387 1388 VECTOR_FOR_INORDER_I(i, u64) { 1389 prod[i] = 0; 1390 for (j = 0; j < 64; j++) { 1391 if (a->u64[i] & (1ull << j)) { 1392 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1393 } 1394 } 1395 } 1396 1397 r->u128 = prod[0] ^ prod[1]; 1398 1399 #else 1400 int i, j; 1401 ppc_avr_t prod[2]; 1402 1403 VECTOR_FOR_INORDER_I(i, u64) { 1404 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1405 for (j = 0; j < 64; j++) { 1406 if (a->u64[i] & (1ull << j)) { 1407 ppc_avr_t bshift; 1408 if (j == 0) { 1409 bshift.VsrD(0) = 0; 1410 bshift.VsrD(1) = b->u64[i]; 1411 } else { 1412 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1413 bshift.VsrD(1) = b->u64[i] << j; 1414 } 1415 prod[i].VsrD(1) ^= bshift.VsrD(1); 1416 prod[i].VsrD(0) ^= bshift.VsrD(0); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1422 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1423 #endif 1424 } 1425 1426 1427 #if HOST_BIG_ENDIAN 1428 #define PKBIG 1 1429 #else 1430 #define PKBIG 0 1431 #endif 1432 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1433 { 1434 int i, j; 1435 ppc_avr_t result; 1436 #if HOST_BIG_ENDIAN 1437 const ppc_avr_t *x[2] = { a, b }; 1438 #else 1439 const ppc_avr_t *x[2] = { b, a }; 1440 #endif 1441 1442 VECTOR_FOR_INORDER_I(i, u64) { 1443 VECTOR_FOR_INORDER_I(j, u32) { 1444 uint32_t e = x[i]->u32[j]; 1445 1446 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1447 ((e >> 6) & 0x3e0) | 1448 ((e >> 3) & 0x1f)); 1449 } 1450 } 1451 *r = result; 1452 } 1453 1454 #define VPK(suffix, from, to, cvt, dosat) \ 1455 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1456 ppc_avr_t *a, ppc_avr_t *b) \ 1457 { \ 1458 int i; \ 1459 int sat = 0; \ 1460 ppc_avr_t result; \ 1461 ppc_avr_t *a0 = PKBIG ? a : b; \ 1462 ppc_avr_t *a1 = PKBIG ? b : a; \ 1463 \ 1464 VECTOR_FOR_INORDER_I(i, from) { \ 1465 result.to[i] = cvt(a0->from[i], &sat); \ 1466 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1467 } \ 1468 *r = result; \ 1469 if (dosat && sat) { \ 1470 set_vscr_sat(env); \ 1471 } \ 1472 } 1473 #define I(x, y) (x) 1474 VPK(shss, s16, s8, cvtshsb, 1) 1475 VPK(shus, s16, u8, cvtshub, 1) 1476 VPK(swss, s32, s16, cvtswsh, 1) 1477 VPK(swus, s32, u16, cvtswuh, 1) 1478 VPK(sdss, s64, s32, cvtsdsw, 1) 1479 VPK(sdus, s64, u32, cvtsduw, 1) 1480 VPK(uhus, u16, u8, cvtuhub, 1) 1481 VPK(uwus, u32, u16, cvtuwuh, 1) 1482 VPK(udus, u64, u32, cvtuduw, 1) 1483 VPK(uhum, u16, u8, I, 0) 1484 VPK(uwum, u32, u16, I, 0) 1485 VPK(udum, u64, u32, I, 0) 1486 #undef I 1487 #undef VPK 1488 #undef PKBIG 1489 1490 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1491 { 1492 int i; 1493 1494 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1495 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1496 } 1497 } 1498 1499 #define VRFI(suffix, rounding) \ 1500 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1501 ppc_avr_t *b) \ 1502 { \ 1503 int i; \ 1504 float_status s = env->vec_status; \ 1505 \ 1506 set_float_rounding_mode(rounding, &s); \ 1507 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1508 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1509 } \ 1510 } 1511 VRFI(n, float_round_nearest_even) 1512 VRFI(m, float_round_down) 1513 VRFI(p, float_round_up) 1514 VRFI(z, float_round_to_zero) 1515 #undef VRFI 1516 1517 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1518 { 1519 int i; 1520 1521 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1522 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1523 1524 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1525 } 1526 } 1527 1528 #define VRLMI(name, size, element, insert) \ 1529 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1530 { \ 1531 int i; \ 1532 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1533 uint##size##_t src1 = a->element[i]; \ 1534 uint##size##_t src2 = b->element[i]; \ 1535 uint##size##_t src3 = r->element[i]; \ 1536 uint##size##_t begin, end, shift, mask, rot_val; \ 1537 \ 1538 shift = extract##size(src2, 0, 6); \ 1539 end = extract##size(src2, 8, 6); \ 1540 begin = extract##size(src2, 16, 6); \ 1541 rot_val = rol##size(src1, shift); \ 1542 mask = mask_u##size(begin, end); \ 1543 if (insert) { \ 1544 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1545 } else { \ 1546 r->element[i] = (rot_val & mask); \ 1547 } \ 1548 } \ 1549 } 1550 1551 VRLMI(VRLDMI, 64, u64, 1); 1552 VRLMI(VRLWMI, 32, u32, 1); 1553 VRLMI(VRLDNM, 64, u64, 0); 1554 VRLMI(VRLWNM, 32, u32, 0); 1555 1556 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1557 { 1558 int i; 1559 1560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1561 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1562 } 1563 } 1564 1565 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1566 { 1567 int i; 1568 1569 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1570 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1571 } 1572 } 1573 1574 #define VEXTU_X_DO(name, size, left) \ 1575 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1576 { \ 1577 int index = (a & 0xf) * 8; \ 1578 if (left) { \ 1579 index = 128 - index - size; \ 1580 } \ 1581 return int128_getlo(int128_rshift(b->s128, index)) & \ 1582 MAKE_64BIT_MASK(0, size); \ 1583 } 1584 VEXTU_X_DO(vextublx, 8, 1) 1585 VEXTU_X_DO(vextuhlx, 16, 1) 1586 VEXTU_X_DO(vextuwlx, 32, 1) 1587 VEXTU_X_DO(vextubrx, 8, 0) 1588 VEXTU_X_DO(vextuhrx, 16, 0) 1589 VEXTU_X_DO(vextuwrx, 32, 0) 1590 #undef VEXTU_X_DO 1591 1592 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1593 { 1594 int i; 1595 unsigned int shift, bytes, size; 1596 1597 size = ARRAY_SIZE(r->u8); 1598 for (i = 0; i < size; i++) { 1599 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1600 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1601 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1602 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1603 } 1604 } 1605 1606 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1607 { 1608 int i; 1609 unsigned int shift, bytes; 1610 1611 /* 1612 * Use reverse order, as destination and source register can be 1613 * same. Its being modified in place saving temporary, reverse 1614 * order will guarantee that computed result is not fed back. 1615 */ 1616 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1617 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1618 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1619 /* extract adjacent bytes */ 1620 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1621 } 1622 } 1623 1624 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1625 { 1626 int sh = shift & 0xf; 1627 int i; 1628 ppc_avr_t result; 1629 1630 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1631 int index = sh + i; 1632 if (index > 0xf) { 1633 result.VsrB(i) = b->VsrB(index - 0x10); 1634 } else { 1635 result.VsrB(i) = a->VsrB(index); 1636 } 1637 } 1638 *r = result; 1639 } 1640 1641 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1642 { 1643 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1644 1645 #if HOST_BIG_ENDIAN 1646 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1647 memset(&r->u8[16 - sh], 0, sh); 1648 #else 1649 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1650 memset(&r->u8[0], 0, sh); 1651 #endif 1652 } 1653 1654 #if HOST_BIG_ENDIAN 1655 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1656 #else 1657 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1658 #endif 1659 1660 #define VINSX(SUFFIX, TYPE) \ 1661 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1662 uint64_t val, target_ulong index) \ 1663 { \ 1664 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1665 target_long idx = index; \ 1666 \ 1667 if (idx < 0 || idx > maxidx) { \ 1668 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1669 qemu_log_mask(LOG_GUEST_ERROR, \ 1670 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1671 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1672 } else { \ 1673 TYPE src = val; \ 1674 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1675 } \ 1676 } 1677 VINSX(B, uint8_t) 1678 VINSX(H, uint16_t) 1679 VINSX(W, uint32_t) 1680 VINSX(D, uint64_t) 1681 #undef ELEM_ADDR 1682 #undef VINSX 1683 #if HOST_BIG_ENDIAN 1684 #define VEXTDVLX(NAME, SIZE) \ 1685 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1686 target_ulong index) \ 1687 { \ 1688 const target_long idx = index; \ 1689 ppc_avr_t tmp[2] = { *a, *b }; \ 1690 memset(t, 0, sizeof(*t)); \ 1691 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1692 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1693 } else { \ 1694 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1695 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1696 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1697 } \ 1698 } 1699 #else 1700 #define VEXTDVLX(NAME, SIZE) \ 1701 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1702 target_ulong index) \ 1703 { \ 1704 const target_long idx = index; \ 1705 ppc_avr_t tmp[2] = { *b, *a }; \ 1706 memset(t, 0, sizeof(*t)); \ 1707 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1708 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1709 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1710 } else { \ 1711 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1712 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1713 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1714 } \ 1715 } 1716 #endif 1717 VEXTDVLX(VEXTDUBVLX, 1) 1718 VEXTDVLX(VEXTDUHVLX, 2) 1719 VEXTDVLX(VEXTDUWVLX, 4) 1720 VEXTDVLX(VEXTDDVLX, 8) 1721 #undef VEXTDVLX 1722 #if HOST_BIG_ENDIAN 1723 #define VEXTRACT(suffix, element) \ 1724 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1725 { \ 1726 uint32_t es = sizeof(r->element[0]); \ 1727 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1728 memset(&r->u8[8], 0, 8); \ 1729 memset(&r->u8[0], 0, 8 - es); \ 1730 } 1731 #else 1732 #define VEXTRACT(suffix, element) \ 1733 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1734 { \ 1735 uint32_t es = sizeof(r->element[0]); \ 1736 uint32_t s = (16 - index) - es; \ 1737 memmove(&r->u8[8], &b->u8[s], es); \ 1738 memset(&r->u8[0], 0, 8); \ 1739 memset(&r->u8[8 + es], 0, 8 - es); \ 1740 } 1741 #endif 1742 VEXTRACT(ub, u8) 1743 VEXTRACT(uh, u16) 1744 VEXTRACT(uw, u32) 1745 VEXTRACT(d, u64) 1746 #undef VEXTRACT 1747 1748 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1749 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1750 { \ 1751 int i, idx, crf = 0; \ 1752 \ 1753 for (i = 0; i < NUM_ELEMS; i++) { \ 1754 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1755 if (b->Vsr##ELEM(idx)) { \ 1756 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1757 } else { \ 1758 crf = 0b0010; \ 1759 break; \ 1760 } \ 1761 } \ 1762 \ 1763 for (; i < NUM_ELEMS; i++) { \ 1764 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1765 t->Vsr##ELEM(idx) = 0; \ 1766 } \ 1767 \ 1768 return crf; \ 1769 } 1770 VSTRI(VSTRIBL, B, 16, true) 1771 VSTRI(VSTRIBR, B, 16, false) 1772 VSTRI(VSTRIHL, H, 8, true) 1773 VSTRI(VSTRIHR, H, 8, false) 1774 #undef VSTRI 1775 1776 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1777 { 1778 ppc_vsr_t t = { }; 1779 size_t es = sizeof(uint32_t); 1780 uint32_t ext_index; 1781 int i; 1782 1783 ext_index = index; 1784 for (i = 0; i < es; i++, ext_index++) { 1785 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1786 } 1787 1788 *xt = t; 1789 } 1790 1791 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1792 { 1793 ppc_vsr_t t = *xt; 1794 size_t es = sizeof(uint32_t); 1795 int ins_index, i = 0; 1796 1797 ins_index = index; 1798 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1799 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1800 } 1801 1802 *xt = t; 1803 } 1804 1805 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1806 uint32_t desc) 1807 { 1808 /* 1809 * Instead of processing imm bit-by-bit, we'll skip the computation of 1810 * conjunctions whose corresponding bit is unset. 1811 */ 1812 int bit, imm = simd_data(desc); 1813 Int128 conj, disj = int128_zero(); 1814 1815 /* Iterate over set bits from the least to the most significant bit */ 1816 while (imm) { 1817 /* 1818 * Get the next bit to be processed with ctz64. Invert the result of 1819 * ctz64 to match the indexing used by PowerISA. 1820 */ 1821 bit = 7 - ctzl(imm); 1822 if (bit & 0x4) { 1823 conj = a->s128; 1824 } else { 1825 conj = int128_not(a->s128); 1826 } 1827 if (bit & 0x2) { 1828 conj = int128_and(conj, b->s128); 1829 } else { 1830 conj = int128_and(conj, int128_not(b->s128)); 1831 } 1832 if (bit & 0x1) { 1833 conj = int128_and(conj, c->s128); 1834 } else { 1835 conj = int128_and(conj, int128_not(c->s128)); 1836 } 1837 disj = int128_or(disj, conj); 1838 1839 /* Unset the least significant bit that is set */ 1840 imm &= imm - 1; 1841 } 1842 1843 t->s128 = disj; 1844 } 1845 1846 #define XXBLEND(name, sz) \ 1847 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1848 ppc_avr_t *c, uint32_t desc) \ 1849 { \ 1850 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1851 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1852 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1853 } \ 1854 } 1855 XXBLEND(B, 8) 1856 XXBLEND(H, 16) 1857 XXBLEND(W, 32) 1858 XXBLEND(D, 64) 1859 #undef XXBLEND 1860 1861 #define VNEG(name, element) \ 1862 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1863 { \ 1864 int i; \ 1865 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1866 r->element[i] = -b->element[i]; \ 1867 } \ 1868 } 1869 VNEG(vnegw, s32) 1870 VNEG(vnegd, s64) 1871 #undef VNEG 1872 1873 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1874 { 1875 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1876 1877 #if HOST_BIG_ENDIAN 1878 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1879 memset(&r->u8[0], 0, sh); 1880 #else 1881 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1882 memset(&r->u8[16 - sh], 0, sh); 1883 #endif 1884 } 1885 1886 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1887 { 1888 int i; 1889 1890 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1891 r->u32[i] = a->u32[i] >= b->u32[i]; 1892 } 1893 } 1894 1895 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1896 { 1897 int64_t t; 1898 int i, upper; 1899 ppc_avr_t result; 1900 int sat = 0; 1901 1902 upper = ARRAY_SIZE(r->s32) - 1; 1903 t = (int64_t)b->VsrSW(upper); 1904 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1905 t += a->VsrSW(i); 1906 result.VsrSW(i) = 0; 1907 } 1908 result.VsrSW(upper) = cvtsdsw(t, &sat); 1909 *r = result; 1910 1911 if (sat) { 1912 set_vscr_sat(env); 1913 } 1914 } 1915 1916 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1917 { 1918 int i, j, upper; 1919 ppc_avr_t result; 1920 int sat = 0; 1921 1922 upper = 1; 1923 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1924 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1925 1926 result.VsrD(i) = 0; 1927 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1928 t += a->VsrSW(2 * i + j); 1929 } 1930 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1931 } 1932 1933 *r = result; 1934 if (sat) { 1935 set_vscr_sat(env); 1936 } 1937 } 1938 1939 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1940 { 1941 int i, j; 1942 int sat = 0; 1943 1944 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1945 int64_t t = (int64_t)b->s32[i]; 1946 1947 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1948 t += a->s8[4 * i + j]; 1949 } 1950 r->s32[i] = cvtsdsw(t, &sat); 1951 } 1952 1953 if (sat) { 1954 set_vscr_sat(env); 1955 } 1956 } 1957 1958 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1959 { 1960 int sat = 0; 1961 int i; 1962 1963 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1964 int64_t t = (int64_t)b->s32[i]; 1965 1966 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1967 r->s32[i] = cvtsdsw(t, &sat); 1968 } 1969 1970 if (sat) { 1971 set_vscr_sat(env); 1972 } 1973 } 1974 1975 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1976 { 1977 int i, j; 1978 int sat = 0; 1979 1980 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1981 uint64_t t = (uint64_t)b->u32[i]; 1982 1983 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1984 t += a->u8[4 * i + j]; 1985 } 1986 r->u32[i] = cvtuduw(t, &sat); 1987 } 1988 1989 if (sat) { 1990 set_vscr_sat(env); 1991 } 1992 } 1993 1994 #if HOST_BIG_ENDIAN 1995 #define UPKHI 1 1996 #define UPKLO 0 1997 #else 1998 #define UPKHI 0 1999 #define UPKLO 1 2000 #endif 2001 #define VUPKPX(suffix, hi) \ 2002 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2003 { \ 2004 int i; \ 2005 ppc_avr_t result; \ 2006 \ 2007 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2008 uint16_t e = b->u16[hi ? i : i + 4]; \ 2009 uint8_t a = (e >> 15) ? 0xff : 0; \ 2010 uint8_t r = (e >> 10) & 0x1f; \ 2011 uint8_t g = (e >> 5) & 0x1f; \ 2012 uint8_t b = e & 0x1f; \ 2013 \ 2014 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2015 } \ 2016 *r = result; \ 2017 } 2018 VUPKPX(lpx, UPKLO) 2019 VUPKPX(hpx, UPKHI) 2020 #undef VUPKPX 2021 2022 #define VUPK(suffix, unpacked, packee, hi) \ 2023 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2024 { \ 2025 int i; \ 2026 ppc_avr_t result; \ 2027 \ 2028 if (hi) { \ 2029 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2030 result.unpacked[i] = b->packee[i]; \ 2031 } \ 2032 } else { \ 2033 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2034 i++) { \ 2035 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2036 } \ 2037 } \ 2038 *r = result; \ 2039 } 2040 VUPK(hsb, s16, s8, UPKHI) 2041 VUPK(hsh, s32, s16, UPKHI) 2042 VUPK(hsw, s64, s32, UPKHI) 2043 VUPK(lsb, s16, s8, UPKLO) 2044 VUPK(lsh, s32, s16, UPKLO) 2045 VUPK(lsw, s64, s32, UPKLO) 2046 #undef VUPK 2047 #undef UPKHI 2048 #undef UPKLO 2049 2050 #define VGENERIC_DO(name, element) \ 2051 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2052 { \ 2053 int i; \ 2054 \ 2055 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2056 r->element[i] = name(b->element[i]); \ 2057 } \ 2058 } 2059 2060 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2061 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2062 2063 VGENERIC_DO(clzb, u8) 2064 VGENERIC_DO(clzh, u16) 2065 2066 #undef clzb 2067 #undef clzh 2068 2069 #define ctzb(v) ((v) ? ctz32(v) : 8) 2070 #define ctzh(v) ((v) ? ctz32(v) : 16) 2071 #define ctzw(v) ctz32((v)) 2072 #define ctzd(v) ctz64((v)) 2073 2074 VGENERIC_DO(ctzb, u8) 2075 VGENERIC_DO(ctzh, u16) 2076 VGENERIC_DO(ctzw, u32) 2077 VGENERIC_DO(ctzd, u64) 2078 2079 #undef ctzb 2080 #undef ctzh 2081 #undef ctzw 2082 #undef ctzd 2083 2084 #define popcntb(v) ctpop8(v) 2085 #define popcnth(v) ctpop16(v) 2086 #define popcntw(v) ctpop32(v) 2087 #define popcntd(v) ctpop64(v) 2088 2089 VGENERIC_DO(popcntb, u8) 2090 VGENERIC_DO(popcnth, u16) 2091 VGENERIC_DO(popcntw, u32) 2092 VGENERIC_DO(popcntd, u64) 2093 2094 #undef popcntb 2095 #undef popcnth 2096 #undef popcntw 2097 #undef popcntd 2098 2099 #undef VGENERIC_DO 2100 2101 #if HOST_BIG_ENDIAN 2102 #define QW_ONE { .u64 = { 0, 1 } } 2103 #else 2104 #define QW_ONE { .u64 = { 1, 0 } } 2105 #endif 2106 2107 #ifndef CONFIG_INT128 2108 2109 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2110 { 2111 t->u64[0] = ~a.u64[0]; 2112 t->u64[1] = ~a.u64[1]; 2113 } 2114 2115 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2116 { 2117 if (a.VsrD(0) < b.VsrD(0)) { 2118 return -1; 2119 } else if (a.VsrD(0) > b.VsrD(0)) { 2120 return 1; 2121 } else if (a.VsrD(1) < b.VsrD(1)) { 2122 return -1; 2123 } else if (a.VsrD(1) > b.VsrD(1)) { 2124 return 1; 2125 } else { 2126 return 0; 2127 } 2128 } 2129 2130 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2131 { 2132 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2133 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2134 (~a.VsrD(1) < b.VsrD(1)); 2135 } 2136 2137 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2138 { 2139 ppc_avr_t not_a; 2140 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2141 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2142 (~a.VsrD(1) < b.VsrD(1)); 2143 avr_qw_not(¬_a, a); 2144 return avr_qw_cmpu(not_a, b) < 0; 2145 } 2146 2147 #endif 2148 2149 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2150 { 2151 #ifdef CONFIG_INT128 2152 r->u128 = a->u128 + b->u128; 2153 #else 2154 avr_qw_add(r, *a, *b); 2155 #endif 2156 } 2157 2158 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2159 { 2160 #ifdef CONFIG_INT128 2161 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2162 #else 2163 2164 if (c->VsrD(1) & 1) { 2165 ppc_avr_t tmp; 2166 2167 tmp.VsrD(0) = 0; 2168 tmp.VsrD(1) = c->VsrD(1) & 1; 2169 avr_qw_add(&tmp, *a, tmp); 2170 avr_qw_add(r, tmp, *b); 2171 } else { 2172 avr_qw_add(r, *a, *b); 2173 } 2174 #endif 2175 } 2176 2177 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2178 { 2179 #ifdef CONFIG_INT128 2180 r->u128 = (~a->u128 < b->u128); 2181 #else 2182 ppc_avr_t not_a; 2183 2184 avr_qw_not(¬_a, *a); 2185 2186 r->VsrD(0) = 0; 2187 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2188 #endif 2189 } 2190 2191 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2192 { 2193 #ifdef CONFIG_INT128 2194 int carry_out = (~a->u128 < b->u128); 2195 if (!carry_out && (c->u128 & 1)) { 2196 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2197 ((a->u128 != 0) || (b->u128 != 0)); 2198 } 2199 r->u128 = carry_out; 2200 #else 2201 2202 int carry_in = c->VsrD(1) & 1; 2203 int carry_out = 0; 2204 ppc_avr_t tmp; 2205 2206 carry_out = avr_qw_addc(&tmp, *a, *b); 2207 2208 if (!carry_out && carry_in) { 2209 ppc_avr_t one = QW_ONE; 2210 carry_out = avr_qw_addc(&tmp, tmp, one); 2211 } 2212 r->VsrD(0) = 0; 2213 r->VsrD(1) = carry_out; 2214 #endif 2215 } 2216 2217 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2218 { 2219 #ifdef CONFIG_INT128 2220 r->u128 = a->u128 - b->u128; 2221 #else 2222 ppc_avr_t tmp; 2223 ppc_avr_t one = QW_ONE; 2224 2225 avr_qw_not(&tmp, *b); 2226 avr_qw_add(&tmp, *a, tmp); 2227 avr_qw_add(r, tmp, one); 2228 #endif 2229 } 2230 2231 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2232 { 2233 #ifdef CONFIG_INT128 2234 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2235 #else 2236 ppc_avr_t tmp, sum; 2237 2238 avr_qw_not(&tmp, *b); 2239 avr_qw_add(&sum, *a, tmp); 2240 2241 tmp.VsrD(0) = 0; 2242 tmp.VsrD(1) = c->VsrD(1) & 1; 2243 avr_qw_add(r, sum, tmp); 2244 #endif 2245 } 2246 2247 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2248 { 2249 #ifdef CONFIG_INT128 2250 r->u128 = (~a->u128 < ~b->u128) || 2251 (a->u128 + ~b->u128 == (__uint128_t)-1); 2252 #else 2253 int carry = (avr_qw_cmpu(*a, *b) > 0); 2254 if (!carry) { 2255 ppc_avr_t tmp; 2256 avr_qw_not(&tmp, *b); 2257 avr_qw_add(&tmp, *a, tmp); 2258 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2259 } 2260 r->VsrD(0) = 0; 2261 r->VsrD(1) = carry; 2262 #endif 2263 } 2264 2265 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2266 { 2267 #ifdef CONFIG_INT128 2268 r->u128 = 2269 (~a->u128 < ~b->u128) || 2270 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2271 #else 2272 int carry_in = c->VsrD(1) & 1; 2273 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2274 if (!carry_out && carry_in) { 2275 ppc_avr_t tmp; 2276 avr_qw_not(&tmp, *b); 2277 avr_qw_add(&tmp, *a, tmp); 2278 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2279 } 2280 2281 r->VsrD(0) = 0; 2282 r->VsrD(1) = carry_out; 2283 #endif 2284 } 2285 2286 #define BCD_PLUS_PREF_1 0xC 2287 #define BCD_PLUS_PREF_2 0xF 2288 #define BCD_PLUS_ALT_1 0xA 2289 #define BCD_NEG_PREF 0xD 2290 #define BCD_NEG_ALT 0xB 2291 #define BCD_PLUS_ALT_2 0xE 2292 #define NATIONAL_PLUS 0x2B 2293 #define NATIONAL_NEG 0x2D 2294 2295 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2296 2297 static int bcd_get_sgn(ppc_avr_t *bcd) 2298 { 2299 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2300 case BCD_PLUS_PREF_1: 2301 case BCD_PLUS_PREF_2: 2302 case BCD_PLUS_ALT_1: 2303 case BCD_PLUS_ALT_2: 2304 { 2305 return 1; 2306 } 2307 2308 case BCD_NEG_PREF: 2309 case BCD_NEG_ALT: 2310 { 2311 return -1; 2312 } 2313 2314 default: 2315 { 2316 return 0; 2317 } 2318 } 2319 } 2320 2321 static int bcd_preferred_sgn(int sgn, int ps) 2322 { 2323 if (sgn >= 0) { 2324 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2325 } else { 2326 return BCD_NEG_PREF; 2327 } 2328 } 2329 2330 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2331 { 2332 uint8_t result; 2333 if (n & 1) { 2334 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2335 } else { 2336 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2337 } 2338 2339 if (unlikely(result > 9)) { 2340 *invalid = true; 2341 } 2342 return result; 2343 } 2344 2345 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2346 { 2347 if (n & 1) { 2348 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2349 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2350 } else { 2351 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2352 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2353 } 2354 } 2355 2356 static bool bcd_is_valid(ppc_avr_t *bcd) 2357 { 2358 int i; 2359 int invalid = 0; 2360 2361 if (bcd_get_sgn(bcd) == 0) { 2362 return false; 2363 } 2364 2365 for (i = 1; i < 32; i++) { 2366 bcd_get_digit(bcd, i, &invalid); 2367 if (unlikely(invalid)) { 2368 return false; 2369 } 2370 } 2371 return true; 2372 } 2373 2374 static int bcd_cmp_zero(ppc_avr_t *bcd) 2375 { 2376 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2377 return CRF_EQ; 2378 } else { 2379 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2380 } 2381 } 2382 2383 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2384 { 2385 return reg->VsrH(7 - n); 2386 } 2387 2388 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2389 { 2390 reg->VsrH(7 - n) = val; 2391 } 2392 2393 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2394 { 2395 int i; 2396 int invalid = 0; 2397 for (i = 31; i > 0; i--) { 2398 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2399 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2400 if (unlikely(invalid)) { 2401 return 0; /* doesn't matter */ 2402 } else if (dig_a > dig_b) { 2403 return 1; 2404 } else if (dig_a < dig_b) { 2405 return -1; 2406 } 2407 } 2408 2409 return 0; 2410 } 2411 2412 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2413 int *overflow) 2414 { 2415 int carry = 0; 2416 int i; 2417 int is_zero = 1; 2418 2419 for (i = 1; i <= 31; i++) { 2420 uint8_t digit = bcd_get_digit(a, i, invalid) + 2421 bcd_get_digit(b, i, invalid) + carry; 2422 is_zero &= (digit == 0); 2423 if (digit > 9) { 2424 carry = 1; 2425 digit -= 10; 2426 } else { 2427 carry = 0; 2428 } 2429 2430 bcd_put_digit(t, digit, i); 2431 } 2432 2433 *overflow = carry; 2434 return is_zero; 2435 } 2436 2437 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2438 int *overflow) 2439 { 2440 int carry = 0; 2441 int i; 2442 2443 for (i = 1; i <= 31; i++) { 2444 uint8_t digit = bcd_get_digit(a, i, invalid) - 2445 bcd_get_digit(b, i, invalid) + carry; 2446 if (digit & 0x80) { 2447 carry = -1; 2448 digit += 10; 2449 } else { 2450 carry = 0; 2451 } 2452 2453 bcd_put_digit(t, digit, i); 2454 } 2455 2456 *overflow = carry; 2457 } 2458 2459 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2460 { 2461 2462 int sgna = bcd_get_sgn(a); 2463 int sgnb = bcd_get_sgn(b); 2464 int invalid = (sgna == 0) || (sgnb == 0); 2465 int overflow = 0; 2466 int zero = 0; 2467 uint32_t cr = 0; 2468 ppc_avr_t result = { .u64 = { 0, 0 } }; 2469 2470 if (!invalid) { 2471 if (sgna == sgnb) { 2472 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2473 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2474 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2475 } else { 2476 int magnitude = bcd_cmp_mag(a, b); 2477 if (magnitude > 0) { 2478 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2479 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2480 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2481 } else if (magnitude < 0) { 2482 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2483 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2484 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2485 } else { 2486 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2487 cr = CRF_EQ; 2488 } 2489 } 2490 } 2491 2492 if (unlikely(invalid)) { 2493 result.VsrD(0) = result.VsrD(1) = -1; 2494 cr = CRF_SO; 2495 } else if (overflow) { 2496 cr |= CRF_SO; 2497 } else if (zero) { 2498 cr |= CRF_EQ; 2499 } 2500 2501 *r = result; 2502 2503 return cr; 2504 } 2505 2506 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2507 { 2508 ppc_avr_t bcopy = *b; 2509 int sgnb = bcd_get_sgn(b); 2510 if (sgnb < 0) { 2511 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2512 } else if (sgnb > 0) { 2513 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2514 } 2515 /* else invalid ... defer to bcdadd code for proper handling */ 2516 2517 return helper_bcdadd(r, a, &bcopy, ps); 2518 } 2519 2520 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2521 { 2522 int i; 2523 int cr = 0; 2524 uint16_t national = 0; 2525 uint16_t sgnb = get_national_digit(b, 0); 2526 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2527 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2528 2529 for (i = 1; i < 8; i++) { 2530 national = get_national_digit(b, i); 2531 if (unlikely(national < 0x30 || national > 0x39)) { 2532 invalid = 1; 2533 break; 2534 } 2535 2536 bcd_put_digit(&ret, national & 0xf, i); 2537 } 2538 2539 if (sgnb == NATIONAL_PLUS) { 2540 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2541 } else { 2542 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2543 } 2544 2545 cr = bcd_cmp_zero(&ret); 2546 2547 if (unlikely(invalid)) { 2548 cr = CRF_SO; 2549 } 2550 2551 *r = ret; 2552 2553 return cr; 2554 } 2555 2556 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2557 { 2558 int i; 2559 int cr = 0; 2560 int sgnb = bcd_get_sgn(b); 2561 int invalid = (sgnb == 0); 2562 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2563 2564 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2565 2566 for (i = 1; i < 8; i++) { 2567 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2568 2569 if (unlikely(invalid)) { 2570 break; 2571 } 2572 } 2573 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2574 2575 cr = bcd_cmp_zero(b); 2576 2577 if (ox_flag) { 2578 cr |= CRF_SO; 2579 } 2580 2581 if (unlikely(invalid)) { 2582 cr = CRF_SO; 2583 } 2584 2585 *r = ret; 2586 2587 return cr; 2588 } 2589 2590 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2591 { 2592 int i; 2593 int cr = 0; 2594 int invalid = 0; 2595 int zone_digit = 0; 2596 int zone_lead = ps ? 0xF : 0x3; 2597 int digit = 0; 2598 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2599 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2600 2601 if (unlikely((sgnb < 0xA) && ps)) { 2602 invalid = 1; 2603 } 2604 2605 for (i = 0; i < 16; i++) { 2606 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2607 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2608 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2609 invalid = 1; 2610 break; 2611 } 2612 2613 bcd_put_digit(&ret, digit, i + 1); 2614 } 2615 2616 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2617 (!ps && (sgnb & 0x4))) { 2618 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2619 } else { 2620 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2621 } 2622 2623 cr = bcd_cmp_zero(&ret); 2624 2625 if (unlikely(invalid)) { 2626 cr = CRF_SO; 2627 } 2628 2629 *r = ret; 2630 2631 return cr; 2632 } 2633 2634 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2635 { 2636 int i; 2637 int cr = 0; 2638 uint8_t digit = 0; 2639 int sgnb = bcd_get_sgn(b); 2640 int zone_lead = (ps) ? 0xF0 : 0x30; 2641 int invalid = (sgnb == 0); 2642 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2643 2644 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2645 2646 for (i = 0; i < 16; i++) { 2647 digit = bcd_get_digit(b, i + 1, &invalid); 2648 2649 if (unlikely(invalid)) { 2650 break; 2651 } 2652 2653 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2654 } 2655 2656 if (ps) { 2657 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2658 } else { 2659 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2660 } 2661 2662 cr = bcd_cmp_zero(b); 2663 2664 if (ox_flag) { 2665 cr |= CRF_SO; 2666 } 2667 2668 if (unlikely(invalid)) { 2669 cr = CRF_SO; 2670 } 2671 2672 *r = ret; 2673 2674 return cr; 2675 } 2676 2677 /** 2678 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2679 * 2680 * Returns: 2681 * > 0 if ahi|alo > bhi|blo, 2682 * 0 if ahi|alo == bhi|blo, 2683 * < 0 if ahi|alo < bhi|blo 2684 */ 2685 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2686 uint64_t blo, uint64_t bhi) 2687 { 2688 return (ahi == bhi) ? 2689 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2690 (ahi > bhi ? 1 : -1); 2691 } 2692 2693 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2694 { 2695 int i; 2696 int cr; 2697 uint64_t lo_value; 2698 uint64_t hi_value; 2699 uint64_t rem; 2700 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2701 2702 if (b->VsrSD(0) < 0) { 2703 lo_value = -b->VsrSD(1); 2704 hi_value = ~b->VsrD(0) + !lo_value; 2705 bcd_put_digit(&ret, 0xD, 0); 2706 2707 cr = CRF_LT; 2708 } else { 2709 lo_value = b->VsrD(1); 2710 hi_value = b->VsrD(0); 2711 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2712 2713 if (hi_value == 0 && lo_value == 0) { 2714 cr = CRF_EQ; 2715 } else { 2716 cr = CRF_GT; 2717 } 2718 } 2719 2720 /* 2721 * Check src limits: abs(src) <= 10^31 - 1 2722 * 2723 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2724 */ 2725 if (ucmp128(lo_value, hi_value, 2726 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2727 cr |= CRF_SO; 2728 2729 /* 2730 * According to the ISA, if src wouldn't fit in the destination 2731 * register, the result is undefined. 2732 * In that case, we leave r unchanged. 2733 */ 2734 } else { 2735 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2736 2737 for (i = 1; i < 16; rem /= 10, i++) { 2738 bcd_put_digit(&ret, rem % 10, i); 2739 } 2740 2741 for (; i < 32; lo_value /= 10, i++) { 2742 bcd_put_digit(&ret, lo_value % 10, i); 2743 } 2744 2745 *r = ret; 2746 } 2747 2748 return cr; 2749 } 2750 2751 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2752 { 2753 uint8_t i; 2754 int cr; 2755 uint64_t carry; 2756 uint64_t unused; 2757 uint64_t lo_value; 2758 uint64_t hi_value = 0; 2759 int sgnb = bcd_get_sgn(b); 2760 int invalid = (sgnb == 0); 2761 2762 lo_value = bcd_get_digit(b, 31, &invalid); 2763 for (i = 30; i > 0; i--) { 2764 mulu64(&lo_value, &carry, lo_value, 10ULL); 2765 mulu64(&hi_value, &unused, hi_value, 10ULL); 2766 lo_value += bcd_get_digit(b, i, &invalid); 2767 hi_value += carry; 2768 2769 if (unlikely(invalid)) { 2770 break; 2771 } 2772 } 2773 2774 if (sgnb == -1) { 2775 r->VsrSD(1) = -lo_value; 2776 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2777 } else { 2778 r->VsrSD(1) = lo_value; 2779 r->VsrSD(0) = hi_value; 2780 } 2781 2782 cr = bcd_cmp_zero(b); 2783 2784 if (unlikely(invalid)) { 2785 cr = CRF_SO; 2786 } 2787 2788 return cr; 2789 } 2790 2791 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2792 { 2793 int i; 2794 int invalid = 0; 2795 2796 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2797 return CRF_SO; 2798 } 2799 2800 *r = *a; 2801 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2802 2803 for (i = 1; i < 32; i++) { 2804 bcd_get_digit(a, i, &invalid); 2805 bcd_get_digit(b, i, &invalid); 2806 if (unlikely(invalid)) { 2807 return CRF_SO; 2808 } 2809 } 2810 2811 return bcd_cmp_zero(r); 2812 } 2813 2814 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2815 { 2816 int sgnb = bcd_get_sgn(b); 2817 2818 *r = *b; 2819 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2820 2821 if (bcd_is_valid(b) == false) { 2822 return CRF_SO; 2823 } 2824 2825 return bcd_cmp_zero(r); 2826 } 2827 2828 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2829 { 2830 int cr; 2831 int i = a->VsrSB(7); 2832 bool ox_flag = false; 2833 int sgnb = bcd_get_sgn(b); 2834 ppc_avr_t ret = *b; 2835 ret.VsrD(1) &= ~0xf; 2836 2837 if (bcd_is_valid(b) == false) { 2838 return CRF_SO; 2839 } 2840 2841 if (unlikely(i > 31)) { 2842 i = 31; 2843 } else if (unlikely(i < -31)) { 2844 i = -31; 2845 } 2846 2847 if (i > 0) { 2848 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2849 } else { 2850 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2851 } 2852 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2853 2854 *r = ret; 2855 2856 cr = bcd_cmp_zero(r); 2857 if (ox_flag) { 2858 cr |= CRF_SO; 2859 } 2860 2861 return cr; 2862 } 2863 2864 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2865 { 2866 int cr; 2867 int i; 2868 int invalid = 0; 2869 bool ox_flag = false; 2870 ppc_avr_t ret = *b; 2871 2872 for (i = 0; i < 32; i++) { 2873 bcd_get_digit(b, i, &invalid); 2874 2875 if (unlikely(invalid)) { 2876 return CRF_SO; 2877 } 2878 } 2879 2880 i = a->VsrSB(7); 2881 if (i >= 32) { 2882 ox_flag = true; 2883 ret.VsrD(1) = ret.VsrD(0) = 0; 2884 } else if (i <= -32) { 2885 ret.VsrD(1) = ret.VsrD(0) = 0; 2886 } else if (i > 0) { 2887 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2888 } else { 2889 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2890 } 2891 *r = ret; 2892 2893 cr = bcd_cmp_zero(r); 2894 if (ox_flag) { 2895 cr |= CRF_SO; 2896 } 2897 2898 return cr; 2899 } 2900 2901 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2902 { 2903 int cr; 2904 int unused = 0; 2905 int invalid = 0; 2906 bool ox_flag = false; 2907 int sgnb = bcd_get_sgn(b); 2908 ppc_avr_t ret = *b; 2909 ret.VsrD(1) &= ~0xf; 2910 2911 int i = a->VsrSB(7); 2912 ppc_avr_t bcd_one; 2913 2914 bcd_one.VsrD(0) = 0; 2915 bcd_one.VsrD(1) = 0x10; 2916 2917 if (bcd_is_valid(b) == false) { 2918 return CRF_SO; 2919 } 2920 2921 if (unlikely(i > 31)) { 2922 i = 31; 2923 } else if (unlikely(i < -31)) { 2924 i = -31; 2925 } 2926 2927 if (i > 0) { 2928 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2929 } else { 2930 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2931 2932 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2933 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2934 } 2935 } 2936 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2937 2938 cr = bcd_cmp_zero(&ret); 2939 if (ox_flag) { 2940 cr |= CRF_SO; 2941 } 2942 *r = ret; 2943 2944 return cr; 2945 } 2946 2947 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2948 { 2949 uint64_t mask; 2950 uint32_t ox_flag = 0; 2951 int i = a->VsrSH(3) + 1; 2952 ppc_avr_t ret = *b; 2953 2954 if (bcd_is_valid(b) == false) { 2955 return CRF_SO; 2956 } 2957 2958 if (i > 16 && i < 32) { 2959 mask = (uint64_t)-1 >> (128 - i * 4); 2960 if (ret.VsrD(0) & ~mask) { 2961 ox_flag = CRF_SO; 2962 } 2963 2964 ret.VsrD(0) &= mask; 2965 } else if (i >= 0 && i <= 16) { 2966 mask = (uint64_t)-1 >> (64 - i * 4); 2967 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2968 ox_flag = CRF_SO; 2969 } 2970 2971 ret.VsrD(1) &= mask; 2972 ret.VsrD(0) = 0; 2973 } 2974 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2975 *r = ret; 2976 2977 return bcd_cmp_zero(&ret) | ox_flag; 2978 } 2979 2980 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2981 { 2982 int i; 2983 uint64_t mask; 2984 uint32_t ox_flag = 0; 2985 int invalid = 0; 2986 ppc_avr_t ret = *b; 2987 2988 for (i = 0; i < 32; i++) { 2989 bcd_get_digit(b, i, &invalid); 2990 2991 if (unlikely(invalid)) { 2992 return CRF_SO; 2993 } 2994 } 2995 2996 i = a->VsrSH(3); 2997 if (i > 16 && i < 33) { 2998 mask = (uint64_t)-1 >> (128 - i * 4); 2999 if (ret.VsrD(0) & ~mask) { 3000 ox_flag = CRF_SO; 3001 } 3002 3003 ret.VsrD(0) &= mask; 3004 } else if (i > 0 && i <= 16) { 3005 mask = (uint64_t)-1 >> (64 - i * 4); 3006 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3007 ox_flag = CRF_SO; 3008 } 3009 3010 ret.VsrD(1) &= mask; 3011 ret.VsrD(0) = 0; 3012 } else if (i == 0) { 3013 if (ret.VsrD(0) || ret.VsrD(1)) { 3014 ox_flag = CRF_SO; 3015 } 3016 ret.VsrD(0) = ret.VsrD(1) = 0; 3017 } 3018 3019 *r = ret; 3020 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3021 return ox_flag | CRF_EQ; 3022 } 3023 3024 return ox_flag | CRF_GT; 3025 } 3026 3027 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3028 { 3029 int i; 3030 VECTOR_FOR_INORDER_I(i, u8) { 3031 r->u8[i] = AES_sbox[a->u8[i]]; 3032 } 3033 } 3034 3035 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3036 { 3037 ppc_avr_t result; 3038 int i; 3039 3040 VECTOR_FOR_INORDER_I(i, u32) { 3041 result.VsrW(i) = b->VsrW(i) ^ 3042 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3043 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3044 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3045 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3046 } 3047 *r = result; 3048 } 3049 3050 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3051 { 3052 ppc_avr_t result; 3053 int i; 3054 3055 VECTOR_FOR_INORDER_I(i, u8) { 3056 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3057 } 3058 *r = result; 3059 } 3060 3061 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3062 { 3063 /* This differs from what is written in ISA V2.07. The RTL is */ 3064 /* incorrect and will be fixed in V2.07B. */ 3065 int i; 3066 ppc_avr_t tmp; 3067 3068 VECTOR_FOR_INORDER_I(i, u8) { 3069 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3070 } 3071 3072 VECTOR_FOR_INORDER_I(i, u32) { 3073 r->VsrW(i) = 3074 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3075 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3076 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3077 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3078 } 3079 } 3080 3081 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3082 { 3083 ppc_avr_t result; 3084 int i; 3085 3086 VECTOR_FOR_INORDER_I(i, u8) { 3087 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3088 } 3089 *r = result; 3090 } 3091 3092 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3093 { 3094 int st = (st_six & 0x10) != 0; 3095 int six = st_six & 0xF; 3096 int i; 3097 3098 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3099 if (st == 0) { 3100 if ((six & (0x8 >> i)) == 0) { 3101 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3102 ror32(a->VsrW(i), 18) ^ 3103 (a->VsrW(i) >> 3); 3104 } else { /* six.bit[i] == 1 */ 3105 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3106 ror32(a->VsrW(i), 19) ^ 3107 (a->VsrW(i) >> 10); 3108 } 3109 } else { /* st == 1 */ 3110 if ((six & (0x8 >> i)) == 0) { 3111 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3112 ror32(a->VsrW(i), 13) ^ 3113 ror32(a->VsrW(i), 22); 3114 } else { /* six.bit[i] == 1 */ 3115 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3116 ror32(a->VsrW(i), 11) ^ 3117 ror32(a->VsrW(i), 25); 3118 } 3119 } 3120 } 3121 } 3122 3123 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3124 { 3125 int st = (st_six & 0x10) != 0; 3126 int six = st_six & 0xF; 3127 int i; 3128 3129 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3130 if (st == 0) { 3131 if ((six & (0x8 >> (2 * i))) == 0) { 3132 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3133 ror64(a->VsrD(i), 8) ^ 3134 (a->VsrD(i) >> 7); 3135 } else { /* six.bit[2*i] == 1 */ 3136 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3137 ror64(a->VsrD(i), 61) ^ 3138 (a->VsrD(i) >> 6); 3139 } 3140 } else { /* st == 1 */ 3141 if ((six & (0x8 >> (2 * i))) == 0) { 3142 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3143 ror64(a->VsrD(i), 34) ^ 3144 ror64(a->VsrD(i), 39); 3145 } else { /* six.bit[2*i] == 1 */ 3146 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3147 ror64(a->VsrD(i), 18) ^ 3148 ror64(a->VsrD(i), 41); 3149 } 3150 } 3151 } 3152 } 3153 3154 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3155 { 3156 ppc_avr_t result; 3157 int i; 3158 3159 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3160 int indexA = c->VsrB(i) >> 4; 3161 int indexB = c->VsrB(i) & 0xF; 3162 3163 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3164 } 3165 *r = result; 3166 } 3167 3168 #undef VECTOR_FOR_INORDER_I 3169 3170 /*****************************************************************************/ 3171 /* SPE extension helpers */ 3172 /* Use a table to make this quicker */ 3173 static const uint8_t hbrev[16] = { 3174 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3175 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3176 }; 3177 3178 static inline uint8_t byte_reverse(uint8_t val) 3179 { 3180 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3181 } 3182 3183 static inline uint32_t word_reverse(uint32_t val) 3184 { 3185 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3186 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3187 } 3188 3189 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3190 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3191 { 3192 uint32_t a, b, d, mask; 3193 3194 mask = UINT32_MAX >> (32 - MASKBITS); 3195 a = arg1 & mask; 3196 b = arg2 & mask; 3197 d = word_reverse(1 + word_reverse(a | ~b)); 3198 return (arg1 & ~mask) | (d & b); 3199 } 3200 3201 uint32_t helper_cntlsw32(uint32_t val) 3202 { 3203 if (val & 0x80000000) { 3204 return clz32(~val); 3205 } else { 3206 return clz32(val); 3207 } 3208 } 3209 3210 uint32_t helper_cntlzw32(uint32_t val) 3211 { 3212 return clz32(val); 3213 } 3214 3215 /* 440 specific */ 3216 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3217 target_ulong low, uint32_t update_Rc) 3218 { 3219 target_ulong mask; 3220 int i; 3221 3222 i = 1; 3223 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3224 if ((high & mask) == 0) { 3225 if (update_Rc) { 3226 env->crf[0] = 0x4; 3227 } 3228 goto done; 3229 } 3230 i++; 3231 } 3232 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3233 if ((low & mask) == 0) { 3234 if (update_Rc) { 3235 env->crf[0] = 0x8; 3236 } 3237 goto done; 3238 } 3239 i++; 3240 } 3241 i = 8; 3242 if (update_Rc) { 3243 env->crf[0] = 0x2; 3244 } 3245 done: 3246 env->xer = (env->xer & ~0x7F) | i; 3247 if (update_Rc) { 3248 env->crf[0] |= xer_so; 3249 } 3250 return i; 3251 } 3252