1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 390 { 391 int i, o; 392 uint64_t result = 0; 393 394 if (mask == -1) { 395 return src; 396 } 397 398 for (i = 0; mask != 0; i++) { 399 o = ctz64(mask); 400 mask &= mask - 1; 401 result |= ((src >> i) & 1) << o; 402 } 403 404 return result; 405 } 406 407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 408 { 409 int i, o; 410 uint64_t result = 0; 411 412 if (mask == -1) { 413 return src; 414 } 415 416 for (o = 0; mask != 0; o++) { 417 i = ctz64(mask); 418 mask &= mask - 1; 419 result |= ((src >> i) & 1) << o; 420 } 421 422 return result; 423 } 424 425 /*****************************************************************************/ 426 /* Altivec extension helpers */ 427 #if defined(HOST_WORDS_BIGENDIAN) 428 #define VECTOR_FOR_INORDER_I(index, element) \ 429 for (index = 0; index < ARRAY_SIZE(r->element); index++) 430 #else 431 #define VECTOR_FOR_INORDER_I(index, element) \ 432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 433 #endif 434 435 /* Saturating arithmetic helpers. */ 436 #define SATCVT(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x < (from_type)min) { \ 442 r = min; \ 443 *sat = 1; \ 444 } else if (x > (from_type)max) { \ 445 r = max; \ 446 *sat = 1; \ 447 } else { \ 448 r = x; \ 449 } \ 450 return r; \ 451 } 452 #define SATCVTU(from, to, from_type, to_type, min, max) \ 453 static inline to_type cvt##from##to(from_type x, int *sat) \ 454 { \ 455 to_type r; \ 456 \ 457 if (x > (from_type)max) { \ 458 r = max; \ 459 *sat = 1; \ 460 } else { \ 461 r = x; \ 462 } \ 463 return r; \ 464 } 465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 468 469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 475 #undef SATCVT 476 #undef SATCVTU 477 478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 479 { 480 ppc_store_vscr(env, vscr); 481 } 482 483 uint32_t helper_mfvscr(CPUPPCState *env) 484 { 485 return ppc_get_vscr(env); 486 } 487 488 static inline void set_vscr_sat(CPUPPCState *env) 489 { 490 /* The choice of non-zero value is arbitrary. */ 491 env->vscr_sat.u32[0] = 1; 492 } 493 494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 495 { 496 int i; 497 498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 499 r->u32[i] = ~a->u32[i] < b->u32[i]; 500 } 501 } 502 503 /* vprtybw */ 504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 509 res ^= res >> 8; 510 r->u32[i] = res & 1; 511 } 512 } 513 514 /* vprtybd */ 515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 516 { 517 int i; 518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->u64[i] = res & 1; 523 } 524 } 525 526 /* vprtybq */ 527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 528 { 529 uint64_t res = b->u64[0] ^ b->u64[1]; 530 res ^= res >> 32; 531 res ^= res >> 16; 532 res ^= res >> 8; 533 r->VsrD(1) = res & 1; 534 r->VsrD(0) = 0; 535 } 536 537 #define VARITHFP(suffix, func) \ 538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 539 ppc_avr_t *b) \ 540 { \ 541 int i; \ 542 \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 545 } \ 546 } 547 VARITHFP(addfp, float32_add) 548 VARITHFP(subfp, float32_sub) 549 VARITHFP(minfp, float32_min) 550 VARITHFP(maxfp, float32_max) 551 #undef VARITHFP 552 553 #define VARITHFPFMA(suffix, type) \ 554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 555 ppc_avr_t *b, ppc_avr_t *c) \ 556 { \ 557 int i; \ 558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 560 type, &env->vec_status); \ 561 } \ 562 } 563 VARITHFPFMA(maddfp, 0); 564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 565 #undef VARITHFPFMA 566 567 #define VARITHSAT_CASE(type, op, cvt, element) \ 568 { \ 569 type result = (type)a->element[i] op (type)b->element[i]; \ 570 r->element[i] = cvt(result, &sat); \ 571 } 572 573 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 576 { \ 577 int sat = 0; \ 578 int i; \ 579 \ 580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 581 VARITHSAT_CASE(optype, op, cvt, element); \ 582 } \ 583 if (sat) { \ 584 vscr_sat->u32[0] = 1; \ 585 } \ 586 } 587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 599 #undef VARITHSAT_CASE 600 #undef VARITHSAT_DO 601 #undef VARITHSAT_SIGNED 602 #undef VARITHSAT_UNSIGNED 603 604 #define VAVG_DO(name, element, etype) \ 605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 606 { \ 607 int i; \ 608 \ 609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 611 r->element[i] = x >> 1; \ 612 } \ 613 } 614 615 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 616 unsigned_type) \ 617 VAVG_DO(avgs##type, signed_element, signed_type) \ 618 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 619 VAVG(b, s8, int16_t, u8, uint16_t) 620 VAVG(h, s16, int32_t, u16, uint32_t) 621 VAVG(w, s32, int64_t, u32, uint64_t) 622 #undef VAVG_DO 623 #undef VAVG 624 625 #define VABSDU_DO(name, element) \ 626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 627 { \ 628 int i; \ 629 \ 630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 631 r->element[i] = (a->element[i] > b->element[i]) ? \ 632 (a->element[i] - b->element[i]) : \ 633 (b->element[i] - a->element[i]); \ 634 } \ 635 } 636 637 /* 638 * VABSDU - Vector absolute difference unsigned 639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 640 * element - element type to access from vector 641 */ 642 #define VABSDU(type, element) \ 643 VABSDU_DO(absdu##type, element) 644 VABSDU(b, u8) 645 VABSDU(h, u16) 646 VABSDU(w, u32) 647 #undef VABSDU_DO 648 #undef VABSDU 649 650 #define VCF(suffix, cvt, element) \ 651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 652 ppc_avr_t *b, uint32_t uim) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 657 float32 t = cvt(b->element[i], &env->vec_status); \ 658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 659 } \ 660 } 661 VCF(ux, uint32_to_float32, u32) 662 VCF(sx, int32_to_float32, s32) 663 #undef VCF 664 665 #define VCMP_DO(suffix, compare, element, record) \ 666 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 667 ppc_avr_t *a, ppc_avr_t *b) \ 668 { \ 669 uint64_t ones = (uint64_t)-1; \ 670 uint64_t all = ones; \ 671 uint64_t none = 0; \ 672 int i; \ 673 \ 674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 675 uint64_t result = (a->element[i] compare b->element[i] ? \ 676 ones : 0x0); \ 677 switch (sizeof(a->element[0])) { \ 678 case 8: \ 679 r->u64[i] = result; \ 680 break; \ 681 case 4: \ 682 r->u32[i] = result; \ 683 break; \ 684 case 2: \ 685 r->u16[i] = result; \ 686 break; \ 687 case 1: \ 688 r->u8[i] = result; \ 689 break; \ 690 } \ 691 all &= result; \ 692 none |= result; \ 693 } \ 694 if (record) { \ 695 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 696 } \ 697 } 698 #define VCMP(suffix, compare, element) \ 699 VCMP_DO(suffix, compare, element, 0) \ 700 VCMP_DO(suffix##_dot, compare, element, 1) 701 VCMP(equb, ==, u8) 702 VCMP(equh, ==, u16) 703 VCMP(equw, ==, u32) 704 VCMP(equd, ==, u64) 705 VCMP(gtub, >, u8) 706 VCMP(gtuh, >, u16) 707 VCMP(gtuw, >, u32) 708 VCMP(gtud, >, u64) 709 VCMP(gtsb, >, s8) 710 VCMP(gtsh, >, s16) 711 VCMP(gtsw, >, s32) 712 VCMP(gtsd, >, s64) 713 #undef VCMP_DO 714 #undef VCMP 715 716 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 717 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 718 ppc_avr_t *a, ppc_avr_t *b) \ 719 { \ 720 etype ones = (etype)-1; \ 721 etype all = ones; \ 722 etype result, none = 0; \ 723 int i; \ 724 \ 725 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 726 if (cmpzero) { \ 727 result = ((a->element[i] == 0) \ 728 || (b->element[i] == 0) \ 729 || (a->element[i] != b->element[i]) ? \ 730 ones : 0x0); \ 731 } else { \ 732 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 733 } \ 734 r->element[i] = result; \ 735 all &= result; \ 736 none |= result; \ 737 } \ 738 if (record) { \ 739 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 740 } \ 741 } 742 743 /* 744 * VCMPNEZ - Vector compare not equal to zero 745 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 746 * element - element type to access from vector 747 */ 748 #define VCMPNE(suffix, element, etype, cmpzero) \ 749 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 750 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 751 VCMPNE(zb, u8, uint8_t, 1) 752 VCMPNE(zh, u16, uint16_t, 1) 753 VCMPNE(zw, u32, uint32_t, 1) 754 VCMPNE(b, u8, uint8_t, 0) 755 VCMPNE(h, u16, uint16_t, 0) 756 VCMPNE(w, u32, uint32_t, 0) 757 #undef VCMPNE_DO 758 #undef VCMPNE 759 760 #define VCMPFP_DO(suffix, compare, order, record) \ 761 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 762 ppc_avr_t *a, ppc_avr_t *b) \ 763 { \ 764 uint32_t ones = (uint32_t)-1; \ 765 uint32_t all = ones; \ 766 uint32_t none = 0; \ 767 int i; \ 768 \ 769 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 770 uint32_t result; \ 771 FloatRelation rel = \ 772 float32_compare_quiet(a->f32[i], b->f32[i], \ 773 &env->vec_status); \ 774 if (rel == float_relation_unordered) { \ 775 result = 0; \ 776 } else if (rel compare order) { \ 777 result = ones; \ 778 } else { \ 779 result = 0; \ 780 } \ 781 r->u32[i] = result; \ 782 all &= result; \ 783 none |= result; \ 784 } \ 785 if (record) { \ 786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 787 } \ 788 } 789 #define VCMPFP(suffix, compare, order) \ 790 VCMPFP_DO(suffix, compare, order, 0) \ 791 VCMPFP_DO(suffix##_dot, compare, order, 1) 792 VCMPFP(eqfp, ==, float_relation_equal) 793 VCMPFP(gefp, !=, float_relation_less) 794 VCMPFP(gtfp, ==, float_relation_greater) 795 #undef VCMPFP_DO 796 #undef VCMPFP 797 798 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 799 ppc_avr_t *a, ppc_avr_t *b, int record) 800 { 801 int i; 802 int all_in = 0; 803 804 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 805 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 806 &env->vec_status); 807 if (le_rel == float_relation_unordered) { 808 r->u32[i] = 0xc0000000; 809 all_in = 1; 810 } else { 811 float32 bneg = float32_chs(b->f32[i]); 812 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 813 &env->vec_status); 814 int le = le_rel != float_relation_greater; 815 int ge = ge_rel != float_relation_less; 816 817 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 818 all_in |= (!le | !ge); 819 } 820 } 821 if (record) { 822 env->crf[6] = (all_in == 0) << 1; 823 } 824 } 825 826 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 827 { 828 vcmpbfp_internal(env, r, a, b, 0); 829 } 830 831 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 832 ppc_avr_t *b) 833 { 834 vcmpbfp_internal(env, r, a, b, 1); 835 } 836 837 #define VCT(suffix, satcvt, element) \ 838 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 839 ppc_avr_t *b, uint32_t uim) \ 840 { \ 841 int i; \ 842 int sat = 0; \ 843 float_status s = env->vec_status; \ 844 \ 845 set_float_rounding_mode(float_round_to_zero, &s); \ 846 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 847 if (float32_is_any_nan(b->f32[i])) { \ 848 r->element[i] = 0; \ 849 } else { \ 850 float64 t = float32_to_float64(b->f32[i], &s); \ 851 int64_t j; \ 852 \ 853 t = float64_scalbn(t, uim, &s); \ 854 j = float64_to_int64(t, &s); \ 855 r->element[i] = satcvt(j, &sat); \ 856 } \ 857 } \ 858 if (sat) { \ 859 set_vscr_sat(env); \ 860 } \ 861 } 862 VCT(uxs, cvtsduw, u32) 863 VCT(sxs, cvtsdsw, s32) 864 #undef VCT 865 866 target_ulong helper_vclzlsbb(ppc_avr_t *r) 867 { 868 target_ulong count = 0; 869 int i; 870 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 871 if (r->VsrB(i) & 0x01) { 872 break; 873 } 874 count++; 875 } 876 return count; 877 } 878 879 target_ulong helper_vctzlsbb(ppc_avr_t *r) 880 { 881 target_ulong count = 0; 882 int i; 883 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 884 if (r->VsrB(i) & 0x01) { 885 break; 886 } 887 count++; 888 } 889 return count; 890 } 891 892 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 893 ppc_avr_t *b, ppc_avr_t *c) 894 { 895 int sat = 0; 896 int i; 897 898 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 899 int32_t prod = a->s16[i] * b->s16[i]; 900 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 901 902 r->s16[i] = cvtswsh(t, &sat); 903 } 904 905 if (sat) { 906 set_vscr_sat(env); 907 } 908 } 909 910 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 911 ppc_avr_t *b, ppc_avr_t *c) 912 { 913 int sat = 0; 914 int i; 915 916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 917 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 918 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 919 r->s16[i] = cvtswsh(t, &sat); 920 } 921 922 if (sat) { 923 set_vscr_sat(env); 924 } 925 } 926 927 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 928 { 929 int i; 930 931 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 932 int32_t prod = a->s16[i] * b->s16[i]; 933 r->s16[i] = (int16_t) (prod + c->s16[i]); 934 } 935 } 936 937 #define VMRG_DO(name, element, access, ofs) \ 938 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 939 { \ 940 ppc_avr_t result; \ 941 int i, half = ARRAY_SIZE(r->element) / 2; \ 942 \ 943 for (i = 0; i < half; i++) { \ 944 result.access(i * 2 + 0) = a->access(i + ofs); \ 945 result.access(i * 2 + 1) = b->access(i + ofs); \ 946 } \ 947 *r = result; \ 948 } 949 950 #define VMRG(suffix, element, access) \ 951 VMRG_DO(mrgl##suffix, element, access, half) \ 952 VMRG_DO(mrgh##suffix, element, access, 0) 953 VMRG(b, u8, VsrB) 954 VMRG(h, u16, VsrH) 955 VMRG(w, u32, VsrW) 956 #undef VMRG_DO 957 #undef VMRG 958 959 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 960 ppc_avr_t *b, ppc_avr_t *c) 961 { 962 int32_t prod[16]; 963 int i; 964 965 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 966 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 967 } 968 969 VECTOR_FOR_INORDER_I(i, s32) { 970 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 971 prod[4 * i + 2] + prod[4 * i + 3]; 972 } 973 } 974 975 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 976 ppc_avr_t *b, ppc_avr_t *c) 977 { 978 int32_t prod[8]; 979 int i; 980 981 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 982 prod[i] = a->s16[i] * b->s16[i]; 983 } 984 985 VECTOR_FOR_INORDER_I(i, s32) { 986 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 987 } 988 } 989 990 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 991 ppc_avr_t *b, ppc_avr_t *c) 992 { 993 int32_t prod[8]; 994 int i; 995 int sat = 0; 996 997 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 998 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 999 } 1000 1001 VECTOR_FOR_INORDER_I(i, s32) { 1002 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1003 1004 r->u32[i] = cvtsdsw(t, &sat); 1005 } 1006 1007 if (sat) { 1008 set_vscr_sat(env); 1009 } 1010 } 1011 1012 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1013 ppc_avr_t *b, ppc_avr_t *c) 1014 { 1015 uint16_t prod[16]; 1016 int i; 1017 1018 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1019 prod[i] = a->u8[i] * b->u8[i]; 1020 } 1021 1022 VECTOR_FOR_INORDER_I(i, u32) { 1023 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1024 prod[4 * i + 2] + prod[4 * i + 3]; 1025 } 1026 } 1027 1028 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1029 ppc_avr_t *b, ppc_avr_t *c) 1030 { 1031 uint32_t prod[8]; 1032 int i; 1033 1034 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1035 prod[i] = a->u16[i] * b->u16[i]; 1036 } 1037 1038 VECTOR_FOR_INORDER_I(i, u32) { 1039 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1040 } 1041 } 1042 1043 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1044 ppc_avr_t *b, ppc_avr_t *c) 1045 { 1046 uint32_t prod[8]; 1047 int i; 1048 int sat = 0; 1049 1050 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1051 prod[i] = a->u16[i] * b->u16[i]; 1052 } 1053 1054 VECTOR_FOR_INORDER_I(i, s32) { 1055 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1056 1057 r->u32[i] = cvtuduw(t, &sat); 1058 } 1059 1060 if (sat) { 1061 set_vscr_sat(env); 1062 } 1063 } 1064 1065 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1066 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1067 { \ 1068 int i; \ 1069 \ 1070 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1071 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1072 (cast)b->mul_access(i); \ 1073 } \ 1074 } 1075 1076 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1083 (cast)b->mul_access(i + 1); \ 1084 } \ 1085 } 1086 1087 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1088 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1089 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1090 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1091 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1092 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1093 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1094 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1095 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1096 #undef VMUL_DO_EVN 1097 #undef VMUL_DO_ODD 1098 #undef VMUL 1099 1100 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1101 { 1102 int i; 1103 1104 for (i = 0; i < 4; i++) { 1105 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1106 } 1107 } 1108 1109 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1110 { 1111 int i; 1112 1113 for (i = 0; i < 4; i++) { 1114 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1115 (uint64_t)b->u32[i]) >> 32); 1116 } 1117 } 1118 1119 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1120 { 1121 uint64_t discard; 1122 1123 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1124 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1125 } 1126 1127 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1128 { 1129 uint64_t discard; 1130 1131 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1132 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1133 } 1134 1135 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1136 ppc_avr_t *c) 1137 { 1138 ppc_avr_t result; 1139 int i; 1140 1141 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1142 int s = c->VsrB(i) & 0x1f; 1143 int index = s & 0xf; 1144 1145 if (s & 0x10) { 1146 result.VsrB(i) = b->VsrB(index); 1147 } else { 1148 result.VsrB(i) = a->VsrB(index); 1149 } 1150 } 1151 *r = result; 1152 } 1153 1154 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1155 ppc_avr_t *c) 1156 { 1157 ppc_avr_t result; 1158 int i; 1159 1160 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1161 int s = c->VsrB(i) & 0x1f; 1162 int index = 15 - (s & 0xf); 1163 1164 if (s & 0x10) { 1165 result.VsrB(i) = a->VsrB(index); 1166 } else { 1167 result.VsrB(i) = b->VsrB(index); 1168 } 1169 } 1170 *r = result; 1171 } 1172 1173 #if defined(HOST_WORDS_BIGENDIAN) 1174 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1175 #define VBPERMD_INDEX(i) (i) 1176 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1177 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1178 #else 1179 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1180 #define VBPERMD_INDEX(i) (1 - i) 1181 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1182 #define EXTRACT_BIT(avr, i, index) \ 1183 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1184 #endif 1185 1186 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1187 { 1188 int i, j; 1189 ppc_avr_t result = { .u64 = { 0, 0 } }; 1190 VECTOR_FOR_INORDER_I(i, u64) { 1191 for (j = 0; j < 8; j++) { 1192 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1193 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1194 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1195 } 1196 } 1197 } 1198 *r = result; 1199 } 1200 1201 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1202 { 1203 int i; 1204 uint64_t perm = 0; 1205 1206 VECTOR_FOR_INORDER_I(i, u8) { 1207 int index = VBPERMQ_INDEX(b, i); 1208 1209 if (index < 128) { 1210 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1211 if (a->u64[VBPERMQ_DW(index)] & mask) { 1212 perm |= (0x8000 >> i); 1213 } 1214 } 1215 } 1216 1217 r->VsrD(0) = perm; 1218 r->VsrD(1) = 0; 1219 } 1220 1221 #undef VBPERMQ_INDEX 1222 #undef VBPERMQ_DW 1223 1224 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1225 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1226 { \ 1227 int i, j; \ 1228 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1229 \ 1230 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1231 prod[i] = 0; \ 1232 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1233 if (a->srcfld[i] & (1ull << j)) { \ 1234 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1235 } \ 1236 } \ 1237 } \ 1238 \ 1239 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1240 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1241 } \ 1242 } 1243 1244 PMSUM(vpmsumb, u8, u16, uint16_t) 1245 PMSUM(vpmsumh, u16, u32, uint32_t) 1246 PMSUM(vpmsumw, u32, u64, uint64_t) 1247 1248 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1249 { 1250 1251 #ifdef CONFIG_INT128 1252 int i, j; 1253 __uint128_t prod[2]; 1254 1255 VECTOR_FOR_INORDER_I(i, u64) { 1256 prod[i] = 0; 1257 for (j = 0; j < 64; j++) { 1258 if (a->u64[i] & (1ull << j)) { 1259 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1260 } 1261 } 1262 } 1263 1264 r->u128 = prod[0] ^ prod[1]; 1265 1266 #else 1267 int i, j; 1268 ppc_avr_t prod[2]; 1269 1270 VECTOR_FOR_INORDER_I(i, u64) { 1271 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1272 for (j = 0; j < 64; j++) { 1273 if (a->u64[i] & (1ull << j)) { 1274 ppc_avr_t bshift; 1275 if (j == 0) { 1276 bshift.VsrD(0) = 0; 1277 bshift.VsrD(1) = b->u64[i]; 1278 } else { 1279 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1280 bshift.VsrD(1) = b->u64[i] << j; 1281 } 1282 prod[i].VsrD(1) ^= bshift.VsrD(1); 1283 prod[i].VsrD(0) ^= bshift.VsrD(0); 1284 } 1285 } 1286 } 1287 1288 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1289 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1290 #endif 1291 } 1292 1293 1294 #if defined(HOST_WORDS_BIGENDIAN) 1295 #define PKBIG 1 1296 #else 1297 #define PKBIG 0 1298 #endif 1299 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1300 { 1301 int i, j; 1302 ppc_avr_t result; 1303 #if defined(HOST_WORDS_BIGENDIAN) 1304 const ppc_avr_t *x[2] = { a, b }; 1305 #else 1306 const ppc_avr_t *x[2] = { b, a }; 1307 #endif 1308 1309 VECTOR_FOR_INORDER_I(i, u64) { 1310 VECTOR_FOR_INORDER_I(j, u32) { 1311 uint32_t e = x[i]->u32[j]; 1312 1313 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1314 ((e >> 6) & 0x3e0) | 1315 ((e >> 3) & 0x1f)); 1316 } 1317 } 1318 *r = result; 1319 } 1320 1321 #define VPK(suffix, from, to, cvt, dosat) \ 1322 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1323 ppc_avr_t *a, ppc_avr_t *b) \ 1324 { \ 1325 int i; \ 1326 int sat = 0; \ 1327 ppc_avr_t result; \ 1328 ppc_avr_t *a0 = PKBIG ? a : b; \ 1329 ppc_avr_t *a1 = PKBIG ? b : a; \ 1330 \ 1331 VECTOR_FOR_INORDER_I(i, from) { \ 1332 result.to[i] = cvt(a0->from[i], &sat); \ 1333 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1334 } \ 1335 *r = result; \ 1336 if (dosat && sat) { \ 1337 set_vscr_sat(env); \ 1338 } \ 1339 } 1340 #define I(x, y) (x) 1341 VPK(shss, s16, s8, cvtshsb, 1) 1342 VPK(shus, s16, u8, cvtshub, 1) 1343 VPK(swss, s32, s16, cvtswsh, 1) 1344 VPK(swus, s32, u16, cvtswuh, 1) 1345 VPK(sdss, s64, s32, cvtsdsw, 1) 1346 VPK(sdus, s64, u32, cvtsduw, 1) 1347 VPK(uhus, u16, u8, cvtuhub, 1) 1348 VPK(uwus, u32, u16, cvtuwuh, 1) 1349 VPK(udus, u64, u32, cvtuduw, 1) 1350 VPK(uhum, u16, u8, I, 0) 1351 VPK(uwum, u32, u16, I, 0) 1352 VPK(udum, u64, u32, I, 0) 1353 #undef I 1354 #undef VPK 1355 #undef PKBIG 1356 1357 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1358 { 1359 int i; 1360 1361 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1362 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1363 } 1364 } 1365 1366 #define VRFI(suffix, rounding) \ 1367 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1368 ppc_avr_t *b) \ 1369 { \ 1370 int i; \ 1371 float_status s = env->vec_status; \ 1372 \ 1373 set_float_rounding_mode(rounding, &s); \ 1374 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1375 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1376 } \ 1377 } 1378 VRFI(n, float_round_nearest_even) 1379 VRFI(m, float_round_down) 1380 VRFI(p, float_round_up) 1381 VRFI(z, float_round_to_zero) 1382 #undef VRFI 1383 1384 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1385 { 1386 int i; 1387 1388 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1389 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1390 1391 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1392 } 1393 } 1394 1395 #define VRLMI(name, size, element, insert) \ 1396 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1397 { \ 1398 int i; \ 1399 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1400 uint##size##_t src1 = a->element[i]; \ 1401 uint##size##_t src2 = b->element[i]; \ 1402 uint##size##_t src3 = r->element[i]; \ 1403 uint##size##_t begin, end, shift, mask, rot_val; \ 1404 \ 1405 shift = extract##size(src2, 0, 6); \ 1406 end = extract##size(src2, 8, 6); \ 1407 begin = extract##size(src2, 16, 6); \ 1408 rot_val = rol##size(src1, shift); \ 1409 mask = mask_u##size(begin, end); \ 1410 if (insert) { \ 1411 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1412 } else { \ 1413 r->element[i] = (rot_val & mask); \ 1414 } \ 1415 } \ 1416 } 1417 1418 VRLMI(vrldmi, 64, u64, 1); 1419 VRLMI(vrlwmi, 32, u32, 1); 1420 VRLMI(vrldnm, 64, u64, 0); 1421 VRLMI(vrlwnm, 32, u32, 0); 1422 1423 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1424 ppc_avr_t *c) 1425 { 1426 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1427 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1428 } 1429 1430 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1431 { 1432 int i; 1433 1434 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1435 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1436 } 1437 } 1438 1439 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1440 { 1441 int i; 1442 1443 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1444 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1445 } 1446 } 1447 1448 #define VEXTU_X_DO(name, size, left) \ 1449 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1450 { \ 1451 int index = (a & 0xf) * 8; \ 1452 if (left) { \ 1453 index = 128 - index - size; \ 1454 } \ 1455 return int128_getlo(int128_rshift(b->s128, index)) & \ 1456 MAKE_64BIT_MASK(0, size); \ 1457 } 1458 VEXTU_X_DO(vextublx, 8, 1) 1459 VEXTU_X_DO(vextuhlx, 16, 1) 1460 VEXTU_X_DO(vextuwlx, 32, 1) 1461 VEXTU_X_DO(vextubrx, 8, 0) 1462 VEXTU_X_DO(vextuhrx, 16, 0) 1463 VEXTU_X_DO(vextuwrx, 32, 0) 1464 #undef VEXTU_X_DO 1465 1466 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1467 { 1468 int i; 1469 unsigned int shift, bytes, size; 1470 1471 size = ARRAY_SIZE(r->u8); 1472 for (i = 0; i < size; i++) { 1473 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1474 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1475 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1476 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1477 } 1478 } 1479 1480 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1481 { 1482 int i; 1483 unsigned int shift, bytes; 1484 1485 /* 1486 * Use reverse order, as destination and source register can be 1487 * same. Its being modified in place saving temporary, reverse 1488 * order will guarantee that computed result is not fed back. 1489 */ 1490 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1491 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1492 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1493 /* extract adjacent bytes */ 1494 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1495 } 1496 } 1497 1498 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1499 { 1500 int sh = shift & 0xf; 1501 int i; 1502 ppc_avr_t result; 1503 1504 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1505 int index = sh + i; 1506 if (index > 0xf) { 1507 result.VsrB(i) = b->VsrB(index - 0x10); 1508 } else { 1509 result.VsrB(i) = a->VsrB(index); 1510 } 1511 } 1512 *r = result; 1513 } 1514 1515 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1516 { 1517 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1518 1519 #if defined(HOST_WORDS_BIGENDIAN) 1520 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1521 memset(&r->u8[16 - sh], 0, sh); 1522 #else 1523 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1524 memset(&r->u8[0], 0, sh); 1525 #endif 1526 } 1527 1528 #if defined(HOST_WORDS_BIGENDIAN) 1529 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1530 #else 1531 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1532 #endif 1533 1534 #define VINSX(SUFFIX, TYPE) \ 1535 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1536 uint64_t val, target_ulong index) \ 1537 { \ 1538 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1539 target_long idx = index; \ 1540 \ 1541 if (idx < 0 || idx > maxidx) { \ 1542 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1543 qemu_log_mask(LOG_GUEST_ERROR, \ 1544 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1545 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1546 } else { \ 1547 TYPE src = val; \ 1548 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1549 } \ 1550 } 1551 VINSX(B, uint8_t) 1552 VINSX(H, uint16_t) 1553 VINSX(W, uint32_t) 1554 VINSX(D, uint64_t) 1555 #undef ELEM_ADDR 1556 #undef VINSX 1557 #if defined(HOST_WORDS_BIGENDIAN) 1558 #define VEXTDVLX(NAME, SIZE) \ 1559 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1560 target_ulong index) \ 1561 { \ 1562 const target_long idx = index; \ 1563 ppc_avr_t tmp[2] = { *a, *b }; \ 1564 memset(t, 0, sizeof(*t)); \ 1565 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1566 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1567 } else { \ 1568 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1569 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1570 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1571 } \ 1572 } 1573 #else 1574 #define VEXTDVLX(NAME, SIZE) \ 1575 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1576 target_ulong index) \ 1577 { \ 1578 const target_long idx = index; \ 1579 ppc_avr_t tmp[2] = { *b, *a }; \ 1580 memset(t, 0, sizeof(*t)); \ 1581 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1582 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1583 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1584 } else { \ 1585 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1586 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1587 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1588 } \ 1589 } 1590 #endif 1591 VEXTDVLX(VEXTDUBVLX, 1) 1592 VEXTDVLX(VEXTDUHVLX, 2) 1593 VEXTDVLX(VEXTDUWVLX, 4) 1594 VEXTDVLX(VEXTDDVLX, 8) 1595 #undef VEXTDVLX 1596 #if defined(HOST_WORDS_BIGENDIAN) 1597 #define VEXTRACT(suffix, element) \ 1598 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1599 { \ 1600 uint32_t es = sizeof(r->element[0]); \ 1601 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1602 memset(&r->u8[8], 0, 8); \ 1603 memset(&r->u8[0], 0, 8 - es); \ 1604 } 1605 #else 1606 #define VEXTRACT(suffix, element) \ 1607 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1608 { \ 1609 uint32_t es = sizeof(r->element[0]); \ 1610 uint32_t s = (16 - index) - es; \ 1611 memmove(&r->u8[8], &b->u8[s], es); \ 1612 memset(&r->u8[0], 0, 8); \ 1613 memset(&r->u8[8 + es], 0, 8 - es); \ 1614 } 1615 #endif 1616 VEXTRACT(ub, u8) 1617 VEXTRACT(uh, u16) 1618 VEXTRACT(uw, u32) 1619 VEXTRACT(d, u64) 1620 #undef VEXTRACT 1621 1622 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1623 ppc_vsr_t *xb, uint32_t index) 1624 { 1625 ppc_vsr_t t = { }; 1626 size_t es = sizeof(uint32_t); 1627 uint32_t ext_index; 1628 int i; 1629 1630 ext_index = index; 1631 for (i = 0; i < es; i++, ext_index++) { 1632 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1633 } 1634 1635 *xt = t; 1636 } 1637 1638 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1639 ppc_vsr_t *xb, uint32_t index) 1640 { 1641 ppc_vsr_t t = *xt; 1642 size_t es = sizeof(uint32_t); 1643 int ins_index, i = 0; 1644 1645 ins_index = index; 1646 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1647 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1648 } 1649 1650 *xt = t; 1651 } 1652 1653 #define XXBLEND(name, sz) \ 1654 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1655 ppc_avr_t *c, uint32_t desc) \ 1656 { \ 1657 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1658 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1659 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1660 } \ 1661 } 1662 XXBLEND(B, 8) 1663 XXBLEND(H, 16) 1664 XXBLEND(W, 32) 1665 XXBLEND(D, 64) 1666 #undef XXBLEND 1667 1668 #define VEXT_SIGNED(name, element, cast) \ 1669 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1670 { \ 1671 int i; \ 1672 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1673 r->element[i] = (cast)b->element[i]; \ 1674 } \ 1675 } 1676 VEXT_SIGNED(vextsb2w, s32, int8_t) 1677 VEXT_SIGNED(vextsb2d, s64, int8_t) 1678 VEXT_SIGNED(vextsh2w, s32, int16_t) 1679 VEXT_SIGNED(vextsh2d, s64, int16_t) 1680 VEXT_SIGNED(vextsw2d, s64, int32_t) 1681 #undef VEXT_SIGNED 1682 1683 #define VNEG(name, element) \ 1684 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1685 { \ 1686 int i; \ 1687 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1688 r->element[i] = -b->element[i]; \ 1689 } \ 1690 } 1691 VNEG(vnegw, s32) 1692 VNEG(vnegd, s64) 1693 #undef VNEG 1694 1695 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1696 { 1697 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1698 1699 #if defined(HOST_WORDS_BIGENDIAN) 1700 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1701 memset(&r->u8[0], 0, sh); 1702 #else 1703 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1704 memset(&r->u8[16 - sh], 0, sh); 1705 #endif 1706 } 1707 1708 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1709 { 1710 int i; 1711 1712 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1713 r->u32[i] = a->u32[i] >= b->u32[i]; 1714 } 1715 } 1716 1717 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1718 { 1719 int64_t t; 1720 int i, upper; 1721 ppc_avr_t result; 1722 int sat = 0; 1723 1724 upper = ARRAY_SIZE(r->s32) - 1; 1725 t = (int64_t)b->VsrSW(upper); 1726 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1727 t += a->VsrSW(i); 1728 result.VsrSW(i) = 0; 1729 } 1730 result.VsrSW(upper) = cvtsdsw(t, &sat); 1731 *r = result; 1732 1733 if (sat) { 1734 set_vscr_sat(env); 1735 } 1736 } 1737 1738 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1739 { 1740 int i, j, upper; 1741 ppc_avr_t result; 1742 int sat = 0; 1743 1744 upper = 1; 1745 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1746 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1747 1748 result.VsrD(i) = 0; 1749 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1750 t += a->VsrSW(2 * i + j); 1751 } 1752 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1753 } 1754 1755 *r = result; 1756 if (sat) { 1757 set_vscr_sat(env); 1758 } 1759 } 1760 1761 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1762 { 1763 int i, j; 1764 int sat = 0; 1765 1766 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1767 int64_t t = (int64_t)b->s32[i]; 1768 1769 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1770 t += a->s8[4 * i + j]; 1771 } 1772 r->s32[i] = cvtsdsw(t, &sat); 1773 } 1774 1775 if (sat) { 1776 set_vscr_sat(env); 1777 } 1778 } 1779 1780 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1781 { 1782 int sat = 0; 1783 int i; 1784 1785 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1786 int64_t t = (int64_t)b->s32[i]; 1787 1788 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1789 r->s32[i] = cvtsdsw(t, &sat); 1790 } 1791 1792 if (sat) { 1793 set_vscr_sat(env); 1794 } 1795 } 1796 1797 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1798 { 1799 int i, j; 1800 int sat = 0; 1801 1802 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1803 uint64_t t = (uint64_t)b->u32[i]; 1804 1805 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1806 t += a->u8[4 * i + j]; 1807 } 1808 r->u32[i] = cvtuduw(t, &sat); 1809 } 1810 1811 if (sat) { 1812 set_vscr_sat(env); 1813 } 1814 } 1815 1816 #if defined(HOST_WORDS_BIGENDIAN) 1817 #define UPKHI 1 1818 #define UPKLO 0 1819 #else 1820 #define UPKHI 0 1821 #define UPKLO 1 1822 #endif 1823 #define VUPKPX(suffix, hi) \ 1824 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1825 { \ 1826 int i; \ 1827 ppc_avr_t result; \ 1828 \ 1829 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1830 uint16_t e = b->u16[hi ? i : i + 4]; \ 1831 uint8_t a = (e >> 15) ? 0xff : 0; \ 1832 uint8_t r = (e >> 10) & 0x1f; \ 1833 uint8_t g = (e >> 5) & 0x1f; \ 1834 uint8_t b = e & 0x1f; \ 1835 \ 1836 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1837 } \ 1838 *r = result; \ 1839 } 1840 VUPKPX(lpx, UPKLO) 1841 VUPKPX(hpx, UPKHI) 1842 #undef VUPKPX 1843 1844 #define VUPK(suffix, unpacked, packee, hi) \ 1845 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1846 { \ 1847 int i; \ 1848 ppc_avr_t result; \ 1849 \ 1850 if (hi) { \ 1851 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1852 result.unpacked[i] = b->packee[i]; \ 1853 } \ 1854 } else { \ 1855 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1856 i++) { \ 1857 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1858 } \ 1859 } \ 1860 *r = result; \ 1861 } 1862 VUPK(hsb, s16, s8, UPKHI) 1863 VUPK(hsh, s32, s16, UPKHI) 1864 VUPK(hsw, s64, s32, UPKHI) 1865 VUPK(lsb, s16, s8, UPKLO) 1866 VUPK(lsh, s32, s16, UPKLO) 1867 VUPK(lsw, s64, s32, UPKLO) 1868 #undef VUPK 1869 #undef UPKHI 1870 #undef UPKLO 1871 1872 #define VGENERIC_DO(name, element) \ 1873 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1874 { \ 1875 int i; \ 1876 \ 1877 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1878 r->element[i] = name(b->element[i]); \ 1879 } \ 1880 } 1881 1882 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1883 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1884 1885 VGENERIC_DO(clzb, u8) 1886 VGENERIC_DO(clzh, u16) 1887 1888 #undef clzb 1889 #undef clzh 1890 1891 #define ctzb(v) ((v) ? ctz32(v) : 8) 1892 #define ctzh(v) ((v) ? ctz32(v) : 16) 1893 #define ctzw(v) ctz32((v)) 1894 #define ctzd(v) ctz64((v)) 1895 1896 VGENERIC_DO(ctzb, u8) 1897 VGENERIC_DO(ctzh, u16) 1898 VGENERIC_DO(ctzw, u32) 1899 VGENERIC_DO(ctzd, u64) 1900 1901 #undef ctzb 1902 #undef ctzh 1903 #undef ctzw 1904 #undef ctzd 1905 1906 #define popcntb(v) ctpop8(v) 1907 #define popcnth(v) ctpop16(v) 1908 #define popcntw(v) ctpop32(v) 1909 #define popcntd(v) ctpop64(v) 1910 1911 VGENERIC_DO(popcntb, u8) 1912 VGENERIC_DO(popcnth, u16) 1913 VGENERIC_DO(popcntw, u32) 1914 VGENERIC_DO(popcntd, u64) 1915 1916 #undef popcntb 1917 #undef popcnth 1918 #undef popcntw 1919 #undef popcntd 1920 1921 #undef VGENERIC_DO 1922 1923 #if defined(HOST_WORDS_BIGENDIAN) 1924 #define QW_ONE { .u64 = { 0, 1 } } 1925 #else 1926 #define QW_ONE { .u64 = { 1, 0 } } 1927 #endif 1928 1929 #ifndef CONFIG_INT128 1930 1931 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1932 { 1933 t->u64[0] = ~a.u64[0]; 1934 t->u64[1] = ~a.u64[1]; 1935 } 1936 1937 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1938 { 1939 if (a.VsrD(0) < b.VsrD(0)) { 1940 return -1; 1941 } else if (a.VsrD(0) > b.VsrD(0)) { 1942 return 1; 1943 } else if (a.VsrD(1) < b.VsrD(1)) { 1944 return -1; 1945 } else if (a.VsrD(1) > b.VsrD(1)) { 1946 return 1; 1947 } else { 1948 return 0; 1949 } 1950 } 1951 1952 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1953 { 1954 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1955 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1956 (~a.VsrD(1) < b.VsrD(1)); 1957 } 1958 1959 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1960 { 1961 ppc_avr_t not_a; 1962 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1963 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1964 (~a.VsrD(1) < b.VsrD(1)); 1965 avr_qw_not(¬_a, a); 1966 return avr_qw_cmpu(not_a, b) < 0; 1967 } 1968 1969 #endif 1970 1971 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1972 { 1973 #ifdef CONFIG_INT128 1974 r->u128 = a->u128 + b->u128; 1975 #else 1976 avr_qw_add(r, *a, *b); 1977 #endif 1978 } 1979 1980 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1981 { 1982 #ifdef CONFIG_INT128 1983 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1984 #else 1985 1986 if (c->VsrD(1) & 1) { 1987 ppc_avr_t tmp; 1988 1989 tmp.VsrD(0) = 0; 1990 tmp.VsrD(1) = c->VsrD(1) & 1; 1991 avr_qw_add(&tmp, *a, tmp); 1992 avr_qw_add(r, tmp, *b); 1993 } else { 1994 avr_qw_add(r, *a, *b); 1995 } 1996 #endif 1997 } 1998 1999 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2000 { 2001 #ifdef CONFIG_INT128 2002 r->u128 = (~a->u128 < b->u128); 2003 #else 2004 ppc_avr_t not_a; 2005 2006 avr_qw_not(¬_a, *a); 2007 2008 r->VsrD(0) = 0; 2009 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2010 #endif 2011 } 2012 2013 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2014 { 2015 #ifdef CONFIG_INT128 2016 int carry_out = (~a->u128 < b->u128); 2017 if (!carry_out && (c->u128 & 1)) { 2018 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2019 ((a->u128 != 0) || (b->u128 != 0)); 2020 } 2021 r->u128 = carry_out; 2022 #else 2023 2024 int carry_in = c->VsrD(1) & 1; 2025 int carry_out = 0; 2026 ppc_avr_t tmp; 2027 2028 carry_out = avr_qw_addc(&tmp, *a, *b); 2029 2030 if (!carry_out && carry_in) { 2031 ppc_avr_t one = QW_ONE; 2032 carry_out = avr_qw_addc(&tmp, tmp, one); 2033 } 2034 r->VsrD(0) = 0; 2035 r->VsrD(1) = carry_out; 2036 #endif 2037 } 2038 2039 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2040 { 2041 #ifdef CONFIG_INT128 2042 r->u128 = a->u128 - b->u128; 2043 #else 2044 ppc_avr_t tmp; 2045 ppc_avr_t one = QW_ONE; 2046 2047 avr_qw_not(&tmp, *b); 2048 avr_qw_add(&tmp, *a, tmp); 2049 avr_qw_add(r, tmp, one); 2050 #endif 2051 } 2052 2053 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2054 { 2055 #ifdef CONFIG_INT128 2056 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2057 #else 2058 ppc_avr_t tmp, sum; 2059 2060 avr_qw_not(&tmp, *b); 2061 avr_qw_add(&sum, *a, tmp); 2062 2063 tmp.VsrD(0) = 0; 2064 tmp.VsrD(1) = c->VsrD(1) & 1; 2065 avr_qw_add(r, sum, tmp); 2066 #endif 2067 } 2068 2069 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2070 { 2071 #ifdef CONFIG_INT128 2072 r->u128 = (~a->u128 < ~b->u128) || 2073 (a->u128 + ~b->u128 == (__uint128_t)-1); 2074 #else 2075 int carry = (avr_qw_cmpu(*a, *b) > 0); 2076 if (!carry) { 2077 ppc_avr_t tmp; 2078 avr_qw_not(&tmp, *b); 2079 avr_qw_add(&tmp, *a, tmp); 2080 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2081 } 2082 r->VsrD(0) = 0; 2083 r->VsrD(1) = carry; 2084 #endif 2085 } 2086 2087 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2088 { 2089 #ifdef CONFIG_INT128 2090 r->u128 = 2091 (~a->u128 < ~b->u128) || 2092 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2093 #else 2094 int carry_in = c->VsrD(1) & 1; 2095 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2096 if (!carry_out && carry_in) { 2097 ppc_avr_t tmp; 2098 avr_qw_not(&tmp, *b); 2099 avr_qw_add(&tmp, *a, tmp); 2100 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2101 } 2102 2103 r->VsrD(0) = 0; 2104 r->VsrD(1) = carry_out; 2105 #endif 2106 } 2107 2108 #define BCD_PLUS_PREF_1 0xC 2109 #define BCD_PLUS_PREF_2 0xF 2110 #define BCD_PLUS_ALT_1 0xA 2111 #define BCD_NEG_PREF 0xD 2112 #define BCD_NEG_ALT 0xB 2113 #define BCD_PLUS_ALT_2 0xE 2114 #define NATIONAL_PLUS 0x2B 2115 #define NATIONAL_NEG 0x2D 2116 2117 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2118 2119 static int bcd_get_sgn(ppc_avr_t *bcd) 2120 { 2121 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2122 case BCD_PLUS_PREF_1: 2123 case BCD_PLUS_PREF_2: 2124 case BCD_PLUS_ALT_1: 2125 case BCD_PLUS_ALT_2: 2126 { 2127 return 1; 2128 } 2129 2130 case BCD_NEG_PREF: 2131 case BCD_NEG_ALT: 2132 { 2133 return -1; 2134 } 2135 2136 default: 2137 { 2138 return 0; 2139 } 2140 } 2141 } 2142 2143 static int bcd_preferred_sgn(int sgn, int ps) 2144 { 2145 if (sgn >= 0) { 2146 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2147 } else { 2148 return BCD_NEG_PREF; 2149 } 2150 } 2151 2152 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2153 { 2154 uint8_t result; 2155 if (n & 1) { 2156 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2157 } else { 2158 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2159 } 2160 2161 if (unlikely(result > 9)) { 2162 *invalid = true; 2163 } 2164 return result; 2165 } 2166 2167 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2168 { 2169 if (n & 1) { 2170 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2171 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2172 } else { 2173 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2174 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2175 } 2176 } 2177 2178 static bool bcd_is_valid(ppc_avr_t *bcd) 2179 { 2180 int i; 2181 int invalid = 0; 2182 2183 if (bcd_get_sgn(bcd) == 0) { 2184 return false; 2185 } 2186 2187 for (i = 1; i < 32; i++) { 2188 bcd_get_digit(bcd, i, &invalid); 2189 if (unlikely(invalid)) { 2190 return false; 2191 } 2192 } 2193 return true; 2194 } 2195 2196 static int bcd_cmp_zero(ppc_avr_t *bcd) 2197 { 2198 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2199 return CRF_EQ; 2200 } else { 2201 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2202 } 2203 } 2204 2205 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2206 { 2207 return reg->VsrH(7 - n); 2208 } 2209 2210 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2211 { 2212 reg->VsrH(7 - n) = val; 2213 } 2214 2215 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2216 { 2217 int i; 2218 int invalid = 0; 2219 for (i = 31; i > 0; i--) { 2220 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2221 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2222 if (unlikely(invalid)) { 2223 return 0; /* doesn't matter */ 2224 } else if (dig_a > dig_b) { 2225 return 1; 2226 } else if (dig_a < dig_b) { 2227 return -1; 2228 } 2229 } 2230 2231 return 0; 2232 } 2233 2234 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2235 int *overflow) 2236 { 2237 int carry = 0; 2238 int i; 2239 int is_zero = 1; 2240 2241 for (i = 1; i <= 31; i++) { 2242 uint8_t digit = bcd_get_digit(a, i, invalid) + 2243 bcd_get_digit(b, i, invalid) + carry; 2244 is_zero &= (digit == 0); 2245 if (digit > 9) { 2246 carry = 1; 2247 digit -= 10; 2248 } else { 2249 carry = 0; 2250 } 2251 2252 bcd_put_digit(t, digit, i); 2253 } 2254 2255 *overflow = carry; 2256 return is_zero; 2257 } 2258 2259 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2260 int *overflow) 2261 { 2262 int carry = 0; 2263 int i; 2264 2265 for (i = 1; i <= 31; i++) { 2266 uint8_t digit = bcd_get_digit(a, i, invalid) - 2267 bcd_get_digit(b, i, invalid) + carry; 2268 if (digit & 0x80) { 2269 carry = -1; 2270 digit += 10; 2271 } else { 2272 carry = 0; 2273 } 2274 2275 bcd_put_digit(t, digit, i); 2276 } 2277 2278 *overflow = carry; 2279 } 2280 2281 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2282 { 2283 2284 int sgna = bcd_get_sgn(a); 2285 int sgnb = bcd_get_sgn(b); 2286 int invalid = (sgna == 0) || (sgnb == 0); 2287 int overflow = 0; 2288 int zero = 0; 2289 uint32_t cr = 0; 2290 ppc_avr_t result = { .u64 = { 0, 0 } }; 2291 2292 if (!invalid) { 2293 if (sgna == sgnb) { 2294 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2295 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2296 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2297 } else { 2298 int magnitude = bcd_cmp_mag(a, b); 2299 if (magnitude > 0) { 2300 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2301 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2302 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2303 } else if (magnitude < 0) { 2304 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2305 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2306 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2307 } else { 2308 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2309 cr = CRF_EQ; 2310 } 2311 } 2312 } 2313 2314 if (unlikely(invalid)) { 2315 result.VsrD(0) = result.VsrD(1) = -1; 2316 cr = CRF_SO; 2317 } else if (overflow) { 2318 cr |= CRF_SO; 2319 } else if (zero) { 2320 cr |= CRF_EQ; 2321 } 2322 2323 *r = result; 2324 2325 return cr; 2326 } 2327 2328 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2329 { 2330 ppc_avr_t bcopy = *b; 2331 int sgnb = bcd_get_sgn(b); 2332 if (sgnb < 0) { 2333 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2334 } else if (sgnb > 0) { 2335 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2336 } 2337 /* else invalid ... defer to bcdadd code for proper handling */ 2338 2339 return helper_bcdadd(r, a, &bcopy, ps); 2340 } 2341 2342 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2343 { 2344 int i; 2345 int cr = 0; 2346 uint16_t national = 0; 2347 uint16_t sgnb = get_national_digit(b, 0); 2348 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2349 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2350 2351 for (i = 1; i < 8; i++) { 2352 national = get_national_digit(b, i); 2353 if (unlikely(national < 0x30 || national > 0x39)) { 2354 invalid = 1; 2355 break; 2356 } 2357 2358 bcd_put_digit(&ret, national & 0xf, i); 2359 } 2360 2361 if (sgnb == NATIONAL_PLUS) { 2362 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2363 } else { 2364 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2365 } 2366 2367 cr = bcd_cmp_zero(&ret); 2368 2369 if (unlikely(invalid)) { 2370 cr = CRF_SO; 2371 } 2372 2373 *r = ret; 2374 2375 return cr; 2376 } 2377 2378 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2379 { 2380 int i; 2381 int cr = 0; 2382 int sgnb = bcd_get_sgn(b); 2383 int invalid = (sgnb == 0); 2384 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2385 2386 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2387 2388 for (i = 1; i < 8; i++) { 2389 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2390 2391 if (unlikely(invalid)) { 2392 break; 2393 } 2394 } 2395 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2396 2397 cr = bcd_cmp_zero(b); 2398 2399 if (ox_flag) { 2400 cr |= CRF_SO; 2401 } 2402 2403 if (unlikely(invalid)) { 2404 cr = CRF_SO; 2405 } 2406 2407 *r = ret; 2408 2409 return cr; 2410 } 2411 2412 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2413 { 2414 int i; 2415 int cr = 0; 2416 int invalid = 0; 2417 int zone_digit = 0; 2418 int zone_lead = ps ? 0xF : 0x3; 2419 int digit = 0; 2420 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2421 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2422 2423 if (unlikely((sgnb < 0xA) && ps)) { 2424 invalid = 1; 2425 } 2426 2427 for (i = 0; i < 16; i++) { 2428 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2429 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2430 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2431 invalid = 1; 2432 break; 2433 } 2434 2435 bcd_put_digit(&ret, digit, i + 1); 2436 } 2437 2438 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2439 (!ps && (sgnb & 0x4))) { 2440 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2441 } else { 2442 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2443 } 2444 2445 cr = bcd_cmp_zero(&ret); 2446 2447 if (unlikely(invalid)) { 2448 cr = CRF_SO; 2449 } 2450 2451 *r = ret; 2452 2453 return cr; 2454 } 2455 2456 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2457 { 2458 int i; 2459 int cr = 0; 2460 uint8_t digit = 0; 2461 int sgnb = bcd_get_sgn(b); 2462 int zone_lead = (ps) ? 0xF0 : 0x30; 2463 int invalid = (sgnb == 0); 2464 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2465 2466 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2467 2468 for (i = 0; i < 16; i++) { 2469 digit = bcd_get_digit(b, i + 1, &invalid); 2470 2471 if (unlikely(invalid)) { 2472 break; 2473 } 2474 2475 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2476 } 2477 2478 if (ps) { 2479 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2480 } else { 2481 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2482 } 2483 2484 cr = bcd_cmp_zero(b); 2485 2486 if (ox_flag) { 2487 cr |= CRF_SO; 2488 } 2489 2490 if (unlikely(invalid)) { 2491 cr = CRF_SO; 2492 } 2493 2494 *r = ret; 2495 2496 return cr; 2497 } 2498 2499 /** 2500 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2501 * 2502 * Returns: 2503 * > 0 if ahi|alo > bhi|blo, 2504 * 0 if ahi|alo == bhi|blo, 2505 * < 0 if ahi|alo < bhi|blo 2506 */ 2507 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2508 uint64_t blo, uint64_t bhi) 2509 { 2510 return (ahi == bhi) ? 2511 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2512 (ahi > bhi ? 1 : -1); 2513 } 2514 2515 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2516 { 2517 int i; 2518 int cr; 2519 uint64_t lo_value; 2520 uint64_t hi_value; 2521 uint64_t rem; 2522 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2523 2524 if (b->VsrSD(0) < 0) { 2525 lo_value = -b->VsrSD(1); 2526 hi_value = ~b->VsrD(0) + !lo_value; 2527 bcd_put_digit(&ret, 0xD, 0); 2528 2529 cr = CRF_LT; 2530 } else { 2531 lo_value = b->VsrD(1); 2532 hi_value = b->VsrD(0); 2533 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2534 2535 if (hi_value == 0 && lo_value == 0) { 2536 cr = CRF_EQ; 2537 } else { 2538 cr = CRF_GT; 2539 } 2540 } 2541 2542 /* 2543 * Check src limits: abs(src) <= 10^31 - 1 2544 * 2545 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2546 */ 2547 if (ucmp128(lo_value, hi_value, 2548 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2549 cr |= CRF_SO; 2550 2551 /* 2552 * According to the ISA, if src wouldn't fit in the destination 2553 * register, the result is undefined. 2554 * In that case, we leave r unchanged. 2555 */ 2556 } else { 2557 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2558 2559 for (i = 1; i < 16; rem /= 10, i++) { 2560 bcd_put_digit(&ret, rem % 10, i); 2561 } 2562 2563 for (; i < 32; lo_value /= 10, i++) { 2564 bcd_put_digit(&ret, lo_value % 10, i); 2565 } 2566 2567 *r = ret; 2568 } 2569 2570 return cr; 2571 } 2572 2573 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2574 { 2575 uint8_t i; 2576 int cr; 2577 uint64_t carry; 2578 uint64_t unused; 2579 uint64_t lo_value; 2580 uint64_t hi_value = 0; 2581 int sgnb = bcd_get_sgn(b); 2582 int invalid = (sgnb == 0); 2583 2584 lo_value = bcd_get_digit(b, 31, &invalid); 2585 for (i = 30; i > 0; i--) { 2586 mulu64(&lo_value, &carry, lo_value, 10ULL); 2587 mulu64(&hi_value, &unused, hi_value, 10ULL); 2588 lo_value += bcd_get_digit(b, i, &invalid); 2589 hi_value += carry; 2590 2591 if (unlikely(invalid)) { 2592 break; 2593 } 2594 } 2595 2596 if (sgnb == -1) { 2597 r->VsrSD(1) = -lo_value; 2598 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2599 } else { 2600 r->VsrSD(1) = lo_value; 2601 r->VsrSD(0) = hi_value; 2602 } 2603 2604 cr = bcd_cmp_zero(b); 2605 2606 if (unlikely(invalid)) { 2607 cr = CRF_SO; 2608 } 2609 2610 return cr; 2611 } 2612 2613 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2614 { 2615 int i; 2616 int invalid = 0; 2617 2618 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2619 return CRF_SO; 2620 } 2621 2622 *r = *a; 2623 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2624 2625 for (i = 1; i < 32; i++) { 2626 bcd_get_digit(a, i, &invalid); 2627 bcd_get_digit(b, i, &invalid); 2628 if (unlikely(invalid)) { 2629 return CRF_SO; 2630 } 2631 } 2632 2633 return bcd_cmp_zero(r); 2634 } 2635 2636 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2637 { 2638 int sgnb = bcd_get_sgn(b); 2639 2640 *r = *b; 2641 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2642 2643 if (bcd_is_valid(b) == false) { 2644 return CRF_SO; 2645 } 2646 2647 return bcd_cmp_zero(r); 2648 } 2649 2650 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2651 { 2652 int cr; 2653 int i = a->VsrSB(7); 2654 bool ox_flag = false; 2655 int sgnb = bcd_get_sgn(b); 2656 ppc_avr_t ret = *b; 2657 ret.VsrD(1) &= ~0xf; 2658 2659 if (bcd_is_valid(b) == false) { 2660 return CRF_SO; 2661 } 2662 2663 if (unlikely(i > 31)) { 2664 i = 31; 2665 } else if (unlikely(i < -31)) { 2666 i = -31; 2667 } 2668 2669 if (i > 0) { 2670 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2671 } else { 2672 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2673 } 2674 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2675 2676 *r = ret; 2677 2678 cr = bcd_cmp_zero(r); 2679 if (ox_flag) { 2680 cr |= CRF_SO; 2681 } 2682 2683 return cr; 2684 } 2685 2686 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2687 { 2688 int cr; 2689 int i; 2690 int invalid = 0; 2691 bool ox_flag = false; 2692 ppc_avr_t ret = *b; 2693 2694 for (i = 0; i < 32; i++) { 2695 bcd_get_digit(b, i, &invalid); 2696 2697 if (unlikely(invalid)) { 2698 return CRF_SO; 2699 } 2700 } 2701 2702 i = a->VsrSB(7); 2703 if (i >= 32) { 2704 ox_flag = true; 2705 ret.VsrD(1) = ret.VsrD(0) = 0; 2706 } else if (i <= -32) { 2707 ret.VsrD(1) = ret.VsrD(0) = 0; 2708 } else if (i > 0) { 2709 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2710 } else { 2711 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2712 } 2713 *r = ret; 2714 2715 cr = bcd_cmp_zero(r); 2716 if (ox_flag) { 2717 cr |= CRF_SO; 2718 } 2719 2720 return cr; 2721 } 2722 2723 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2724 { 2725 int cr; 2726 int unused = 0; 2727 int invalid = 0; 2728 bool ox_flag = false; 2729 int sgnb = bcd_get_sgn(b); 2730 ppc_avr_t ret = *b; 2731 ret.VsrD(1) &= ~0xf; 2732 2733 int i = a->VsrSB(7); 2734 ppc_avr_t bcd_one; 2735 2736 bcd_one.VsrD(0) = 0; 2737 bcd_one.VsrD(1) = 0x10; 2738 2739 if (bcd_is_valid(b) == false) { 2740 return CRF_SO; 2741 } 2742 2743 if (unlikely(i > 31)) { 2744 i = 31; 2745 } else if (unlikely(i < -31)) { 2746 i = -31; 2747 } 2748 2749 if (i > 0) { 2750 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2751 } else { 2752 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2753 2754 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2755 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2756 } 2757 } 2758 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2759 2760 cr = bcd_cmp_zero(&ret); 2761 if (ox_flag) { 2762 cr |= CRF_SO; 2763 } 2764 *r = ret; 2765 2766 return cr; 2767 } 2768 2769 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2770 { 2771 uint64_t mask; 2772 uint32_t ox_flag = 0; 2773 int i = a->VsrSH(3) + 1; 2774 ppc_avr_t ret = *b; 2775 2776 if (bcd_is_valid(b) == false) { 2777 return CRF_SO; 2778 } 2779 2780 if (i > 16 && i < 32) { 2781 mask = (uint64_t)-1 >> (128 - i * 4); 2782 if (ret.VsrD(0) & ~mask) { 2783 ox_flag = CRF_SO; 2784 } 2785 2786 ret.VsrD(0) &= mask; 2787 } else if (i >= 0 && i <= 16) { 2788 mask = (uint64_t)-1 >> (64 - i * 4); 2789 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2790 ox_flag = CRF_SO; 2791 } 2792 2793 ret.VsrD(1) &= mask; 2794 ret.VsrD(0) = 0; 2795 } 2796 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2797 *r = ret; 2798 2799 return bcd_cmp_zero(&ret) | ox_flag; 2800 } 2801 2802 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2803 { 2804 int i; 2805 uint64_t mask; 2806 uint32_t ox_flag = 0; 2807 int invalid = 0; 2808 ppc_avr_t ret = *b; 2809 2810 for (i = 0; i < 32; i++) { 2811 bcd_get_digit(b, i, &invalid); 2812 2813 if (unlikely(invalid)) { 2814 return CRF_SO; 2815 } 2816 } 2817 2818 i = a->VsrSH(3); 2819 if (i > 16 && i < 33) { 2820 mask = (uint64_t)-1 >> (128 - i * 4); 2821 if (ret.VsrD(0) & ~mask) { 2822 ox_flag = CRF_SO; 2823 } 2824 2825 ret.VsrD(0) &= mask; 2826 } else if (i > 0 && i <= 16) { 2827 mask = (uint64_t)-1 >> (64 - i * 4); 2828 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2829 ox_flag = CRF_SO; 2830 } 2831 2832 ret.VsrD(1) &= mask; 2833 ret.VsrD(0) = 0; 2834 } else if (i == 0) { 2835 if (ret.VsrD(0) || ret.VsrD(1)) { 2836 ox_flag = CRF_SO; 2837 } 2838 ret.VsrD(0) = ret.VsrD(1) = 0; 2839 } 2840 2841 *r = ret; 2842 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2843 return ox_flag | CRF_EQ; 2844 } 2845 2846 return ox_flag | CRF_GT; 2847 } 2848 2849 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2850 { 2851 int i; 2852 VECTOR_FOR_INORDER_I(i, u8) { 2853 r->u8[i] = AES_sbox[a->u8[i]]; 2854 } 2855 } 2856 2857 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2858 { 2859 ppc_avr_t result; 2860 int i; 2861 2862 VECTOR_FOR_INORDER_I(i, u32) { 2863 result.VsrW(i) = b->VsrW(i) ^ 2864 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2865 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2866 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2867 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2868 } 2869 *r = result; 2870 } 2871 2872 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2873 { 2874 ppc_avr_t result; 2875 int i; 2876 2877 VECTOR_FOR_INORDER_I(i, u8) { 2878 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2879 } 2880 *r = result; 2881 } 2882 2883 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2884 { 2885 /* This differs from what is written in ISA V2.07. The RTL is */ 2886 /* incorrect and will be fixed in V2.07B. */ 2887 int i; 2888 ppc_avr_t tmp; 2889 2890 VECTOR_FOR_INORDER_I(i, u8) { 2891 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2892 } 2893 2894 VECTOR_FOR_INORDER_I(i, u32) { 2895 r->VsrW(i) = 2896 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2897 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2898 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2899 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2900 } 2901 } 2902 2903 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2904 { 2905 ppc_avr_t result; 2906 int i; 2907 2908 VECTOR_FOR_INORDER_I(i, u8) { 2909 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2910 } 2911 *r = result; 2912 } 2913 2914 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2915 { 2916 int st = (st_six & 0x10) != 0; 2917 int six = st_six & 0xF; 2918 int i; 2919 2920 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2921 if (st == 0) { 2922 if ((six & (0x8 >> i)) == 0) { 2923 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2924 ror32(a->VsrW(i), 18) ^ 2925 (a->VsrW(i) >> 3); 2926 } else { /* six.bit[i] == 1 */ 2927 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2928 ror32(a->VsrW(i), 19) ^ 2929 (a->VsrW(i) >> 10); 2930 } 2931 } else { /* st == 1 */ 2932 if ((six & (0x8 >> i)) == 0) { 2933 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2934 ror32(a->VsrW(i), 13) ^ 2935 ror32(a->VsrW(i), 22); 2936 } else { /* six.bit[i] == 1 */ 2937 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2938 ror32(a->VsrW(i), 11) ^ 2939 ror32(a->VsrW(i), 25); 2940 } 2941 } 2942 } 2943 } 2944 2945 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2946 { 2947 int st = (st_six & 0x10) != 0; 2948 int six = st_six & 0xF; 2949 int i; 2950 2951 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2952 if (st == 0) { 2953 if ((six & (0x8 >> (2 * i))) == 0) { 2954 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2955 ror64(a->VsrD(i), 8) ^ 2956 (a->VsrD(i) >> 7); 2957 } else { /* six.bit[2*i] == 1 */ 2958 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2959 ror64(a->VsrD(i), 61) ^ 2960 (a->VsrD(i) >> 6); 2961 } 2962 } else { /* st == 1 */ 2963 if ((six & (0x8 >> (2 * i))) == 0) { 2964 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2965 ror64(a->VsrD(i), 34) ^ 2966 ror64(a->VsrD(i), 39); 2967 } else { /* six.bit[2*i] == 1 */ 2968 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2969 ror64(a->VsrD(i), 18) ^ 2970 ror64(a->VsrD(i), 41); 2971 } 2972 } 2973 } 2974 } 2975 2976 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2977 { 2978 ppc_avr_t result; 2979 int i; 2980 2981 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2982 int indexA = c->VsrB(i) >> 4; 2983 int indexB = c->VsrB(i) & 0xF; 2984 2985 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2986 } 2987 *r = result; 2988 } 2989 2990 #undef VECTOR_FOR_INORDER_I 2991 2992 /*****************************************************************************/ 2993 /* SPE extension helpers */ 2994 /* Use a table to make this quicker */ 2995 static const uint8_t hbrev[16] = { 2996 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2997 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2998 }; 2999 3000 static inline uint8_t byte_reverse(uint8_t val) 3001 { 3002 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3003 } 3004 3005 static inline uint32_t word_reverse(uint32_t val) 3006 { 3007 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3008 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3009 } 3010 3011 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3012 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3013 { 3014 uint32_t a, b, d, mask; 3015 3016 mask = UINT32_MAX >> (32 - MASKBITS); 3017 a = arg1 & mask; 3018 b = arg2 & mask; 3019 d = word_reverse(1 + word_reverse(a | ~b)); 3020 return (arg1 & ~mask) | (d & b); 3021 } 3022 3023 uint32_t helper_cntlsw32(uint32_t val) 3024 { 3025 if (val & 0x80000000) { 3026 return clz32(~val); 3027 } else { 3028 return clz32(val); 3029 } 3030 } 3031 3032 uint32_t helper_cntlzw32(uint32_t val) 3033 { 3034 return clz32(val); 3035 } 3036 3037 /* 440 specific */ 3038 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3039 target_ulong low, uint32_t update_Rc) 3040 { 3041 target_ulong mask; 3042 int i; 3043 3044 i = 1; 3045 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3046 if ((high & mask) == 0) { 3047 if (update_Rc) { 3048 env->crf[0] = 0x4; 3049 } 3050 goto done; 3051 } 3052 i++; 3053 } 3054 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3055 if ((low & mask) == 0) { 3056 if (update_Rc) { 3057 env->crf[0] = 0x8; 3058 } 3059 goto done; 3060 } 3061 i++; 3062 } 3063 i = 8; 3064 if (update_Rc) { 3065 env->crf[0] = 0x2; 3066 } 3067 done: 3068 env->xer = (env->xer & ~0x7F) | i; 3069 if (update_Rc) { 3070 env->crf[0] |= xer_so; 3071 } 3072 return i; 3073 } 3074