1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 390 { 391 int i, o; 392 uint64_t result = 0; 393 394 if (mask == -1) { 395 return src; 396 } 397 398 for (i = 0; mask != 0; i++) { 399 o = ctz64(mask); 400 mask &= mask - 1; 401 result |= ((src >> i) & 1) << o; 402 } 403 404 return result; 405 } 406 407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 408 { 409 int i, o; 410 uint64_t result = 0; 411 412 if (mask == -1) { 413 return src; 414 } 415 416 for (o = 0; mask != 0; o++) { 417 i = ctz64(mask); 418 mask &= mask - 1; 419 result |= ((src >> i) & 1) << o; 420 } 421 422 return result; 423 } 424 425 /*****************************************************************************/ 426 /* Altivec extension helpers */ 427 #if defined(HOST_WORDS_BIGENDIAN) 428 #define VECTOR_FOR_INORDER_I(index, element) \ 429 for (index = 0; index < ARRAY_SIZE(r->element); index++) 430 #else 431 #define VECTOR_FOR_INORDER_I(index, element) \ 432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 433 #endif 434 435 /* Saturating arithmetic helpers. */ 436 #define SATCVT(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x < (from_type)min) { \ 442 r = min; \ 443 *sat = 1; \ 444 } else if (x > (from_type)max) { \ 445 r = max; \ 446 *sat = 1; \ 447 } else { \ 448 r = x; \ 449 } \ 450 return r; \ 451 } 452 #define SATCVTU(from, to, from_type, to_type, min, max) \ 453 static inline to_type cvt##from##to(from_type x, int *sat) \ 454 { \ 455 to_type r; \ 456 \ 457 if (x > (from_type)max) { \ 458 r = max; \ 459 *sat = 1; \ 460 } else { \ 461 r = x; \ 462 } \ 463 return r; \ 464 } 465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 468 469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 475 #undef SATCVT 476 #undef SATCVTU 477 478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 479 { 480 ppc_store_vscr(env, vscr); 481 } 482 483 uint32_t helper_mfvscr(CPUPPCState *env) 484 { 485 return ppc_get_vscr(env); 486 } 487 488 static inline void set_vscr_sat(CPUPPCState *env) 489 { 490 /* The choice of non-zero value is arbitrary. */ 491 env->vscr_sat.u32[0] = 1; 492 } 493 494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 495 { 496 int i; 497 498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 499 r->u32[i] = ~a->u32[i] < b->u32[i]; 500 } 501 } 502 503 /* vprtybw */ 504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 509 res ^= res >> 8; 510 r->u32[i] = res & 1; 511 } 512 } 513 514 /* vprtybd */ 515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 516 { 517 int i; 518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->u64[i] = res & 1; 523 } 524 } 525 526 /* vprtybq */ 527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 528 { 529 uint64_t res = b->u64[0] ^ b->u64[1]; 530 res ^= res >> 32; 531 res ^= res >> 16; 532 res ^= res >> 8; 533 r->VsrD(1) = res & 1; 534 r->VsrD(0) = 0; 535 } 536 537 #define VARITHFP(suffix, func) \ 538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 539 ppc_avr_t *b) \ 540 { \ 541 int i; \ 542 \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 545 } \ 546 } 547 VARITHFP(addfp, float32_add) 548 VARITHFP(subfp, float32_sub) 549 VARITHFP(minfp, float32_min) 550 VARITHFP(maxfp, float32_max) 551 #undef VARITHFP 552 553 #define VARITHFPFMA(suffix, type) \ 554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 555 ppc_avr_t *b, ppc_avr_t *c) \ 556 { \ 557 int i; \ 558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 560 type, &env->vec_status); \ 561 } \ 562 } 563 VARITHFPFMA(maddfp, 0); 564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 565 #undef VARITHFPFMA 566 567 #define VARITHSAT_CASE(type, op, cvt, element) \ 568 { \ 569 type result = (type)a->element[i] op (type)b->element[i]; \ 570 r->element[i] = cvt(result, &sat); \ 571 } 572 573 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 576 { \ 577 int sat = 0; \ 578 int i; \ 579 \ 580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 581 VARITHSAT_CASE(optype, op, cvt, element); \ 582 } \ 583 if (sat) { \ 584 vscr_sat->u32[0] = 1; \ 585 } \ 586 } 587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 599 #undef VARITHSAT_CASE 600 #undef VARITHSAT_DO 601 #undef VARITHSAT_SIGNED 602 #undef VARITHSAT_UNSIGNED 603 604 #define VAVG_DO(name, element, etype) \ 605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 606 { \ 607 int i; \ 608 \ 609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 611 r->element[i] = x >> 1; \ 612 } \ 613 } 614 615 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 616 unsigned_type) \ 617 VAVG_DO(avgs##type, signed_element, signed_type) \ 618 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 619 VAVG(b, s8, int16_t, u8, uint16_t) 620 VAVG(h, s16, int32_t, u16, uint32_t) 621 VAVG(w, s32, int64_t, u32, uint64_t) 622 #undef VAVG_DO 623 #undef VAVG 624 625 #define VABSDU_DO(name, element) \ 626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 627 { \ 628 int i; \ 629 \ 630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 631 r->element[i] = (a->element[i] > b->element[i]) ? \ 632 (a->element[i] - b->element[i]) : \ 633 (b->element[i] - a->element[i]); \ 634 } \ 635 } 636 637 /* 638 * VABSDU - Vector absolute difference unsigned 639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 640 * element - element type to access from vector 641 */ 642 #define VABSDU(type, element) \ 643 VABSDU_DO(absdu##type, element) 644 VABSDU(b, u8) 645 VABSDU(h, u16) 646 VABSDU(w, u32) 647 #undef VABSDU_DO 648 #undef VABSDU 649 650 #define VCF(suffix, cvt, element) \ 651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 652 ppc_avr_t *b, uint32_t uim) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 657 float32 t = cvt(b->element[i], &env->vec_status); \ 658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 659 } \ 660 } 661 VCF(ux, uint32_to_float32, u32) 662 VCF(sx, int32_to_float32, s32) 663 #undef VCF 664 665 #define VCMPNEZ(NAME, ELEM) \ 666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 667 { \ 668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 671 } \ 672 } 673 VCMPNEZ(VCMPNEZB, u8) 674 VCMPNEZ(VCMPNEZH, u16) 675 VCMPNEZ(VCMPNEZW, u32) 676 #undef VCMPNEZ 677 678 #define VCMPFP_DO(suffix, compare, order, record) \ 679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 680 ppc_avr_t *a, ppc_avr_t *b) \ 681 { \ 682 uint32_t ones = (uint32_t)-1; \ 683 uint32_t all = ones; \ 684 uint32_t none = 0; \ 685 int i; \ 686 \ 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 688 uint32_t result; \ 689 FloatRelation rel = \ 690 float32_compare_quiet(a->f32[i], b->f32[i], \ 691 &env->vec_status); \ 692 if (rel == float_relation_unordered) { \ 693 result = 0; \ 694 } else if (rel compare order) { \ 695 result = ones; \ 696 } else { \ 697 result = 0; \ 698 } \ 699 r->u32[i] = result; \ 700 all &= result; \ 701 none |= result; \ 702 } \ 703 if (record) { \ 704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 705 } \ 706 } 707 #define VCMPFP(suffix, compare, order) \ 708 VCMPFP_DO(suffix, compare, order, 0) \ 709 VCMPFP_DO(suffix##_dot, compare, order, 1) 710 VCMPFP(eqfp, ==, float_relation_equal) 711 VCMPFP(gefp, !=, float_relation_less) 712 VCMPFP(gtfp, ==, float_relation_greater) 713 #undef VCMPFP_DO 714 #undef VCMPFP 715 716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 717 ppc_avr_t *a, ppc_avr_t *b, int record) 718 { 719 int i; 720 int all_in = 0; 721 722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 724 &env->vec_status); 725 if (le_rel == float_relation_unordered) { 726 r->u32[i] = 0xc0000000; 727 all_in = 1; 728 } else { 729 float32 bneg = float32_chs(b->f32[i]); 730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 731 &env->vec_status); 732 int le = le_rel != float_relation_greater; 733 int ge = ge_rel != float_relation_less; 734 735 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 736 all_in |= (!le | !ge); 737 } 738 } 739 if (record) { 740 env->crf[6] = (all_in == 0) << 1; 741 } 742 } 743 744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 745 { 746 vcmpbfp_internal(env, r, a, b, 0); 747 } 748 749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 750 ppc_avr_t *b) 751 { 752 vcmpbfp_internal(env, r, a, b, 1); 753 } 754 755 #define VCT(suffix, satcvt, element) \ 756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 757 ppc_avr_t *b, uint32_t uim) \ 758 { \ 759 int i; \ 760 int sat = 0; \ 761 float_status s = env->vec_status; \ 762 \ 763 set_float_rounding_mode(float_round_to_zero, &s); \ 764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 765 if (float32_is_any_nan(b->f32[i])) { \ 766 r->element[i] = 0; \ 767 } else { \ 768 float64 t = float32_to_float64(b->f32[i], &s); \ 769 int64_t j; \ 770 \ 771 t = float64_scalbn(t, uim, &s); \ 772 j = float64_to_int64(t, &s); \ 773 r->element[i] = satcvt(j, &sat); \ 774 } \ 775 } \ 776 if (sat) { \ 777 set_vscr_sat(env); \ 778 } \ 779 } 780 VCT(uxs, cvtsduw, u32) 781 VCT(sxs, cvtsdsw, s32) 782 #undef VCT 783 784 target_ulong helper_vclzlsbb(ppc_avr_t *r) 785 { 786 target_ulong count = 0; 787 int i; 788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 789 if (r->VsrB(i) & 0x01) { 790 break; 791 } 792 count++; 793 } 794 return count; 795 } 796 797 target_ulong helper_vctzlsbb(ppc_avr_t *r) 798 { 799 target_ulong count = 0; 800 int i; 801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 802 if (r->VsrB(i) & 0x01) { 803 break; 804 } 805 count++; 806 } 807 return count; 808 } 809 810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 811 ppc_avr_t *b, ppc_avr_t *c) 812 { 813 int sat = 0; 814 int i; 815 816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 817 int32_t prod = a->s16[i] * b->s16[i]; 818 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 819 820 r->s16[i] = cvtswsh(t, &sat); 821 } 822 823 if (sat) { 824 set_vscr_sat(env); 825 } 826 } 827 828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 829 ppc_avr_t *b, ppc_avr_t *c) 830 { 831 int sat = 0; 832 int i; 833 834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 836 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 837 r->s16[i] = cvtswsh(t, &sat); 838 } 839 840 if (sat) { 841 set_vscr_sat(env); 842 } 843 } 844 845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 846 { 847 int i; 848 849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 850 int32_t prod = a->s16[i] * b->s16[i]; 851 r->s16[i] = (int16_t) (prod + c->s16[i]); 852 } 853 } 854 855 #define VMRG_DO(name, element, access, ofs) \ 856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 857 { \ 858 ppc_avr_t result; \ 859 int i, half = ARRAY_SIZE(r->element) / 2; \ 860 \ 861 for (i = 0; i < half; i++) { \ 862 result.access(i * 2 + 0) = a->access(i + ofs); \ 863 result.access(i * 2 + 1) = b->access(i + ofs); \ 864 } \ 865 *r = result; \ 866 } 867 868 #define VMRG(suffix, element, access) \ 869 VMRG_DO(mrgl##suffix, element, access, half) \ 870 VMRG_DO(mrgh##suffix, element, access, 0) 871 VMRG(b, u8, VsrB) 872 VMRG(h, u16, VsrH) 873 VMRG(w, u32, VsrW) 874 #undef VMRG_DO 875 #undef VMRG 876 877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 878 ppc_avr_t *b, ppc_avr_t *c) 879 { 880 int32_t prod[16]; 881 int i; 882 883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 884 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 885 } 886 887 VECTOR_FOR_INORDER_I(i, s32) { 888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 889 prod[4 * i + 2] + prod[4 * i + 3]; 890 } 891 } 892 893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 894 ppc_avr_t *b, ppc_avr_t *c) 895 { 896 int32_t prod[8]; 897 int i; 898 899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 900 prod[i] = a->s16[i] * b->s16[i]; 901 } 902 903 VECTOR_FOR_INORDER_I(i, s32) { 904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 905 } 906 } 907 908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 909 ppc_avr_t *b, ppc_avr_t *c) 910 { 911 int32_t prod[8]; 912 int i; 913 int sat = 0; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 917 } 918 919 VECTOR_FOR_INORDER_I(i, s32) { 920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 921 922 r->u32[i] = cvtsdsw(t, &sat); 923 } 924 925 if (sat) { 926 set_vscr_sat(env); 927 } 928 } 929 930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 931 ppc_avr_t *b, ppc_avr_t *c) 932 { 933 uint16_t prod[16]; 934 int i; 935 936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 937 prod[i] = a->u8[i] * b->u8[i]; 938 } 939 940 VECTOR_FOR_INORDER_I(i, u32) { 941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 942 prod[4 * i + 2] + prod[4 * i + 3]; 943 } 944 } 945 946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 947 ppc_avr_t *b, ppc_avr_t *c) 948 { 949 uint32_t prod[8]; 950 int i; 951 952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 953 prod[i] = a->u16[i] * b->u16[i]; 954 } 955 956 VECTOR_FOR_INORDER_I(i, u32) { 957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 958 } 959 } 960 961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 962 ppc_avr_t *b, ppc_avr_t *c) 963 { 964 uint32_t prod[8]; 965 int i; 966 int sat = 0; 967 968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 969 prod[i] = a->u16[i] * b->u16[i]; 970 } 971 972 VECTOR_FOR_INORDER_I(i, s32) { 973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 974 975 r->u32[i] = cvtuduw(t, &sat); 976 } 977 978 if (sat) { 979 set_vscr_sat(env); 980 } 981 } 982 983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 985 { \ 986 int i; \ 987 \ 988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 990 (cast)b->mul_access(i); \ 991 } \ 992 } 993 994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 996 { \ 997 int i; \ 998 \ 999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1001 (cast)b->mul_access(i + 1); \ 1002 } \ 1003 } 1004 1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1011 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1012 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1013 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1014 #undef VMUL_DO_EVN 1015 #undef VMUL_DO_ODD 1016 #undef VMUL 1017 1018 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1019 ppc_avr_t *c) 1020 { 1021 ppc_avr_t result; 1022 int i; 1023 1024 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1025 int s = c->VsrB(i) & 0x1f; 1026 int index = s & 0xf; 1027 1028 if (s & 0x10) { 1029 result.VsrB(i) = b->VsrB(index); 1030 } else { 1031 result.VsrB(i) = a->VsrB(index); 1032 } 1033 } 1034 *r = result; 1035 } 1036 1037 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1038 ppc_avr_t *c) 1039 { 1040 ppc_avr_t result; 1041 int i; 1042 1043 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1044 int s = c->VsrB(i) & 0x1f; 1045 int index = 15 - (s & 0xf); 1046 1047 if (s & 0x10) { 1048 result.VsrB(i) = a->VsrB(index); 1049 } else { 1050 result.VsrB(i) = b->VsrB(index); 1051 } 1052 } 1053 *r = result; 1054 } 1055 1056 #if defined(HOST_WORDS_BIGENDIAN) 1057 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1058 #define VBPERMD_INDEX(i) (i) 1059 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1060 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1061 #else 1062 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1063 #define VBPERMD_INDEX(i) (1 - i) 1064 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1065 #define EXTRACT_BIT(avr, i, index) \ 1066 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1067 #endif 1068 1069 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1070 { 1071 int i, j; 1072 ppc_avr_t result = { .u64 = { 0, 0 } }; 1073 VECTOR_FOR_INORDER_I(i, u64) { 1074 for (j = 0; j < 8; j++) { 1075 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1076 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1077 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1078 } 1079 } 1080 } 1081 *r = result; 1082 } 1083 1084 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1085 { 1086 int i; 1087 uint64_t perm = 0; 1088 1089 VECTOR_FOR_INORDER_I(i, u8) { 1090 int index = VBPERMQ_INDEX(b, i); 1091 1092 if (index < 128) { 1093 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1094 if (a->u64[VBPERMQ_DW(index)] & mask) { 1095 perm |= (0x8000 >> i); 1096 } 1097 } 1098 } 1099 1100 r->VsrD(0) = perm; 1101 r->VsrD(1) = 0; 1102 } 1103 1104 #undef VBPERMQ_INDEX 1105 #undef VBPERMQ_DW 1106 1107 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1108 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1109 { \ 1110 int i, j; \ 1111 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1112 \ 1113 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1114 prod[i] = 0; \ 1115 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1116 if (a->srcfld[i] & (1ull << j)) { \ 1117 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1118 } \ 1119 } \ 1120 } \ 1121 \ 1122 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1123 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1124 } \ 1125 } 1126 1127 PMSUM(vpmsumb, u8, u16, uint16_t) 1128 PMSUM(vpmsumh, u16, u32, uint32_t) 1129 PMSUM(vpmsumw, u32, u64, uint64_t) 1130 1131 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 1134 #ifdef CONFIG_INT128 1135 int i, j; 1136 __uint128_t prod[2]; 1137 1138 VECTOR_FOR_INORDER_I(i, u64) { 1139 prod[i] = 0; 1140 for (j = 0; j < 64; j++) { 1141 if (a->u64[i] & (1ull << j)) { 1142 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1143 } 1144 } 1145 } 1146 1147 r->u128 = prod[0] ^ prod[1]; 1148 1149 #else 1150 int i, j; 1151 ppc_avr_t prod[2]; 1152 1153 VECTOR_FOR_INORDER_I(i, u64) { 1154 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1155 for (j = 0; j < 64; j++) { 1156 if (a->u64[i] & (1ull << j)) { 1157 ppc_avr_t bshift; 1158 if (j == 0) { 1159 bshift.VsrD(0) = 0; 1160 bshift.VsrD(1) = b->u64[i]; 1161 } else { 1162 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1163 bshift.VsrD(1) = b->u64[i] << j; 1164 } 1165 prod[i].VsrD(1) ^= bshift.VsrD(1); 1166 prod[i].VsrD(0) ^= bshift.VsrD(0); 1167 } 1168 } 1169 } 1170 1171 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1172 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1173 #endif 1174 } 1175 1176 1177 #if defined(HOST_WORDS_BIGENDIAN) 1178 #define PKBIG 1 1179 #else 1180 #define PKBIG 0 1181 #endif 1182 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1183 { 1184 int i, j; 1185 ppc_avr_t result; 1186 #if defined(HOST_WORDS_BIGENDIAN) 1187 const ppc_avr_t *x[2] = { a, b }; 1188 #else 1189 const ppc_avr_t *x[2] = { b, a }; 1190 #endif 1191 1192 VECTOR_FOR_INORDER_I(i, u64) { 1193 VECTOR_FOR_INORDER_I(j, u32) { 1194 uint32_t e = x[i]->u32[j]; 1195 1196 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1197 ((e >> 6) & 0x3e0) | 1198 ((e >> 3) & 0x1f)); 1199 } 1200 } 1201 *r = result; 1202 } 1203 1204 #define VPK(suffix, from, to, cvt, dosat) \ 1205 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1206 ppc_avr_t *a, ppc_avr_t *b) \ 1207 { \ 1208 int i; \ 1209 int sat = 0; \ 1210 ppc_avr_t result; \ 1211 ppc_avr_t *a0 = PKBIG ? a : b; \ 1212 ppc_avr_t *a1 = PKBIG ? b : a; \ 1213 \ 1214 VECTOR_FOR_INORDER_I(i, from) { \ 1215 result.to[i] = cvt(a0->from[i], &sat); \ 1216 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1217 } \ 1218 *r = result; \ 1219 if (dosat && sat) { \ 1220 set_vscr_sat(env); \ 1221 } \ 1222 } 1223 #define I(x, y) (x) 1224 VPK(shss, s16, s8, cvtshsb, 1) 1225 VPK(shus, s16, u8, cvtshub, 1) 1226 VPK(swss, s32, s16, cvtswsh, 1) 1227 VPK(swus, s32, u16, cvtswuh, 1) 1228 VPK(sdss, s64, s32, cvtsdsw, 1) 1229 VPK(sdus, s64, u32, cvtsduw, 1) 1230 VPK(uhus, u16, u8, cvtuhub, 1) 1231 VPK(uwus, u32, u16, cvtuwuh, 1) 1232 VPK(udus, u64, u32, cvtuduw, 1) 1233 VPK(uhum, u16, u8, I, 0) 1234 VPK(uwum, u32, u16, I, 0) 1235 VPK(udum, u64, u32, I, 0) 1236 #undef I 1237 #undef VPK 1238 #undef PKBIG 1239 1240 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1241 { 1242 int i; 1243 1244 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1245 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1246 } 1247 } 1248 1249 #define VRFI(suffix, rounding) \ 1250 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1251 ppc_avr_t *b) \ 1252 { \ 1253 int i; \ 1254 float_status s = env->vec_status; \ 1255 \ 1256 set_float_rounding_mode(rounding, &s); \ 1257 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1258 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1259 } \ 1260 } 1261 VRFI(n, float_round_nearest_even) 1262 VRFI(m, float_round_down) 1263 VRFI(p, float_round_up) 1264 VRFI(z, float_round_to_zero) 1265 #undef VRFI 1266 1267 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1268 { 1269 int i; 1270 1271 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1272 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1273 1274 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1275 } 1276 } 1277 1278 #define VRLMI(name, size, element, insert) \ 1279 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1280 { \ 1281 int i; \ 1282 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1283 uint##size##_t src1 = a->element[i]; \ 1284 uint##size##_t src2 = b->element[i]; \ 1285 uint##size##_t src3 = r->element[i]; \ 1286 uint##size##_t begin, end, shift, mask, rot_val; \ 1287 \ 1288 shift = extract##size(src2, 0, 6); \ 1289 end = extract##size(src2, 8, 6); \ 1290 begin = extract##size(src2, 16, 6); \ 1291 rot_val = rol##size(src1, shift); \ 1292 mask = mask_u##size(begin, end); \ 1293 if (insert) { \ 1294 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1295 } else { \ 1296 r->element[i] = (rot_val & mask); \ 1297 } \ 1298 } \ 1299 } 1300 1301 VRLMI(VRLDMI, 64, u64, 1); 1302 VRLMI(VRLWMI, 32, u32, 1); 1303 VRLMI(VRLDNM, 64, u64, 0); 1304 VRLMI(VRLWNM, 32, u32, 0); 1305 1306 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1307 ppc_avr_t *c) 1308 { 1309 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1310 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1311 } 1312 1313 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1314 { 1315 int i; 1316 1317 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1318 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1319 } 1320 } 1321 1322 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1323 { 1324 int i; 1325 1326 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1327 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1328 } 1329 } 1330 1331 #define VEXTU_X_DO(name, size, left) \ 1332 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1333 { \ 1334 int index = (a & 0xf) * 8; \ 1335 if (left) { \ 1336 index = 128 - index - size; \ 1337 } \ 1338 return int128_getlo(int128_rshift(b->s128, index)) & \ 1339 MAKE_64BIT_MASK(0, size); \ 1340 } 1341 VEXTU_X_DO(vextublx, 8, 1) 1342 VEXTU_X_DO(vextuhlx, 16, 1) 1343 VEXTU_X_DO(vextuwlx, 32, 1) 1344 VEXTU_X_DO(vextubrx, 8, 0) 1345 VEXTU_X_DO(vextuhrx, 16, 0) 1346 VEXTU_X_DO(vextuwrx, 32, 0) 1347 #undef VEXTU_X_DO 1348 1349 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1350 { 1351 int i; 1352 unsigned int shift, bytes, size; 1353 1354 size = ARRAY_SIZE(r->u8); 1355 for (i = 0; i < size; i++) { 1356 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1357 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1358 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1359 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1360 } 1361 } 1362 1363 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1364 { 1365 int i; 1366 unsigned int shift, bytes; 1367 1368 /* 1369 * Use reverse order, as destination and source register can be 1370 * same. Its being modified in place saving temporary, reverse 1371 * order will guarantee that computed result is not fed back. 1372 */ 1373 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1374 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1375 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1376 /* extract adjacent bytes */ 1377 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1378 } 1379 } 1380 1381 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1382 { 1383 int sh = shift & 0xf; 1384 int i; 1385 ppc_avr_t result; 1386 1387 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1388 int index = sh + i; 1389 if (index > 0xf) { 1390 result.VsrB(i) = b->VsrB(index - 0x10); 1391 } else { 1392 result.VsrB(i) = a->VsrB(index); 1393 } 1394 } 1395 *r = result; 1396 } 1397 1398 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1399 { 1400 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1401 1402 #if defined(HOST_WORDS_BIGENDIAN) 1403 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1404 memset(&r->u8[16 - sh], 0, sh); 1405 #else 1406 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1407 memset(&r->u8[0], 0, sh); 1408 #endif 1409 } 1410 1411 #if defined(HOST_WORDS_BIGENDIAN) 1412 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1413 #else 1414 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1415 #endif 1416 1417 #define VINSX(SUFFIX, TYPE) \ 1418 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1419 uint64_t val, target_ulong index) \ 1420 { \ 1421 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1422 target_long idx = index; \ 1423 \ 1424 if (idx < 0 || idx > maxidx) { \ 1425 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1426 qemu_log_mask(LOG_GUEST_ERROR, \ 1427 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1428 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1429 } else { \ 1430 TYPE src = val; \ 1431 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1432 } \ 1433 } 1434 VINSX(B, uint8_t) 1435 VINSX(H, uint16_t) 1436 VINSX(W, uint32_t) 1437 VINSX(D, uint64_t) 1438 #undef ELEM_ADDR 1439 #undef VINSX 1440 #if defined(HOST_WORDS_BIGENDIAN) 1441 #define VEXTDVLX(NAME, SIZE) \ 1442 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1443 target_ulong index) \ 1444 { \ 1445 const target_long idx = index; \ 1446 ppc_avr_t tmp[2] = { *a, *b }; \ 1447 memset(t, 0, sizeof(*t)); \ 1448 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1449 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1450 } else { \ 1451 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1452 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1453 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1454 } \ 1455 } 1456 #else 1457 #define VEXTDVLX(NAME, SIZE) \ 1458 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1459 target_ulong index) \ 1460 { \ 1461 const target_long idx = index; \ 1462 ppc_avr_t tmp[2] = { *b, *a }; \ 1463 memset(t, 0, sizeof(*t)); \ 1464 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1465 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1466 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1467 } else { \ 1468 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1469 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1470 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1471 } \ 1472 } 1473 #endif 1474 VEXTDVLX(VEXTDUBVLX, 1) 1475 VEXTDVLX(VEXTDUHVLX, 2) 1476 VEXTDVLX(VEXTDUWVLX, 4) 1477 VEXTDVLX(VEXTDDVLX, 8) 1478 #undef VEXTDVLX 1479 #if defined(HOST_WORDS_BIGENDIAN) 1480 #define VEXTRACT(suffix, element) \ 1481 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1482 { \ 1483 uint32_t es = sizeof(r->element[0]); \ 1484 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1485 memset(&r->u8[8], 0, 8); \ 1486 memset(&r->u8[0], 0, 8 - es); \ 1487 } 1488 #else 1489 #define VEXTRACT(suffix, element) \ 1490 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1491 { \ 1492 uint32_t es = sizeof(r->element[0]); \ 1493 uint32_t s = (16 - index) - es; \ 1494 memmove(&r->u8[8], &b->u8[s], es); \ 1495 memset(&r->u8[0], 0, 8); \ 1496 memset(&r->u8[8 + es], 0, 8 - es); \ 1497 } 1498 #endif 1499 VEXTRACT(ub, u8) 1500 VEXTRACT(uh, u16) 1501 VEXTRACT(uw, u32) 1502 VEXTRACT(d, u64) 1503 #undef VEXTRACT 1504 1505 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1506 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1507 { \ 1508 int i, idx, crf = 0; \ 1509 \ 1510 for (i = 0; i < NUM_ELEMS; i++) { \ 1511 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1512 if (b->Vsr##ELEM(idx)) { \ 1513 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1514 } else { \ 1515 crf = 0b0010; \ 1516 break; \ 1517 } \ 1518 } \ 1519 \ 1520 for (; i < NUM_ELEMS; i++) { \ 1521 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1522 t->Vsr##ELEM(idx) = 0; \ 1523 } \ 1524 \ 1525 return crf; \ 1526 } 1527 VSTRI(VSTRIBL, B, 16, true) 1528 VSTRI(VSTRIBR, B, 16, false) 1529 VSTRI(VSTRIHL, H, 8, true) 1530 VSTRI(VSTRIHR, H, 8, false) 1531 #undef VSTRI 1532 1533 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1534 ppc_vsr_t *xb, uint32_t index) 1535 { 1536 ppc_vsr_t t = { }; 1537 size_t es = sizeof(uint32_t); 1538 uint32_t ext_index; 1539 int i; 1540 1541 ext_index = index; 1542 for (i = 0; i < es; i++, ext_index++) { 1543 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1544 } 1545 1546 *xt = t; 1547 } 1548 1549 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1550 ppc_vsr_t *xb, uint32_t index) 1551 { 1552 ppc_vsr_t t = *xt; 1553 size_t es = sizeof(uint32_t); 1554 int ins_index, i = 0; 1555 1556 ins_index = index; 1557 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1558 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1559 } 1560 1561 *xt = t; 1562 } 1563 1564 #define XXBLEND(name, sz) \ 1565 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1566 ppc_avr_t *c, uint32_t desc) \ 1567 { \ 1568 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1569 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1570 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1571 } \ 1572 } 1573 XXBLEND(B, 8) 1574 XXBLEND(H, 16) 1575 XXBLEND(W, 32) 1576 XXBLEND(D, 64) 1577 #undef XXBLEND 1578 1579 #define VNEG(name, element) \ 1580 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1581 { \ 1582 int i; \ 1583 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1584 r->element[i] = -b->element[i]; \ 1585 } \ 1586 } 1587 VNEG(vnegw, s32) 1588 VNEG(vnegd, s64) 1589 #undef VNEG 1590 1591 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1592 { 1593 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1594 1595 #if defined(HOST_WORDS_BIGENDIAN) 1596 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1597 memset(&r->u8[0], 0, sh); 1598 #else 1599 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1600 memset(&r->u8[16 - sh], 0, sh); 1601 #endif 1602 } 1603 1604 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1605 { 1606 int i; 1607 1608 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1609 r->u32[i] = a->u32[i] >= b->u32[i]; 1610 } 1611 } 1612 1613 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1614 { 1615 int64_t t; 1616 int i, upper; 1617 ppc_avr_t result; 1618 int sat = 0; 1619 1620 upper = ARRAY_SIZE(r->s32) - 1; 1621 t = (int64_t)b->VsrSW(upper); 1622 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1623 t += a->VsrSW(i); 1624 result.VsrSW(i) = 0; 1625 } 1626 result.VsrSW(upper) = cvtsdsw(t, &sat); 1627 *r = result; 1628 1629 if (sat) { 1630 set_vscr_sat(env); 1631 } 1632 } 1633 1634 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1635 { 1636 int i, j, upper; 1637 ppc_avr_t result; 1638 int sat = 0; 1639 1640 upper = 1; 1641 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1642 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1643 1644 result.VsrD(i) = 0; 1645 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1646 t += a->VsrSW(2 * i + j); 1647 } 1648 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1649 } 1650 1651 *r = result; 1652 if (sat) { 1653 set_vscr_sat(env); 1654 } 1655 } 1656 1657 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1658 { 1659 int i, j; 1660 int sat = 0; 1661 1662 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1663 int64_t t = (int64_t)b->s32[i]; 1664 1665 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1666 t += a->s8[4 * i + j]; 1667 } 1668 r->s32[i] = cvtsdsw(t, &sat); 1669 } 1670 1671 if (sat) { 1672 set_vscr_sat(env); 1673 } 1674 } 1675 1676 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1677 { 1678 int sat = 0; 1679 int i; 1680 1681 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1682 int64_t t = (int64_t)b->s32[i]; 1683 1684 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1685 r->s32[i] = cvtsdsw(t, &sat); 1686 } 1687 1688 if (sat) { 1689 set_vscr_sat(env); 1690 } 1691 } 1692 1693 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1694 { 1695 int i, j; 1696 int sat = 0; 1697 1698 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1699 uint64_t t = (uint64_t)b->u32[i]; 1700 1701 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1702 t += a->u8[4 * i + j]; 1703 } 1704 r->u32[i] = cvtuduw(t, &sat); 1705 } 1706 1707 if (sat) { 1708 set_vscr_sat(env); 1709 } 1710 } 1711 1712 #if defined(HOST_WORDS_BIGENDIAN) 1713 #define UPKHI 1 1714 #define UPKLO 0 1715 #else 1716 #define UPKHI 0 1717 #define UPKLO 1 1718 #endif 1719 #define VUPKPX(suffix, hi) \ 1720 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1721 { \ 1722 int i; \ 1723 ppc_avr_t result; \ 1724 \ 1725 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1726 uint16_t e = b->u16[hi ? i : i + 4]; \ 1727 uint8_t a = (e >> 15) ? 0xff : 0; \ 1728 uint8_t r = (e >> 10) & 0x1f; \ 1729 uint8_t g = (e >> 5) & 0x1f; \ 1730 uint8_t b = e & 0x1f; \ 1731 \ 1732 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1733 } \ 1734 *r = result; \ 1735 } 1736 VUPKPX(lpx, UPKLO) 1737 VUPKPX(hpx, UPKHI) 1738 #undef VUPKPX 1739 1740 #define VUPK(suffix, unpacked, packee, hi) \ 1741 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1742 { \ 1743 int i; \ 1744 ppc_avr_t result; \ 1745 \ 1746 if (hi) { \ 1747 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1748 result.unpacked[i] = b->packee[i]; \ 1749 } \ 1750 } else { \ 1751 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1752 i++) { \ 1753 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1754 } \ 1755 } \ 1756 *r = result; \ 1757 } 1758 VUPK(hsb, s16, s8, UPKHI) 1759 VUPK(hsh, s32, s16, UPKHI) 1760 VUPK(hsw, s64, s32, UPKHI) 1761 VUPK(lsb, s16, s8, UPKLO) 1762 VUPK(lsh, s32, s16, UPKLO) 1763 VUPK(lsw, s64, s32, UPKLO) 1764 #undef VUPK 1765 #undef UPKHI 1766 #undef UPKLO 1767 1768 #define VGENERIC_DO(name, element) \ 1769 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1770 { \ 1771 int i; \ 1772 \ 1773 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1774 r->element[i] = name(b->element[i]); \ 1775 } \ 1776 } 1777 1778 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1779 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1780 1781 VGENERIC_DO(clzb, u8) 1782 VGENERIC_DO(clzh, u16) 1783 1784 #undef clzb 1785 #undef clzh 1786 1787 #define ctzb(v) ((v) ? ctz32(v) : 8) 1788 #define ctzh(v) ((v) ? ctz32(v) : 16) 1789 #define ctzw(v) ctz32((v)) 1790 #define ctzd(v) ctz64((v)) 1791 1792 VGENERIC_DO(ctzb, u8) 1793 VGENERIC_DO(ctzh, u16) 1794 VGENERIC_DO(ctzw, u32) 1795 VGENERIC_DO(ctzd, u64) 1796 1797 #undef ctzb 1798 #undef ctzh 1799 #undef ctzw 1800 #undef ctzd 1801 1802 #define popcntb(v) ctpop8(v) 1803 #define popcnth(v) ctpop16(v) 1804 #define popcntw(v) ctpop32(v) 1805 #define popcntd(v) ctpop64(v) 1806 1807 VGENERIC_DO(popcntb, u8) 1808 VGENERIC_DO(popcnth, u16) 1809 VGENERIC_DO(popcntw, u32) 1810 VGENERIC_DO(popcntd, u64) 1811 1812 #undef popcntb 1813 #undef popcnth 1814 #undef popcntw 1815 #undef popcntd 1816 1817 #undef VGENERIC_DO 1818 1819 #if defined(HOST_WORDS_BIGENDIAN) 1820 #define QW_ONE { .u64 = { 0, 1 } } 1821 #else 1822 #define QW_ONE { .u64 = { 1, 0 } } 1823 #endif 1824 1825 #ifndef CONFIG_INT128 1826 1827 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1828 { 1829 t->u64[0] = ~a.u64[0]; 1830 t->u64[1] = ~a.u64[1]; 1831 } 1832 1833 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1834 { 1835 if (a.VsrD(0) < b.VsrD(0)) { 1836 return -1; 1837 } else if (a.VsrD(0) > b.VsrD(0)) { 1838 return 1; 1839 } else if (a.VsrD(1) < b.VsrD(1)) { 1840 return -1; 1841 } else if (a.VsrD(1) > b.VsrD(1)) { 1842 return 1; 1843 } else { 1844 return 0; 1845 } 1846 } 1847 1848 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1849 { 1850 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1851 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1852 (~a.VsrD(1) < b.VsrD(1)); 1853 } 1854 1855 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1856 { 1857 ppc_avr_t not_a; 1858 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1859 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1860 (~a.VsrD(1) < b.VsrD(1)); 1861 avr_qw_not(¬_a, a); 1862 return avr_qw_cmpu(not_a, b) < 0; 1863 } 1864 1865 #endif 1866 1867 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1868 { 1869 #ifdef CONFIG_INT128 1870 r->u128 = a->u128 + b->u128; 1871 #else 1872 avr_qw_add(r, *a, *b); 1873 #endif 1874 } 1875 1876 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1877 { 1878 #ifdef CONFIG_INT128 1879 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1880 #else 1881 1882 if (c->VsrD(1) & 1) { 1883 ppc_avr_t tmp; 1884 1885 tmp.VsrD(0) = 0; 1886 tmp.VsrD(1) = c->VsrD(1) & 1; 1887 avr_qw_add(&tmp, *a, tmp); 1888 avr_qw_add(r, tmp, *b); 1889 } else { 1890 avr_qw_add(r, *a, *b); 1891 } 1892 #endif 1893 } 1894 1895 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1896 { 1897 #ifdef CONFIG_INT128 1898 r->u128 = (~a->u128 < b->u128); 1899 #else 1900 ppc_avr_t not_a; 1901 1902 avr_qw_not(¬_a, *a); 1903 1904 r->VsrD(0) = 0; 1905 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1906 #endif 1907 } 1908 1909 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1910 { 1911 #ifdef CONFIG_INT128 1912 int carry_out = (~a->u128 < b->u128); 1913 if (!carry_out && (c->u128 & 1)) { 1914 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1915 ((a->u128 != 0) || (b->u128 != 0)); 1916 } 1917 r->u128 = carry_out; 1918 #else 1919 1920 int carry_in = c->VsrD(1) & 1; 1921 int carry_out = 0; 1922 ppc_avr_t tmp; 1923 1924 carry_out = avr_qw_addc(&tmp, *a, *b); 1925 1926 if (!carry_out && carry_in) { 1927 ppc_avr_t one = QW_ONE; 1928 carry_out = avr_qw_addc(&tmp, tmp, one); 1929 } 1930 r->VsrD(0) = 0; 1931 r->VsrD(1) = carry_out; 1932 #endif 1933 } 1934 1935 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1936 { 1937 #ifdef CONFIG_INT128 1938 r->u128 = a->u128 - b->u128; 1939 #else 1940 ppc_avr_t tmp; 1941 ppc_avr_t one = QW_ONE; 1942 1943 avr_qw_not(&tmp, *b); 1944 avr_qw_add(&tmp, *a, tmp); 1945 avr_qw_add(r, tmp, one); 1946 #endif 1947 } 1948 1949 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1950 { 1951 #ifdef CONFIG_INT128 1952 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 1953 #else 1954 ppc_avr_t tmp, sum; 1955 1956 avr_qw_not(&tmp, *b); 1957 avr_qw_add(&sum, *a, tmp); 1958 1959 tmp.VsrD(0) = 0; 1960 tmp.VsrD(1) = c->VsrD(1) & 1; 1961 avr_qw_add(r, sum, tmp); 1962 #endif 1963 } 1964 1965 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1966 { 1967 #ifdef CONFIG_INT128 1968 r->u128 = (~a->u128 < ~b->u128) || 1969 (a->u128 + ~b->u128 == (__uint128_t)-1); 1970 #else 1971 int carry = (avr_qw_cmpu(*a, *b) > 0); 1972 if (!carry) { 1973 ppc_avr_t tmp; 1974 avr_qw_not(&tmp, *b); 1975 avr_qw_add(&tmp, *a, tmp); 1976 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 1977 } 1978 r->VsrD(0) = 0; 1979 r->VsrD(1) = carry; 1980 #endif 1981 } 1982 1983 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1984 { 1985 #ifdef CONFIG_INT128 1986 r->u128 = 1987 (~a->u128 < ~b->u128) || 1988 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 1989 #else 1990 int carry_in = c->VsrD(1) & 1; 1991 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 1992 if (!carry_out && carry_in) { 1993 ppc_avr_t tmp; 1994 avr_qw_not(&tmp, *b); 1995 avr_qw_add(&tmp, *a, tmp); 1996 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 1997 } 1998 1999 r->VsrD(0) = 0; 2000 r->VsrD(1) = carry_out; 2001 #endif 2002 } 2003 2004 #define BCD_PLUS_PREF_1 0xC 2005 #define BCD_PLUS_PREF_2 0xF 2006 #define BCD_PLUS_ALT_1 0xA 2007 #define BCD_NEG_PREF 0xD 2008 #define BCD_NEG_ALT 0xB 2009 #define BCD_PLUS_ALT_2 0xE 2010 #define NATIONAL_PLUS 0x2B 2011 #define NATIONAL_NEG 0x2D 2012 2013 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2014 2015 static int bcd_get_sgn(ppc_avr_t *bcd) 2016 { 2017 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2018 case BCD_PLUS_PREF_1: 2019 case BCD_PLUS_PREF_2: 2020 case BCD_PLUS_ALT_1: 2021 case BCD_PLUS_ALT_2: 2022 { 2023 return 1; 2024 } 2025 2026 case BCD_NEG_PREF: 2027 case BCD_NEG_ALT: 2028 { 2029 return -1; 2030 } 2031 2032 default: 2033 { 2034 return 0; 2035 } 2036 } 2037 } 2038 2039 static int bcd_preferred_sgn(int sgn, int ps) 2040 { 2041 if (sgn >= 0) { 2042 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2043 } else { 2044 return BCD_NEG_PREF; 2045 } 2046 } 2047 2048 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2049 { 2050 uint8_t result; 2051 if (n & 1) { 2052 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2053 } else { 2054 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2055 } 2056 2057 if (unlikely(result > 9)) { 2058 *invalid = true; 2059 } 2060 return result; 2061 } 2062 2063 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2064 { 2065 if (n & 1) { 2066 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2067 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2068 } else { 2069 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2070 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2071 } 2072 } 2073 2074 static bool bcd_is_valid(ppc_avr_t *bcd) 2075 { 2076 int i; 2077 int invalid = 0; 2078 2079 if (bcd_get_sgn(bcd) == 0) { 2080 return false; 2081 } 2082 2083 for (i = 1; i < 32; i++) { 2084 bcd_get_digit(bcd, i, &invalid); 2085 if (unlikely(invalid)) { 2086 return false; 2087 } 2088 } 2089 return true; 2090 } 2091 2092 static int bcd_cmp_zero(ppc_avr_t *bcd) 2093 { 2094 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2095 return CRF_EQ; 2096 } else { 2097 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2098 } 2099 } 2100 2101 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2102 { 2103 return reg->VsrH(7 - n); 2104 } 2105 2106 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2107 { 2108 reg->VsrH(7 - n) = val; 2109 } 2110 2111 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2112 { 2113 int i; 2114 int invalid = 0; 2115 for (i = 31; i > 0; i--) { 2116 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2117 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2118 if (unlikely(invalid)) { 2119 return 0; /* doesn't matter */ 2120 } else if (dig_a > dig_b) { 2121 return 1; 2122 } else if (dig_a < dig_b) { 2123 return -1; 2124 } 2125 } 2126 2127 return 0; 2128 } 2129 2130 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2131 int *overflow) 2132 { 2133 int carry = 0; 2134 int i; 2135 int is_zero = 1; 2136 2137 for (i = 1; i <= 31; i++) { 2138 uint8_t digit = bcd_get_digit(a, i, invalid) + 2139 bcd_get_digit(b, i, invalid) + carry; 2140 is_zero &= (digit == 0); 2141 if (digit > 9) { 2142 carry = 1; 2143 digit -= 10; 2144 } else { 2145 carry = 0; 2146 } 2147 2148 bcd_put_digit(t, digit, i); 2149 } 2150 2151 *overflow = carry; 2152 return is_zero; 2153 } 2154 2155 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2156 int *overflow) 2157 { 2158 int carry = 0; 2159 int i; 2160 2161 for (i = 1; i <= 31; i++) { 2162 uint8_t digit = bcd_get_digit(a, i, invalid) - 2163 bcd_get_digit(b, i, invalid) + carry; 2164 if (digit & 0x80) { 2165 carry = -1; 2166 digit += 10; 2167 } else { 2168 carry = 0; 2169 } 2170 2171 bcd_put_digit(t, digit, i); 2172 } 2173 2174 *overflow = carry; 2175 } 2176 2177 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2178 { 2179 2180 int sgna = bcd_get_sgn(a); 2181 int sgnb = bcd_get_sgn(b); 2182 int invalid = (sgna == 0) || (sgnb == 0); 2183 int overflow = 0; 2184 int zero = 0; 2185 uint32_t cr = 0; 2186 ppc_avr_t result = { .u64 = { 0, 0 } }; 2187 2188 if (!invalid) { 2189 if (sgna == sgnb) { 2190 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2191 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2192 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2193 } else { 2194 int magnitude = bcd_cmp_mag(a, b); 2195 if (magnitude > 0) { 2196 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2197 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2198 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2199 } else if (magnitude < 0) { 2200 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2201 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2202 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2203 } else { 2204 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2205 cr = CRF_EQ; 2206 } 2207 } 2208 } 2209 2210 if (unlikely(invalid)) { 2211 result.VsrD(0) = result.VsrD(1) = -1; 2212 cr = CRF_SO; 2213 } else if (overflow) { 2214 cr |= CRF_SO; 2215 } else if (zero) { 2216 cr |= CRF_EQ; 2217 } 2218 2219 *r = result; 2220 2221 return cr; 2222 } 2223 2224 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2225 { 2226 ppc_avr_t bcopy = *b; 2227 int sgnb = bcd_get_sgn(b); 2228 if (sgnb < 0) { 2229 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2230 } else if (sgnb > 0) { 2231 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2232 } 2233 /* else invalid ... defer to bcdadd code for proper handling */ 2234 2235 return helper_bcdadd(r, a, &bcopy, ps); 2236 } 2237 2238 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2239 { 2240 int i; 2241 int cr = 0; 2242 uint16_t national = 0; 2243 uint16_t sgnb = get_national_digit(b, 0); 2244 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2245 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2246 2247 for (i = 1; i < 8; i++) { 2248 national = get_national_digit(b, i); 2249 if (unlikely(national < 0x30 || national > 0x39)) { 2250 invalid = 1; 2251 break; 2252 } 2253 2254 bcd_put_digit(&ret, national & 0xf, i); 2255 } 2256 2257 if (sgnb == NATIONAL_PLUS) { 2258 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2259 } else { 2260 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2261 } 2262 2263 cr = bcd_cmp_zero(&ret); 2264 2265 if (unlikely(invalid)) { 2266 cr = CRF_SO; 2267 } 2268 2269 *r = ret; 2270 2271 return cr; 2272 } 2273 2274 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2275 { 2276 int i; 2277 int cr = 0; 2278 int sgnb = bcd_get_sgn(b); 2279 int invalid = (sgnb == 0); 2280 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2281 2282 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2283 2284 for (i = 1; i < 8; i++) { 2285 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2286 2287 if (unlikely(invalid)) { 2288 break; 2289 } 2290 } 2291 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2292 2293 cr = bcd_cmp_zero(b); 2294 2295 if (ox_flag) { 2296 cr |= CRF_SO; 2297 } 2298 2299 if (unlikely(invalid)) { 2300 cr = CRF_SO; 2301 } 2302 2303 *r = ret; 2304 2305 return cr; 2306 } 2307 2308 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2309 { 2310 int i; 2311 int cr = 0; 2312 int invalid = 0; 2313 int zone_digit = 0; 2314 int zone_lead = ps ? 0xF : 0x3; 2315 int digit = 0; 2316 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2317 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2318 2319 if (unlikely((sgnb < 0xA) && ps)) { 2320 invalid = 1; 2321 } 2322 2323 for (i = 0; i < 16; i++) { 2324 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2325 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2326 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2327 invalid = 1; 2328 break; 2329 } 2330 2331 bcd_put_digit(&ret, digit, i + 1); 2332 } 2333 2334 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2335 (!ps && (sgnb & 0x4))) { 2336 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2337 } else { 2338 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2339 } 2340 2341 cr = bcd_cmp_zero(&ret); 2342 2343 if (unlikely(invalid)) { 2344 cr = CRF_SO; 2345 } 2346 2347 *r = ret; 2348 2349 return cr; 2350 } 2351 2352 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2353 { 2354 int i; 2355 int cr = 0; 2356 uint8_t digit = 0; 2357 int sgnb = bcd_get_sgn(b); 2358 int zone_lead = (ps) ? 0xF0 : 0x30; 2359 int invalid = (sgnb == 0); 2360 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2361 2362 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2363 2364 for (i = 0; i < 16; i++) { 2365 digit = bcd_get_digit(b, i + 1, &invalid); 2366 2367 if (unlikely(invalid)) { 2368 break; 2369 } 2370 2371 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2372 } 2373 2374 if (ps) { 2375 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2376 } else { 2377 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2378 } 2379 2380 cr = bcd_cmp_zero(b); 2381 2382 if (ox_flag) { 2383 cr |= CRF_SO; 2384 } 2385 2386 if (unlikely(invalid)) { 2387 cr = CRF_SO; 2388 } 2389 2390 *r = ret; 2391 2392 return cr; 2393 } 2394 2395 /** 2396 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2397 * 2398 * Returns: 2399 * > 0 if ahi|alo > bhi|blo, 2400 * 0 if ahi|alo == bhi|blo, 2401 * < 0 if ahi|alo < bhi|blo 2402 */ 2403 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2404 uint64_t blo, uint64_t bhi) 2405 { 2406 return (ahi == bhi) ? 2407 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2408 (ahi > bhi ? 1 : -1); 2409 } 2410 2411 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2412 { 2413 int i; 2414 int cr; 2415 uint64_t lo_value; 2416 uint64_t hi_value; 2417 uint64_t rem; 2418 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2419 2420 if (b->VsrSD(0) < 0) { 2421 lo_value = -b->VsrSD(1); 2422 hi_value = ~b->VsrD(0) + !lo_value; 2423 bcd_put_digit(&ret, 0xD, 0); 2424 2425 cr = CRF_LT; 2426 } else { 2427 lo_value = b->VsrD(1); 2428 hi_value = b->VsrD(0); 2429 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2430 2431 if (hi_value == 0 && lo_value == 0) { 2432 cr = CRF_EQ; 2433 } else { 2434 cr = CRF_GT; 2435 } 2436 } 2437 2438 /* 2439 * Check src limits: abs(src) <= 10^31 - 1 2440 * 2441 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2442 */ 2443 if (ucmp128(lo_value, hi_value, 2444 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2445 cr |= CRF_SO; 2446 2447 /* 2448 * According to the ISA, if src wouldn't fit in the destination 2449 * register, the result is undefined. 2450 * In that case, we leave r unchanged. 2451 */ 2452 } else { 2453 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2454 2455 for (i = 1; i < 16; rem /= 10, i++) { 2456 bcd_put_digit(&ret, rem % 10, i); 2457 } 2458 2459 for (; i < 32; lo_value /= 10, i++) { 2460 bcd_put_digit(&ret, lo_value % 10, i); 2461 } 2462 2463 *r = ret; 2464 } 2465 2466 return cr; 2467 } 2468 2469 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2470 { 2471 uint8_t i; 2472 int cr; 2473 uint64_t carry; 2474 uint64_t unused; 2475 uint64_t lo_value; 2476 uint64_t hi_value = 0; 2477 int sgnb = bcd_get_sgn(b); 2478 int invalid = (sgnb == 0); 2479 2480 lo_value = bcd_get_digit(b, 31, &invalid); 2481 for (i = 30; i > 0; i--) { 2482 mulu64(&lo_value, &carry, lo_value, 10ULL); 2483 mulu64(&hi_value, &unused, hi_value, 10ULL); 2484 lo_value += bcd_get_digit(b, i, &invalid); 2485 hi_value += carry; 2486 2487 if (unlikely(invalid)) { 2488 break; 2489 } 2490 } 2491 2492 if (sgnb == -1) { 2493 r->VsrSD(1) = -lo_value; 2494 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2495 } else { 2496 r->VsrSD(1) = lo_value; 2497 r->VsrSD(0) = hi_value; 2498 } 2499 2500 cr = bcd_cmp_zero(b); 2501 2502 if (unlikely(invalid)) { 2503 cr = CRF_SO; 2504 } 2505 2506 return cr; 2507 } 2508 2509 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2510 { 2511 int i; 2512 int invalid = 0; 2513 2514 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2515 return CRF_SO; 2516 } 2517 2518 *r = *a; 2519 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2520 2521 for (i = 1; i < 32; i++) { 2522 bcd_get_digit(a, i, &invalid); 2523 bcd_get_digit(b, i, &invalid); 2524 if (unlikely(invalid)) { 2525 return CRF_SO; 2526 } 2527 } 2528 2529 return bcd_cmp_zero(r); 2530 } 2531 2532 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2533 { 2534 int sgnb = bcd_get_sgn(b); 2535 2536 *r = *b; 2537 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2538 2539 if (bcd_is_valid(b) == false) { 2540 return CRF_SO; 2541 } 2542 2543 return bcd_cmp_zero(r); 2544 } 2545 2546 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2547 { 2548 int cr; 2549 int i = a->VsrSB(7); 2550 bool ox_flag = false; 2551 int sgnb = bcd_get_sgn(b); 2552 ppc_avr_t ret = *b; 2553 ret.VsrD(1) &= ~0xf; 2554 2555 if (bcd_is_valid(b) == false) { 2556 return CRF_SO; 2557 } 2558 2559 if (unlikely(i > 31)) { 2560 i = 31; 2561 } else if (unlikely(i < -31)) { 2562 i = -31; 2563 } 2564 2565 if (i > 0) { 2566 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2567 } else { 2568 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2569 } 2570 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2571 2572 *r = ret; 2573 2574 cr = bcd_cmp_zero(r); 2575 if (ox_flag) { 2576 cr |= CRF_SO; 2577 } 2578 2579 return cr; 2580 } 2581 2582 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2583 { 2584 int cr; 2585 int i; 2586 int invalid = 0; 2587 bool ox_flag = false; 2588 ppc_avr_t ret = *b; 2589 2590 for (i = 0; i < 32; i++) { 2591 bcd_get_digit(b, i, &invalid); 2592 2593 if (unlikely(invalid)) { 2594 return CRF_SO; 2595 } 2596 } 2597 2598 i = a->VsrSB(7); 2599 if (i >= 32) { 2600 ox_flag = true; 2601 ret.VsrD(1) = ret.VsrD(0) = 0; 2602 } else if (i <= -32) { 2603 ret.VsrD(1) = ret.VsrD(0) = 0; 2604 } else if (i > 0) { 2605 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2606 } else { 2607 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2608 } 2609 *r = ret; 2610 2611 cr = bcd_cmp_zero(r); 2612 if (ox_flag) { 2613 cr |= CRF_SO; 2614 } 2615 2616 return cr; 2617 } 2618 2619 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2620 { 2621 int cr; 2622 int unused = 0; 2623 int invalid = 0; 2624 bool ox_flag = false; 2625 int sgnb = bcd_get_sgn(b); 2626 ppc_avr_t ret = *b; 2627 ret.VsrD(1) &= ~0xf; 2628 2629 int i = a->VsrSB(7); 2630 ppc_avr_t bcd_one; 2631 2632 bcd_one.VsrD(0) = 0; 2633 bcd_one.VsrD(1) = 0x10; 2634 2635 if (bcd_is_valid(b) == false) { 2636 return CRF_SO; 2637 } 2638 2639 if (unlikely(i > 31)) { 2640 i = 31; 2641 } else if (unlikely(i < -31)) { 2642 i = -31; 2643 } 2644 2645 if (i > 0) { 2646 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2647 } else { 2648 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2649 2650 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2651 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2652 } 2653 } 2654 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2655 2656 cr = bcd_cmp_zero(&ret); 2657 if (ox_flag) { 2658 cr |= CRF_SO; 2659 } 2660 *r = ret; 2661 2662 return cr; 2663 } 2664 2665 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2666 { 2667 uint64_t mask; 2668 uint32_t ox_flag = 0; 2669 int i = a->VsrSH(3) + 1; 2670 ppc_avr_t ret = *b; 2671 2672 if (bcd_is_valid(b) == false) { 2673 return CRF_SO; 2674 } 2675 2676 if (i > 16 && i < 32) { 2677 mask = (uint64_t)-1 >> (128 - i * 4); 2678 if (ret.VsrD(0) & ~mask) { 2679 ox_flag = CRF_SO; 2680 } 2681 2682 ret.VsrD(0) &= mask; 2683 } else if (i >= 0 && i <= 16) { 2684 mask = (uint64_t)-1 >> (64 - i * 4); 2685 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2686 ox_flag = CRF_SO; 2687 } 2688 2689 ret.VsrD(1) &= mask; 2690 ret.VsrD(0) = 0; 2691 } 2692 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2693 *r = ret; 2694 2695 return bcd_cmp_zero(&ret) | ox_flag; 2696 } 2697 2698 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2699 { 2700 int i; 2701 uint64_t mask; 2702 uint32_t ox_flag = 0; 2703 int invalid = 0; 2704 ppc_avr_t ret = *b; 2705 2706 for (i = 0; i < 32; i++) { 2707 bcd_get_digit(b, i, &invalid); 2708 2709 if (unlikely(invalid)) { 2710 return CRF_SO; 2711 } 2712 } 2713 2714 i = a->VsrSH(3); 2715 if (i > 16 && i < 33) { 2716 mask = (uint64_t)-1 >> (128 - i * 4); 2717 if (ret.VsrD(0) & ~mask) { 2718 ox_flag = CRF_SO; 2719 } 2720 2721 ret.VsrD(0) &= mask; 2722 } else if (i > 0 && i <= 16) { 2723 mask = (uint64_t)-1 >> (64 - i * 4); 2724 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2725 ox_flag = CRF_SO; 2726 } 2727 2728 ret.VsrD(1) &= mask; 2729 ret.VsrD(0) = 0; 2730 } else if (i == 0) { 2731 if (ret.VsrD(0) || ret.VsrD(1)) { 2732 ox_flag = CRF_SO; 2733 } 2734 ret.VsrD(0) = ret.VsrD(1) = 0; 2735 } 2736 2737 *r = ret; 2738 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2739 return ox_flag | CRF_EQ; 2740 } 2741 2742 return ox_flag | CRF_GT; 2743 } 2744 2745 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2746 { 2747 int i; 2748 VECTOR_FOR_INORDER_I(i, u8) { 2749 r->u8[i] = AES_sbox[a->u8[i]]; 2750 } 2751 } 2752 2753 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2754 { 2755 ppc_avr_t result; 2756 int i; 2757 2758 VECTOR_FOR_INORDER_I(i, u32) { 2759 result.VsrW(i) = b->VsrW(i) ^ 2760 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2761 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2762 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2763 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2764 } 2765 *r = result; 2766 } 2767 2768 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2769 { 2770 ppc_avr_t result; 2771 int i; 2772 2773 VECTOR_FOR_INORDER_I(i, u8) { 2774 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2775 } 2776 *r = result; 2777 } 2778 2779 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2780 { 2781 /* This differs from what is written in ISA V2.07. The RTL is */ 2782 /* incorrect and will be fixed in V2.07B. */ 2783 int i; 2784 ppc_avr_t tmp; 2785 2786 VECTOR_FOR_INORDER_I(i, u8) { 2787 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2788 } 2789 2790 VECTOR_FOR_INORDER_I(i, u32) { 2791 r->VsrW(i) = 2792 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2793 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2794 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2795 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2796 } 2797 } 2798 2799 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2800 { 2801 ppc_avr_t result; 2802 int i; 2803 2804 VECTOR_FOR_INORDER_I(i, u8) { 2805 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2806 } 2807 *r = result; 2808 } 2809 2810 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2811 { 2812 int st = (st_six & 0x10) != 0; 2813 int six = st_six & 0xF; 2814 int i; 2815 2816 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2817 if (st == 0) { 2818 if ((six & (0x8 >> i)) == 0) { 2819 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2820 ror32(a->VsrW(i), 18) ^ 2821 (a->VsrW(i) >> 3); 2822 } else { /* six.bit[i] == 1 */ 2823 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2824 ror32(a->VsrW(i), 19) ^ 2825 (a->VsrW(i) >> 10); 2826 } 2827 } else { /* st == 1 */ 2828 if ((six & (0x8 >> i)) == 0) { 2829 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2830 ror32(a->VsrW(i), 13) ^ 2831 ror32(a->VsrW(i), 22); 2832 } else { /* six.bit[i] == 1 */ 2833 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2834 ror32(a->VsrW(i), 11) ^ 2835 ror32(a->VsrW(i), 25); 2836 } 2837 } 2838 } 2839 } 2840 2841 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2842 { 2843 int st = (st_six & 0x10) != 0; 2844 int six = st_six & 0xF; 2845 int i; 2846 2847 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2848 if (st == 0) { 2849 if ((six & (0x8 >> (2 * i))) == 0) { 2850 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2851 ror64(a->VsrD(i), 8) ^ 2852 (a->VsrD(i) >> 7); 2853 } else { /* six.bit[2*i] == 1 */ 2854 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2855 ror64(a->VsrD(i), 61) ^ 2856 (a->VsrD(i) >> 6); 2857 } 2858 } else { /* st == 1 */ 2859 if ((six & (0x8 >> (2 * i))) == 0) { 2860 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2861 ror64(a->VsrD(i), 34) ^ 2862 ror64(a->VsrD(i), 39); 2863 } else { /* six.bit[2*i] == 1 */ 2864 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2865 ror64(a->VsrD(i), 18) ^ 2866 ror64(a->VsrD(i), 41); 2867 } 2868 } 2869 } 2870 } 2871 2872 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2873 { 2874 ppc_avr_t result; 2875 int i; 2876 2877 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2878 int indexA = c->VsrB(i) >> 4; 2879 int indexB = c->VsrB(i) & 0xF; 2880 2881 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2882 } 2883 *r = result; 2884 } 2885 2886 #undef VECTOR_FOR_INORDER_I 2887 2888 /*****************************************************************************/ 2889 /* SPE extension helpers */ 2890 /* Use a table to make this quicker */ 2891 static const uint8_t hbrev[16] = { 2892 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2893 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2894 }; 2895 2896 static inline uint8_t byte_reverse(uint8_t val) 2897 { 2898 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2899 } 2900 2901 static inline uint32_t word_reverse(uint32_t val) 2902 { 2903 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2904 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2905 } 2906 2907 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2908 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2909 { 2910 uint32_t a, b, d, mask; 2911 2912 mask = UINT32_MAX >> (32 - MASKBITS); 2913 a = arg1 & mask; 2914 b = arg2 & mask; 2915 d = word_reverse(1 + word_reverse(a | ~b)); 2916 return (arg1 & ~mask) | (d & b); 2917 } 2918 2919 uint32_t helper_cntlsw32(uint32_t val) 2920 { 2921 if (val & 0x80000000) { 2922 return clz32(~val); 2923 } else { 2924 return clz32(val); 2925 } 2926 } 2927 2928 uint32_t helper_cntlzw32(uint32_t val) 2929 { 2930 return clz32(val); 2931 } 2932 2933 /* 440 specific */ 2934 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2935 target_ulong low, uint32_t update_Rc) 2936 { 2937 target_ulong mask; 2938 int i; 2939 2940 i = 1; 2941 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2942 if ((high & mask) == 0) { 2943 if (update_Rc) { 2944 env->crf[0] = 0x4; 2945 } 2946 goto done; 2947 } 2948 i++; 2949 } 2950 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2951 if ((low & mask) == 0) { 2952 if (update_Rc) { 2953 env->crf[0] = 0x8; 2954 } 2955 goto done; 2956 } 2957 i++; 2958 } 2959 i = 8; 2960 if (update_Rc) { 2961 env->crf[0] = 0x2; 2962 } 2963 done: 2964 env->xer = (env->xer & ~0x7F) | i; 2965 if (update_Rc) { 2966 env->crf[0] |= xer_so; 2967 } 2968 return i; 2969 } 2970