1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 if (unlikely(rb == 0 || ra >= rb)) { 108 overflow = 1; 109 rt = 0; /* Undefined */ 110 } else { 111 divu128(&rt, &ra, rb); 112 } 113 114 if (oe) { 115 helper_update_ov_legacy(env, overflow); 116 } 117 118 return rt; 119 } 120 121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 122 { 123 uint64_t rt = 0; 124 int64_t ra = (int64_t)rau; 125 int64_t rb = (int64_t)rbu; 126 int overflow = 0; 127 128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 129 overflow = 1; 130 rt = 0; /* Undefined */ 131 } else { 132 divs128(&rt, &ra, rb); 133 } 134 135 if (oe) { 136 helper_update_ov_legacy(env, overflow); 137 } 138 139 return rt; 140 } 141 142 #endif 143 144 145 #if defined(TARGET_PPC64) 146 /* if x = 0xab, returns 0xababababababababa */ 147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 148 149 /* 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each 151 * byte. 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 154 */ 155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 156 157 /* When you XOR the pattern and there is a match, that byte will be zero */ 158 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 159 160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 161 { 162 return hasvalue(rb, ra) ? CRF_GT : 0; 163 } 164 165 #undef pattern 166 #undef haszero 167 #undef hasvalue 168 169 /* 170 * Return a random number. 171 */ 172 uint64_t helper_darn32(void) 173 { 174 Error *err = NULL; 175 uint32_t ret; 176 177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 179 error_get_pretty(err)); 180 error_free(err); 181 return -1; 182 } 183 184 return ret; 185 } 186 187 uint64_t helper_darn64(void) 188 { 189 Error *err = NULL; 190 uint64_t ret; 191 192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 194 error_get_pretty(err)); 195 error_free(err); 196 return -1; 197 } 198 199 return ret; 200 } 201 202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 203 { 204 int i; 205 uint64_t ra = 0; 206 207 for (i = 0; i < 8; i++) { 208 int index = (rs >> (i * 8)) & 0xFF; 209 if (index < 64) { 210 if (rb & PPC_BIT(index)) { 211 ra |= 1 << i; 212 } 213 } 214 } 215 return ra; 216 } 217 218 #endif 219 220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 221 { 222 target_ulong mask = 0xff; 223 target_ulong ra = 0; 224 int i; 225 226 for (i = 0; i < sizeof(target_ulong); i++) { 227 if ((rs & mask) == (rb & mask)) { 228 ra |= mask; 229 } 230 mask <<= 8; 231 } 232 return ra; 233 } 234 235 /* shift right arithmetic helper */ 236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 237 target_ulong shift) 238 { 239 int32_t ret; 240 241 if (likely(!(shift & 0x20))) { 242 if (likely((uint32_t)shift != 0)) { 243 shift &= 0x1f; 244 ret = (int32_t)value >> shift; 245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 246 env->ca32 = env->ca = 0; 247 } else { 248 env->ca32 = env->ca = 1; 249 } 250 } else { 251 ret = (int32_t)value; 252 env->ca32 = env->ca = 0; 253 } 254 } else { 255 ret = (int32_t)value >> 31; 256 env->ca32 = env->ca = (ret != 0); 257 } 258 return (target_long)ret; 259 } 260 261 #if defined(TARGET_PPC64) 262 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 263 target_ulong shift) 264 { 265 int64_t ret; 266 267 if (likely(!(shift & 0x40))) { 268 if (likely((uint64_t)shift != 0)) { 269 shift &= 0x3f; 270 ret = (int64_t)value >> shift; 271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 272 env->ca32 = env->ca = 0; 273 } else { 274 env->ca32 = env->ca = 1; 275 } 276 } else { 277 ret = (int64_t)value; 278 env->ca32 = env->ca = 0; 279 } 280 } else { 281 ret = (int64_t)value >> 63; 282 env->ca32 = env->ca = (ret != 0); 283 } 284 return ret; 285 } 286 #endif 287 288 #if defined(TARGET_PPC64) 289 target_ulong helper_popcntb(target_ulong val) 290 { 291 /* Note that we don't fold past bytes */ 292 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 293 0x5555555555555555ULL); 294 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 295 0x3333333333333333ULL); 296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 297 0x0f0f0f0f0f0f0f0fULL); 298 return val; 299 } 300 301 target_ulong helper_popcntw(target_ulong val) 302 { 303 /* Note that we don't fold past words. */ 304 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 305 0x5555555555555555ULL); 306 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 307 0x3333333333333333ULL); 308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 309 0x0f0f0f0f0f0f0f0fULL); 310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 311 0x00ff00ff00ff00ffULL); 312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 313 0x0000ffff0000ffffULL); 314 return val; 315 } 316 #else 317 target_ulong helper_popcntb(target_ulong val) 318 { 319 /* Note that we don't fold past bytes */ 320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 323 return val; 324 } 325 #endif 326 327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 328 { 329 /* 330 * Instead of processing the mask bit-by-bit from the most significant to 331 * the least significant bit, as described in PowerISA, we'll handle it in 332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 333 * ctz or cto, we negate the mask at the end of the loop. 334 */ 335 target_ulong m, left = 0, right = 0; 336 unsigned int n, i = 64; 337 bool bit = false; /* tracks if we are processing zeros or ones */ 338 339 if (mask == 0 || mask == -1) { 340 return src; 341 } 342 343 /* Processes the mask in blocks, from LSB to MSB */ 344 while (i) { 345 /* Find how many bits we should take */ 346 n = ctz64(mask); 347 if (n > i) { 348 n = i; 349 } 350 351 /* 352 * Extracts 'n' trailing bits of src and put them on the leading 'n' 353 * bits of 'right' or 'left', pushing down the previously extracted 354 * values. 355 */ 356 m = (1ll << n) - 1; 357 if (bit) { 358 right = ror64(right | (src & m), n); 359 } else { 360 left = ror64(left | (src & m), n); 361 } 362 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ 369 src >>= n; 370 mask >>= n; 371 i -= n; 372 bit = !bit; 373 mask = ~mask; 374 } 375 376 /* 377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 378 * we'll shift it more 64-ctpop(mask) times. 379 */ 380 if (bit) { 381 n = ctpop64(mask); 382 } else { 383 n = 64 - ctpop64(mask); 384 } 385 386 return left | (right >> n); 387 } 388 389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 390 { 391 int i, o; 392 uint64_t result = 0; 393 394 if (mask == -1) { 395 return src; 396 } 397 398 for (i = 0; mask != 0; i++) { 399 o = ctz64(mask); 400 mask &= mask - 1; 401 result |= ((src >> i) & 1) << o; 402 } 403 404 return result; 405 } 406 407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 408 { 409 int i, o; 410 uint64_t result = 0; 411 412 if (mask == -1) { 413 return src; 414 } 415 416 for (o = 0; mask != 0; o++) { 417 i = ctz64(mask); 418 mask &= mask - 1; 419 result |= ((src >> i) & 1) << o; 420 } 421 422 return result; 423 } 424 425 /*****************************************************************************/ 426 /* Altivec extension helpers */ 427 #if defined(HOST_WORDS_BIGENDIAN) 428 #define VECTOR_FOR_INORDER_I(index, element) \ 429 for (index = 0; index < ARRAY_SIZE(r->element); index++) 430 #else 431 #define VECTOR_FOR_INORDER_I(index, element) \ 432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 433 #endif 434 435 /* Saturating arithmetic helpers. */ 436 #define SATCVT(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x < (from_type)min) { \ 442 r = min; \ 443 *sat = 1; \ 444 } else if (x > (from_type)max) { \ 445 r = max; \ 446 *sat = 1; \ 447 } else { \ 448 r = x; \ 449 } \ 450 return r; \ 451 } 452 #define SATCVTU(from, to, from_type, to_type, min, max) \ 453 static inline to_type cvt##from##to(from_type x, int *sat) \ 454 { \ 455 to_type r; \ 456 \ 457 if (x > (from_type)max) { \ 458 r = max; \ 459 *sat = 1; \ 460 } else { \ 461 r = x; \ 462 } \ 463 return r; \ 464 } 465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 468 469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 475 #undef SATCVT 476 #undef SATCVTU 477 478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 479 { 480 ppc_store_vscr(env, vscr); 481 } 482 483 uint32_t helper_mfvscr(CPUPPCState *env) 484 { 485 return ppc_get_vscr(env); 486 } 487 488 static inline void set_vscr_sat(CPUPPCState *env) 489 { 490 /* The choice of non-zero value is arbitrary. */ 491 env->vscr_sat.u32[0] = 1; 492 } 493 494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 495 { 496 int i; 497 498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 499 r->u32[i] = ~a->u32[i] < b->u32[i]; 500 } 501 } 502 503 /* vprtybw */ 504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 509 res ^= res >> 8; 510 r->u32[i] = res & 1; 511 } 512 } 513 514 /* vprtybd */ 515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 516 { 517 int i; 518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->u64[i] = res & 1; 523 } 524 } 525 526 /* vprtybq */ 527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 528 { 529 uint64_t res = b->u64[0] ^ b->u64[1]; 530 res ^= res >> 32; 531 res ^= res >> 16; 532 res ^= res >> 8; 533 r->VsrD(1) = res & 1; 534 r->VsrD(0) = 0; 535 } 536 537 #define VARITHFP(suffix, func) \ 538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 539 ppc_avr_t *b) \ 540 { \ 541 int i; \ 542 \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 545 } \ 546 } 547 VARITHFP(addfp, float32_add) 548 VARITHFP(subfp, float32_sub) 549 VARITHFP(minfp, float32_min) 550 VARITHFP(maxfp, float32_max) 551 #undef VARITHFP 552 553 #define VARITHFPFMA(suffix, type) \ 554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 555 ppc_avr_t *b, ppc_avr_t *c) \ 556 { \ 557 int i; \ 558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 560 type, &env->vec_status); \ 561 } \ 562 } 563 VARITHFPFMA(maddfp, 0); 564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 565 #undef VARITHFPFMA 566 567 #define VARITHSAT_CASE(type, op, cvt, element) \ 568 { \ 569 type result = (type)a->element[i] op (type)b->element[i]; \ 570 r->element[i] = cvt(result, &sat); \ 571 } 572 573 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 576 { \ 577 int sat = 0; \ 578 int i; \ 579 \ 580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 581 VARITHSAT_CASE(optype, op, cvt, element); \ 582 } \ 583 if (sat) { \ 584 vscr_sat->u32[0] = 1; \ 585 } \ 586 } 587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 599 #undef VARITHSAT_CASE 600 #undef VARITHSAT_DO 601 #undef VARITHSAT_SIGNED 602 #undef VARITHSAT_UNSIGNED 603 604 #define VAVG_DO(name, element, etype) \ 605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 606 { \ 607 int i; \ 608 \ 609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 611 r->element[i] = x >> 1; \ 612 } \ 613 } 614 615 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 616 unsigned_type) \ 617 VAVG_DO(avgs##type, signed_element, signed_type) \ 618 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 619 VAVG(b, s8, int16_t, u8, uint16_t) 620 VAVG(h, s16, int32_t, u16, uint32_t) 621 VAVG(w, s32, int64_t, u32, uint64_t) 622 #undef VAVG_DO 623 #undef VAVG 624 625 #define VABSDU_DO(name, element) \ 626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 627 { \ 628 int i; \ 629 \ 630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 631 r->element[i] = (a->element[i] > b->element[i]) ? \ 632 (a->element[i] - b->element[i]) : \ 633 (b->element[i] - a->element[i]); \ 634 } \ 635 } 636 637 /* 638 * VABSDU - Vector absolute difference unsigned 639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 640 * element - element type to access from vector 641 */ 642 #define VABSDU(type, element) \ 643 VABSDU_DO(absdu##type, element) 644 VABSDU(b, u8) 645 VABSDU(h, u16) 646 VABSDU(w, u32) 647 #undef VABSDU_DO 648 #undef VABSDU 649 650 #define VCF(suffix, cvt, element) \ 651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 652 ppc_avr_t *b, uint32_t uim) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 657 float32 t = cvt(b->element[i], &env->vec_status); \ 658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 659 } \ 660 } 661 VCF(ux, uint32_to_float32, u32) 662 VCF(sx, int32_to_float32, s32) 663 #undef VCF 664 665 #define VCMPNEZ(NAME, ELEM) \ 666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 667 { \ 668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 671 } \ 672 } 673 VCMPNEZ(VCMPNEZB, u8) 674 VCMPNEZ(VCMPNEZH, u16) 675 VCMPNEZ(VCMPNEZW, u32) 676 #undef VCMPNEZ 677 678 #define VCMPFP_DO(suffix, compare, order, record) \ 679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 680 ppc_avr_t *a, ppc_avr_t *b) \ 681 { \ 682 uint32_t ones = (uint32_t)-1; \ 683 uint32_t all = ones; \ 684 uint32_t none = 0; \ 685 int i; \ 686 \ 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 688 uint32_t result; \ 689 FloatRelation rel = \ 690 float32_compare_quiet(a->f32[i], b->f32[i], \ 691 &env->vec_status); \ 692 if (rel == float_relation_unordered) { \ 693 result = 0; \ 694 } else if (rel compare order) { \ 695 result = ones; \ 696 } else { \ 697 result = 0; \ 698 } \ 699 r->u32[i] = result; \ 700 all &= result; \ 701 none |= result; \ 702 } \ 703 if (record) { \ 704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 705 } \ 706 } 707 #define VCMPFP(suffix, compare, order) \ 708 VCMPFP_DO(suffix, compare, order, 0) \ 709 VCMPFP_DO(suffix##_dot, compare, order, 1) 710 VCMPFP(eqfp, ==, float_relation_equal) 711 VCMPFP(gefp, !=, float_relation_less) 712 VCMPFP(gtfp, ==, float_relation_greater) 713 #undef VCMPFP_DO 714 #undef VCMPFP 715 716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 717 ppc_avr_t *a, ppc_avr_t *b, int record) 718 { 719 int i; 720 int all_in = 0; 721 722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 724 &env->vec_status); 725 if (le_rel == float_relation_unordered) { 726 r->u32[i] = 0xc0000000; 727 all_in = 1; 728 } else { 729 float32 bneg = float32_chs(b->f32[i]); 730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 731 &env->vec_status); 732 int le = le_rel != float_relation_greater; 733 int ge = ge_rel != float_relation_less; 734 735 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 736 all_in |= (!le | !ge); 737 } 738 } 739 if (record) { 740 env->crf[6] = (all_in == 0) << 1; 741 } 742 } 743 744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 745 { 746 vcmpbfp_internal(env, r, a, b, 0); 747 } 748 749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 750 ppc_avr_t *b) 751 { 752 vcmpbfp_internal(env, r, a, b, 1); 753 } 754 755 #define VCT(suffix, satcvt, element) \ 756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 757 ppc_avr_t *b, uint32_t uim) \ 758 { \ 759 int i; \ 760 int sat = 0; \ 761 float_status s = env->vec_status; \ 762 \ 763 set_float_rounding_mode(float_round_to_zero, &s); \ 764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 765 if (float32_is_any_nan(b->f32[i])) { \ 766 r->element[i] = 0; \ 767 } else { \ 768 float64 t = float32_to_float64(b->f32[i], &s); \ 769 int64_t j; \ 770 \ 771 t = float64_scalbn(t, uim, &s); \ 772 j = float64_to_int64(t, &s); \ 773 r->element[i] = satcvt(j, &sat); \ 774 } \ 775 } \ 776 if (sat) { \ 777 set_vscr_sat(env); \ 778 } \ 779 } 780 VCT(uxs, cvtsduw, u32) 781 VCT(sxs, cvtsdsw, s32) 782 #undef VCT 783 784 target_ulong helper_vclzlsbb(ppc_avr_t *r) 785 { 786 target_ulong count = 0; 787 int i; 788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 789 if (r->VsrB(i) & 0x01) { 790 break; 791 } 792 count++; 793 } 794 return count; 795 } 796 797 target_ulong helper_vctzlsbb(ppc_avr_t *r) 798 { 799 target_ulong count = 0; 800 int i; 801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 802 if (r->VsrB(i) & 0x01) { 803 break; 804 } 805 count++; 806 } 807 return count; 808 } 809 810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 811 ppc_avr_t *b, ppc_avr_t *c) 812 { 813 int sat = 0; 814 int i; 815 816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 817 int32_t prod = a->s16[i] * b->s16[i]; 818 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 819 820 r->s16[i] = cvtswsh(t, &sat); 821 } 822 823 if (sat) { 824 set_vscr_sat(env); 825 } 826 } 827 828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 829 ppc_avr_t *b, ppc_avr_t *c) 830 { 831 int sat = 0; 832 int i; 833 834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 836 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 837 r->s16[i] = cvtswsh(t, &sat); 838 } 839 840 if (sat) { 841 set_vscr_sat(env); 842 } 843 } 844 845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 846 { 847 int i; 848 849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 850 int32_t prod = a->s16[i] * b->s16[i]; 851 r->s16[i] = (int16_t) (prod + c->s16[i]); 852 } 853 } 854 855 #define VMRG_DO(name, element, access, ofs) \ 856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 857 { \ 858 ppc_avr_t result; \ 859 int i, half = ARRAY_SIZE(r->element) / 2; \ 860 \ 861 for (i = 0; i < half; i++) { \ 862 result.access(i * 2 + 0) = a->access(i + ofs); \ 863 result.access(i * 2 + 1) = b->access(i + ofs); \ 864 } \ 865 *r = result; \ 866 } 867 868 #define VMRG(suffix, element, access) \ 869 VMRG_DO(mrgl##suffix, element, access, half) \ 870 VMRG_DO(mrgh##suffix, element, access, 0) 871 VMRG(b, u8, VsrB) 872 VMRG(h, u16, VsrH) 873 VMRG(w, u32, VsrW) 874 #undef VMRG_DO 875 #undef VMRG 876 877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 878 ppc_avr_t *b, ppc_avr_t *c) 879 { 880 int32_t prod[16]; 881 int i; 882 883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 884 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 885 } 886 887 VECTOR_FOR_INORDER_I(i, s32) { 888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 889 prod[4 * i + 2] + prod[4 * i + 3]; 890 } 891 } 892 893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 894 ppc_avr_t *b, ppc_avr_t *c) 895 { 896 int32_t prod[8]; 897 int i; 898 899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 900 prod[i] = a->s16[i] * b->s16[i]; 901 } 902 903 VECTOR_FOR_INORDER_I(i, s32) { 904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 905 } 906 } 907 908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 909 ppc_avr_t *b, ppc_avr_t *c) 910 { 911 int32_t prod[8]; 912 int i; 913 int sat = 0; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 917 } 918 919 VECTOR_FOR_INORDER_I(i, s32) { 920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 921 922 r->u32[i] = cvtsdsw(t, &sat); 923 } 924 925 if (sat) { 926 set_vscr_sat(env); 927 } 928 } 929 930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 931 ppc_avr_t *b, ppc_avr_t *c) 932 { 933 uint16_t prod[16]; 934 int i; 935 936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 937 prod[i] = a->u8[i] * b->u8[i]; 938 } 939 940 VECTOR_FOR_INORDER_I(i, u32) { 941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 942 prod[4 * i + 2] + prod[4 * i + 3]; 943 } 944 } 945 946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 947 ppc_avr_t *b, ppc_avr_t *c) 948 { 949 uint32_t prod[8]; 950 int i; 951 952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 953 prod[i] = a->u16[i] * b->u16[i]; 954 } 955 956 VECTOR_FOR_INORDER_I(i, u32) { 957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 958 } 959 } 960 961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 962 ppc_avr_t *b, ppc_avr_t *c) 963 { 964 uint32_t prod[8]; 965 int i; 966 int sat = 0; 967 968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 969 prod[i] = a->u16[i] * b->u16[i]; 970 } 971 972 VECTOR_FOR_INORDER_I(i, s32) { 973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 974 975 r->u32[i] = cvtuduw(t, &sat); 976 } 977 978 if (sat) { 979 set_vscr_sat(env); 980 } 981 } 982 983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 985 { \ 986 int i; \ 987 \ 988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 990 (cast)b->mul_access(i); \ 991 } \ 992 } 993 994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 996 { \ 997 int i; \ 998 \ 999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1001 (cast)b->mul_access(i + 1); \ 1002 } \ 1003 } 1004 1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1011 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1012 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1013 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1014 #undef VMUL_DO_EVN 1015 #undef VMUL_DO_ODD 1016 #undef VMUL 1017 1018 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1019 target_ulong uim) 1020 { 1021 int i, idx; 1022 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1023 1024 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1025 if ((pcv->VsrB(i) >> 5) == uim) { 1026 idx = pcv->VsrB(i) & 0x1f; 1027 if (idx < ARRAY_SIZE(t->u8)) { 1028 tmp.VsrB(i) = s0->VsrB(idx); 1029 } else { 1030 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1031 } 1032 } 1033 } 1034 1035 *t = tmp; 1036 } 1037 1038 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1039 { 1040 ppc_avr_t result; 1041 int i; 1042 1043 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1044 int s = c->VsrB(i) & 0x1f; 1045 int index = s & 0xf; 1046 1047 if (s & 0x10) { 1048 result.VsrB(i) = b->VsrB(index); 1049 } else { 1050 result.VsrB(i) = a->VsrB(index); 1051 } 1052 } 1053 *r = result; 1054 } 1055 1056 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1057 { 1058 ppc_avr_t result; 1059 int i; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1062 int s = c->VsrB(i) & 0x1f; 1063 int index = 15 - (s & 0xf); 1064 1065 if (s & 0x10) { 1066 result.VsrB(i) = a->VsrB(index); 1067 } else { 1068 result.VsrB(i) = b->VsrB(index); 1069 } 1070 } 1071 *r = result; 1072 } 1073 1074 #if defined(HOST_WORDS_BIGENDIAN) 1075 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1076 #define VBPERMD_INDEX(i) (i) 1077 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1078 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1079 #else 1080 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1081 #define VBPERMD_INDEX(i) (1 - i) 1082 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1083 #define EXTRACT_BIT(avr, i, index) \ 1084 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1085 #endif 1086 1087 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1088 { 1089 int i, j; 1090 ppc_avr_t result = { .u64 = { 0, 0 } }; 1091 VECTOR_FOR_INORDER_I(i, u64) { 1092 for (j = 0; j < 8; j++) { 1093 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1094 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1095 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1096 } 1097 } 1098 } 1099 *r = result; 1100 } 1101 1102 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1103 { 1104 int i; 1105 uint64_t perm = 0; 1106 1107 VECTOR_FOR_INORDER_I(i, u8) { 1108 int index = VBPERMQ_INDEX(b, i); 1109 1110 if (index < 128) { 1111 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1112 if (a->u64[VBPERMQ_DW(index)] & mask) { 1113 perm |= (0x8000 >> i); 1114 } 1115 } 1116 } 1117 1118 r->VsrD(0) = perm; 1119 r->VsrD(1) = 0; 1120 } 1121 1122 #undef VBPERMQ_INDEX 1123 #undef VBPERMQ_DW 1124 1125 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1126 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1127 { \ 1128 int i, j; \ 1129 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1130 \ 1131 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1132 prod[i] = 0; \ 1133 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1134 if (a->srcfld[i] & (1ull << j)) { \ 1135 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1136 } \ 1137 } \ 1138 } \ 1139 \ 1140 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1141 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1142 } \ 1143 } 1144 1145 PMSUM(vpmsumb, u8, u16, uint16_t) 1146 PMSUM(vpmsumh, u16, u32, uint32_t) 1147 PMSUM(vpmsumw, u32, u64, uint64_t) 1148 1149 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1150 { 1151 1152 #ifdef CONFIG_INT128 1153 int i, j; 1154 __uint128_t prod[2]; 1155 1156 VECTOR_FOR_INORDER_I(i, u64) { 1157 prod[i] = 0; 1158 for (j = 0; j < 64; j++) { 1159 if (a->u64[i] & (1ull << j)) { 1160 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1161 } 1162 } 1163 } 1164 1165 r->u128 = prod[0] ^ prod[1]; 1166 1167 #else 1168 int i, j; 1169 ppc_avr_t prod[2]; 1170 1171 VECTOR_FOR_INORDER_I(i, u64) { 1172 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1173 for (j = 0; j < 64; j++) { 1174 if (a->u64[i] & (1ull << j)) { 1175 ppc_avr_t bshift; 1176 if (j == 0) { 1177 bshift.VsrD(0) = 0; 1178 bshift.VsrD(1) = b->u64[i]; 1179 } else { 1180 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1181 bshift.VsrD(1) = b->u64[i] << j; 1182 } 1183 prod[i].VsrD(1) ^= bshift.VsrD(1); 1184 prod[i].VsrD(0) ^= bshift.VsrD(0); 1185 } 1186 } 1187 } 1188 1189 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1190 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1191 #endif 1192 } 1193 1194 1195 #if defined(HOST_WORDS_BIGENDIAN) 1196 #define PKBIG 1 1197 #else 1198 #define PKBIG 0 1199 #endif 1200 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1201 { 1202 int i, j; 1203 ppc_avr_t result; 1204 #if defined(HOST_WORDS_BIGENDIAN) 1205 const ppc_avr_t *x[2] = { a, b }; 1206 #else 1207 const ppc_avr_t *x[2] = { b, a }; 1208 #endif 1209 1210 VECTOR_FOR_INORDER_I(i, u64) { 1211 VECTOR_FOR_INORDER_I(j, u32) { 1212 uint32_t e = x[i]->u32[j]; 1213 1214 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1215 ((e >> 6) & 0x3e0) | 1216 ((e >> 3) & 0x1f)); 1217 } 1218 } 1219 *r = result; 1220 } 1221 1222 #define VPK(suffix, from, to, cvt, dosat) \ 1223 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1224 ppc_avr_t *a, ppc_avr_t *b) \ 1225 { \ 1226 int i; \ 1227 int sat = 0; \ 1228 ppc_avr_t result; \ 1229 ppc_avr_t *a0 = PKBIG ? a : b; \ 1230 ppc_avr_t *a1 = PKBIG ? b : a; \ 1231 \ 1232 VECTOR_FOR_INORDER_I(i, from) { \ 1233 result.to[i] = cvt(a0->from[i], &sat); \ 1234 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1235 } \ 1236 *r = result; \ 1237 if (dosat && sat) { \ 1238 set_vscr_sat(env); \ 1239 } \ 1240 } 1241 #define I(x, y) (x) 1242 VPK(shss, s16, s8, cvtshsb, 1) 1243 VPK(shus, s16, u8, cvtshub, 1) 1244 VPK(swss, s32, s16, cvtswsh, 1) 1245 VPK(swus, s32, u16, cvtswuh, 1) 1246 VPK(sdss, s64, s32, cvtsdsw, 1) 1247 VPK(sdus, s64, u32, cvtsduw, 1) 1248 VPK(uhus, u16, u8, cvtuhub, 1) 1249 VPK(uwus, u32, u16, cvtuwuh, 1) 1250 VPK(udus, u64, u32, cvtuduw, 1) 1251 VPK(uhum, u16, u8, I, 0) 1252 VPK(uwum, u32, u16, I, 0) 1253 VPK(udum, u64, u32, I, 0) 1254 #undef I 1255 #undef VPK 1256 #undef PKBIG 1257 1258 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1259 { 1260 int i; 1261 1262 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1263 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1264 } 1265 } 1266 1267 #define VRFI(suffix, rounding) \ 1268 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1269 ppc_avr_t *b) \ 1270 { \ 1271 int i; \ 1272 float_status s = env->vec_status; \ 1273 \ 1274 set_float_rounding_mode(rounding, &s); \ 1275 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1276 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1277 } \ 1278 } 1279 VRFI(n, float_round_nearest_even) 1280 VRFI(m, float_round_down) 1281 VRFI(p, float_round_up) 1282 VRFI(z, float_round_to_zero) 1283 #undef VRFI 1284 1285 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1286 { 1287 int i; 1288 1289 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1290 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1291 1292 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1293 } 1294 } 1295 1296 #define VRLMI(name, size, element, insert) \ 1297 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1298 { \ 1299 int i; \ 1300 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1301 uint##size##_t src1 = a->element[i]; \ 1302 uint##size##_t src2 = b->element[i]; \ 1303 uint##size##_t src3 = r->element[i]; \ 1304 uint##size##_t begin, end, shift, mask, rot_val; \ 1305 \ 1306 shift = extract##size(src2, 0, 6); \ 1307 end = extract##size(src2, 8, 6); \ 1308 begin = extract##size(src2, 16, 6); \ 1309 rot_val = rol##size(src1, shift); \ 1310 mask = mask_u##size(begin, end); \ 1311 if (insert) { \ 1312 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1313 } else { \ 1314 r->element[i] = (rot_val & mask); \ 1315 } \ 1316 } \ 1317 } 1318 1319 VRLMI(VRLDMI, 64, u64, 1); 1320 VRLMI(VRLWMI, 32, u32, 1); 1321 VRLMI(VRLDNM, 64, u64, 0); 1322 VRLMI(VRLWNM, 32, u32, 0); 1323 1324 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1325 { 1326 int i; 1327 1328 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1329 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1330 } 1331 } 1332 1333 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1334 { 1335 int i; 1336 1337 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1338 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1339 } 1340 } 1341 1342 #define VEXTU_X_DO(name, size, left) \ 1343 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1344 { \ 1345 int index = (a & 0xf) * 8; \ 1346 if (left) { \ 1347 index = 128 - index - size; \ 1348 } \ 1349 return int128_getlo(int128_rshift(b->s128, index)) & \ 1350 MAKE_64BIT_MASK(0, size); \ 1351 } 1352 VEXTU_X_DO(vextublx, 8, 1) 1353 VEXTU_X_DO(vextuhlx, 16, 1) 1354 VEXTU_X_DO(vextuwlx, 32, 1) 1355 VEXTU_X_DO(vextubrx, 8, 0) 1356 VEXTU_X_DO(vextuhrx, 16, 0) 1357 VEXTU_X_DO(vextuwrx, 32, 0) 1358 #undef VEXTU_X_DO 1359 1360 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1361 { 1362 int i; 1363 unsigned int shift, bytes, size; 1364 1365 size = ARRAY_SIZE(r->u8); 1366 for (i = 0; i < size; i++) { 1367 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1368 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1369 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1370 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1371 } 1372 } 1373 1374 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1375 { 1376 int i; 1377 unsigned int shift, bytes; 1378 1379 /* 1380 * Use reverse order, as destination and source register can be 1381 * same. Its being modified in place saving temporary, reverse 1382 * order will guarantee that computed result is not fed back. 1383 */ 1384 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1385 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1386 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1387 /* extract adjacent bytes */ 1388 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1389 } 1390 } 1391 1392 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1393 { 1394 int sh = shift & 0xf; 1395 int i; 1396 ppc_avr_t result; 1397 1398 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1399 int index = sh + i; 1400 if (index > 0xf) { 1401 result.VsrB(i) = b->VsrB(index - 0x10); 1402 } else { 1403 result.VsrB(i) = a->VsrB(index); 1404 } 1405 } 1406 *r = result; 1407 } 1408 1409 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1410 { 1411 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1412 1413 #if defined(HOST_WORDS_BIGENDIAN) 1414 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1415 memset(&r->u8[16 - sh], 0, sh); 1416 #else 1417 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1418 memset(&r->u8[0], 0, sh); 1419 #endif 1420 } 1421 1422 #if defined(HOST_WORDS_BIGENDIAN) 1423 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1424 #else 1425 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1426 #endif 1427 1428 #define VINSX(SUFFIX, TYPE) \ 1429 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1430 uint64_t val, target_ulong index) \ 1431 { \ 1432 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1433 target_long idx = index; \ 1434 \ 1435 if (idx < 0 || idx > maxidx) { \ 1436 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1437 qemu_log_mask(LOG_GUEST_ERROR, \ 1438 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1439 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1440 } else { \ 1441 TYPE src = val; \ 1442 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1443 } \ 1444 } 1445 VINSX(B, uint8_t) 1446 VINSX(H, uint16_t) 1447 VINSX(W, uint32_t) 1448 VINSX(D, uint64_t) 1449 #undef ELEM_ADDR 1450 #undef VINSX 1451 #if defined(HOST_WORDS_BIGENDIAN) 1452 #define VEXTDVLX(NAME, SIZE) \ 1453 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1454 target_ulong index) \ 1455 { \ 1456 const target_long idx = index; \ 1457 ppc_avr_t tmp[2] = { *a, *b }; \ 1458 memset(t, 0, sizeof(*t)); \ 1459 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1460 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1461 } else { \ 1462 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1463 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1464 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1465 } \ 1466 } 1467 #else 1468 #define VEXTDVLX(NAME, SIZE) \ 1469 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1470 target_ulong index) \ 1471 { \ 1472 const target_long idx = index; \ 1473 ppc_avr_t tmp[2] = { *b, *a }; \ 1474 memset(t, 0, sizeof(*t)); \ 1475 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1476 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1477 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1478 } else { \ 1479 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1480 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1481 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1482 } \ 1483 } 1484 #endif 1485 VEXTDVLX(VEXTDUBVLX, 1) 1486 VEXTDVLX(VEXTDUHVLX, 2) 1487 VEXTDVLX(VEXTDUWVLX, 4) 1488 VEXTDVLX(VEXTDDVLX, 8) 1489 #undef VEXTDVLX 1490 #if defined(HOST_WORDS_BIGENDIAN) 1491 #define VEXTRACT(suffix, element) \ 1492 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1493 { \ 1494 uint32_t es = sizeof(r->element[0]); \ 1495 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1496 memset(&r->u8[8], 0, 8); \ 1497 memset(&r->u8[0], 0, 8 - es); \ 1498 } 1499 #else 1500 #define VEXTRACT(suffix, element) \ 1501 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1502 { \ 1503 uint32_t es = sizeof(r->element[0]); \ 1504 uint32_t s = (16 - index) - es; \ 1505 memmove(&r->u8[8], &b->u8[s], es); \ 1506 memset(&r->u8[0], 0, 8); \ 1507 memset(&r->u8[8 + es], 0, 8 - es); \ 1508 } 1509 #endif 1510 VEXTRACT(ub, u8) 1511 VEXTRACT(uh, u16) 1512 VEXTRACT(uw, u32) 1513 VEXTRACT(d, u64) 1514 #undef VEXTRACT 1515 1516 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1517 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1518 { \ 1519 int i, idx, crf = 0; \ 1520 \ 1521 for (i = 0; i < NUM_ELEMS; i++) { \ 1522 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1523 if (b->Vsr##ELEM(idx)) { \ 1524 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1525 } else { \ 1526 crf = 0b0010; \ 1527 break; \ 1528 } \ 1529 } \ 1530 \ 1531 for (; i < NUM_ELEMS; i++) { \ 1532 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1533 t->Vsr##ELEM(idx) = 0; \ 1534 } \ 1535 \ 1536 return crf; \ 1537 } 1538 VSTRI(VSTRIBL, B, 16, true) 1539 VSTRI(VSTRIBR, B, 16, false) 1540 VSTRI(VSTRIHL, H, 8, true) 1541 VSTRI(VSTRIHR, H, 8, false) 1542 #undef VSTRI 1543 1544 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1545 ppc_vsr_t *xb, uint32_t index) 1546 { 1547 ppc_vsr_t t = { }; 1548 size_t es = sizeof(uint32_t); 1549 uint32_t ext_index; 1550 int i; 1551 1552 ext_index = index; 1553 for (i = 0; i < es; i++, ext_index++) { 1554 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1555 } 1556 1557 *xt = t; 1558 } 1559 1560 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1561 ppc_vsr_t *xb, uint32_t index) 1562 { 1563 ppc_vsr_t t = *xt; 1564 size_t es = sizeof(uint32_t); 1565 int ins_index, i = 0; 1566 1567 ins_index = index; 1568 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1569 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1570 } 1571 1572 *xt = t; 1573 } 1574 1575 #define XXBLEND(name, sz) \ 1576 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1577 ppc_avr_t *c, uint32_t desc) \ 1578 { \ 1579 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1580 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1581 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1582 } \ 1583 } 1584 XXBLEND(B, 8) 1585 XXBLEND(H, 16) 1586 XXBLEND(W, 32) 1587 XXBLEND(D, 64) 1588 #undef XXBLEND 1589 1590 #define VNEG(name, element) \ 1591 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1592 { \ 1593 int i; \ 1594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1595 r->element[i] = -b->element[i]; \ 1596 } \ 1597 } 1598 VNEG(vnegw, s32) 1599 VNEG(vnegd, s64) 1600 #undef VNEG 1601 1602 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1603 { 1604 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1605 1606 #if defined(HOST_WORDS_BIGENDIAN) 1607 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1608 memset(&r->u8[0], 0, sh); 1609 #else 1610 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1611 memset(&r->u8[16 - sh], 0, sh); 1612 #endif 1613 } 1614 1615 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1616 { 1617 int i; 1618 1619 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1620 r->u32[i] = a->u32[i] >= b->u32[i]; 1621 } 1622 } 1623 1624 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1625 { 1626 int64_t t; 1627 int i, upper; 1628 ppc_avr_t result; 1629 int sat = 0; 1630 1631 upper = ARRAY_SIZE(r->s32) - 1; 1632 t = (int64_t)b->VsrSW(upper); 1633 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1634 t += a->VsrSW(i); 1635 result.VsrSW(i) = 0; 1636 } 1637 result.VsrSW(upper) = cvtsdsw(t, &sat); 1638 *r = result; 1639 1640 if (sat) { 1641 set_vscr_sat(env); 1642 } 1643 } 1644 1645 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1646 { 1647 int i, j, upper; 1648 ppc_avr_t result; 1649 int sat = 0; 1650 1651 upper = 1; 1652 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1653 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1654 1655 result.VsrD(i) = 0; 1656 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1657 t += a->VsrSW(2 * i + j); 1658 } 1659 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1660 } 1661 1662 *r = result; 1663 if (sat) { 1664 set_vscr_sat(env); 1665 } 1666 } 1667 1668 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1669 { 1670 int i, j; 1671 int sat = 0; 1672 1673 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1674 int64_t t = (int64_t)b->s32[i]; 1675 1676 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1677 t += a->s8[4 * i + j]; 1678 } 1679 r->s32[i] = cvtsdsw(t, &sat); 1680 } 1681 1682 if (sat) { 1683 set_vscr_sat(env); 1684 } 1685 } 1686 1687 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1688 { 1689 int sat = 0; 1690 int i; 1691 1692 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1693 int64_t t = (int64_t)b->s32[i]; 1694 1695 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1696 r->s32[i] = cvtsdsw(t, &sat); 1697 } 1698 1699 if (sat) { 1700 set_vscr_sat(env); 1701 } 1702 } 1703 1704 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1705 { 1706 int i, j; 1707 int sat = 0; 1708 1709 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1710 uint64_t t = (uint64_t)b->u32[i]; 1711 1712 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1713 t += a->u8[4 * i + j]; 1714 } 1715 r->u32[i] = cvtuduw(t, &sat); 1716 } 1717 1718 if (sat) { 1719 set_vscr_sat(env); 1720 } 1721 } 1722 1723 #if defined(HOST_WORDS_BIGENDIAN) 1724 #define UPKHI 1 1725 #define UPKLO 0 1726 #else 1727 #define UPKHI 0 1728 #define UPKLO 1 1729 #endif 1730 #define VUPKPX(suffix, hi) \ 1731 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1732 { \ 1733 int i; \ 1734 ppc_avr_t result; \ 1735 \ 1736 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1737 uint16_t e = b->u16[hi ? i : i + 4]; \ 1738 uint8_t a = (e >> 15) ? 0xff : 0; \ 1739 uint8_t r = (e >> 10) & 0x1f; \ 1740 uint8_t g = (e >> 5) & 0x1f; \ 1741 uint8_t b = e & 0x1f; \ 1742 \ 1743 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1744 } \ 1745 *r = result; \ 1746 } 1747 VUPKPX(lpx, UPKLO) 1748 VUPKPX(hpx, UPKHI) 1749 #undef VUPKPX 1750 1751 #define VUPK(suffix, unpacked, packee, hi) \ 1752 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1753 { \ 1754 int i; \ 1755 ppc_avr_t result; \ 1756 \ 1757 if (hi) { \ 1758 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1759 result.unpacked[i] = b->packee[i]; \ 1760 } \ 1761 } else { \ 1762 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1763 i++) { \ 1764 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1765 } \ 1766 } \ 1767 *r = result; \ 1768 } 1769 VUPK(hsb, s16, s8, UPKHI) 1770 VUPK(hsh, s32, s16, UPKHI) 1771 VUPK(hsw, s64, s32, UPKHI) 1772 VUPK(lsb, s16, s8, UPKLO) 1773 VUPK(lsh, s32, s16, UPKLO) 1774 VUPK(lsw, s64, s32, UPKLO) 1775 #undef VUPK 1776 #undef UPKHI 1777 #undef UPKLO 1778 1779 #define VGENERIC_DO(name, element) \ 1780 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1781 { \ 1782 int i; \ 1783 \ 1784 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1785 r->element[i] = name(b->element[i]); \ 1786 } \ 1787 } 1788 1789 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1790 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1791 1792 VGENERIC_DO(clzb, u8) 1793 VGENERIC_DO(clzh, u16) 1794 1795 #undef clzb 1796 #undef clzh 1797 1798 #define ctzb(v) ((v) ? ctz32(v) : 8) 1799 #define ctzh(v) ((v) ? ctz32(v) : 16) 1800 #define ctzw(v) ctz32((v)) 1801 #define ctzd(v) ctz64((v)) 1802 1803 VGENERIC_DO(ctzb, u8) 1804 VGENERIC_DO(ctzh, u16) 1805 VGENERIC_DO(ctzw, u32) 1806 VGENERIC_DO(ctzd, u64) 1807 1808 #undef ctzb 1809 #undef ctzh 1810 #undef ctzw 1811 #undef ctzd 1812 1813 #define popcntb(v) ctpop8(v) 1814 #define popcnth(v) ctpop16(v) 1815 #define popcntw(v) ctpop32(v) 1816 #define popcntd(v) ctpop64(v) 1817 1818 VGENERIC_DO(popcntb, u8) 1819 VGENERIC_DO(popcnth, u16) 1820 VGENERIC_DO(popcntw, u32) 1821 VGENERIC_DO(popcntd, u64) 1822 1823 #undef popcntb 1824 #undef popcnth 1825 #undef popcntw 1826 #undef popcntd 1827 1828 #undef VGENERIC_DO 1829 1830 #if defined(HOST_WORDS_BIGENDIAN) 1831 #define QW_ONE { .u64 = { 0, 1 } } 1832 #else 1833 #define QW_ONE { .u64 = { 1, 0 } } 1834 #endif 1835 1836 #ifndef CONFIG_INT128 1837 1838 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1839 { 1840 t->u64[0] = ~a.u64[0]; 1841 t->u64[1] = ~a.u64[1]; 1842 } 1843 1844 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1845 { 1846 if (a.VsrD(0) < b.VsrD(0)) { 1847 return -1; 1848 } else if (a.VsrD(0) > b.VsrD(0)) { 1849 return 1; 1850 } else if (a.VsrD(1) < b.VsrD(1)) { 1851 return -1; 1852 } else if (a.VsrD(1) > b.VsrD(1)) { 1853 return 1; 1854 } else { 1855 return 0; 1856 } 1857 } 1858 1859 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1860 { 1861 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1862 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1863 (~a.VsrD(1) < b.VsrD(1)); 1864 } 1865 1866 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1867 { 1868 ppc_avr_t not_a; 1869 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1870 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1871 (~a.VsrD(1) < b.VsrD(1)); 1872 avr_qw_not(¬_a, a); 1873 return avr_qw_cmpu(not_a, b) < 0; 1874 } 1875 1876 #endif 1877 1878 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1879 { 1880 #ifdef CONFIG_INT128 1881 r->u128 = a->u128 + b->u128; 1882 #else 1883 avr_qw_add(r, *a, *b); 1884 #endif 1885 } 1886 1887 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1888 { 1889 #ifdef CONFIG_INT128 1890 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1891 #else 1892 1893 if (c->VsrD(1) & 1) { 1894 ppc_avr_t tmp; 1895 1896 tmp.VsrD(0) = 0; 1897 tmp.VsrD(1) = c->VsrD(1) & 1; 1898 avr_qw_add(&tmp, *a, tmp); 1899 avr_qw_add(r, tmp, *b); 1900 } else { 1901 avr_qw_add(r, *a, *b); 1902 } 1903 #endif 1904 } 1905 1906 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1907 { 1908 #ifdef CONFIG_INT128 1909 r->u128 = (~a->u128 < b->u128); 1910 #else 1911 ppc_avr_t not_a; 1912 1913 avr_qw_not(¬_a, *a); 1914 1915 r->VsrD(0) = 0; 1916 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1917 #endif 1918 } 1919 1920 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1921 { 1922 #ifdef CONFIG_INT128 1923 int carry_out = (~a->u128 < b->u128); 1924 if (!carry_out && (c->u128 & 1)) { 1925 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1926 ((a->u128 != 0) || (b->u128 != 0)); 1927 } 1928 r->u128 = carry_out; 1929 #else 1930 1931 int carry_in = c->VsrD(1) & 1; 1932 int carry_out = 0; 1933 ppc_avr_t tmp; 1934 1935 carry_out = avr_qw_addc(&tmp, *a, *b); 1936 1937 if (!carry_out && carry_in) { 1938 ppc_avr_t one = QW_ONE; 1939 carry_out = avr_qw_addc(&tmp, tmp, one); 1940 } 1941 r->VsrD(0) = 0; 1942 r->VsrD(1) = carry_out; 1943 #endif 1944 } 1945 1946 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1947 { 1948 #ifdef CONFIG_INT128 1949 r->u128 = a->u128 - b->u128; 1950 #else 1951 ppc_avr_t tmp; 1952 ppc_avr_t one = QW_ONE; 1953 1954 avr_qw_not(&tmp, *b); 1955 avr_qw_add(&tmp, *a, tmp); 1956 avr_qw_add(r, tmp, one); 1957 #endif 1958 } 1959 1960 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1961 { 1962 #ifdef CONFIG_INT128 1963 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 1964 #else 1965 ppc_avr_t tmp, sum; 1966 1967 avr_qw_not(&tmp, *b); 1968 avr_qw_add(&sum, *a, tmp); 1969 1970 tmp.VsrD(0) = 0; 1971 tmp.VsrD(1) = c->VsrD(1) & 1; 1972 avr_qw_add(r, sum, tmp); 1973 #endif 1974 } 1975 1976 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1977 { 1978 #ifdef CONFIG_INT128 1979 r->u128 = (~a->u128 < ~b->u128) || 1980 (a->u128 + ~b->u128 == (__uint128_t)-1); 1981 #else 1982 int carry = (avr_qw_cmpu(*a, *b) > 0); 1983 if (!carry) { 1984 ppc_avr_t tmp; 1985 avr_qw_not(&tmp, *b); 1986 avr_qw_add(&tmp, *a, tmp); 1987 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 1988 } 1989 r->VsrD(0) = 0; 1990 r->VsrD(1) = carry; 1991 #endif 1992 } 1993 1994 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1995 { 1996 #ifdef CONFIG_INT128 1997 r->u128 = 1998 (~a->u128 < ~b->u128) || 1999 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2000 #else 2001 int carry_in = c->VsrD(1) & 1; 2002 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2003 if (!carry_out && carry_in) { 2004 ppc_avr_t tmp; 2005 avr_qw_not(&tmp, *b); 2006 avr_qw_add(&tmp, *a, tmp); 2007 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2008 } 2009 2010 r->VsrD(0) = 0; 2011 r->VsrD(1) = carry_out; 2012 #endif 2013 } 2014 2015 #define BCD_PLUS_PREF_1 0xC 2016 #define BCD_PLUS_PREF_2 0xF 2017 #define BCD_PLUS_ALT_1 0xA 2018 #define BCD_NEG_PREF 0xD 2019 #define BCD_NEG_ALT 0xB 2020 #define BCD_PLUS_ALT_2 0xE 2021 #define NATIONAL_PLUS 0x2B 2022 #define NATIONAL_NEG 0x2D 2023 2024 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2025 2026 static int bcd_get_sgn(ppc_avr_t *bcd) 2027 { 2028 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2029 case BCD_PLUS_PREF_1: 2030 case BCD_PLUS_PREF_2: 2031 case BCD_PLUS_ALT_1: 2032 case BCD_PLUS_ALT_2: 2033 { 2034 return 1; 2035 } 2036 2037 case BCD_NEG_PREF: 2038 case BCD_NEG_ALT: 2039 { 2040 return -1; 2041 } 2042 2043 default: 2044 { 2045 return 0; 2046 } 2047 } 2048 } 2049 2050 static int bcd_preferred_sgn(int sgn, int ps) 2051 { 2052 if (sgn >= 0) { 2053 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2054 } else { 2055 return BCD_NEG_PREF; 2056 } 2057 } 2058 2059 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2060 { 2061 uint8_t result; 2062 if (n & 1) { 2063 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2064 } else { 2065 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2066 } 2067 2068 if (unlikely(result > 9)) { 2069 *invalid = true; 2070 } 2071 return result; 2072 } 2073 2074 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2075 { 2076 if (n & 1) { 2077 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2078 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2079 } else { 2080 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2081 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2082 } 2083 } 2084 2085 static bool bcd_is_valid(ppc_avr_t *bcd) 2086 { 2087 int i; 2088 int invalid = 0; 2089 2090 if (bcd_get_sgn(bcd) == 0) { 2091 return false; 2092 } 2093 2094 for (i = 1; i < 32; i++) { 2095 bcd_get_digit(bcd, i, &invalid); 2096 if (unlikely(invalid)) { 2097 return false; 2098 } 2099 } 2100 return true; 2101 } 2102 2103 static int bcd_cmp_zero(ppc_avr_t *bcd) 2104 { 2105 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2106 return CRF_EQ; 2107 } else { 2108 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2109 } 2110 } 2111 2112 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2113 { 2114 return reg->VsrH(7 - n); 2115 } 2116 2117 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2118 { 2119 reg->VsrH(7 - n) = val; 2120 } 2121 2122 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2123 { 2124 int i; 2125 int invalid = 0; 2126 for (i = 31; i > 0; i--) { 2127 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2128 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2129 if (unlikely(invalid)) { 2130 return 0; /* doesn't matter */ 2131 } else if (dig_a > dig_b) { 2132 return 1; 2133 } else if (dig_a < dig_b) { 2134 return -1; 2135 } 2136 } 2137 2138 return 0; 2139 } 2140 2141 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2142 int *overflow) 2143 { 2144 int carry = 0; 2145 int i; 2146 int is_zero = 1; 2147 2148 for (i = 1; i <= 31; i++) { 2149 uint8_t digit = bcd_get_digit(a, i, invalid) + 2150 bcd_get_digit(b, i, invalid) + carry; 2151 is_zero &= (digit == 0); 2152 if (digit > 9) { 2153 carry = 1; 2154 digit -= 10; 2155 } else { 2156 carry = 0; 2157 } 2158 2159 bcd_put_digit(t, digit, i); 2160 } 2161 2162 *overflow = carry; 2163 return is_zero; 2164 } 2165 2166 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2167 int *overflow) 2168 { 2169 int carry = 0; 2170 int i; 2171 2172 for (i = 1; i <= 31; i++) { 2173 uint8_t digit = bcd_get_digit(a, i, invalid) - 2174 bcd_get_digit(b, i, invalid) + carry; 2175 if (digit & 0x80) { 2176 carry = -1; 2177 digit += 10; 2178 } else { 2179 carry = 0; 2180 } 2181 2182 bcd_put_digit(t, digit, i); 2183 } 2184 2185 *overflow = carry; 2186 } 2187 2188 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2189 { 2190 2191 int sgna = bcd_get_sgn(a); 2192 int sgnb = bcd_get_sgn(b); 2193 int invalid = (sgna == 0) || (sgnb == 0); 2194 int overflow = 0; 2195 int zero = 0; 2196 uint32_t cr = 0; 2197 ppc_avr_t result = { .u64 = { 0, 0 } }; 2198 2199 if (!invalid) { 2200 if (sgna == sgnb) { 2201 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2202 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2203 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2204 } else { 2205 int magnitude = bcd_cmp_mag(a, b); 2206 if (magnitude > 0) { 2207 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2208 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2209 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2210 } else if (magnitude < 0) { 2211 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2212 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2213 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2214 } else { 2215 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2216 cr = CRF_EQ; 2217 } 2218 } 2219 } 2220 2221 if (unlikely(invalid)) { 2222 result.VsrD(0) = result.VsrD(1) = -1; 2223 cr = CRF_SO; 2224 } else if (overflow) { 2225 cr |= CRF_SO; 2226 } else if (zero) { 2227 cr |= CRF_EQ; 2228 } 2229 2230 *r = result; 2231 2232 return cr; 2233 } 2234 2235 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2236 { 2237 ppc_avr_t bcopy = *b; 2238 int sgnb = bcd_get_sgn(b); 2239 if (sgnb < 0) { 2240 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2241 } else if (sgnb > 0) { 2242 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2243 } 2244 /* else invalid ... defer to bcdadd code for proper handling */ 2245 2246 return helper_bcdadd(r, a, &bcopy, ps); 2247 } 2248 2249 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2250 { 2251 int i; 2252 int cr = 0; 2253 uint16_t national = 0; 2254 uint16_t sgnb = get_national_digit(b, 0); 2255 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2256 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2257 2258 for (i = 1; i < 8; i++) { 2259 national = get_national_digit(b, i); 2260 if (unlikely(national < 0x30 || national > 0x39)) { 2261 invalid = 1; 2262 break; 2263 } 2264 2265 bcd_put_digit(&ret, national & 0xf, i); 2266 } 2267 2268 if (sgnb == NATIONAL_PLUS) { 2269 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2270 } else { 2271 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2272 } 2273 2274 cr = bcd_cmp_zero(&ret); 2275 2276 if (unlikely(invalid)) { 2277 cr = CRF_SO; 2278 } 2279 2280 *r = ret; 2281 2282 return cr; 2283 } 2284 2285 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2286 { 2287 int i; 2288 int cr = 0; 2289 int sgnb = bcd_get_sgn(b); 2290 int invalid = (sgnb == 0); 2291 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2292 2293 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2294 2295 for (i = 1; i < 8; i++) { 2296 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2297 2298 if (unlikely(invalid)) { 2299 break; 2300 } 2301 } 2302 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2303 2304 cr = bcd_cmp_zero(b); 2305 2306 if (ox_flag) { 2307 cr |= CRF_SO; 2308 } 2309 2310 if (unlikely(invalid)) { 2311 cr = CRF_SO; 2312 } 2313 2314 *r = ret; 2315 2316 return cr; 2317 } 2318 2319 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2320 { 2321 int i; 2322 int cr = 0; 2323 int invalid = 0; 2324 int zone_digit = 0; 2325 int zone_lead = ps ? 0xF : 0x3; 2326 int digit = 0; 2327 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2328 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2329 2330 if (unlikely((sgnb < 0xA) && ps)) { 2331 invalid = 1; 2332 } 2333 2334 for (i = 0; i < 16; i++) { 2335 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2336 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2337 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2338 invalid = 1; 2339 break; 2340 } 2341 2342 bcd_put_digit(&ret, digit, i + 1); 2343 } 2344 2345 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2346 (!ps && (sgnb & 0x4))) { 2347 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2348 } else { 2349 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2350 } 2351 2352 cr = bcd_cmp_zero(&ret); 2353 2354 if (unlikely(invalid)) { 2355 cr = CRF_SO; 2356 } 2357 2358 *r = ret; 2359 2360 return cr; 2361 } 2362 2363 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2364 { 2365 int i; 2366 int cr = 0; 2367 uint8_t digit = 0; 2368 int sgnb = bcd_get_sgn(b); 2369 int zone_lead = (ps) ? 0xF0 : 0x30; 2370 int invalid = (sgnb == 0); 2371 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2372 2373 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2374 2375 for (i = 0; i < 16; i++) { 2376 digit = bcd_get_digit(b, i + 1, &invalid); 2377 2378 if (unlikely(invalid)) { 2379 break; 2380 } 2381 2382 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2383 } 2384 2385 if (ps) { 2386 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2387 } else { 2388 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2389 } 2390 2391 cr = bcd_cmp_zero(b); 2392 2393 if (ox_flag) { 2394 cr |= CRF_SO; 2395 } 2396 2397 if (unlikely(invalid)) { 2398 cr = CRF_SO; 2399 } 2400 2401 *r = ret; 2402 2403 return cr; 2404 } 2405 2406 /** 2407 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2408 * 2409 * Returns: 2410 * > 0 if ahi|alo > bhi|blo, 2411 * 0 if ahi|alo == bhi|blo, 2412 * < 0 if ahi|alo < bhi|blo 2413 */ 2414 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2415 uint64_t blo, uint64_t bhi) 2416 { 2417 return (ahi == bhi) ? 2418 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2419 (ahi > bhi ? 1 : -1); 2420 } 2421 2422 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2423 { 2424 int i; 2425 int cr; 2426 uint64_t lo_value; 2427 uint64_t hi_value; 2428 uint64_t rem; 2429 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2430 2431 if (b->VsrSD(0) < 0) { 2432 lo_value = -b->VsrSD(1); 2433 hi_value = ~b->VsrD(0) + !lo_value; 2434 bcd_put_digit(&ret, 0xD, 0); 2435 2436 cr = CRF_LT; 2437 } else { 2438 lo_value = b->VsrD(1); 2439 hi_value = b->VsrD(0); 2440 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2441 2442 if (hi_value == 0 && lo_value == 0) { 2443 cr = CRF_EQ; 2444 } else { 2445 cr = CRF_GT; 2446 } 2447 } 2448 2449 /* 2450 * Check src limits: abs(src) <= 10^31 - 1 2451 * 2452 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2453 */ 2454 if (ucmp128(lo_value, hi_value, 2455 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2456 cr |= CRF_SO; 2457 2458 /* 2459 * According to the ISA, if src wouldn't fit in the destination 2460 * register, the result is undefined. 2461 * In that case, we leave r unchanged. 2462 */ 2463 } else { 2464 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2465 2466 for (i = 1; i < 16; rem /= 10, i++) { 2467 bcd_put_digit(&ret, rem % 10, i); 2468 } 2469 2470 for (; i < 32; lo_value /= 10, i++) { 2471 bcd_put_digit(&ret, lo_value % 10, i); 2472 } 2473 2474 *r = ret; 2475 } 2476 2477 return cr; 2478 } 2479 2480 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2481 { 2482 uint8_t i; 2483 int cr; 2484 uint64_t carry; 2485 uint64_t unused; 2486 uint64_t lo_value; 2487 uint64_t hi_value = 0; 2488 int sgnb = bcd_get_sgn(b); 2489 int invalid = (sgnb == 0); 2490 2491 lo_value = bcd_get_digit(b, 31, &invalid); 2492 for (i = 30; i > 0; i--) { 2493 mulu64(&lo_value, &carry, lo_value, 10ULL); 2494 mulu64(&hi_value, &unused, hi_value, 10ULL); 2495 lo_value += bcd_get_digit(b, i, &invalid); 2496 hi_value += carry; 2497 2498 if (unlikely(invalid)) { 2499 break; 2500 } 2501 } 2502 2503 if (sgnb == -1) { 2504 r->VsrSD(1) = -lo_value; 2505 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2506 } else { 2507 r->VsrSD(1) = lo_value; 2508 r->VsrSD(0) = hi_value; 2509 } 2510 2511 cr = bcd_cmp_zero(b); 2512 2513 if (unlikely(invalid)) { 2514 cr = CRF_SO; 2515 } 2516 2517 return cr; 2518 } 2519 2520 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2521 { 2522 int i; 2523 int invalid = 0; 2524 2525 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2526 return CRF_SO; 2527 } 2528 2529 *r = *a; 2530 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2531 2532 for (i = 1; i < 32; i++) { 2533 bcd_get_digit(a, i, &invalid); 2534 bcd_get_digit(b, i, &invalid); 2535 if (unlikely(invalid)) { 2536 return CRF_SO; 2537 } 2538 } 2539 2540 return bcd_cmp_zero(r); 2541 } 2542 2543 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2544 { 2545 int sgnb = bcd_get_sgn(b); 2546 2547 *r = *b; 2548 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2549 2550 if (bcd_is_valid(b) == false) { 2551 return CRF_SO; 2552 } 2553 2554 return bcd_cmp_zero(r); 2555 } 2556 2557 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2558 { 2559 int cr; 2560 int i = a->VsrSB(7); 2561 bool ox_flag = false; 2562 int sgnb = bcd_get_sgn(b); 2563 ppc_avr_t ret = *b; 2564 ret.VsrD(1) &= ~0xf; 2565 2566 if (bcd_is_valid(b) == false) { 2567 return CRF_SO; 2568 } 2569 2570 if (unlikely(i > 31)) { 2571 i = 31; 2572 } else if (unlikely(i < -31)) { 2573 i = -31; 2574 } 2575 2576 if (i > 0) { 2577 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2578 } else { 2579 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2580 } 2581 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2582 2583 *r = ret; 2584 2585 cr = bcd_cmp_zero(r); 2586 if (ox_flag) { 2587 cr |= CRF_SO; 2588 } 2589 2590 return cr; 2591 } 2592 2593 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2594 { 2595 int cr; 2596 int i; 2597 int invalid = 0; 2598 bool ox_flag = false; 2599 ppc_avr_t ret = *b; 2600 2601 for (i = 0; i < 32; i++) { 2602 bcd_get_digit(b, i, &invalid); 2603 2604 if (unlikely(invalid)) { 2605 return CRF_SO; 2606 } 2607 } 2608 2609 i = a->VsrSB(7); 2610 if (i >= 32) { 2611 ox_flag = true; 2612 ret.VsrD(1) = ret.VsrD(0) = 0; 2613 } else if (i <= -32) { 2614 ret.VsrD(1) = ret.VsrD(0) = 0; 2615 } else if (i > 0) { 2616 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2617 } else { 2618 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2619 } 2620 *r = ret; 2621 2622 cr = bcd_cmp_zero(r); 2623 if (ox_flag) { 2624 cr |= CRF_SO; 2625 } 2626 2627 return cr; 2628 } 2629 2630 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2631 { 2632 int cr; 2633 int unused = 0; 2634 int invalid = 0; 2635 bool ox_flag = false; 2636 int sgnb = bcd_get_sgn(b); 2637 ppc_avr_t ret = *b; 2638 ret.VsrD(1) &= ~0xf; 2639 2640 int i = a->VsrSB(7); 2641 ppc_avr_t bcd_one; 2642 2643 bcd_one.VsrD(0) = 0; 2644 bcd_one.VsrD(1) = 0x10; 2645 2646 if (bcd_is_valid(b) == false) { 2647 return CRF_SO; 2648 } 2649 2650 if (unlikely(i > 31)) { 2651 i = 31; 2652 } else if (unlikely(i < -31)) { 2653 i = -31; 2654 } 2655 2656 if (i > 0) { 2657 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2658 } else { 2659 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2660 2661 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2662 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2663 } 2664 } 2665 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2666 2667 cr = bcd_cmp_zero(&ret); 2668 if (ox_flag) { 2669 cr |= CRF_SO; 2670 } 2671 *r = ret; 2672 2673 return cr; 2674 } 2675 2676 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2677 { 2678 uint64_t mask; 2679 uint32_t ox_flag = 0; 2680 int i = a->VsrSH(3) + 1; 2681 ppc_avr_t ret = *b; 2682 2683 if (bcd_is_valid(b) == false) { 2684 return CRF_SO; 2685 } 2686 2687 if (i > 16 && i < 32) { 2688 mask = (uint64_t)-1 >> (128 - i * 4); 2689 if (ret.VsrD(0) & ~mask) { 2690 ox_flag = CRF_SO; 2691 } 2692 2693 ret.VsrD(0) &= mask; 2694 } else if (i >= 0 && i <= 16) { 2695 mask = (uint64_t)-1 >> (64 - i * 4); 2696 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2697 ox_flag = CRF_SO; 2698 } 2699 2700 ret.VsrD(1) &= mask; 2701 ret.VsrD(0) = 0; 2702 } 2703 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2704 *r = ret; 2705 2706 return bcd_cmp_zero(&ret) | ox_flag; 2707 } 2708 2709 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2710 { 2711 int i; 2712 uint64_t mask; 2713 uint32_t ox_flag = 0; 2714 int invalid = 0; 2715 ppc_avr_t ret = *b; 2716 2717 for (i = 0; i < 32; i++) { 2718 bcd_get_digit(b, i, &invalid); 2719 2720 if (unlikely(invalid)) { 2721 return CRF_SO; 2722 } 2723 } 2724 2725 i = a->VsrSH(3); 2726 if (i > 16 && i < 33) { 2727 mask = (uint64_t)-1 >> (128 - i * 4); 2728 if (ret.VsrD(0) & ~mask) { 2729 ox_flag = CRF_SO; 2730 } 2731 2732 ret.VsrD(0) &= mask; 2733 } else if (i > 0 && i <= 16) { 2734 mask = (uint64_t)-1 >> (64 - i * 4); 2735 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2736 ox_flag = CRF_SO; 2737 } 2738 2739 ret.VsrD(1) &= mask; 2740 ret.VsrD(0) = 0; 2741 } else if (i == 0) { 2742 if (ret.VsrD(0) || ret.VsrD(1)) { 2743 ox_flag = CRF_SO; 2744 } 2745 ret.VsrD(0) = ret.VsrD(1) = 0; 2746 } 2747 2748 *r = ret; 2749 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2750 return ox_flag | CRF_EQ; 2751 } 2752 2753 return ox_flag | CRF_GT; 2754 } 2755 2756 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2757 { 2758 int i; 2759 VECTOR_FOR_INORDER_I(i, u8) { 2760 r->u8[i] = AES_sbox[a->u8[i]]; 2761 } 2762 } 2763 2764 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2765 { 2766 ppc_avr_t result; 2767 int i; 2768 2769 VECTOR_FOR_INORDER_I(i, u32) { 2770 result.VsrW(i) = b->VsrW(i) ^ 2771 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2772 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2773 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2774 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2775 } 2776 *r = result; 2777 } 2778 2779 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2780 { 2781 ppc_avr_t result; 2782 int i; 2783 2784 VECTOR_FOR_INORDER_I(i, u8) { 2785 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2786 } 2787 *r = result; 2788 } 2789 2790 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2791 { 2792 /* This differs from what is written in ISA V2.07. The RTL is */ 2793 /* incorrect and will be fixed in V2.07B. */ 2794 int i; 2795 ppc_avr_t tmp; 2796 2797 VECTOR_FOR_INORDER_I(i, u8) { 2798 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2799 } 2800 2801 VECTOR_FOR_INORDER_I(i, u32) { 2802 r->VsrW(i) = 2803 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2804 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2805 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2806 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2807 } 2808 } 2809 2810 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2811 { 2812 ppc_avr_t result; 2813 int i; 2814 2815 VECTOR_FOR_INORDER_I(i, u8) { 2816 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2817 } 2818 *r = result; 2819 } 2820 2821 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2822 { 2823 int st = (st_six & 0x10) != 0; 2824 int six = st_six & 0xF; 2825 int i; 2826 2827 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2828 if (st == 0) { 2829 if ((six & (0x8 >> i)) == 0) { 2830 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2831 ror32(a->VsrW(i), 18) ^ 2832 (a->VsrW(i) >> 3); 2833 } else { /* six.bit[i] == 1 */ 2834 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2835 ror32(a->VsrW(i), 19) ^ 2836 (a->VsrW(i) >> 10); 2837 } 2838 } else { /* st == 1 */ 2839 if ((six & (0x8 >> i)) == 0) { 2840 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2841 ror32(a->VsrW(i), 13) ^ 2842 ror32(a->VsrW(i), 22); 2843 } else { /* six.bit[i] == 1 */ 2844 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2845 ror32(a->VsrW(i), 11) ^ 2846 ror32(a->VsrW(i), 25); 2847 } 2848 } 2849 } 2850 } 2851 2852 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2853 { 2854 int st = (st_six & 0x10) != 0; 2855 int six = st_six & 0xF; 2856 int i; 2857 2858 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2859 if (st == 0) { 2860 if ((six & (0x8 >> (2 * i))) == 0) { 2861 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2862 ror64(a->VsrD(i), 8) ^ 2863 (a->VsrD(i) >> 7); 2864 } else { /* six.bit[2*i] == 1 */ 2865 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2866 ror64(a->VsrD(i), 61) ^ 2867 (a->VsrD(i) >> 6); 2868 } 2869 } else { /* st == 1 */ 2870 if ((six & (0x8 >> (2 * i))) == 0) { 2871 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2872 ror64(a->VsrD(i), 34) ^ 2873 ror64(a->VsrD(i), 39); 2874 } else { /* six.bit[2*i] == 1 */ 2875 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2876 ror64(a->VsrD(i), 18) ^ 2877 ror64(a->VsrD(i), 41); 2878 } 2879 } 2880 } 2881 } 2882 2883 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2884 { 2885 ppc_avr_t result; 2886 int i; 2887 2888 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2889 int indexA = c->VsrB(i) >> 4; 2890 int indexB = c->VsrB(i) & 0xF; 2891 2892 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2893 } 2894 *r = result; 2895 } 2896 2897 #undef VECTOR_FOR_INORDER_I 2898 2899 /*****************************************************************************/ 2900 /* SPE extension helpers */ 2901 /* Use a table to make this quicker */ 2902 static const uint8_t hbrev[16] = { 2903 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2904 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2905 }; 2906 2907 static inline uint8_t byte_reverse(uint8_t val) 2908 { 2909 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2910 } 2911 2912 static inline uint32_t word_reverse(uint32_t val) 2913 { 2914 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2915 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2916 } 2917 2918 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2919 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2920 { 2921 uint32_t a, b, d, mask; 2922 2923 mask = UINT32_MAX >> (32 - MASKBITS); 2924 a = arg1 & mask; 2925 b = arg2 & mask; 2926 d = word_reverse(1 + word_reverse(a | ~b)); 2927 return (arg1 & ~mask) | (d & b); 2928 } 2929 2930 uint32_t helper_cntlsw32(uint32_t val) 2931 { 2932 if (val & 0x80000000) { 2933 return clz32(~val); 2934 } else { 2935 return clz32(val); 2936 } 2937 } 2938 2939 uint32_t helper_cntlzw32(uint32_t val) 2940 { 2941 return clz32(val); 2942 } 2943 2944 /* 440 specific */ 2945 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2946 target_ulong low, uint32_t update_Rc) 2947 { 2948 target_ulong mask; 2949 int i; 2950 2951 i = 1; 2952 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2953 if ((high & mask) == 0) { 2954 if (update_Rc) { 2955 env->crf[0] = 0x4; 2956 } 2957 goto done; 2958 } 2959 i++; 2960 } 2961 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2962 if ((low & mask) == 0) { 2963 if (update_Rc) { 2964 env->crf[0] = 0x8; 2965 } 2966 goto done; 2967 } 2968 i++; 2969 } 2970 i = 8; 2971 if (update_Rc) { 2972 env->crf[0] = 0x2; 2973 } 2974 done: 2975 env->xer = (env->xer & ~0x7F) | i; 2976 if (update_Rc) { 2977 env->crf[0] |= xer_so; 2978 } 2979 return i; 2980 } 2981