1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "exec/exec-all.h" 23 #include "qemu/host-utils.h" 24 #include "exec/helper-proto.h" 25 #include "crypto/aes.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 32 uint32_t oe) 33 { 34 uint64_t rt = 0; 35 int overflow = 0; 36 37 uint64_t dividend = (uint64_t)ra << 32; 38 uint64_t divisor = (uint32_t)rb; 39 40 if (unlikely(divisor == 0)) { 41 overflow = 1; 42 } else { 43 rt = dividend / divisor; 44 overflow = rt > UINT32_MAX; 45 } 46 47 if (unlikely(overflow)) { 48 rt = 0; /* Undefined */ 49 } 50 51 if (oe) { 52 if (unlikely(overflow)) { 53 env->so = env->ov = 1; 54 } else { 55 env->ov = 0; 56 } 57 } 58 59 return (target_ulong)rt; 60 } 61 62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 63 uint32_t oe) 64 { 65 int64_t rt = 0; 66 int overflow = 0; 67 68 int64_t dividend = (int64_t)ra << 32; 69 int64_t divisor = (int64_t)((int32_t)rb); 70 71 if (unlikely((divisor == 0) || 72 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 73 overflow = 1; 74 } else { 75 rt = dividend / divisor; 76 overflow = rt != (int32_t)rt; 77 } 78 79 if (unlikely(overflow)) { 80 rt = 0; /* Undefined */ 81 } 82 83 if (oe) { 84 if (unlikely(overflow)) { 85 env->so = env->ov = 1; 86 } else { 87 env->ov = 0; 88 } 89 } 90 91 return (target_ulong)rt; 92 } 93 94 #if defined(TARGET_PPC64) 95 96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 97 { 98 uint64_t rt = 0; 99 int overflow = 0; 100 101 overflow = divu128(&rt, &ra, rb); 102 103 if (unlikely(overflow)) { 104 rt = 0; /* Undefined */ 105 } 106 107 if (oe) { 108 if (unlikely(overflow)) { 109 env->so = env->ov = 1; 110 } else { 111 env->ov = 0; 112 } 113 } 114 115 return rt; 116 } 117 118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 119 { 120 int64_t rt = 0; 121 int64_t ra = (int64_t)rau; 122 int64_t rb = (int64_t)rbu; 123 int overflow = divs128(&rt, &ra, rb); 124 125 if (unlikely(overflow)) { 126 rt = 0; /* Undefined */ 127 } 128 129 if (oe) { 130 131 if (unlikely(overflow)) { 132 env->so = env->ov = 1; 133 } else { 134 env->ov = 0; 135 } 136 } 137 138 return rt; 139 } 140 141 #endif 142 143 144 #if defined(TARGET_PPC64) 145 /* if x = 0xab, returns 0xababababababababa */ 146 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 147 148 /* substract 1 from each byte, and with inverse, check if MSB is set at each 149 * byte. 150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 152 */ 153 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 154 155 /* When you XOR the pattern and there is a match, that byte will be zero */ 156 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 157 158 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 159 { 160 return hasvalue(rb, ra) ? CRF_GT : 0; 161 } 162 163 #undef pattern 164 #undef haszero 165 #undef hasvalue 166 167 /* Return invalid random number. 168 * 169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 170 * random number 171 */ 172 target_ulong helper_darn32(void) 173 { 174 return -1; 175 } 176 177 target_ulong helper_darn64(void) 178 { 179 return -1; 180 } 181 182 #endif 183 184 #if defined(TARGET_PPC64) 185 186 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 187 { 188 int i; 189 uint64_t ra = 0; 190 191 for (i = 0; i < 8; i++) { 192 int index = (rs >> (i*8)) & 0xFF; 193 if (index < 64) { 194 if (rb & (1ull << (63-index))) { 195 ra |= 1 << i; 196 } 197 } 198 } 199 return ra; 200 } 201 202 #endif 203 204 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 205 { 206 target_ulong mask = 0xff; 207 target_ulong ra = 0; 208 int i; 209 210 for (i = 0; i < sizeof(target_ulong); i++) { 211 if ((rs & mask) == (rb & mask)) { 212 ra |= mask; 213 } 214 mask <<= 8; 215 } 216 return ra; 217 } 218 219 /* shift right arithmetic helper */ 220 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 221 target_ulong shift) 222 { 223 int32_t ret; 224 225 if (likely(!(shift & 0x20))) { 226 if (likely((uint32_t)shift != 0)) { 227 shift &= 0x1f; 228 ret = (int32_t)value >> shift; 229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 230 env->ca = 0; 231 } else { 232 env->ca = 1; 233 } 234 } else { 235 ret = (int32_t)value; 236 env->ca = 0; 237 } 238 } else { 239 ret = (int32_t)value >> 31; 240 env->ca = (ret != 0); 241 } 242 return (target_long)ret; 243 } 244 245 #if defined(TARGET_PPC64) 246 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 247 target_ulong shift) 248 { 249 int64_t ret; 250 251 if (likely(!(shift & 0x40))) { 252 if (likely((uint64_t)shift != 0)) { 253 shift &= 0x3f; 254 ret = (int64_t)value >> shift; 255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 256 env->ca = 0; 257 } else { 258 env->ca = 1; 259 } 260 } else { 261 ret = (int64_t)value; 262 env->ca = 0; 263 } 264 } else { 265 ret = (int64_t)value >> 63; 266 env->ca = (ret != 0); 267 } 268 return ret; 269 } 270 #endif 271 272 #if defined(TARGET_PPC64) 273 target_ulong helper_popcntb(target_ulong val) 274 { 275 /* Note that we don't fold past bytes */ 276 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 277 0x5555555555555555ULL); 278 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 279 0x3333333333333333ULL); 280 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 281 0x0f0f0f0f0f0f0f0fULL); 282 return val; 283 } 284 285 target_ulong helper_popcntw(target_ulong val) 286 { 287 /* Note that we don't fold past words. */ 288 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 289 0x5555555555555555ULL); 290 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 291 0x3333333333333333ULL); 292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 293 0x0f0f0f0f0f0f0f0fULL); 294 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 295 0x00ff00ff00ff00ffULL); 296 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 297 0x0000ffff0000ffffULL); 298 return val; 299 } 300 #else 301 target_ulong helper_popcntb(target_ulong val) 302 { 303 /* Note that we don't fold past bytes */ 304 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 305 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 306 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 307 return val; 308 } 309 #endif 310 311 /*****************************************************************************/ 312 /* PowerPC 601 specific instructions (POWER bridge) */ 313 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 314 { 315 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 316 317 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 318 (int32_t)arg2 == 0) { 319 env->spr[SPR_MQ] = 0; 320 return INT32_MIN; 321 } else { 322 env->spr[SPR_MQ] = tmp % arg2; 323 return tmp / (int32_t)arg2; 324 } 325 } 326 327 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 328 target_ulong arg2) 329 { 330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 331 332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 333 (int32_t)arg2 == 0) { 334 env->so = env->ov = 1; 335 env->spr[SPR_MQ] = 0; 336 return INT32_MIN; 337 } else { 338 env->spr[SPR_MQ] = tmp % arg2; 339 tmp /= (int32_t)arg2; 340 if ((int32_t)tmp != tmp) { 341 env->so = env->ov = 1; 342 } else { 343 env->ov = 0; 344 } 345 return tmp; 346 } 347 } 348 349 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 350 target_ulong arg2) 351 { 352 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 353 (int32_t)arg2 == 0) { 354 env->spr[SPR_MQ] = 0; 355 return INT32_MIN; 356 } else { 357 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 358 return (int32_t)arg1 / (int32_t)arg2; 359 } 360 } 361 362 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 363 target_ulong arg2) 364 { 365 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 366 (int32_t)arg2 == 0) { 367 env->so = env->ov = 1; 368 env->spr[SPR_MQ] = 0; 369 return INT32_MIN; 370 } else { 371 env->ov = 0; 372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 373 return (int32_t)arg1 / (int32_t)arg2; 374 } 375 } 376 377 /*****************************************************************************/ 378 /* 602 specific instructions */ 379 /* mfrom is the most crazy instruction ever seen, imho ! */ 380 /* Real implementation uses a ROM table. Do the same */ 381 /* Extremely decomposed: 382 * -arg / 256 383 * return 256 * log10(10 + 1.0) + 0.5 384 */ 385 #if !defined(CONFIG_USER_ONLY) 386 target_ulong helper_602_mfrom(target_ulong arg) 387 { 388 if (likely(arg < 602)) { 389 #include "mfrom_table.c" 390 return mfrom_ROM_table[arg]; 391 } else { 392 return 0; 393 } 394 } 395 #endif 396 397 /*****************************************************************************/ 398 /* Altivec extension helpers */ 399 #if defined(HOST_WORDS_BIGENDIAN) 400 #define HI_IDX 0 401 #define LO_IDX 1 402 #define AVRB(i) u8[i] 403 #define AVRW(i) u32[i] 404 #else 405 #define HI_IDX 1 406 #define LO_IDX 0 407 #define AVRB(i) u8[15-(i)] 408 #define AVRW(i) u32[3-(i)] 409 #endif 410 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 463 { 464 int i, j = (sh & 0xf); 465 466 VECTOR_FOR_INORDER_I(i, u8) { 467 r->u8[i] = j++; 468 } 469 } 470 471 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 472 { 473 int i, j = 0x10 - (sh & 0xf); 474 475 VECTOR_FOR_INORDER_I(i, u8) { 476 r->u8[i] = j++; 477 } 478 } 479 480 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 481 { 482 #if defined(HOST_WORDS_BIGENDIAN) 483 env->vscr = r->u32[3]; 484 #else 485 env->vscr = r->u32[0]; 486 #endif 487 set_flush_to_zero(vscr_nj, &env->vec_status); 488 } 489 490 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 491 { 492 int i; 493 494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 495 r->u32[i] = ~a->u32[i] < b->u32[i]; 496 } 497 } 498 499 /* vprtybw */ 500 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 501 { 502 int i; 503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 504 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 505 res ^= res >> 8; 506 r->u32[i] = res & 1; 507 } 508 } 509 510 /* vprtybd */ 511 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 512 { 513 int i; 514 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 515 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 516 res ^= res >> 16; 517 res ^= res >> 8; 518 r->u64[i] = res & 1; 519 } 520 } 521 522 /* vprtybq */ 523 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 524 { 525 uint64_t res = b->u64[0] ^ b->u64[1]; 526 res ^= res >> 32; 527 res ^= res >> 16; 528 res ^= res >> 8; 529 r->u64[LO_IDX] = res & 1; 530 r->u64[HI_IDX] = 0; 531 } 532 533 #define VARITH_DO(name, op, element) \ 534 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 535 { \ 536 int i; \ 537 \ 538 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 539 r->element[i] = a->element[i] op b->element[i]; \ 540 } \ 541 } 542 #define VARITH(suffix, element) \ 543 VARITH_DO(add##suffix, +, element) \ 544 VARITH_DO(sub##suffix, -, element) 545 VARITH(ubm, u8) 546 VARITH(uhm, u16) 547 VARITH(uwm, u32) 548 VARITH(udm, u64) 549 VARITH_DO(muluwm, *, u32) 550 #undef VARITH_DO 551 #undef VARITH 552 553 #define VARITHFP(suffix, func) \ 554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 555 ppc_avr_t *b) \ 556 { \ 557 int i; \ 558 \ 559 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 560 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ 561 } \ 562 } 563 VARITHFP(addfp, float32_add) 564 VARITHFP(subfp, float32_sub) 565 VARITHFP(minfp, float32_min) 566 VARITHFP(maxfp, float32_max) 567 #undef VARITHFP 568 569 #define VARITHFPFMA(suffix, type) \ 570 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 571 ppc_avr_t *b, ppc_avr_t *c) \ 572 { \ 573 int i; \ 574 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 575 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ 576 type, &env->vec_status); \ 577 } \ 578 } 579 VARITHFPFMA(maddfp, 0); 580 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 581 #undef VARITHFPFMA 582 583 #define VARITHSAT_CASE(type, op, cvt, element) \ 584 { \ 585 type result = (type)a->element[i] op (type)b->element[i]; \ 586 r->element[i] = cvt(result, &sat); \ 587 } 588 589 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 590 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 591 ppc_avr_t *b) \ 592 { \ 593 int sat = 0; \ 594 int i; \ 595 \ 596 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 597 switch (sizeof(r->element[0])) { \ 598 case 1: \ 599 VARITHSAT_CASE(optype, op, cvt, element); \ 600 break; \ 601 case 2: \ 602 VARITHSAT_CASE(optype, op, cvt, element); \ 603 break; \ 604 case 4: \ 605 VARITHSAT_CASE(optype, op, cvt, element); \ 606 break; \ 607 } \ 608 } \ 609 if (sat) { \ 610 env->vscr |= (1 << VSCR_SAT); \ 611 } \ 612 } 613 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 614 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 615 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 616 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 617 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 618 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 619 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 620 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 621 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 622 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 623 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 624 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 625 #undef VARITHSAT_CASE 626 #undef VARITHSAT_DO 627 #undef VARITHSAT_SIGNED 628 #undef VARITHSAT_UNSIGNED 629 630 #define VAVG_DO(name, element, etype) \ 631 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 632 { \ 633 int i; \ 634 \ 635 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 636 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 637 r->element[i] = x >> 1; \ 638 } \ 639 } 640 641 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 642 unsigned_type) \ 643 VAVG_DO(avgs##type, signed_element, signed_type) \ 644 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 645 VAVG(b, s8, int16_t, u8, uint16_t) 646 VAVG(h, s16, int32_t, u16, uint32_t) 647 VAVG(w, s32, int64_t, u32, uint64_t) 648 #undef VAVG_DO 649 #undef VAVG 650 651 #define VABSDU_DO(name, element) \ 652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 653 { \ 654 int i; \ 655 \ 656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 657 r->element[i] = (a->element[i] > b->element[i]) ? \ 658 (a->element[i] - b->element[i]) : \ 659 (b->element[i] - a->element[i]); \ 660 } \ 661 } 662 663 /* VABSDU - Vector absolute difference unsigned 664 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 665 * element - element type to access from vector 666 */ 667 #define VABSDU(type, element) \ 668 VABSDU_DO(absdu##type, element) 669 VABSDU(b, u8) 670 VABSDU(h, u16) 671 VABSDU(w, u32) 672 #undef VABSDU_DO 673 #undef VABSDU 674 675 #define VCF(suffix, cvt, element) \ 676 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 677 ppc_avr_t *b, uint32_t uim) \ 678 { \ 679 int i; \ 680 \ 681 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 682 float32 t = cvt(b->element[i], &env->vec_status); \ 683 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \ 684 } \ 685 } 686 VCF(ux, uint32_to_float32, u32) 687 VCF(sx, int32_to_float32, s32) 688 #undef VCF 689 690 #define VCMP_DO(suffix, compare, element, record) \ 691 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 692 ppc_avr_t *a, ppc_avr_t *b) \ 693 { \ 694 uint64_t ones = (uint64_t)-1; \ 695 uint64_t all = ones; \ 696 uint64_t none = 0; \ 697 int i; \ 698 \ 699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 700 uint64_t result = (a->element[i] compare b->element[i] ? \ 701 ones : 0x0); \ 702 switch (sizeof(a->element[0])) { \ 703 case 8: \ 704 r->u64[i] = result; \ 705 break; \ 706 case 4: \ 707 r->u32[i] = result; \ 708 break; \ 709 case 2: \ 710 r->u16[i] = result; \ 711 break; \ 712 case 1: \ 713 r->u8[i] = result; \ 714 break; \ 715 } \ 716 all &= result; \ 717 none |= result; \ 718 } \ 719 if (record) { \ 720 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 721 } \ 722 } 723 #define VCMP(suffix, compare, element) \ 724 VCMP_DO(suffix, compare, element, 0) \ 725 VCMP_DO(suffix##_dot, compare, element, 1) 726 VCMP(equb, ==, u8) 727 VCMP(equh, ==, u16) 728 VCMP(equw, ==, u32) 729 VCMP(equd, ==, u64) 730 VCMP(gtub, >, u8) 731 VCMP(gtuh, >, u16) 732 VCMP(gtuw, >, u32) 733 VCMP(gtud, >, u64) 734 VCMP(gtsb, >, s8) 735 VCMP(gtsh, >, s16) 736 VCMP(gtsw, >, s32) 737 VCMP(gtsd, >, s64) 738 #undef VCMP_DO 739 #undef VCMP 740 741 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 742 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 743 ppc_avr_t *a, ppc_avr_t *b) \ 744 { \ 745 etype ones = (etype)-1; \ 746 etype all = ones; \ 747 etype result, none = 0; \ 748 int i; \ 749 \ 750 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 751 if (cmpzero) { \ 752 result = ((a->element[i] == 0) \ 753 || (b->element[i] == 0) \ 754 || (a->element[i] != b->element[i]) ? \ 755 ones : 0x0); \ 756 } else { \ 757 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 758 } \ 759 r->element[i] = result; \ 760 all &= result; \ 761 none |= result; \ 762 } \ 763 if (record) { \ 764 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 765 } \ 766 } 767 768 /* VCMPNEZ - Vector compare not equal to zero 769 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 770 * element - element type to access from vector 771 */ 772 #define VCMPNE(suffix, element, etype, cmpzero) \ 773 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 774 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 775 VCMPNE(zb, u8, uint8_t, 1) 776 VCMPNE(zh, u16, uint16_t, 1) 777 VCMPNE(zw, u32, uint32_t, 1) 778 VCMPNE(b, u8, uint8_t, 0) 779 VCMPNE(h, u16, uint16_t, 0) 780 VCMPNE(w, u32, uint32_t, 0) 781 #undef VCMPNE_DO 782 #undef VCMPNE 783 784 #define VCMPFP_DO(suffix, compare, order, record) \ 785 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 786 ppc_avr_t *a, ppc_avr_t *b) \ 787 { \ 788 uint32_t ones = (uint32_t)-1; \ 789 uint32_t all = ones; \ 790 uint32_t none = 0; \ 791 int i; \ 792 \ 793 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 794 uint32_t result; \ 795 int rel = float32_compare_quiet(a->f[i], b->f[i], \ 796 &env->vec_status); \ 797 if (rel == float_relation_unordered) { \ 798 result = 0; \ 799 } else if (rel compare order) { \ 800 result = ones; \ 801 } else { \ 802 result = 0; \ 803 } \ 804 r->u32[i] = result; \ 805 all &= result; \ 806 none |= result; \ 807 } \ 808 if (record) { \ 809 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 810 } \ 811 } 812 #define VCMPFP(suffix, compare, order) \ 813 VCMPFP_DO(suffix, compare, order, 0) \ 814 VCMPFP_DO(suffix##_dot, compare, order, 1) 815 VCMPFP(eqfp, ==, float_relation_equal) 816 VCMPFP(gefp, !=, float_relation_less) 817 VCMPFP(gtfp, ==, float_relation_greater) 818 #undef VCMPFP_DO 819 #undef VCMPFP 820 821 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 822 ppc_avr_t *a, ppc_avr_t *b, int record) 823 { 824 int i; 825 int all_in = 0; 826 827 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 828 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); 829 if (le_rel == float_relation_unordered) { 830 r->u32[i] = 0xc0000000; 831 all_in = 1; 832 } else { 833 float32 bneg = float32_chs(b->f[i]); 834 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status); 835 int le = le_rel != float_relation_greater; 836 int ge = ge_rel != float_relation_less; 837 838 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 839 all_in |= (!le | !ge); 840 } 841 } 842 if (record) { 843 env->crf[6] = (all_in == 0) << 1; 844 } 845 } 846 847 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 848 { 849 vcmpbfp_internal(env, r, a, b, 0); 850 } 851 852 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 853 ppc_avr_t *b) 854 { 855 vcmpbfp_internal(env, r, a, b, 1); 856 } 857 858 #define VCT(suffix, satcvt, element) \ 859 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 860 ppc_avr_t *b, uint32_t uim) \ 861 { \ 862 int i; \ 863 int sat = 0; \ 864 float_status s = env->vec_status; \ 865 \ 866 set_float_rounding_mode(float_round_to_zero, &s); \ 867 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 868 if (float32_is_any_nan(b->f[i])) { \ 869 r->element[i] = 0; \ 870 } else { \ 871 float64 t = float32_to_float64(b->f[i], &s); \ 872 int64_t j; \ 873 \ 874 t = float64_scalbn(t, uim, &s); \ 875 j = float64_to_int64(t, &s); \ 876 r->element[i] = satcvt(j, &sat); \ 877 } \ 878 } \ 879 if (sat) { \ 880 env->vscr |= (1 << VSCR_SAT); \ 881 } \ 882 } 883 VCT(uxs, cvtsduw, u32) 884 VCT(sxs, cvtsdsw, s32) 885 #undef VCT 886 887 target_ulong helper_vclzlsbb(ppc_avr_t *r) 888 { 889 target_ulong count = 0; 890 int i; 891 VECTOR_FOR_INORDER_I(i, u8) { 892 if (r->u8[i] & 0x01) { 893 break; 894 } 895 count++; 896 } 897 return count; 898 } 899 900 target_ulong helper_vctzlsbb(ppc_avr_t *r) 901 { 902 target_ulong count = 0; 903 int i; 904 #if defined(HOST_WORDS_BIGENDIAN) 905 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 906 #else 907 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 908 #endif 909 if (r->u8[i] & 0x01) { 910 break; 911 } 912 count++; 913 } 914 return count; 915 } 916 917 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 918 ppc_avr_t *b, ppc_avr_t *c) 919 { 920 int sat = 0; 921 int i; 922 923 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 924 int32_t prod = a->s16[i] * b->s16[i]; 925 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 926 927 r->s16[i] = cvtswsh(t, &sat); 928 } 929 930 if (sat) { 931 env->vscr |= (1 << VSCR_SAT); 932 } 933 } 934 935 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 936 ppc_avr_t *b, ppc_avr_t *c) 937 { 938 int sat = 0; 939 int i; 940 941 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 942 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 943 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 944 r->s16[i] = cvtswsh(t, &sat); 945 } 946 947 if (sat) { 948 env->vscr |= (1 << VSCR_SAT); 949 } 950 } 951 952 #define VMINMAX_DO(name, compare, element) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 int i; \ 956 \ 957 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 958 if (a->element[i] compare b->element[i]) { \ 959 r->element[i] = b->element[i]; \ 960 } else { \ 961 r->element[i] = a->element[i]; \ 962 } \ 963 } \ 964 } 965 #define VMINMAX(suffix, element) \ 966 VMINMAX_DO(min##suffix, >, element) \ 967 VMINMAX_DO(max##suffix, <, element) 968 VMINMAX(sb, s8) 969 VMINMAX(sh, s16) 970 VMINMAX(sw, s32) 971 VMINMAX(sd, s64) 972 VMINMAX(ub, u8) 973 VMINMAX(uh, u16) 974 VMINMAX(uw, u32) 975 VMINMAX(ud, u64) 976 #undef VMINMAX_DO 977 #undef VMINMAX 978 979 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 980 { 981 int i; 982 983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 984 int32_t prod = a->s16[i] * b->s16[i]; 985 r->s16[i] = (int16_t) (prod + c->s16[i]); 986 } 987 } 988 989 #define VMRG_DO(name, element, highp) \ 990 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 991 { \ 992 ppc_avr_t result; \ 993 int i; \ 994 size_t n_elems = ARRAY_SIZE(r->element); \ 995 \ 996 for (i = 0; i < n_elems / 2; i++) { \ 997 if (highp) { \ 998 result.element[i*2+HI_IDX] = a->element[i]; \ 999 result.element[i*2+LO_IDX] = b->element[i]; \ 1000 } else { \ 1001 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \ 1002 b->element[n_elems - i - 1]; \ 1003 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \ 1004 a->element[n_elems - i - 1]; \ 1005 } \ 1006 } \ 1007 *r = result; \ 1008 } 1009 #if defined(HOST_WORDS_BIGENDIAN) 1010 #define MRGHI 0 1011 #define MRGLO 1 1012 #else 1013 #define MRGHI 1 1014 #define MRGLO 0 1015 #endif 1016 #define VMRG(suffix, element) \ 1017 VMRG_DO(mrgl##suffix, element, MRGHI) \ 1018 VMRG_DO(mrgh##suffix, element, MRGLO) 1019 VMRG(b, u8) 1020 VMRG(h, u16) 1021 VMRG(w, u32) 1022 #undef VMRG_DO 1023 #undef VMRG 1024 #undef MRGHI 1025 #undef MRGLO 1026 1027 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1028 ppc_avr_t *b, ppc_avr_t *c) 1029 { 1030 int32_t prod[16]; 1031 int i; 1032 1033 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1034 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1035 } 1036 1037 VECTOR_FOR_INORDER_I(i, s32) { 1038 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1039 prod[4 * i + 2] + prod[4 * i + 3]; 1040 } 1041 } 1042 1043 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1044 ppc_avr_t *b, ppc_avr_t *c) 1045 { 1046 int32_t prod[8]; 1047 int i; 1048 1049 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1050 prod[i] = a->s16[i] * b->s16[i]; 1051 } 1052 1053 VECTOR_FOR_INORDER_I(i, s32) { 1054 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1055 } 1056 } 1057 1058 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1059 ppc_avr_t *b, ppc_avr_t *c) 1060 { 1061 int32_t prod[8]; 1062 int i; 1063 int sat = 0; 1064 1065 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1066 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1067 } 1068 1069 VECTOR_FOR_INORDER_I(i, s32) { 1070 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1071 1072 r->u32[i] = cvtsdsw(t, &sat); 1073 } 1074 1075 if (sat) { 1076 env->vscr |= (1 << VSCR_SAT); 1077 } 1078 } 1079 1080 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1081 ppc_avr_t *b, ppc_avr_t *c) 1082 { 1083 uint16_t prod[16]; 1084 int i; 1085 1086 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1087 prod[i] = a->u8[i] * b->u8[i]; 1088 } 1089 1090 VECTOR_FOR_INORDER_I(i, u32) { 1091 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1092 prod[4 * i + 2] + prod[4 * i + 3]; 1093 } 1094 } 1095 1096 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1097 ppc_avr_t *b, ppc_avr_t *c) 1098 { 1099 uint32_t prod[8]; 1100 int i; 1101 1102 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1103 prod[i] = a->u16[i] * b->u16[i]; 1104 } 1105 1106 VECTOR_FOR_INORDER_I(i, u32) { 1107 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1108 } 1109 } 1110 1111 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1112 ppc_avr_t *b, ppc_avr_t *c) 1113 { 1114 uint32_t prod[8]; 1115 int i; 1116 int sat = 0; 1117 1118 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1119 prod[i] = a->u16[i] * b->u16[i]; 1120 } 1121 1122 VECTOR_FOR_INORDER_I(i, s32) { 1123 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1124 1125 r->u32[i] = cvtuduw(t, &sat); 1126 } 1127 1128 if (sat) { 1129 env->vscr |= (1 << VSCR_SAT); 1130 } 1131 } 1132 1133 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ 1134 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1135 { \ 1136 int i; \ 1137 \ 1138 VECTOR_FOR_INORDER_I(i, prod_element) { \ 1139 if (evenp) { \ 1140 r->prod_element[i] = \ 1141 (cast)a->mul_element[i * 2 + HI_IDX] * \ 1142 (cast)b->mul_element[i * 2 + HI_IDX]; \ 1143 } else { \ 1144 r->prod_element[i] = \ 1145 (cast)a->mul_element[i * 2 + LO_IDX] * \ 1146 (cast)b->mul_element[i * 2 + LO_IDX]; \ 1147 } \ 1148 } \ 1149 } 1150 #define VMUL(suffix, mul_element, prod_element, cast) \ 1151 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ 1152 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) 1153 VMUL(sb, s8, s16, int16_t) 1154 VMUL(sh, s16, s32, int32_t) 1155 VMUL(sw, s32, s64, int64_t) 1156 VMUL(ub, u8, u16, uint16_t) 1157 VMUL(uh, u16, u32, uint32_t) 1158 VMUL(uw, u32, u64, uint64_t) 1159 #undef VMUL_DO 1160 #undef VMUL 1161 1162 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1163 ppc_avr_t *c) 1164 { 1165 ppc_avr_t result; 1166 int i; 1167 1168 VECTOR_FOR_INORDER_I(i, u8) { 1169 int s = c->u8[i] & 0x1f; 1170 #if defined(HOST_WORDS_BIGENDIAN) 1171 int index = s & 0xf; 1172 #else 1173 int index = 15 - (s & 0xf); 1174 #endif 1175 1176 if (s & 0x10) { 1177 result.u8[i] = b->u8[index]; 1178 } else { 1179 result.u8[i] = a->u8[index]; 1180 } 1181 } 1182 *r = result; 1183 } 1184 1185 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1186 ppc_avr_t *c) 1187 { 1188 ppc_avr_t result; 1189 int i; 1190 1191 VECTOR_FOR_INORDER_I(i, u8) { 1192 int s = c->u8[i] & 0x1f; 1193 #if defined(HOST_WORDS_BIGENDIAN) 1194 int index = 15 - (s & 0xf); 1195 #else 1196 int index = s & 0xf; 1197 #endif 1198 1199 if (s & 0x10) { 1200 result.u8[i] = a->u8[index]; 1201 } else { 1202 result.u8[i] = b->u8[index]; 1203 } 1204 } 1205 *r = result; 1206 } 1207 1208 #if defined(HOST_WORDS_BIGENDIAN) 1209 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1210 #define VBPERMD_INDEX(i) (i) 1211 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1212 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1213 #else 1214 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1215 #define VBPERMD_INDEX(i) (1 - i) 1216 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1217 #define EXTRACT_BIT(avr, i, index) \ 1218 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1219 #endif 1220 1221 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1222 { 1223 int i, j; 1224 ppc_avr_t result = { .u64 = { 0, 0 } }; 1225 VECTOR_FOR_INORDER_I(i, u64) { 1226 for (j = 0; j < 8; j++) { 1227 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1228 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1229 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1230 } 1231 } 1232 } 1233 *r = result; 1234 } 1235 1236 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1237 { 1238 int i; 1239 uint64_t perm = 0; 1240 1241 VECTOR_FOR_INORDER_I(i, u8) { 1242 int index = VBPERMQ_INDEX(b, i); 1243 1244 if (index < 128) { 1245 uint64_t mask = (1ull << (63-(index & 0x3F))); 1246 if (a->u64[VBPERMQ_DW(index)] & mask) { 1247 perm |= (0x8000 >> i); 1248 } 1249 } 1250 } 1251 1252 r->u64[HI_IDX] = perm; 1253 r->u64[LO_IDX] = 0; 1254 } 1255 1256 #undef VBPERMQ_INDEX 1257 #undef VBPERMQ_DW 1258 1259 static const uint64_t VGBBD_MASKS[256] = { 1260 0x0000000000000000ull, /* 00 */ 1261 0x0000000000000080ull, /* 01 */ 1262 0x0000000000008000ull, /* 02 */ 1263 0x0000000000008080ull, /* 03 */ 1264 0x0000000000800000ull, /* 04 */ 1265 0x0000000000800080ull, /* 05 */ 1266 0x0000000000808000ull, /* 06 */ 1267 0x0000000000808080ull, /* 07 */ 1268 0x0000000080000000ull, /* 08 */ 1269 0x0000000080000080ull, /* 09 */ 1270 0x0000000080008000ull, /* 0A */ 1271 0x0000000080008080ull, /* 0B */ 1272 0x0000000080800000ull, /* 0C */ 1273 0x0000000080800080ull, /* 0D */ 1274 0x0000000080808000ull, /* 0E */ 1275 0x0000000080808080ull, /* 0F */ 1276 0x0000008000000000ull, /* 10 */ 1277 0x0000008000000080ull, /* 11 */ 1278 0x0000008000008000ull, /* 12 */ 1279 0x0000008000008080ull, /* 13 */ 1280 0x0000008000800000ull, /* 14 */ 1281 0x0000008000800080ull, /* 15 */ 1282 0x0000008000808000ull, /* 16 */ 1283 0x0000008000808080ull, /* 17 */ 1284 0x0000008080000000ull, /* 18 */ 1285 0x0000008080000080ull, /* 19 */ 1286 0x0000008080008000ull, /* 1A */ 1287 0x0000008080008080ull, /* 1B */ 1288 0x0000008080800000ull, /* 1C */ 1289 0x0000008080800080ull, /* 1D */ 1290 0x0000008080808000ull, /* 1E */ 1291 0x0000008080808080ull, /* 1F */ 1292 0x0000800000000000ull, /* 20 */ 1293 0x0000800000000080ull, /* 21 */ 1294 0x0000800000008000ull, /* 22 */ 1295 0x0000800000008080ull, /* 23 */ 1296 0x0000800000800000ull, /* 24 */ 1297 0x0000800000800080ull, /* 25 */ 1298 0x0000800000808000ull, /* 26 */ 1299 0x0000800000808080ull, /* 27 */ 1300 0x0000800080000000ull, /* 28 */ 1301 0x0000800080000080ull, /* 29 */ 1302 0x0000800080008000ull, /* 2A */ 1303 0x0000800080008080ull, /* 2B */ 1304 0x0000800080800000ull, /* 2C */ 1305 0x0000800080800080ull, /* 2D */ 1306 0x0000800080808000ull, /* 2E */ 1307 0x0000800080808080ull, /* 2F */ 1308 0x0000808000000000ull, /* 30 */ 1309 0x0000808000000080ull, /* 31 */ 1310 0x0000808000008000ull, /* 32 */ 1311 0x0000808000008080ull, /* 33 */ 1312 0x0000808000800000ull, /* 34 */ 1313 0x0000808000800080ull, /* 35 */ 1314 0x0000808000808000ull, /* 36 */ 1315 0x0000808000808080ull, /* 37 */ 1316 0x0000808080000000ull, /* 38 */ 1317 0x0000808080000080ull, /* 39 */ 1318 0x0000808080008000ull, /* 3A */ 1319 0x0000808080008080ull, /* 3B */ 1320 0x0000808080800000ull, /* 3C */ 1321 0x0000808080800080ull, /* 3D */ 1322 0x0000808080808000ull, /* 3E */ 1323 0x0000808080808080ull, /* 3F */ 1324 0x0080000000000000ull, /* 40 */ 1325 0x0080000000000080ull, /* 41 */ 1326 0x0080000000008000ull, /* 42 */ 1327 0x0080000000008080ull, /* 43 */ 1328 0x0080000000800000ull, /* 44 */ 1329 0x0080000000800080ull, /* 45 */ 1330 0x0080000000808000ull, /* 46 */ 1331 0x0080000000808080ull, /* 47 */ 1332 0x0080000080000000ull, /* 48 */ 1333 0x0080000080000080ull, /* 49 */ 1334 0x0080000080008000ull, /* 4A */ 1335 0x0080000080008080ull, /* 4B */ 1336 0x0080000080800000ull, /* 4C */ 1337 0x0080000080800080ull, /* 4D */ 1338 0x0080000080808000ull, /* 4E */ 1339 0x0080000080808080ull, /* 4F */ 1340 0x0080008000000000ull, /* 50 */ 1341 0x0080008000000080ull, /* 51 */ 1342 0x0080008000008000ull, /* 52 */ 1343 0x0080008000008080ull, /* 53 */ 1344 0x0080008000800000ull, /* 54 */ 1345 0x0080008000800080ull, /* 55 */ 1346 0x0080008000808000ull, /* 56 */ 1347 0x0080008000808080ull, /* 57 */ 1348 0x0080008080000000ull, /* 58 */ 1349 0x0080008080000080ull, /* 59 */ 1350 0x0080008080008000ull, /* 5A */ 1351 0x0080008080008080ull, /* 5B */ 1352 0x0080008080800000ull, /* 5C */ 1353 0x0080008080800080ull, /* 5D */ 1354 0x0080008080808000ull, /* 5E */ 1355 0x0080008080808080ull, /* 5F */ 1356 0x0080800000000000ull, /* 60 */ 1357 0x0080800000000080ull, /* 61 */ 1358 0x0080800000008000ull, /* 62 */ 1359 0x0080800000008080ull, /* 63 */ 1360 0x0080800000800000ull, /* 64 */ 1361 0x0080800000800080ull, /* 65 */ 1362 0x0080800000808000ull, /* 66 */ 1363 0x0080800000808080ull, /* 67 */ 1364 0x0080800080000000ull, /* 68 */ 1365 0x0080800080000080ull, /* 69 */ 1366 0x0080800080008000ull, /* 6A */ 1367 0x0080800080008080ull, /* 6B */ 1368 0x0080800080800000ull, /* 6C */ 1369 0x0080800080800080ull, /* 6D */ 1370 0x0080800080808000ull, /* 6E */ 1371 0x0080800080808080ull, /* 6F */ 1372 0x0080808000000000ull, /* 70 */ 1373 0x0080808000000080ull, /* 71 */ 1374 0x0080808000008000ull, /* 72 */ 1375 0x0080808000008080ull, /* 73 */ 1376 0x0080808000800000ull, /* 74 */ 1377 0x0080808000800080ull, /* 75 */ 1378 0x0080808000808000ull, /* 76 */ 1379 0x0080808000808080ull, /* 77 */ 1380 0x0080808080000000ull, /* 78 */ 1381 0x0080808080000080ull, /* 79 */ 1382 0x0080808080008000ull, /* 7A */ 1383 0x0080808080008080ull, /* 7B */ 1384 0x0080808080800000ull, /* 7C */ 1385 0x0080808080800080ull, /* 7D */ 1386 0x0080808080808000ull, /* 7E */ 1387 0x0080808080808080ull, /* 7F */ 1388 0x8000000000000000ull, /* 80 */ 1389 0x8000000000000080ull, /* 81 */ 1390 0x8000000000008000ull, /* 82 */ 1391 0x8000000000008080ull, /* 83 */ 1392 0x8000000000800000ull, /* 84 */ 1393 0x8000000000800080ull, /* 85 */ 1394 0x8000000000808000ull, /* 86 */ 1395 0x8000000000808080ull, /* 87 */ 1396 0x8000000080000000ull, /* 88 */ 1397 0x8000000080000080ull, /* 89 */ 1398 0x8000000080008000ull, /* 8A */ 1399 0x8000000080008080ull, /* 8B */ 1400 0x8000000080800000ull, /* 8C */ 1401 0x8000000080800080ull, /* 8D */ 1402 0x8000000080808000ull, /* 8E */ 1403 0x8000000080808080ull, /* 8F */ 1404 0x8000008000000000ull, /* 90 */ 1405 0x8000008000000080ull, /* 91 */ 1406 0x8000008000008000ull, /* 92 */ 1407 0x8000008000008080ull, /* 93 */ 1408 0x8000008000800000ull, /* 94 */ 1409 0x8000008000800080ull, /* 95 */ 1410 0x8000008000808000ull, /* 96 */ 1411 0x8000008000808080ull, /* 97 */ 1412 0x8000008080000000ull, /* 98 */ 1413 0x8000008080000080ull, /* 99 */ 1414 0x8000008080008000ull, /* 9A */ 1415 0x8000008080008080ull, /* 9B */ 1416 0x8000008080800000ull, /* 9C */ 1417 0x8000008080800080ull, /* 9D */ 1418 0x8000008080808000ull, /* 9E */ 1419 0x8000008080808080ull, /* 9F */ 1420 0x8000800000000000ull, /* A0 */ 1421 0x8000800000000080ull, /* A1 */ 1422 0x8000800000008000ull, /* A2 */ 1423 0x8000800000008080ull, /* A3 */ 1424 0x8000800000800000ull, /* A4 */ 1425 0x8000800000800080ull, /* A5 */ 1426 0x8000800000808000ull, /* A6 */ 1427 0x8000800000808080ull, /* A7 */ 1428 0x8000800080000000ull, /* A8 */ 1429 0x8000800080000080ull, /* A9 */ 1430 0x8000800080008000ull, /* AA */ 1431 0x8000800080008080ull, /* AB */ 1432 0x8000800080800000ull, /* AC */ 1433 0x8000800080800080ull, /* AD */ 1434 0x8000800080808000ull, /* AE */ 1435 0x8000800080808080ull, /* AF */ 1436 0x8000808000000000ull, /* B0 */ 1437 0x8000808000000080ull, /* B1 */ 1438 0x8000808000008000ull, /* B2 */ 1439 0x8000808000008080ull, /* B3 */ 1440 0x8000808000800000ull, /* B4 */ 1441 0x8000808000800080ull, /* B5 */ 1442 0x8000808000808000ull, /* B6 */ 1443 0x8000808000808080ull, /* B7 */ 1444 0x8000808080000000ull, /* B8 */ 1445 0x8000808080000080ull, /* B9 */ 1446 0x8000808080008000ull, /* BA */ 1447 0x8000808080008080ull, /* BB */ 1448 0x8000808080800000ull, /* BC */ 1449 0x8000808080800080ull, /* BD */ 1450 0x8000808080808000ull, /* BE */ 1451 0x8000808080808080ull, /* BF */ 1452 0x8080000000000000ull, /* C0 */ 1453 0x8080000000000080ull, /* C1 */ 1454 0x8080000000008000ull, /* C2 */ 1455 0x8080000000008080ull, /* C3 */ 1456 0x8080000000800000ull, /* C4 */ 1457 0x8080000000800080ull, /* C5 */ 1458 0x8080000000808000ull, /* C6 */ 1459 0x8080000000808080ull, /* C7 */ 1460 0x8080000080000000ull, /* C8 */ 1461 0x8080000080000080ull, /* C9 */ 1462 0x8080000080008000ull, /* CA */ 1463 0x8080000080008080ull, /* CB */ 1464 0x8080000080800000ull, /* CC */ 1465 0x8080000080800080ull, /* CD */ 1466 0x8080000080808000ull, /* CE */ 1467 0x8080000080808080ull, /* CF */ 1468 0x8080008000000000ull, /* D0 */ 1469 0x8080008000000080ull, /* D1 */ 1470 0x8080008000008000ull, /* D2 */ 1471 0x8080008000008080ull, /* D3 */ 1472 0x8080008000800000ull, /* D4 */ 1473 0x8080008000800080ull, /* D5 */ 1474 0x8080008000808000ull, /* D6 */ 1475 0x8080008000808080ull, /* D7 */ 1476 0x8080008080000000ull, /* D8 */ 1477 0x8080008080000080ull, /* D9 */ 1478 0x8080008080008000ull, /* DA */ 1479 0x8080008080008080ull, /* DB */ 1480 0x8080008080800000ull, /* DC */ 1481 0x8080008080800080ull, /* DD */ 1482 0x8080008080808000ull, /* DE */ 1483 0x8080008080808080ull, /* DF */ 1484 0x8080800000000000ull, /* E0 */ 1485 0x8080800000000080ull, /* E1 */ 1486 0x8080800000008000ull, /* E2 */ 1487 0x8080800000008080ull, /* E3 */ 1488 0x8080800000800000ull, /* E4 */ 1489 0x8080800000800080ull, /* E5 */ 1490 0x8080800000808000ull, /* E6 */ 1491 0x8080800000808080ull, /* E7 */ 1492 0x8080800080000000ull, /* E8 */ 1493 0x8080800080000080ull, /* E9 */ 1494 0x8080800080008000ull, /* EA */ 1495 0x8080800080008080ull, /* EB */ 1496 0x8080800080800000ull, /* EC */ 1497 0x8080800080800080ull, /* ED */ 1498 0x8080800080808000ull, /* EE */ 1499 0x8080800080808080ull, /* EF */ 1500 0x8080808000000000ull, /* F0 */ 1501 0x8080808000000080ull, /* F1 */ 1502 0x8080808000008000ull, /* F2 */ 1503 0x8080808000008080ull, /* F3 */ 1504 0x8080808000800000ull, /* F4 */ 1505 0x8080808000800080ull, /* F5 */ 1506 0x8080808000808000ull, /* F6 */ 1507 0x8080808000808080ull, /* F7 */ 1508 0x8080808080000000ull, /* F8 */ 1509 0x8080808080000080ull, /* F9 */ 1510 0x8080808080008000ull, /* FA */ 1511 0x8080808080008080ull, /* FB */ 1512 0x8080808080800000ull, /* FC */ 1513 0x8080808080800080ull, /* FD */ 1514 0x8080808080808000ull, /* FE */ 1515 0x8080808080808080ull, /* FF */ 1516 }; 1517 1518 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1519 { 1520 int i; 1521 uint64_t t[2] = { 0, 0 }; 1522 1523 VECTOR_FOR_INORDER_I(i, u8) { 1524 #if defined(HOST_WORDS_BIGENDIAN) 1525 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1526 #else 1527 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1528 #endif 1529 } 1530 1531 r->u64[0] = t[0]; 1532 r->u64[1] = t[1]; 1533 } 1534 1535 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1536 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1537 { \ 1538 int i, j; \ 1539 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1540 \ 1541 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1542 prod[i] = 0; \ 1543 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1544 if (a->srcfld[i] & (1ull<<j)) { \ 1545 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1546 } \ 1547 } \ 1548 } \ 1549 \ 1550 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1551 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1552 } \ 1553 } 1554 1555 PMSUM(vpmsumb, u8, u16, uint16_t) 1556 PMSUM(vpmsumh, u16, u32, uint32_t) 1557 PMSUM(vpmsumw, u32, u64, uint64_t) 1558 1559 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1560 { 1561 1562 #ifdef CONFIG_INT128 1563 int i, j; 1564 __uint128_t prod[2]; 1565 1566 VECTOR_FOR_INORDER_I(i, u64) { 1567 prod[i] = 0; 1568 for (j = 0; j < 64; j++) { 1569 if (a->u64[i] & (1ull<<j)) { 1570 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1571 } 1572 } 1573 } 1574 1575 r->u128 = prod[0] ^ prod[1]; 1576 1577 #else 1578 int i, j; 1579 ppc_avr_t prod[2]; 1580 1581 VECTOR_FOR_INORDER_I(i, u64) { 1582 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; 1583 for (j = 0; j < 64; j++) { 1584 if (a->u64[i] & (1ull<<j)) { 1585 ppc_avr_t bshift; 1586 if (j == 0) { 1587 bshift.u64[HI_IDX] = 0; 1588 bshift.u64[LO_IDX] = b->u64[i]; 1589 } else { 1590 bshift.u64[HI_IDX] = b->u64[i] >> (64-j); 1591 bshift.u64[LO_IDX] = b->u64[i] << j; 1592 } 1593 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; 1594 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; 1595 } 1596 } 1597 } 1598 1599 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; 1600 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; 1601 #endif 1602 } 1603 1604 1605 #if defined(HOST_WORDS_BIGENDIAN) 1606 #define PKBIG 1 1607 #else 1608 #define PKBIG 0 1609 #endif 1610 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1611 { 1612 int i, j; 1613 ppc_avr_t result; 1614 #if defined(HOST_WORDS_BIGENDIAN) 1615 const ppc_avr_t *x[2] = { a, b }; 1616 #else 1617 const ppc_avr_t *x[2] = { b, a }; 1618 #endif 1619 1620 VECTOR_FOR_INORDER_I(i, u64) { 1621 VECTOR_FOR_INORDER_I(j, u32) { 1622 uint32_t e = x[i]->u32[j]; 1623 1624 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1625 ((e >> 6) & 0x3e0) | 1626 ((e >> 3) & 0x1f)); 1627 } 1628 } 1629 *r = result; 1630 } 1631 1632 #define VPK(suffix, from, to, cvt, dosat) \ 1633 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1634 ppc_avr_t *a, ppc_avr_t *b) \ 1635 { \ 1636 int i; \ 1637 int sat = 0; \ 1638 ppc_avr_t result; \ 1639 ppc_avr_t *a0 = PKBIG ? a : b; \ 1640 ppc_avr_t *a1 = PKBIG ? b : a; \ 1641 \ 1642 VECTOR_FOR_INORDER_I(i, from) { \ 1643 result.to[i] = cvt(a0->from[i], &sat); \ 1644 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1645 } \ 1646 *r = result; \ 1647 if (dosat && sat) { \ 1648 env->vscr |= (1 << VSCR_SAT); \ 1649 } \ 1650 } 1651 #define I(x, y) (x) 1652 VPK(shss, s16, s8, cvtshsb, 1) 1653 VPK(shus, s16, u8, cvtshub, 1) 1654 VPK(swss, s32, s16, cvtswsh, 1) 1655 VPK(swus, s32, u16, cvtswuh, 1) 1656 VPK(sdss, s64, s32, cvtsdsw, 1) 1657 VPK(sdus, s64, u32, cvtsduw, 1) 1658 VPK(uhus, u16, u8, cvtuhub, 1) 1659 VPK(uwus, u32, u16, cvtuwuh, 1) 1660 VPK(udus, u64, u32, cvtuduw, 1) 1661 VPK(uhum, u16, u8, I, 0) 1662 VPK(uwum, u32, u16, I, 0) 1663 VPK(udum, u64, u32, I, 0) 1664 #undef I 1665 #undef VPK 1666 #undef PKBIG 1667 1668 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1669 { 1670 int i; 1671 1672 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1673 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); 1674 } 1675 } 1676 1677 #define VRFI(suffix, rounding) \ 1678 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1679 ppc_avr_t *b) \ 1680 { \ 1681 int i; \ 1682 float_status s = env->vec_status; \ 1683 \ 1684 set_float_rounding_mode(rounding, &s); \ 1685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 1686 r->f[i] = float32_round_to_int (b->f[i], &s); \ 1687 } \ 1688 } 1689 VRFI(n, float_round_nearest_even) 1690 VRFI(m, float_round_down) 1691 VRFI(p, float_round_up) 1692 VRFI(z, float_round_to_zero) 1693 #undef VRFI 1694 1695 #define VROTATE(suffix, element, mask) \ 1696 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1697 { \ 1698 int i; \ 1699 \ 1700 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1701 unsigned int shift = b->element[i] & mask; \ 1702 r->element[i] = (a->element[i] << shift) | \ 1703 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1704 } \ 1705 } 1706 VROTATE(b, u8, 0x7) 1707 VROTATE(h, u16, 0xF) 1708 VROTATE(w, u32, 0x1F) 1709 VROTATE(d, u64, 0x3F) 1710 #undef VROTATE 1711 1712 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1713 { 1714 int i; 1715 1716 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1717 float32 t = float32_sqrt(b->f[i], &env->vec_status); 1718 1719 r->f[i] = float32_div(float32_one, t, &env->vec_status); 1720 } 1721 } 1722 1723 #define VRLMI(name, size, element, insert) \ 1724 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1725 { \ 1726 int i; \ 1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1728 uint##size##_t src1 = a->element[i]; \ 1729 uint##size##_t src2 = b->element[i]; \ 1730 uint##size##_t src3 = r->element[i]; \ 1731 uint##size##_t begin, end, shift, mask, rot_val; \ 1732 \ 1733 shift = extract##size(src2, 0, 6); \ 1734 end = extract##size(src2, 8, 6); \ 1735 begin = extract##size(src2, 16, 6); \ 1736 rot_val = rol##size(src1, shift); \ 1737 mask = mask_u##size(begin, end); \ 1738 if (insert) { \ 1739 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1740 } else { \ 1741 r->element[i] = (rot_val & mask); \ 1742 } \ 1743 } \ 1744 } 1745 1746 VRLMI(vrldmi, 64, u64, 1); 1747 VRLMI(vrlwmi, 32, u32, 1); 1748 VRLMI(vrldnm, 64, u64, 0); 1749 VRLMI(vrlwnm, 32, u32, 0); 1750 1751 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1752 ppc_avr_t *c) 1753 { 1754 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1755 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1756 } 1757 1758 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1759 { 1760 int i; 1761 1762 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1763 r->f[i] = float32_exp2(b->f[i], &env->vec_status); 1764 } 1765 } 1766 1767 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1768 { 1769 int i; 1770 1771 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1772 r->f[i] = float32_log2(b->f[i], &env->vec_status); 1773 } 1774 } 1775 1776 #if defined(HOST_WORDS_BIGENDIAN) 1777 #define VEXTU_X_DO(name, size, left) \ 1778 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1779 { \ 1780 int index; \ 1781 if (left) { \ 1782 index = (a & 0xf) * 8; \ 1783 } else { \ 1784 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1785 } \ 1786 return int128_getlo(int128_rshift(b->s128, index)) & \ 1787 MAKE_64BIT_MASK(0, size); \ 1788 } 1789 #else 1790 #define VEXTU_X_DO(name, size, left) \ 1791 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1792 { \ 1793 int index; \ 1794 if (left) { \ 1795 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1796 } else { \ 1797 index = (a & 0xf) * 8; \ 1798 } \ 1799 return int128_getlo(int128_rshift(b->s128, index)) & \ 1800 MAKE_64BIT_MASK(0, size); \ 1801 } 1802 #endif 1803 1804 VEXTU_X_DO(vextublx, 8, 1) 1805 VEXTU_X_DO(vextuhlx, 16, 1) 1806 VEXTU_X_DO(vextuwlx, 32, 1) 1807 VEXTU_X_DO(vextubrx, 8, 0) 1808 VEXTU_X_DO(vextuhrx, 16, 0) 1809 VEXTU_X_DO(vextuwrx, 32, 0) 1810 #undef VEXTU_X_DO 1811 1812 /* The specification says that the results are undefined if all of the 1813 * shift counts are not identical. We check to make sure that they are 1814 * to conform to what real hardware appears to do. */ 1815 #define VSHIFT(suffix, leftp) \ 1816 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1817 { \ 1818 int shift = b->u8[LO_IDX*15] & 0x7; \ 1819 int doit = 1; \ 1820 int i; \ 1821 \ 1822 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1823 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1824 } \ 1825 if (doit) { \ 1826 if (shift == 0) { \ 1827 *r = *a; \ 1828 } else if (leftp) { \ 1829 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ 1830 \ 1831 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ 1832 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ 1833 } else { \ 1834 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ 1835 \ 1836 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ 1837 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ 1838 } \ 1839 } \ 1840 } 1841 VSHIFT(l, 1) 1842 VSHIFT(r, 0) 1843 #undef VSHIFT 1844 1845 #define VSL(suffix, element, mask) \ 1846 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1847 { \ 1848 int i; \ 1849 \ 1850 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1851 unsigned int shift = b->element[i] & mask; \ 1852 \ 1853 r->element[i] = a->element[i] << shift; \ 1854 } \ 1855 } 1856 VSL(b, u8, 0x7) 1857 VSL(h, u16, 0x0F) 1858 VSL(w, u32, 0x1F) 1859 VSL(d, u64, 0x3F) 1860 #undef VSL 1861 1862 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1863 { 1864 int i; 1865 unsigned int shift, bytes, size; 1866 1867 size = ARRAY_SIZE(r->u8); 1868 for (i = 0; i < size; i++) { 1869 shift = b->u8[i] & 0x7; /* extract shift value */ 1870 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1871 (((i + 1) < size) ? a->u8[i + 1] : 0); 1872 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1873 } 1874 } 1875 1876 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1877 { 1878 int i; 1879 unsigned int shift, bytes; 1880 1881 /* Use reverse order, as destination and source register can be same. Its 1882 * being modified in place saving temporary, reverse order will guarantee 1883 * that computed result is not fed back. 1884 */ 1885 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1886 shift = b->u8[i] & 0x7; /* extract shift value */ 1887 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1888 /* extract adjacent bytes */ 1889 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1890 } 1891 } 1892 1893 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1894 { 1895 int sh = shift & 0xf; 1896 int i; 1897 ppc_avr_t result; 1898 1899 #if defined(HOST_WORDS_BIGENDIAN) 1900 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1901 int index = sh + i; 1902 if (index > 0xf) { 1903 result.u8[i] = b->u8[index - 0x10]; 1904 } else { 1905 result.u8[i] = a->u8[index]; 1906 } 1907 } 1908 #else 1909 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1910 int index = (16 - sh) + i; 1911 if (index > 0xf) { 1912 result.u8[i] = a->u8[index - 0x10]; 1913 } else { 1914 result.u8[i] = b->u8[index]; 1915 } 1916 } 1917 #endif 1918 *r = result; 1919 } 1920 1921 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1922 { 1923 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; 1924 1925 #if defined(HOST_WORDS_BIGENDIAN) 1926 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1927 memset(&r->u8[16-sh], 0, sh); 1928 #else 1929 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1930 memset(&r->u8[0], 0, sh); 1931 #endif 1932 } 1933 1934 /* Experimental testing shows that hardware masks the immediate. */ 1935 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1936 #if defined(HOST_WORDS_BIGENDIAN) 1937 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1938 #else 1939 #define SPLAT_ELEMENT(element) \ 1940 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element)) 1941 #endif 1942 #define VSPLT(suffix, element) \ 1943 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1944 { \ 1945 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ 1946 int i; \ 1947 \ 1948 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1949 r->element[i] = s; \ 1950 } \ 1951 } 1952 VSPLT(b, u8) 1953 VSPLT(h, u16) 1954 VSPLT(w, u32) 1955 #undef VSPLT 1956 #undef SPLAT_ELEMENT 1957 #undef _SPLAT_MASKED 1958 #if defined(HOST_WORDS_BIGENDIAN) 1959 #define VINSERT(suffix, element) \ 1960 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1961 { \ 1962 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \ 1963 sizeof(r->element[0])); \ 1964 } 1965 #else 1966 #define VINSERT(suffix, element) \ 1967 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1968 { \ 1969 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1970 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1971 } 1972 #endif 1973 VINSERT(b, u8) 1974 VINSERT(h, u16) 1975 VINSERT(w, u32) 1976 VINSERT(d, u64) 1977 #undef VINSERT 1978 #if defined(HOST_WORDS_BIGENDIAN) 1979 #define VEXTRACT(suffix, element) \ 1980 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1981 { \ 1982 uint32_t es = sizeof(r->element[0]); \ 1983 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1984 memset(&r->u8[8], 0, 8); \ 1985 memset(&r->u8[0], 0, 8 - es); \ 1986 } 1987 #else 1988 #define VEXTRACT(suffix, element) \ 1989 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1990 { \ 1991 uint32_t es = sizeof(r->element[0]); \ 1992 uint32_t s = (16 - index) - es; \ 1993 memmove(&r->u8[8], &b->u8[s], es); \ 1994 memset(&r->u8[0], 0, 8); \ 1995 memset(&r->u8[8 + es], 0, 8 - es); \ 1996 } 1997 #endif 1998 VEXTRACT(ub, u8) 1999 VEXTRACT(uh, u16) 2000 VEXTRACT(uw, u32) 2001 VEXTRACT(d, u64) 2002 #undef VEXTRACT 2003 2004 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 2005 target_ulong xbn, uint32_t index) 2006 { 2007 ppc_vsr_t xt, xb; 2008 size_t es = sizeof(uint32_t); 2009 uint32_t ext_index; 2010 int i; 2011 2012 getVSR(xbn, &xb, env); 2013 memset(&xt, 0, sizeof(xt)); 2014 2015 #if defined(HOST_WORDS_BIGENDIAN) 2016 ext_index = index; 2017 for (i = 0; i < es; i++, ext_index++) { 2018 xt.u8[8 - es + i] = xb.u8[ext_index % 16]; 2019 } 2020 #else 2021 ext_index = 15 - index; 2022 for (i = es - 1; i >= 0; i--, ext_index--) { 2023 xt.u8[8 + i] = xb.u8[ext_index % 16]; 2024 } 2025 #endif 2026 2027 putVSR(xtn, &xt, env); 2028 } 2029 2030 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 2031 target_ulong xbn, uint32_t index) 2032 { 2033 ppc_vsr_t xt, xb; 2034 size_t es = sizeof(uint32_t); 2035 int ins_index, i = 0; 2036 2037 getVSR(xbn, &xb, env); 2038 getVSR(xtn, &xt, env); 2039 2040 #if defined(HOST_WORDS_BIGENDIAN) 2041 ins_index = index; 2042 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 2043 xt.u8[ins_index] = xb.u8[8 - es + i]; 2044 } 2045 #else 2046 ins_index = 15 - index; 2047 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) { 2048 xt.u8[ins_index] = xb.u8[8 + i]; 2049 } 2050 #endif 2051 2052 putVSR(xtn, &xt, env); 2053 } 2054 2055 #define VEXT_SIGNED(name, element, mask, cast, recast) \ 2056 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2057 { \ 2058 int i; \ 2059 VECTOR_FOR_INORDER_I(i, element) { \ 2060 r->element[i] = (recast)((cast)(b->element[i] & mask)); \ 2061 } \ 2062 } 2063 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) 2064 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) 2065 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) 2066 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) 2067 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) 2068 #undef VEXT_SIGNED 2069 2070 #define VNEG(name, element) \ 2071 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2072 { \ 2073 int i; \ 2074 VECTOR_FOR_INORDER_I(i, element) { \ 2075 r->element[i] = -b->element[i]; \ 2076 } \ 2077 } 2078 VNEG(vnegw, s32) 2079 VNEG(vnegd, s64) 2080 #undef VNEG 2081 2082 #define VSPLTI(suffix, element, splat_type) \ 2083 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2084 { \ 2085 splat_type x = (int8_t)(splat << 3) >> 3; \ 2086 int i; \ 2087 \ 2088 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2089 r->element[i] = x; \ 2090 } \ 2091 } 2092 VSPLTI(b, s8, int8_t) 2093 VSPLTI(h, s16, int16_t) 2094 VSPLTI(w, s32, int32_t) 2095 #undef VSPLTI 2096 2097 #define VSR(suffix, element, mask) \ 2098 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2099 { \ 2100 int i; \ 2101 \ 2102 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2103 unsigned int shift = b->element[i] & mask; \ 2104 r->element[i] = a->element[i] >> shift; \ 2105 } \ 2106 } 2107 VSR(ab, s8, 0x7) 2108 VSR(ah, s16, 0xF) 2109 VSR(aw, s32, 0x1F) 2110 VSR(ad, s64, 0x3F) 2111 VSR(b, u8, 0x7) 2112 VSR(h, u16, 0xF) 2113 VSR(w, u32, 0x1F) 2114 VSR(d, u64, 0x3F) 2115 #undef VSR 2116 2117 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2118 { 2119 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf; 2120 2121 #if defined(HOST_WORDS_BIGENDIAN) 2122 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2123 memset(&r->u8[0], 0, sh); 2124 #else 2125 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2126 memset(&r->u8[16 - sh], 0, sh); 2127 #endif 2128 } 2129 2130 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2131 { 2132 int i; 2133 2134 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2135 r->u32[i] = a->u32[i] >= b->u32[i]; 2136 } 2137 } 2138 2139 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2140 { 2141 int64_t t; 2142 int i, upper; 2143 ppc_avr_t result; 2144 int sat = 0; 2145 2146 #if defined(HOST_WORDS_BIGENDIAN) 2147 upper = ARRAY_SIZE(r->s32)-1; 2148 #else 2149 upper = 0; 2150 #endif 2151 t = (int64_t)b->s32[upper]; 2152 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2153 t += a->s32[i]; 2154 result.s32[i] = 0; 2155 } 2156 result.s32[upper] = cvtsdsw(t, &sat); 2157 *r = result; 2158 2159 if (sat) { 2160 env->vscr |= (1 << VSCR_SAT); 2161 } 2162 } 2163 2164 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2165 { 2166 int i, j, upper; 2167 ppc_avr_t result; 2168 int sat = 0; 2169 2170 #if defined(HOST_WORDS_BIGENDIAN) 2171 upper = 1; 2172 #else 2173 upper = 0; 2174 #endif 2175 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2176 int64_t t = (int64_t)b->s32[upper + i * 2]; 2177 2178 result.u64[i] = 0; 2179 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2180 t += a->s32[2 * i + j]; 2181 } 2182 result.s32[upper + i * 2] = cvtsdsw(t, &sat); 2183 } 2184 2185 *r = result; 2186 if (sat) { 2187 env->vscr |= (1 << VSCR_SAT); 2188 } 2189 } 2190 2191 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2192 { 2193 int i, j; 2194 int sat = 0; 2195 2196 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2197 int64_t t = (int64_t)b->s32[i]; 2198 2199 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2200 t += a->s8[4 * i + j]; 2201 } 2202 r->s32[i] = cvtsdsw(t, &sat); 2203 } 2204 2205 if (sat) { 2206 env->vscr |= (1 << VSCR_SAT); 2207 } 2208 } 2209 2210 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2211 { 2212 int sat = 0; 2213 int i; 2214 2215 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2216 int64_t t = (int64_t)b->s32[i]; 2217 2218 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2219 r->s32[i] = cvtsdsw(t, &sat); 2220 } 2221 2222 if (sat) { 2223 env->vscr |= (1 << VSCR_SAT); 2224 } 2225 } 2226 2227 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2228 { 2229 int i, j; 2230 int sat = 0; 2231 2232 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2233 uint64_t t = (uint64_t)b->u32[i]; 2234 2235 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2236 t += a->u8[4 * i + j]; 2237 } 2238 r->u32[i] = cvtuduw(t, &sat); 2239 } 2240 2241 if (sat) { 2242 env->vscr |= (1 << VSCR_SAT); 2243 } 2244 } 2245 2246 #if defined(HOST_WORDS_BIGENDIAN) 2247 #define UPKHI 1 2248 #define UPKLO 0 2249 #else 2250 #define UPKHI 0 2251 #define UPKLO 1 2252 #endif 2253 #define VUPKPX(suffix, hi) \ 2254 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2255 { \ 2256 int i; \ 2257 ppc_avr_t result; \ 2258 \ 2259 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2260 uint16_t e = b->u16[hi ? i : i+4]; \ 2261 uint8_t a = (e >> 15) ? 0xff : 0; \ 2262 uint8_t r = (e >> 10) & 0x1f; \ 2263 uint8_t g = (e >> 5) & 0x1f; \ 2264 uint8_t b = e & 0x1f; \ 2265 \ 2266 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2267 } \ 2268 *r = result; \ 2269 } 2270 VUPKPX(lpx, UPKLO) 2271 VUPKPX(hpx, UPKHI) 2272 #undef VUPKPX 2273 2274 #define VUPK(suffix, unpacked, packee, hi) \ 2275 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2276 { \ 2277 int i; \ 2278 ppc_avr_t result; \ 2279 \ 2280 if (hi) { \ 2281 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2282 result.unpacked[i] = b->packee[i]; \ 2283 } \ 2284 } else { \ 2285 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2286 i++) { \ 2287 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2288 } \ 2289 } \ 2290 *r = result; \ 2291 } 2292 VUPK(hsb, s16, s8, UPKHI) 2293 VUPK(hsh, s32, s16, UPKHI) 2294 VUPK(hsw, s64, s32, UPKHI) 2295 VUPK(lsb, s16, s8, UPKLO) 2296 VUPK(lsh, s32, s16, UPKLO) 2297 VUPK(lsw, s64, s32, UPKLO) 2298 #undef VUPK 2299 #undef UPKHI 2300 #undef UPKLO 2301 2302 #define VGENERIC_DO(name, element) \ 2303 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2304 { \ 2305 int i; \ 2306 \ 2307 VECTOR_FOR_INORDER_I(i, element) { \ 2308 r->element[i] = name(b->element[i]); \ 2309 } \ 2310 } 2311 2312 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2313 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2314 #define clzw(v) clz32((v)) 2315 #define clzd(v) clz64((v)) 2316 2317 VGENERIC_DO(clzb, u8) 2318 VGENERIC_DO(clzh, u16) 2319 VGENERIC_DO(clzw, u32) 2320 VGENERIC_DO(clzd, u64) 2321 2322 #undef clzb 2323 #undef clzh 2324 #undef clzw 2325 #undef clzd 2326 2327 #define ctzb(v) ((v) ? ctz32(v) : 8) 2328 #define ctzh(v) ((v) ? ctz32(v) : 16) 2329 #define ctzw(v) ctz32((v)) 2330 #define ctzd(v) ctz64((v)) 2331 2332 VGENERIC_DO(ctzb, u8) 2333 VGENERIC_DO(ctzh, u16) 2334 VGENERIC_DO(ctzw, u32) 2335 VGENERIC_DO(ctzd, u64) 2336 2337 #undef ctzb 2338 #undef ctzh 2339 #undef ctzw 2340 #undef ctzd 2341 2342 #define popcntb(v) ctpop8(v) 2343 #define popcnth(v) ctpop16(v) 2344 #define popcntw(v) ctpop32(v) 2345 #define popcntd(v) ctpop64(v) 2346 2347 VGENERIC_DO(popcntb, u8) 2348 VGENERIC_DO(popcnth, u16) 2349 VGENERIC_DO(popcntw, u32) 2350 VGENERIC_DO(popcntd, u64) 2351 2352 #undef popcntb 2353 #undef popcnth 2354 #undef popcntw 2355 #undef popcntd 2356 2357 #undef VGENERIC_DO 2358 2359 #if defined(HOST_WORDS_BIGENDIAN) 2360 #define QW_ONE { .u64 = { 0, 1 } } 2361 #else 2362 #define QW_ONE { .u64 = { 1, 0 } } 2363 #endif 2364 2365 #ifndef CONFIG_INT128 2366 2367 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2368 { 2369 t->u64[0] = ~a.u64[0]; 2370 t->u64[1] = ~a.u64[1]; 2371 } 2372 2373 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2374 { 2375 if (a.u64[HI_IDX] < b.u64[HI_IDX]) { 2376 return -1; 2377 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { 2378 return 1; 2379 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { 2380 return -1; 2381 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { 2382 return 1; 2383 } else { 2384 return 0; 2385 } 2386 } 2387 2388 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2389 { 2390 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2391 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2392 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2393 } 2394 2395 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2396 { 2397 ppc_avr_t not_a; 2398 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2399 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2400 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2401 avr_qw_not(¬_a, a); 2402 return avr_qw_cmpu(not_a, b) < 0; 2403 } 2404 2405 #endif 2406 2407 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2408 { 2409 #ifdef CONFIG_INT128 2410 r->u128 = a->u128 + b->u128; 2411 #else 2412 avr_qw_add(r, *a, *b); 2413 #endif 2414 } 2415 2416 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2417 { 2418 #ifdef CONFIG_INT128 2419 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2420 #else 2421 2422 if (c->u64[LO_IDX] & 1) { 2423 ppc_avr_t tmp; 2424 2425 tmp.u64[HI_IDX] = 0; 2426 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2427 avr_qw_add(&tmp, *a, tmp); 2428 avr_qw_add(r, tmp, *b); 2429 } else { 2430 avr_qw_add(r, *a, *b); 2431 } 2432 #endif 2433 } 2434 2435 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2436 { 2437 #ifdef CONFIG_INT128 2438 r->u128 = (~a->u128 < b->u128); 2439 #else 2440 ppc_avr_t not_a; 2441 2442 avr_qw_not(¬_a, *a); 2443 2444 r->u64[HI_IDX] = 0; 2445 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); 2446 #endif 2447 } 2448 2449 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2450 { 2451 #ifdef CONFIG_INT128 2452 int carry_out = (~a->u128 < b->u128); 2453 if (!carry_out && (c->u128 & 1)) { 2454 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2455 ((a->u128 != 0) || (b->u128 != 0)); 2456 } 2457 r->u128 = carry_out; 2458 #else 2459 2460 int carry_in = c->u64[LO_IDX] & 1; 2461 int carry_out = 0; 2462 ppc_avr_t tmp; 2463 2464 carry_out = avr_qw_addc(&tmp, *a, *b); 2465 2466 if (!carry_out && carry_in) { 2467 ppc_avr_t one = QW_ONE; 2468 carry_out = avr_qw_addc(&tmp, tmp, one); 2469 } 2470 r->u64[HI_IDX] = 0; 2471 r->u64[LO_IDX] = carry_out; 2472 #endif 2473 } 2474 2475 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2476 { 2477 #ifdef CONFIG_INT128 2478 r->u128 = a->u128 - b->u128; 2479 #else 2480 ppc_avr_t tmp; 2481 ppc_avr_t one = QW_ONE; 2482 2483 avr_qw_not(&tmp, *b); 2484 avr_qw_add(&tmp, *a, tmp); 2485 avr_qw_add(r, tmp, one); 2486 #endif 2487 } 2488 2489 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2490 { 2491 #ifdef CONFIG_INT128 2492 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2493 #else 2494 ppc_avr_t tmp, sum; 2495 2496 avr_qw_not(&tmp, *b); 2497 avr_qw_add(&sum, *a, tmp); 2498 2499 tmp.u64[HI_IDX] = 0; 2500 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2501 avr_qw_add(r, sum, tmp); 2502 #endif 2503 } 2504 2505 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2506 { 2507 #ifdef CONFIG_INT128 2508 r->u128 = (~a->u128 < ~b->u128) || 2509 (a->u128 + ~b->u128 == (__uint128_t)-1); 2510 #else 2511 int carry = (avr_qw_cmpu(*a, *b) > 0); 2512 if (!carry) { 2513 ppc_avr_t tmp; 2514 avr_qw_not(&tmp, *b); 2515 avr_qw_add(&tmp, *a, tmp); 2516 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); 2517 } 2518 r->u64[HI_IDX] = 0; 2519 r->u64[LO_IDX] = carry; 2520 #endif 2521 } 2522 2523 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2524 { 2525 #ifdef CONFIG_INT128 2526 r->u128 = 2527 (~a->u128 < ~b->u128) || 2528 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2529 #else 2530 int carry_in = c->u64[LO_IDX] & 1; 2531 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2532 if (!carry_out && carry_in) { 2533 ppc_avr_t tmp; 2534 avr_qw_not(&tmp, *b); 2535 avr_qw_add(&tmp, *a, tmp); 2536 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); 2537 } 2538 2539 r->u64[HI_IDX] = 0; 2540 r->u64[LO_IDX] = carry_out; 2541 #endif 2542 } 2543 2544 #define BCD_PLUS_PREF_1 0xC 2545 #define BCD_PLUS_PREF_2 0xF 2546 #define BCD_PLUS_ALT_1 0xA 2547 #define BCD_NEG_PREF 0xD 2548 #define BCD_NEG_ALT 0xB 2549 #define BCD_PLUS_ALT_2 0xE 2550 #define NATIONAL_PLUS 0x2B 2551 #define NATIONAL_NEG 0x2D 2552 2553 #if defined(HOST_WORDS_BIGENDIAN) 2554 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2555 #else 2556 #define BCD_DIG_BYTE(n) ((n) / 2) 2557 #endif 2558 2559 static int bcd_get_sgn(ppc_avr_t *bcd) 2560 { 2561 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2562 case BCD_PLUS_PREF_1: 2563 case BCD_PLUS_PREF_2: 2564 case BCD_PLUS_ALT_1: 2565 case BCD_PLUS_ALT_2: 2566 { 2567 return 1; 2568 } 2569 2570 case BCD_NEG_PREF: 2571 case BCD_NEG_ALT: 2572 { 2573 return -1; 2574 } 2575 2576 default: 2577 { 2578 return 0; 2579 } 2580 } 2581 } 2582 2583 static int bcd_preferred_sgn(int sgn, int ps) 2584 { 2585 if (sgn >= 0) { 2586 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2587 } else { 2588 return BCD_NEG_PREF; 2589 } 2590 } 2591 2592 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2593 { 2594 uint8_t result; 2595 if (n & 1) { 2596 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2597 } else { 2598 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2599 } 2600 2601 if (unlikely(result > 9)) { 2602 *invalid = true; 2603 } 2604 return result; 2605 } 2606 2607 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2608 { 2609 if (n & 1) { 2610 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2611 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2612 } else { 2613 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2614 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2615 } 2616 } 2617 2618 static bool bcd_is_valid(ppc_avr_t *bcd) 2619 { 2620 int i; 2621 int invalid = 0; 2622 2623 if (bcd_get_sgn(bcd) == 0) { 2624 return false; 2625 } 2626 2627 for (i = 1; i < 32; i++) { 2628 bcd_get_digit(bcd, i, &invalid); 2629 if (unlikely(invalid)) { 2630 return false; 2631 } 2632 } 2633 return true; 2634 } 2635 2636 static int bcd_cmp_zero(ppc_avr_t *bcd) 2637 { 2638 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) { 2639 return CRF_EQ; 2640 } else { 2641 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2642 } 2643 } 2644 2645 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2646 { 2647 #if defined(HOST_WORDS_BIGENDIAN) 2648 return reg->u16[7 - n]; 2649 #else 2650 return reg->u16[n]; 2651 #endif 2652 } 2653 2654 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2655 { 2656 #if defined(HOST_WORDS_BIGENDIAN) 2657 reg->u16[7 - n] = val; 2658 #else 2659 reg->u16[n] = val; 2660 #endif 2661 } 2662 2663 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2664 { 2665 int i; 2666 int invalid = 0; 2667 for (i = 31; i > 0; i--) { 2668 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2669 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2670 if (unlikely(invalid)) { 2671 return 0; /* doesn't matter */ 2672 } else if (dig_a > dig_b) { 2673 return 1; 2674 } else if (dig_a < dig_b) { 2675 return -1; 2676 } 2677 } 2678 2679 return 0; 2680 } 2681 2682 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2683 int *overflow) 2684 { 2685 int carry = 0; 2686 int i; 2687 int is_zero = 1; 2688 for (i = 1; i <= 31; i++) { 2689 uint8_t digit = bcd_get_digit(a, i, invalid) + 2690 bcd_get_digit(b, i, invalid) + carry; 2691 is_zero &= (digit == 0); 2692 if (digit > 9) { 2693 carry = 1; 2694 digit -= 10; 2695 } else { 2696 carry = 0; 2697 } 2698 2699 bcd_put_digit(t, digit, i); 2700 2701 if (unlikely(*invalid)) { 2702 return -1; 2703 } 2704 } 2705 2706 *overflow = carry; 2707 return is_zero; 2708 } 2709 2710 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2711 int *overflow) 2712 { 2713 int carry = 0; 2714 int i; 2715 int is_zero = 1; 2716 for (i = 1; i <= 31; i++) { 2717 uint8_t digit = bcd_get_digit(a, i, invalid) - 2718 bcd_get_digit(b, i, invalid) + carry; 2719 is_zero &= (digit == 0); 2720 if (digit & 0x80) { 2721 carry = -1; 2722 digit += 10; 2723 } else { 2724 carry = 0; 2725 } 2726 2727 bcd_put_digit(t, digit, i); 2728 2729 if (unlikely(*invalid)) { 2730 return -1; 2731 } 2732 } 2733 2734 *overflow = carry; 2735 return is_zero; 2736 } 2737 2738 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2739 { 2740 2741 int sgna = bcd_get_sgn(a); 2742 int sgnb = bcd_get_sgn(b); 2743 int invalid = (sgna == 0) || (sgnb == 0); 2744 int overflow = 0; 2745 int zero = 0; 2746 uint32_t cr = 0; 2747 ppc_avr_t result = { .u64 = { 0, 0 } }; 2748 2749 if (!invalid) { 2750 if (sgna == sgnb) { 2751 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2752 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2753 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2754 } else if (bcd_cmp_mag(a, b) > 0) { 2755 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2756 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow); 2757 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2758 } else { 2759 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2760 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow); 2761 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2762 } 2763 } 2764 2765 if (unlikely(invalid)) { 2766 result.u64[HI_IDX] = result.u64[LO_IDX] = -1; 2767 cr = CRF_SO; 2768 } else if (overflow) { 2769 cr |= CRF_SO; 2770 } else if (zero) { 2771 cr = CRF_EQ; 2772 } 2773 2774 *r = result; 2775 2776 return cr; 2777 } 2778 2779 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2780 { 2781 ppc_avr_t bcopy = *b; 2782 int sgnb = bcd_get_sgn(b); 2783 if (sgnb < 0) { 2784 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2785 } else if (sgnb > 0) { 2786 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2787 } 2788 /* else invalid ... defer to bcdadd code for proper handling */ 2789 2790 return helper_bcdadd(r, a, &bcopy, ps); 2791 } 2792 2793 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2794 { 2795 int i; 2796 int cr = 0; 2797 uint16_t national = 0; 2798 uint16_t sgnb = get_national_digit(b, 0); 2799 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2800 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2801 2802 for (i = 1; i < 8; i++) { 2803 national = get_national_digit(b, i); 2804 if (unlikely(national < 0x30 || national > 0x39)) { 2805 invalid = 1; 2806 break; 2807 } 2808 2809 bcd_put_digit(&ret, national & 0xf, i); 2810 } 2811 2812 if (sgnb == NATIONAL_PLUS) { 2813 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2814 } else { 2815 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2816 } 2817 2818 cr = bcd_cmp_zero(&ret); 2819 2820 if (unlikely(invalid)) { 2821 cr = CRF_SO; 2822 } 2823 2824 *r = ret; 2825 2826 return cr; 2827 } 2828 2829 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2830 { 2831 int i; 2832 int cr = 0; 2833 int sgnb = bcd_get_sgn(b); 2834 int invalid = (sgnb == 0); 2835 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2836 2837 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0); 2838 2839 for (i = 1; i < 8; i++) { 2840 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2841 2842 if (unlikely(invalid)) { 2843 break; 2844 } 2845 } 2846 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2847 2848 cr = bcd_cmp_zero(b); 2849 2850 if (ox_flag) { 2851 cr |= CRF_SO; 2852 } 2853 2854 if (unlikely(invalid)) { 2855 cr = CRF_SO; 2856 } 2857 2858 *r = ret; 2859 2860 return cr; 2861 } 2862 2863 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2864 { 2865 int i; 2866 int cr = 0; 2867 int invalid = 0; 2868 int zone_digit = 0; 2869 int zone_lead = ps ? 0xF : 0x3; 2870 int digit = 0; 2871 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2872 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2873 2874 if (unlikely((sgnb < 0xA) && ps)) { 2875 invalid = 1; 2876 } 2877 2878 for (i = 0; i < 16; i++) { 2879 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2880 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2881 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2882 invalid = 1; 2883 break; 2884 } 2885 2886 bcd_put_digit(&ret, digit, i + 1); 2887 } 2888 2889 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2890 (!ps && (sgnb & 0x4))) { 2891 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2892 } else { 2893 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2894 } 2895 2896 cr = bcd_cmp_zero(&ret); 2897 2898 if (unlikely(invalid)) { 2899 cr = CRF_SO; 2900 } 2901 2902 *r = ret; 2903 2904 return cr; 2905 } 2906 2907 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2908 { 2909 int i; 2910 int cr = 0; 2911 uint8_t digit = 0; 2912 int sgnb = bcd_get_sgn(b); 2913 int zone_lead = (ps) ? 0xF0 : 0x30; 2914 int invalid = (sgnb == 0); 2915 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2916 2917 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0); 2918 2919 for (i = 0; i < 16; i++) { 2920 digit = bcd_get_digit(b, i + 1, &invalid); 2921 2922 if (unlikely(invalid)) { 2923 break; 2924 } 2925 2926 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2927 } 2928 2929 if (ps) { 2930 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2931 } else { 2932 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2933 } 2934 2935 cr = bcd_cmp_zero(b); 2936 2937 if (ox_flag) { 2938 cr |= CRF_SO; 2939 } 2940 2941 if (unlikely(invalid)) { 2942 cr = CRF_SO; 2943 } 2944 2945 *r = ret; 2946 2947 return cr; 2948 } 2949 2950 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2951 { 2952 int i; 2953 int cr = 0; 2954 uint64_t lo_value; 2955 uint64_t hi_value; 2956 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2957 2958 if (b->s64[HI_IDX] < 0) { 2959 lo_value = -b->s64[LO_IDX]; 2960 hi_value = ~b->u64[HI_IDX] + !lo_value; 2961 bcd_put_digit(&ret, 0xD, 0); 2962 } else { 2963 lo_value = b->u64[LO_IDX]; 2964 hi_value = b->u64[HI_IDX]; 2965 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2966 } 2967 2968 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2969 lo_value > 9999999999999999ULL) { 2970 cr = CRF_SO; 2971 } 2972 2973 for (i = 1; i < 16; hi_value /= 10, i++) { 2974 bcd_put_digit(&ret, hi_value % 10, i); 2975 } 2976 2977 for (; i < 32; lo_value /= 10, i++) { 2978 bcd_put_digit(&ret, lo_value % 10, i); 2979 } 2980 2981 cr |= bcd_cmp_zero(&ret); 2982 2983 *r = ret; 2984 2985 return cr; 2986 } 2987 2988 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2989 { 2990 uint8_t i; 2991 int cr; 2992 uint64_t carry; 2993 uint64_t unused; 2994 uint64_t lo_value; 2995 uint64_t hi_value = 0; 2996 int sgnb = bcd_get_sgn(b); 2997 int invalid = (sgnb == 0); 2998 2999 lo_value = bcd_get_digit(b, 31, &invalid); 3000 for (i = 30; i > 0; i--) { 3001 mulu64(&lo_value, &carry, lo_value, 10ULL); 3002 mulu64(&hi_value, &unused, hi_value, 10ULL); 3003 lo_value += bcd_get_digit(b, i, &invalid); 3004 hi_value += carry; 3005 3006 if (unlikely(invalid)) { 3007 break; 3008 } 3009 } 3010 3011 if (sgnb == -1) { 3012 r->s64[LO_IDX] = -lo_value; 3013 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX]; 3014 } else { 3015 r->s64[LO_IDX] = lo_value; 3016 r->s64[HI_IDX] = hi_value; 3017 } 3018 3019 cr = bcd_cmp_zero(b); 3020 3021 if (unlikely(invalid)) { 3022 cr = CRF_SO; 3023 } 3024 3025 return cr; 3026 } 3027 3028 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3029 { 3030 int i; 3031 int invalid = 0; 3032 3033 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 3034 return CRF_SO; 3035 } 3036 3037 *r = *a; 3038 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 3039 3040 for (i = 1; i < 32; i++) { 3041 bcd_get_digit(a, i, &invalid); 3042 bcd_get_digit(b, i, &invalid); 3043 if (unlikely(invalid)) { 3044 return CRF_SO; 3045 } 3046 } 3047 3048 return bcd_cmp_zero(r); 3049 } 3050 3051 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 3052 { 3053 int sgnb = bcd_get_sgn(b); 3054 3055 *r = *b; 3056 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 3057 3058 if (bcd_is_valid(b) == false) { 3059 return CRF_SO; 3060 } 3061 3062 return bcd_cmp_zero(r); 3063 } 3064 3065 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3066 { 3067 int cr; 3068 #if defined(HOST_WORDS_BIGENDIAN) 3069 int i = a->s8[7]; 3070 #else 3071 int i = a->s8[8]; 3072 #endif 3073 bool ox_flag = false; 3074 int sgnb = bcd_get_sgn(b); 3075 ppc_avr_t ret = *b; 3076 ret.u64[LO_IDX] &= ~0xf; 3077 3078 if (bcd_is_valid(b) == false) { 3079 return CRF_SO; 3080 } 3081 3082 if (unlikely(i > 31)) { 3083 i = 31; 3084 } else if (unlikely(i < -31)) { 3085 i = -31; 3086 } 3087 3088 if (i > 0) { 3089 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3090 } else { 3091 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3092 } 3093 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3094 3095 *r = ret; 3096 3097 cr = bcd_cmp_zero(r); 3098 if (ox_flag) { 3099 cr |= CRF_SO; 3100 } 3101 3102 return cr; 3103 } 3104 3105 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3106 { 3107 int cr; 3108 int i; 3109 int invalid = 0; 3110 bool ox_flag = false; 3111 ppc_avr_t ret = *b; 3112 3113 for (i = 0; i < 32; i++) { 3114 bcd_get_digit(b, i, &invalid); 3115 3116 if (unlikely(invalid)) { 3117 return CRF_SO; 3118 } 3119 } 3120 3121 #if defined(HOST_WORDS_BIGENDIAN) 3122 i = a->s8[7]; 3123 #else 3124 i = a->s8[8]; 3125 #endif 3126 if (i >= 32) { 3127 ox_flag = true; 3128 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3129 } else if (i <= -32) { 3130 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3131 } else if (i > 0) { 3132 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3133 } else { 3134 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3135 } 3136 *r = ret; 3137 3138 cr = bcd_cmp_zero(r); 3139 if (ox_flag) { 3140 cr |= CRF_SO; 3141 } 3142 3143 return cr; 3144 } 3145 3146 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3147 { 3148 int cr; 3149 int unused = 0; 3150 int invalid = 0; 3151 bool ox_flag = false; 3152 int sgnb = bcd_get_sgn(b); 3153 ppc_avr_t ret = *b; 3154 ret.u64[LO_IDX] &= ~0xf; 3155 3156 #if defined(HOST_WORDS_BIGENDIAN) 3157 int i = a->s8[7]; 3158 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3159 #else 3160 int i = a->s8[8]; 3161 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3162 #endif 3163 3164 if (bcd_is_valid(b) == false) { 3165 return CRF_SO; 3166 } 3167 3168 if (unlikely(i > 31)) { 3169 i = 31; 3170 } else if (unlikely(i < -31)) { 3171 i = -31; 3172 } 3173 3174 if (i > 0) { 3175 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3176 } else { 3177 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3178 3179 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3180 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3181 } 3182 } 3183 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3184 3185 cr = bcd_cmp_zero(&ret); 3186 if (ox_flag) { 3187 cr |= CRF_SO; 3188 } 3189 *r = ret; 3190 3191 return cr; 3192 } 3193 3194 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3195 { 3196 uint64_t mask; 3197 uint32_t ox_flag = 0; 3198 #if defined(HOST_WORDS_BIGENDIAN) 3199 int i = a->s16[3] + 1; 3200 #else 3201 int i = a->s16[4] + 1; 3202 #endif 3203 ppc_avr_t ret = *b; 3204 3205 if (bcd_is_valid(b) == false) { 3206 return CRF_SO; 3207 } 3208 3209 if (i > 16 && i < 32) { 3210 mask = (uint64_t)-1 >> (128 - i * 4); 3211 if (ret.u64[HI_IDX] & ~mask) { 3212 ox_flag = CRF_SO; 3213 } 3214 3215 ret.u64[HI_IDX] &= mask; 3216 } else if (i >= 0 && i <= 16) { 3217 mask = (uint64_t)-1 >> (64 - i * 4); 3218 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3219 ox_flag = CRF_SO; 3220 } 3221 3222 ret.u64[LO_IDX] &= mask; 3223 ret.u64[HI_IDX] = 0; 3224 } 3225 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3226 *r = ret; 3227 3228 return bcd_cmp_zero(&ret) | ox_flag; 3229 } 3230 3231 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3232 { 3233 int i; 3234 uint64_t mask; 3235 uint32_t ox_flag = 0; 3236 int invalid = 0; 3237 ppc_avr_t ret = *b; 3238 3239 for (i = 0; i < 32; i++) { 3240 bcd_get_digit(b, i, &invalid); 3241 3242 if (unlikely(invalid)) { 3243 return CRF_SO; 3244 } 3245 } 3246 3247 #if defined(HOST_WORDS_BIGENDIAN) 3248 i = a->s16[3]; 3249 #else 3250 i = a->s16[4]; 3251 #endif 3252 if (i > 16 && i < 33) { 3253 mask = (uint64_t)-1 >> (128 - i * 4); 3254 if (ret.u64[HI_IDX] & ~mask) { 3255 ox_flag = CRF_SO; 3256 } 3257 3258 ret.u64[HI_IDX] &= mask; 3259 } else if (i > 0 && i <= 16) { 3260 mask = (uint64_t)-1 >> (64 - i * 4); 3261 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3262 ox_flag = CRF_SO; 3263 } 3264 3265 ret.u64[LO_IDX] &= mask; 3266 ret.u64[HI_IDX] = 0; 3267 } else if (i == 0) { 3268 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) { 3269 ox_flag = CRF_SO; 3270 } 3271 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0; 3272 } 3273 3274 *r = ret; 3275 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) { 3276 return ox_flag | CRF_EQ; 3277 } 3278 3279 return ox_flag | CRF_GT; 3280 } 3281 3282 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3283 { 3284 int i; 3285 VECTOR_FOR_INORDER_I(i, u8) { 3286 r->u8[i] = AES_sbox[a->u8[i]]; 3287 } 3288 } 3289 3290 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3291 { 3292 ppc_avr_t result; 3293 int i; 3294 3295 VECTOR_FOR_INORDER_I(i, u32) { 3296 result.AVRW(i) = b->AVRW(i) ^ 3297 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^ 3298 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^ 3299 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^ 3300 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]); 3301 } 3302 *r = result; 3303 } 3304 3305 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3306 { 3307 ppc_avr_t result; 3308 int i; 3309 3310 VECTOR_FOR_INORDER_I(i, u8) { 3311 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]); 3312 } 3313 *r = result; 3314 } 3315 3316 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3317 { 3318 /* This differs from what is written in ISA V2.07. The RTL is */ 3319 /* incorrect and will be fixed in V2.07B. */ 3320 int i; 3321 ppc_avr_t tmp; 3322 3323 VECTOR_FOR_INORDER_I(i, u8) { 3324 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])]; 3325 } 3326 3327 VECTOR_FOR_INORDER_I(i, u32) { 3328 r->AVRW(i) = 3329 AES_imc[tmp.AVRB(4*i + 0)][0] ^ 3330 AES_imc[tmp.AVRB(4*i + 1)][1] ^ 3331 AES_imc[tmp.AVRB(4*i + 2)][2] ^ 3332 AES_imc[tmp.AVRB(4*i + 3)][3]; 3333 } 3334 } 3335 3336 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3337 { 3338 ppc_avr_t result; 3339 int i; 3340 3341 VECTOR_FOR_INORDER_I(i, u8) { 3342 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]); 3343 } 3344 *r = result; 3345 } 3346 3347 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) 3348 #if defined(HOST_WORDS_BIGENDIAN) 3349 #define EL_IDX(i) (i) 3350 #else 3351 #define EL_IDX(i) (3 - (i)) 3352 #endif 3353 3354 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3355 { 3356 int st = (st_six & 0x10) != 0; 3357 int six = st_six & 0xF; 3358 int i; 3359 3360 VECTOR_FOR_INORDER_I(i, u32) { 3361 if (st == 0) { 3362 if ((six & (0x8 >> i)) == 0) { 3363 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ 3364 ROTRu32(a->u32[EL_IDX(i)], 18) ^ 3365 (a->u32[EL_IDX(i)] >> 3); 3366 } else { /* six.bit[i] == 1 */ 3367 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ 3368 ROTRu32(a->u32[EL_IDX(i)], 19) ^ 3369 (a->u32[EL_IDX(i)] >> 10); 3370 } 3371 } else { /* st == 1 */ 3372 if ((six & (0x8 >> i)) == 0) { 3373 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ 3374 ROTRu32(a->u32[EL_IDX(i)], 13) ^ 3375 ROTRu32(a->u32[EL_IDX(i)], 22); 3376 } else { /* six.bit[i] == 1 */ 3377 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ 3378 ROTRu32(a->u32[EL_IDX(i)], 11) ^ 3379 ROTRu32(a->u32[EL_IDX(i)], 25); 3380 } 3381 } 3382 } 3383 } 3384 3385 #undef ROTRu32 3386 #undef EL_IDX 3387 3388 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) 3389 #if defined(HOST_WORDS_BIGENDIAN) 3390 #define EL_IDX(i) (i) 3391 #else 3392 #define EL_IDX(i) (1 - (i)) 3393 #endif 3394 3395 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3396 { 3397 int st = (st_six & 0x10) != 0; 3398 int six = st_six & 0xF; 3399 int i; 3400 3401 VECTOR_FOR_INORDER_I(i, u64) { 3402 if (st == 0) { 3403 if ((six & (0x8 >> (2*i))) == 0) { 3404 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ 3405 ROTRu64(a->u64[EL_IDX(i)], 8) ^ 3406 (a->u64[EL_IDX(i)] >> 7); 3407 } else { /* six.bit[2*i] == 1 */ 3408 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ 3409 ROTRu64(a->u64[EL_IDX(i)], 61) ^ 3410 (a->u64[EL_IDX(i)] >> 6); 3411 } 3412 } else { /* st == 1 */ 3413 if ((six & (0x8 >> (2*i))) == 0) { 3414 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ 3415 ROTRu64(a->u64[EL_IDX(i)], 34) ^ 3416 ROTRu64(a->u64[EL_IDX(i)], 39); 3417 } else { /* six.bit[2*i] == 1 */ 3418 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ 3419 ROTRu64(a->u64[EL_IDX(i)], 18) ^ 3420 ROTRu64(a->u64[EL_IDX(i)], 41); 3421 } 3422 } 3423 } 3424 } 3425 3426 #undef ROTRu64 3427 #undef EL_IDX 3428 3429 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3430 { 3431 ppc_avr_t result; 3432 int i; 3433 3434 VECTOR_FOR_INORDER_I(i, u8) { 3435 int indexA = c->u8[i] >> 4; 3436 int indexB = c->u8[i] & 0xF; 3437 #if defined(HOST_WORDS_BIGENDIAN) 3438 result.u8[i] = a->u8[indexA] ^ b->u8[indexB]; 3439 #else 3440 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; 3441 #endif 3442 } 3443 *r = result; 3444 } 3445 3446 #undef VECTOR_FOR_INORDER_I 3447 #undef HI_IDX 3448 #undef LO_IDX 3449 3450 /*****************************************************************************/ 3451 /* SPE extension helpers */ 3452 /* Use a table to make this quicker */ 3453 static const uint8_t hbrev[16] = { 3454 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3455 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3456 }; 3457 3458 static inline uint8_t byte_reverse(uint8_t val) 3459 { 3460 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3461 } 3462 3463 static inline uint32_t word_reverse(uint32_t val) 3464 { 3465 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3466 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3467 } 3468 3469 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3470 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3471 { 3472 uint32_t a, b, d, mask; 3473 3474 mask = UINT32_MAX >> (32 - MASKBITS); 3475 a = arg1 & mask; 3476 b = arg2 & mask; 3477 d = word_reverse(1 + word_reverse(a | ~b)); 3478 return (arg1 & ~mask) | (d & b); 3479 } 3480 3481 uint32_t helper_cntlsw32(uint32_t val) 3482 { 3483 if (val & 0x80000000) { 3484 return clz32(~val); 3485 } else { 3486 return clz32(val); 3487 } 3488 } 3489 3490 uint32_t helper_cntlzw32(uint32_t val) 3491 { 3492 return clz32(val); 3493 } 3494 3495 /* 440 specific */ 3496 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3497 target_ulong low, uint32_t update_Rc) 3498 { 3499 target_ulong mask; 3500 int i; 3501 3502 i = 1; 3503 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3504 if ((high & mask) == 0) { 3505 if (update_Rc) { 3506 env->crf[0] = 0x4; 3507 } 3508 goto done; 3509 } 3510 i++; 3511 } 3512 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3513 if ((low & mask) == 0) { 3514 if (update_Rc) { 3515 env->crf[0] = 0x8; 3516 } 3517 goto done; 3518 } 3519 i++; 3520 } 3521 i = 8; 3522 if (update_Rc) { 3523 env->crf[0] = 0x2; 3524 } 3525 done: 3526 env->xer = (env->xer & ~0x7F) | i; 3527 if (update_Rc) { 3528 env->crf[0] |= xer_so; 3529 } 3530 return i; 3531 } 3532