1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define HI_IDX 0 393 #define LO_IDX 1 394 #define AVRB(i) u8[i] 395 #define AVRW(i) u32[i] 396 #else 397 #define HI_IDX 1 398 #define LO_IDX 0 399 #define AVRB(i) u8[15-(i)] 400 #define AVRW(i) u32[3-(i)] 401 #endif 402 403 #if defined(HOST_WORDS_BIGENDIAN) 404 #define VECTOR_FOR_INORDER_I(index, element) \ 405 for (index = 0; index < ARRAY_SIZE(r->element); index++) 406 #else 407 #define VECTOR_FOR_INORDER_I(index, element) \ 408 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 409 #endif 410 411 /* Saturating arithmetic helpers. */ 412 #define SATCVT(from, to, from_type, to_type, min, max) \ 413 static inline to_type cvt##from##to(from_type x, int *sat) \ 414 { \ 415 to_type r; \ 416 \ 417 if (x < (from_type)min) { \ 418 r = min; \ 419 *sat = 1; \ 420 } else if (x > (from_type)max) { \ 421 r = max; \ 422 *sat = 1; \ 423 } else { \ 424 r = x; \ 425 } \ 426 return r; \ 427 } 428 #define SATCVTU(from, to, from_type, to_type, min, max) \ 429 static inline to_type cvt##from##to(from_type x, int *sat) \ 430 { \ 431 to_type r; \ 432 \ 433 if (x > (from_type)max) { \ 434 r = max; \ 435 *sat = 1; \ 436 } else { \ 437 r = x; \ 438 } \ 439 return r; \ 440 } 441 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 442 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 443 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 444 445 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 446 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 447 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 448 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 449 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 450 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 451 #undef SATCVT 452 #undef SATCVTU 453 454 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 455 { 456 int i, j = (sh & 0xf); 457 458 VECTOR_FOR_INORDER_I(i, u8) { 459 r->u8[i] = j++; 460 } 461 } 462 463 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 464 { 465 int i, j = 0x10 - (sh & 0xf); 466 467 VECTOR_FOR_INORDER_I(i, u8) { 468 r->u8[i] = j++; 469 } 470 } 471 472 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 473 { 474 #if defined(HOST_WORDS_BIGENDIAN) 475 env->vscr = r->u32[3]; 476 #else 477 env->vscr = r->u32[0]; 478 #endif 479 set_flush_to_zero(vscr_nj, &env->vec_status); 480 } 481 482 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 483 { 484 int i; 485 486 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 487 r->u32[i] = ~a->u32[i] < b->u32[i]; 488 } 489 } 490 491 /* vprtybw */ 492 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 493 { 494 int i; 495 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 496 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 497 res ^= res >> 8; 498 r->u32[i] = res & 1; 499 } 500 } 501 502 /* vprtybd */ 503 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 504 { 505 int i; 506 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 507 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 508 res ^= res >> 16; 509 res ^= res >> 8; 510 r->u64[i] = res & 1; 511 } 512 } 513 514 /* vprtybq */ 515 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 516 { 517 uint64_t res = b->u64[0] ^ b->u64[1]; 518 res ^= res >> 32; 519 res ^= res >> 16; 520 res ^= res >> 8; 521 r->u64[LO_IDX] = res & 1; 522 r->u64[HI_IDX] = 0; 523 } 524 525 #define VARITH_DO(name, op, element) \ 526 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 527 { \ 528 int i; \ 529 \ 530 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 531 r->element[i] = a->element[i] op b->element[i]; \ 532 } \ 533 } 534 #define VARITH(suffix, element) \ 535 VARITH_DO(add##suffix, +, element) \ 536 VARITH_DO(sub##suffix, -, element) 537 VARITH(ubm, u8) 538 VARITH(uhm, u16) 539 VARITH(uwm, u32) 540 VARITH(udm, u64) 541 VARITH_DO(muluwm, *, u32) 542 #undef VARITH_DO 543 #undef VARITH 544 545 #define VARITHFP(suffix, func) \ 546 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 547 ppc_avr_t *b) \ 548 { \ 549 int i; \ 550 \ 551 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 552 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ 553 } \ 554 } 555 VARITHFP(addfp, float32_add) 556 VARITHFP(subfp, float32_sub) 557 VARITHFP(minfp, float32_min) 558 VARITHFP(maxfp, float32_max) 559 #undef VARITHFP 560 561 #define VARITHFPFMA(suffix, type) \ 562 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 563 ppc_avr_t *b, ppc_avr_t *c) \ 564 { \ 565 int i; \ 566 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 567 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ 568 type, &env->vec_status); \ 569 } \ 570 } 571 VARITHFPFMA(maddfp, 0); 572 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 573 #undef VARITHFPFMA 574 575 #define VARITHSAT_CASE(type, op, cvt, element) \ 576 { \ 577 type result = (type)a->element[i] op (type)b->element[i]; \ 578 r->element[i] = cvt(result, &sat); \ 579 } 580 581 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 582 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 583 ppc_avr_t *b) \ 584 { \ 585 int sat = 0; \ 586 int i; \ 587 \ 588 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 589 switch (sizeof(r->element[0])) { \ 590 case 1: \ 591 VARITHSAT_CASE(optype, op, cvt, element); \ 592 break; \ 593 case 2: \ 594 VARITHSAT_CASE(optype, op, cvt, element); \ 595 break; \ 596 case 4: \ 597 VARITHSAT_CASE(optype, op, cvt, element); \ 598 break; \ 599 } \ 600 } \ 601 if (sat) { \ 602 env->vscr |= (1 << VSCR_SAT); \ 603 } \ 604 } 605 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 606 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 607 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 608 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 609 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 610 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 611 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 612 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 613 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 614 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 615 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 616 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 617 #undef VARITHSAT_CASE 618 #undef VARITHSAT_DO 619 #undef VARITHSAT_SIGNED 620 #undef VARITHSAT_UNSIGNED 621 622 #define VAVG_DO(name, element, etype) \ 623 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 624 { \ 625 int i; \ 626 \ 627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 628 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 629 r->element[i] = x >> 1; \ 630 } \ 631 } 632 633 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 634 unsigned_type) \ 635 VAVG_DO(avgs##type, signed_element, signed_type) \ 636 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 637 VAVG(b, s8, int16_t, u8, uint16_t) 638 VAVG(h, s16, int32_t, u16, uint32_t) 639 VAVG(w, s32, int64_t, u32, uint64_t) 640 #undef VAVG_DO 641 #undef VAVG 642 643 #define VABSDU_DO(name, element) \ 644 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 645 { \ 646 int i; \ 647 \ 648 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 649 r->element[i] = (a->element[i] > b->element[i]) ? \ 650 (a->element[i] - b->element[i]) : \ 651 (b->element[i] - a->element[i]); \ 652 } \ 653 } 654 655 /* VABSDU - Vector absolute difference unsigned 656 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 657 * element - element type to access from vector 658 */ 659 #define VABSDU(type, element) \ 660 VABSDU_DO(absdu##type, element) 661 VABSDU(b, u8) 662 VABSDU(h, u16) 663 VABSDU(w, u32) 664 #undef VABSDU_DO 665 #undef VABSDU 666 667 #define VCF(suffix, cvt, element) \ 668 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 669 ppc_avr_t *b, uint32_t uim) \ 670 { \ 671 int i; \ 672 \ 673 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 674 float32 t = cvt(b->element[i], &env->vec_status); \ 675 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \ 676 } \ 677 } 678 VCF(ux, uint32_to_float32, u32) 679 VCF(sx, int32_to_float32, s32) 680 #undef VCF 681 682 #define VCMP_DO(suffix, compare, element, record) \ 683 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 684 ppc_avr_t *a, ppc_avr_t *b) \ 685 { \ 686 uint64_t ones = (uint64_t)-1; \ 687 uint64_t all = ones; \ 688 uint64_t none = 0; \ 689 int i; \ 690 \ 691 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 692 uint64_t result = (a->element[i] compare b->element[i] ? \ 693 ones : 0x0); \ 694 switch (sizeof(a->element[0])) { \ 695 case 8: \ 696 r->u64[i] = result; \ 697 break; \ 698 case 4: \ 699 r->u32[i] = result; \ 700 break; \ 701 case 2: \ 702 r->u16[i] = result; \ 703 break; \ 704 case 1: \ 705 r->u8[i] = result; \ 706 break; \ 707 } \ 708 all &= result; \ 709 none |= result; \ 710 } \ 711 if (record) { \ 712 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 713 } \ 714 } 715 #define VCMP(suffix, compare, element) \ 716 VCMP_DO(suffix, compare, element, 0) \ 717 VCMP_DO(suffix##_dot, compare, element, 1) 718 VCMP(equb, ==, u8) 719 VCMP(equh, ==, u16) 720 VCMP(equw, ==, u32) 721 VCMP(equd, ==, u64) 722 VCMP(gtub, >, u8) 723 VCMP(gtuh, >, u16) 724 VCMP(gtuw, >, u32) 725 VCMP(gtud, >, u64) 726 VCMP(gtsb, >, s8) 727 VCMP(gtsh, >, s16) 728 VCMP(gtsw, >, s32) 729 VCMP(gtsd, >, s64) 730 #undef VCMP_DO 731 #undef VCMP 732 733 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 734 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 735 ppc_avr_t *a, ppc_avr_t *b) \ 736 { \ 737 etype ones = (etype)-1; \ 738 etype all = ones; \ 739 etype result, none = 0; \ 740 int i; \ 741 \ 742 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 743 if (cmpzero) { \ 744 result = ((a->element[i] == 0) \ 745 || (b->element[i] == 0) \ 746 || (a->element[i] != b->element[i]) ? \ 747 ones : 0x0); \ 748 } else { \ 749 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 750 } \ 751 r->element[i] = result; \ 752 all &= result; \ 753 none |= result; \ 754 } \ 755 if (record) { \ 756 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 757 } \ 758 } 759 760 /* VCMPNEZ - Vector compare not equal to zero 761 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 762 * element - element type to access from vector 763 */ 764 #define VCMPNE(suffix, element, etype, cmpzero) \ 765 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 766 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 767 VCMPNE(zb, u8, uint8_t, 1) 768 VCMPNE(zh, u16, uint16_t, 1) 769 VCMPNE(zw, u32, uint32_t, 1) 770 VCMPNE(b, u8, uint8_t, 0) 771 VCMPNE(h, u16, uint16_t, 0) 772 VCMPNE(w, u32, uint32_t, 0) 773 #undef VCMPNE_DO 774 #undef VCMPNE 775 776 #define VCMPFP_DO(suffix, compare, order, record) \ 777 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 778 ppc_avr_t *a, ppc_avr_t *b) \ 779 { \ 780 uint32_t ones = (uint32_t)-1; \ 781 uint32_t all = ones; \ 782 uint32_t none = 0; \ 783 int i; \ 784 \ 785 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 786 uint32_t result; \ 787 int rel = float32_compare_quiet(a->f[i], b->f[i], \ 788 &env->vec_status); \ 789 if (rel == float_relation_unordered) { \ 790 result = 0; \ 791 } else if (rel compare order) { \ 792 result = ones; \ 793 } else { \ 794 result = 0; \ 795 } \ 796 r->u32[i] = result; \ 797 all &= result; \ 798 none |= result; \ 799 } \ 800 if (record) { \ 801 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 802 } \ 803 } 804 #define VCMPFP(suffix, compare, order) \ 805 VCMPFP_DO(suffix, compare, order, 0) \ 806 VCMPFP_DO(suffix##_dot, compare, order, 1) 807 VCMPFP(eqfp, ==, float_relation_equal) 808 VCMPFP(gefp, !=, float_relation_less) 809 VCMPFP(gtfp, ==, float_relation_greater) 810 #undef VCMPFP_DO 811 #undef VCMPFP 812 813 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 814 ppc_avr_t *a, ppc_avr_t *b, int record) 815 { 816 int i; 817 int all_in = 0; 818 819 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 820 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); 821 if (le_rel == float_relation_unordered) { 822 r->u32[i] = 0xc0000000; 823 all_in = 1; 824 } else { 825 float32 bneg = float32_chs(b->f[i]); 826 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status); 827 int le = le_rel != float_relation_greater; 828 int ge = ge_rel != float_relation_less; 829 830 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 831 all_in |= (!le | !ge); 832 } 833 } 834 if (record) { 835 env->crf[6] = (all_in == 0) << 1; 836 } 837 } 838 839 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 840 { 841 vcmpbfp_internal(env, r, a, b, 0); 842 } 843 844 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 845 ppc_avr_t *b) 846 { 847 vcmpbfp_internal(env, r, a, b, 1); 848 } 849 850 #define VCT(suffix, satcvt, element) \ 851 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 852 ppc_avr_t *b, uint32_t uim) \ 853 { \ 854 int i; \ 855 int sat = 0; \ 856 float_status s = env->vec_status; \ 857 \ 858 set_float_rounding_mode(float_round_to_zero, &s); \ 859 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 860 if (float32_is_any_nan(b->f[i])) { \ 861 r->element[i] = 0; \ 862 } else { \ 863 float64 t = float32_to_float64(b->f[i], &s); \ 864 int64_t j; \ 865 \ 866 t = float64_scalbn(t, uim, &s); \ 867 j = float64_to_int64(t, &s); \ 868 r->element[i] = satcvt(j, &sat); \ 869 } \ 870 } \ 871 if (sat) { \ 872 env->vscr |= (1 << VSCR_SAT); \ 873 } \ 874 } 875 VCT(uxs, cvtsduw, u32) 876 VCT(sxs, cvtsdsw, s32) 877 #undef VCT 878 879 target_ulong helper_vclzlsbb(ppc_avr_t *r) 880 { 881 target_ulong count = 0; 882 int i; 883 VECTOR_FOR_INORDER_I(i, u8) { 884 if (r->u8[i] & 0x01) { 885 break; 886 } 887 count++; 888 } 889 return count; 890 } 891 892 target_ulong helper_vctzlsbb(ppc_avr_t *r) 893 { 894 target_ulong count = 0; 895 int i; 896 #if defined(HOST_WORDS_BIGENDIAN) 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 #else 899 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 900 #endif 901 if (r->u8[i] & 0x01) { 902 break; 903 } 904 count++; 905 } 906 return count; 907 } 908 909 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 910 ppc_avr_t *b, ppc_avr_t *c) 911 { 912 int sat = 0; 913 int i; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 int32_t prod = a->s16[i] * b->s16[i]; 917 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 918 919 r->s16[i] = cvtswsh(t, &sat); 920 } 921 922 if (sat) { 923 env->vscr |= (1 << VSCR_SAT); 924 } 925 } 926 927 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 928 ppc_avr_t *b, ppc_avr_t *c) 929 { 930 int sat = 0; 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 934 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 935 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 936 r->s16[i] = cvtswsh(t, &sat); 937 } 938 939 if (sat) { 940 env->vscr |= (1 << VSCR_SAT); 941 } 942 } 943 944 #define VMINMAX_DO(name, compare, element) \ 945 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 946 { \ 947 int i; \ 948 \ 949 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 950 if (a->element[i] compare b->element[i]) { \ 951 r->element[i] = b->element[i]; \ 952 } else { \ 953 r->element[i] = a->element[i]; \ 954 } \ 955 } \ 956 } 957 #define VMINMAX(suffix, element) \ 958 VMINMAX_DO(min##suffix, >, element) \ 959 VMINMAX_DO(max##suffix, <, element) 960 VMINMAX(sb, s8) 961 VMINMAX(sh, s16) 962 VMINMAX(sw, s32) 963 VMINMAX(sd, s64) 964 VMINMAX(ub, u8) 965 VMINMAX(uh, u16) 966 VMINMAX(uw, u32) 967 VMINMAX(ud, u64) 968 #undef VMINMAX_DO 969 #undef VMINMAX 970 971 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 972 { 973 int i; 974 975 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 976 int32_t prod = a->s16[i] * b->s16[i]; 977 r->s16[i] = (int16_t) (prod + c->s16[i]); 978 } 979 } 980 981 #define VMRG_DO(name, element, highp) \ 982 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 983 { \ 984 ppc_avr_t result; \ 985 int i; \ 986 size_t n_elems = ARRAY_SIZE(r->element); \ 987 \ 988 for (i = 0; i < n_elems / 2; i++) { \ 989 if (highp) { \ 990 result.element[i*2+HI_IDX] = a->element[i]; \ 991 result.element[i*2+LO_IDX] = b->element[i]; \ 992 } else { \ 993 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \ 994 b->element[n_elems - i - 1]; \ 995 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \ 996 a->element[n_elems - i - 1]; \ 997 } \ 998 } \ 999 *r = result; \ 1000 } 1001 #if defined(HOST_WORDS_BIGENDIAN) 1002 #define MRGHI 0 1003 #define MRGLO 1 1004 #else 1005 #define MRGHI 1 1006 #define MRGLO 0 1007 #endif 1008 #define VMRG(suffix, element) \ 1009 VMRG_DO(mrgl##suffix, element, MRGHI) \ 1010 VMRG_DO(mrgh##suffix, element, MRGLO) 1011 VMRG(b, u8) 1012 VMRG(h, u16) 1013 VMRG(w, u32) 1014 #undef VMRG_DO 1015 #undef VMRG 1016 #undef MRGHI 1017 #undef MRGLO 1018 1019 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1020 ppc_avr_t *b, ppc_avr_t *c) 1021 { 1022 int32_t prod[16]; 1023 int i; 1024 1025 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1026 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1027 } 1028 1029 VECTOR_FOR_INORDER_I(i, s32) { 1030 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1031 prod[4 * i + 2] + prod[4 * i + 3]; 1032 } 1033 } 1034 1035 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1036 ppc_avr_t *b, ppc_avr_t *c) 1037 { 1038 int32_t prod[8]; 1039 int i; 1040 1041 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1042 prod[i] = a->s16[i] * b->s16[i]; 1043 } 1044 1045 VECTOR_FOR_INORDER_I(i, s32) { 1046 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1047 } 1048 } 1049 1050 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1051 ppc_avr_t *b, ppc_avr_t *c) 1052 { 1053 int32_t prod[8]; 1054 int i; 1055 int sat = 0; 1056 1057 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1058 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1059 } 1060 1061 VECTOR_FOR_INORDER_I(i, s32) { 1062 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1063 1064 r->u32[i] = cvtsdsw(t, &sat); 1065 } 1066 1067 if (sat) { 1068 env->vscr |= (1 << VSCR_SAT); 1069 } 1070 } 1071 1072 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1073 ppc_avr_t *b, ppc_avr_t *c) 1074 { 1075 uint16_t prod[16]; 1076 int i; 1077 1078 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1079 prod[i] = a->u8[i] * b->u8[i]; 1080 } 1081 1082 VECTOR_FOR_INORDER_I(i, u32) { 1083 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1084 prod[4 * i + 2] + prod[4 * i + 3]; 1085 } 1086 } 1087 1088 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1089 ppc_avr_t *b, ppc_avr_t *c) 1090 { 1091 uint32_t prod[8]; 1092 int i; 1093 1094 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1095 prod[i] = a->u16[i] * b->u16[i]; 1096 } 1097 1098 VECTOR_FOR_INORDER_I(i, u32) { 1099 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1100 } 1101 } 1102 1103 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1104 ppc_avr_t *b, ppc_avr_t *c) 1105 { 1106 uint32_t prod[8]; 1107 int i; 1108 int sat = 0; 1109 1110 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1111 prod[i] = a->u16[i] * b->u16[i]; 1112 } 1113 1114 VECTOR_FOR_INORDER_I(i, s32) { 1115 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1116 1117 r->u32[i] = cvtuduw(t, &sat); 1118 } 1119 1120 if (sat) { 1121 env->vscr |= (1 << VSCR_SAT); 1122 } 1123 } 1124 1125 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ 1126 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1127 { \ 1128 int i; \ 1129 \ 1130 VECTOR_FOR_INORDER_I(i, prod_element) { \ 1131 if (evenp) { \ 1132 r->prod_element[i] = \ 1133 (cast)a->mul_element[i * 2 + HI_IDX] * \ 1134 (cast)b->mul_element[i * 2 + HI_IDX]; \ 1135 } else { \ 1136 r->prod_element[i] = \ 1137 (cast)a->mul_element[i * 2 + LO_IDX] * \ 1138 (cast)b->mul_element[i * 2 + LO_IDX]; \ 1139 } \ 1140 } \ 1141 } 1142 #define VMUL(suffix, mul_element, prod_element, cast) \ 1143 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ 1144 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) 1145 VMUL(sb, s8, s16, int16_t) 1146 VMUL(sh, s16, s32, int32_t) 1147 VMUL(sw, s32, s64, int64_t) 1148 VMUL(ub, u8, u16, uint16_t) 1149 VMUL(uh, u16, u32, uint32_t) 1150 VMUL(uw, u32, u64, uint64_t) 1151 #undef VMUL_DO 1152 #undef VMUL 1153 1154 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1155 ppc_avr_t *c) 1156 { 1157 ppc_avr_t result; 1158 int i; 1159 1160 VECTOR_FOR_INORDER_I(i, u8) { 1161 int s = c->u8[i] & 0x1f; 1162 #if defined(HOST_WORDS_BIGENDIAN) 1163 int index = s & 0xf; 1164 #else 1165 int index = 15 - (s & 0xf); 1166 #endif 1167 1168 if (s & 0x10) { 1169 result.u8[i] = b->u8[index]; 1170 } else { 1171 result.u8[i] = a->u8[index]; 1172 } 1173 } 1174 *r = result; 1175 } 1176 1177 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1178 ppc_avr_t *c) 1179 { 1180 ppc_avr_t result; 1181 int i; 1182 1183 VECTOR_FOR_INORDER_I(i, u8) { 1184 int s = c->u8[i] & 0x1f; 1185 #if defined(HOST_WORDS_BIGENDIAN) 1186 int index = 15 - (s & 0xf); 1187 #else 1188 int index = s & 0xf; 1189 #endif 1190 1191 if (s & 0x10) { 1192 result.u8[i] = a->u8[index]; 1193 } else { 1194 result.u8[i] = b->u8[index]; 1195 } 1196 } 1197 *r = result; 1198 } 1199 1200 #if defined(HOST_WORDS_BIGENDIAN) 1201 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1202 #define VBPERMD_INDEX(i) (i) 1203 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1204 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1205 #else 1206 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1207 #define VBPERMD_INDEX(i) (1 - i) 1208 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1209 #define EXTRACT_BIT(avr, i, index) \ 1210 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1211 #endif 1212 1213 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1214 { 1215 int i, j; 1216 ppc_avr_t result = { .u64 = { 0, 0 } }; 1217 VECTOR_FOR_INORDER_I(i, u64) { 1218 for (j = 0; j < 8; j++) { 1219 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1220 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1221 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1222 } 1223 } 1224 } 1225 *r = result; 1226 } 1227 1228 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 int i; 1231 uint64_t perm = 0; 1232 1233 VECTOR_FOR_INORDER_I(i, u8) { 1234 int index = VBPERMQ_INDEX(b, i); 1235 1236 if (index < 128) { 1237 uint64_t mask = (1ull << (63-(index & 0x3F))); 1238 if (a->u64[VBPERMQ_DW(index)] & mask) { 1239 perm |= (0x8000 >> i); 1240 } 1241 } 1242 } 1243 1244 r->u64[HI_IDX] = perm; 1245 r->u64[LO_IDX] = 0; 1246 } 1247 1248 #undef VBPERMQ_INDEX 1249 #undef VBPERMQ_DW 1250 1251 static const uint64_t VGBBD_MASKS[256] = { 1252 0x0000000000000000ull, /* 00 */ 1253 0x0000000000000080ull, /* 01 */ 1254 0x0000000000008000ull, /* 02 */ 1255 0x0000000000008080ull, /* 03 */ 1256 0x0000000000800000ull, /* 04 */ 1257 0x0000000000800080ull, /* 05 */ 1258 0x0000000000808000ull, /* 06 */ 1259 0x0000000000808080ull, /* 07 */ 1260 0x0000000080000000ull, /* 08 */ 1261 0x0000000080000080ull, /* 09 */ 1262 0x0000000080008000ull, /* 0A */ 1263 0x0000000080008080ull, /* 0B */ 1264 0x0000000080800000ull, /* 0C */ 1265 0x0000000080800080ull, /* 0D */ 1266 0x0000000080808000ull, /* 0E */ 1267 0x0000000080808080ull, /* 0F */ 1268 0x0000008000000000ull, /* 10 */ 1269 0x0000008000000080ull, /* 11 */ 1270 0x0000008000008000ull, /* 12 */ 1271 0x0000008000008080ull, /* 13 */ 1272 0x0000008000800000ull, /* 14 */ 1273 0x0000008000800080ull, /* 15 */ 1274 0x0000008000808000ull, /* 16 */ 1275 0x0000008000808080ull, /* 17 */ 1276 0x0000008080000000ull, /* 18 */ 1277 0x0000008080000080ull, /* 19 */ 1278 0x0000008080008000ull, /* 1A */ 1279 0x0000008080008080ull, /* 1B */ 1280 0x0000008080800000ull, /* 1C */ 1281 0x0000008080800080ull, /* 1D */ 1282 0x0000008080808000ull, /* 1E */ 1283 0x0000008080808080ull, /* 1F */ 1284 0x0000800000000000ull, /* 20 */ 1285 0x0000800000000080ull, /* 21 */ 1286 0x0000800000008000ull, /* 22 */ 1287 0x0000800000008080ull, /* 23 */ 1288 0x0000800000800000ull, /* 24 */ 1289 0x0000800000800080ull, /* 25 */ 1290 0x0000800000808000ull, /* 26 */ 1291 0x0000800000808080ull, /* 27 */ 1292 0x0000800080000000ull, /* 28 */ 1293 0x0000800080000080ull, /* 29 */ 1294 0x0000800080008000ull, /* 2A */ 1295 0x0000800080008080ull, /* 2B */ 1296 0x0000800080800000ull, /* 2C */ 1297 0x0000800080800080ull, /* 2D */ 1298 0x0000800080808000ull, /* 2E */ 1299 0x0000800080808080ull, /* 2F */ 1300 0x0000808000000000ull, /* 30 */ 1301 0x0000808000000080ull, /* 31 */ 1302 0x0000808000008000ull, /* 32 */ 1303 0x0000808000008080ull, /* 33 */ 1304 0x0000808000800000ull, /* 34 */ 1305 0x0000808000800080ull, /* 35 */ 1306 0x0000808000808000ull, /* 36 */ 1307 0x0000808000808080ull, /* 37 */ 1308 0x0000808080000000ull, /* 38 */ 1309 0x0000808080000080ull, /* 39 */ 1310 0x0000808080008000ull, /* 3A */ 1311 0x0000808080008080ull, /* 3B */ 1312 0x0000808080800000ull, /* 3C */ 1313 0x0000808080800080ull, /* 3D */ 1314 0x0000808080808000ull, /* 3E */ 1315 0x0000808080808080ull, /* 3F */ 1316 0x0080000000000000ull, /* 40 */ 1317 0x0080000000000080ull, /* 41 */ 1318 0x0080000000008000ull, /* 42 */ 1319 0x0080000000008080ull, /* 43 */ 1320 0x0080000000800000ull, /* 44 */ 1321 0x0080000000800080ull, /* 45 */ 1322 0x0080000000808000ull, /* 46 */ 1323 0x0080000000808080ull, /* 47 */ 1324 0x0080000080000000ull, /* 48 */ 1325 0x0080000080000080ull, /* 49 */ 1326 0x0080000080008000ull, /* 4A */ 1327 0x0080000080008080ull, /* 4B */ 1328 0x0080000080800000ull, /* 4C */ 1329 0x0080000080800080ull, /* 4D */ 1330 0x0080000080808000ull, /* 4E */ 1331 0x0080000080808080ull, /* 4F */ 1332 0x0080008000000000ull, /* 50 */ 1333 0x0080008000000080ull, /* 51 */ 1334 0x0080008000008000ull, /* 52 */ 1335 0x0080008000008080ull, /* 53 */ 1336 0x0080008000800000ull, /* 54 */ 1337 0x0080008000800080ull, /* 55 */ 1338 0x0080008000808000ull, /* 56 */ 1339 0x0080008000808080ull, /* 57 */ 1340 0x0080008080000000ull, /* 58 */ 1341 0x0080008080000080ull, /* 59 */ 1342 0x0080008080008000ull, /* 5A */ 1343 0x0080008080008080ull, /* 5B */ 1344 0x0080008080800000ull, /* 5C */ 1345 0x0080008080800080ull, /* 5D */ 1346 0x0080008080808000ull, /* 5E */ 1347 0x0080008080808080ull, /* 5F */ 1348 0x0080800000000000ull, /* 60 */ 1349 0x0080800000000080ull, /* 61 */ 1350 0x0080800000008000ull, /* 62 */ 1351 0x0080800000008080ull, /* 63 */ 1352 0x0080800000800000ull, /* 64 */ 1353 0x0080800000800080ull, /* 65 */ 1354 0x0080800000808000ull, /* 66 */ 1355 0x0080800000808080ull, /* 67 */ 1356 0x0080800080000000ull, /* 68 */ 1357 0x0080800080000080ull, /* 69 */ 1358 0x0080800080008000ull, /* 6A */ 1359 0x0080800080008080ull, /* 6B */ 1360 0x0080800080800000ull, /* 6C */ 1361 0x0080800080800080ull, /* 6D */ 1362 0x0080800080808000ull, /* 6E */ 1363 0x0080800080808080ull, /* 6F */ 1364 0x0080808000000000ull, /* 70 */ 1365 0x0080808000000080ull, /* 71 */ 1366 0x0080808000008000ull, /* 72 */ 1367 0x0080808000008080ull, /* 73 */ 1368 0x0080808000800000ull, /* 74 */ 1369 0x0080808000800080ull, /* 75 */ 1370 0x0080808000808000ull, /* 76 */ 1371 0x0080808000808080ull, /* 77 */ 1372 0x0080808080000000ull, /* 78 */ 1373 0x0080808080000080ull, /* 79 */ 1374 0x0080808080008000ull, /* 7A */ 1375 0x0080808080008080ull, /* 7B */ 1376 0x0080808080800000ull, /* 7C */ 1377 0x0080808080800080ull, /* 7D */ 1378 0x0080808080808000ull, /* 7E */ 1379 0x0080808080808080ull, /* 7F */ 1380 0x8000000000000000ull, /* 80 */ 1381 0x8000000000000080ull, /* 81 */ 1382 0x8000000000008000ull, /* 82 */ 1383 0x8000000000008080ull, /* 83 */ 1384 0x8000000000800000ull, /* 84 */ 1385 0x8000000000800080ull, /* 85 */ 1386 0x8000000000808000ull, /* 86 */ 1387 0x8000000000808080ull, /* 87 */ 1388 0x8000000080000000ull, /* 88 */ 1389 0x8000000080000080ull, /* 89 */ 1390 0x8000000080008000ull, /* 8A */ 1391 0x8000000080008080ull, /* 8B */ 1392 0x8000000080800000ull, /* 8C */ 1393 0x8000000080800080ull, /* 8D */ 1394 0x8000000080808000ull, /* 8E */ 1395 0x8000000080808080ull, /* 8F */ 1396 0x8000008000000000ull, /* 90 */ 1397 0x8000008000000080ull, /* 91 */ 1398 0x8000008000008000ull, /* 92 */ 1399 0x8000008000008080ull, /* 93 */ 1400 0x8000008000800000ull, /* 94 */ 1401 0x8000008000800080ull, /* 95 */ 1402 0x8000008000808000ull, /* 96 */ 1403 0x8000008000808080ull, /* 97 */ 1404 0x8000008080000000ull, /* 98 */ 1405 0x8000008080000080ull, /* 99 */ 1406 0x8000008080008000ull, /* 9A */ 1407 0x8000008080008080ull, /* 9B */ 1408 0x8000008080800000ull, /* 9C */ 1409 0x8000008080800080ull, /* 9D */ 1410 0x8000008080808000ull, /* 9E */ 1411 0x8000008080808080ull, /* 9F */ 1412 0x8000800000000000ull, /* A0 */ 1413 0x8000800000000080ull, /* A1 */ 1414 0x8000800000008000ull, /* A2 */ 1415 0x8000800000008080ull, /* A3 */ 1416 0x8000800000800000ull, /* A4 */ 1417 0x8000800000800080ull, /* A5 */ 1418 0x8000800000808000ull, /* A6 */ 1419 0x8000800000808080ull, /* A7 */ 1420 0x8000800080000000ull, /* A8 */ 1421 0x8000800080000080ull, /* A9 */ 1422 0x8000800080008000ull, /* AA */ 1423 0x8000800080008080ull, /* AB */ 1424 0x8000800080800000ull, /* AC */ 1425 0x8000800080800080ull, /* AD */ 1426 0x8000800080808000ull, /* AE */ 1427 0x8000800080808080ull, /* AF */ 1428 0x8000808000000000ull, /* B0 */ 1429 0x8000808000000080ull, /* B1 */ 1430 0x8000808000008000ull, /* B2 */ 1431 0x8000808000008080ull, /* B3 */ 1432 0x8000808000800000ull, /* B4 */ 1433 0x8000808000800080ull, /* B5 */ 1434 0x8000808000808000ull, /* B6 */ 1435 0x8000808000808080ull, /* B7 */ 1436 0x8000808080000000ull, /* B8 */ 1437 0x8000808080000080ull, /* B9 */ 1438 0x8000808080008000ull, /* BA */ 1439 0x8000808080008080ull, /* BB */ 1440 0x8000808080800000ull, /* BC */ 1441 0x8000808080800080ull, /* BD */ 1442 0x8000808080808000ull, /* BE */ 1443 0x8000808080808080ull, /* BF */ 1444 0x8080000000000000ull, /* C0 */ 1445 0x8080000000000080ull, /* C1 */ 1446 0x8080000000008000ull, /* C2 */ 1447 0x8080000000008080ull, /* C3 */ 1448 0x8080000000800000ull, /* C4 */ 1449 0x8080000000800080ull, /* C5 */ 1450 0x8080000000808000ull, /* C6 */ 1451 0x8080000000808080ull, /* C7 */ 1452 0x8080000080000000ull, /* C8 */ 1453 0x8080000080000080ull, /* C9 */ 1454 0x8080000080008000ull, /* CA */ 1455 0x8080000080008080ull, /* CB */ 1456 0x8080000080800000ull, /* CC */ 1457 0x8080000080800080ull, /* CD */ 1458 0x8080000080808000ull, /* CE */ 1459 0x8080000080808080ull, /* CF */ 1460 0x8080008000000000ull, /* D0 */ 1461 0x8080008000000080ull, /* D1 */ 1462 0x8080008000008000ull, /* D2 */ 1463 0x8080008000008080ull, /* D3 */ 1464 0x8080008000800000ull, /* D4 */ 1465 0x8080008000800080ull, /* D5 */ 1466 0x8080008000808000ull, /* D6 */ 1467 0x8080008000808080ull, /* D7 */ 1468 0x8080008080000000ull, /* D8 */ 1469 0x8080008080000080ull, /* D9 */ 1470 0x8080008080008000ull, /* DA */ 1471 0x8080008080008080ull, /* DB */ 1472 0x8080008080800000ull, /* DC */ 1473 0x8080008080800080ull, /* DD */ 1474 0x8080008080808000ull, /* DE */ 1475 0x8080008080808080ull, /* DF */ 1476 0x8080800000000000ull, /* E0 */ 1477 0x8080800000000080ull, /* E1 */ 1478 0x8080800000008000ull, /* E2 */ 1479 0x8080800000008080ull, /* E3 */ 1480 0x8080800000800000ull, /* E4 */ 1481 0x8080800000800080ull, /* E5 */ 1482 0x8080800000808000ull, /* E6 */ 1483 0x8080800000808080ull, /* E7 */ 1484 0x8080800080000000ull, /* E8 */ 1485 0x8080800080000080ull, /* E9 */ 1486 0x8080800080008000ull, /* EA */ 1487 0x8080800080008080ull, /* EB */ 1488 0x8080800080800000ull, /* EC */ 1489 0x8080800080800080ull, /* ED */ 1490 0x8080800080808000ull, /* EE */ 1491 0x8080800080808080ull, /* EF */ 1492 0x8080808000000000ull, /* F0 */ 1493 0x8080808000000080ull, /* F1 */ 1494 0x8080808000008000ull, /* F2 */ 1495 0x8080808000008080ull, /* F3 */ 1496 0x8080808000800000ull, /* F4 */ 1497 0x8080808000800080ull, /* F5 */ 1498 0x8080808000808000ull, /* F6 */ 1499 0x8080808000808080ull, /* F7 */ 1500 0x8080808080000000ull, /* F8 */ 1501 0x8080808080000080ull, /* F9 */ 1502 0x8080808080008000ull, /* FA */ 1503 0x8080808080008080ull, /* FB */ 1504 0x8080808080800000ull, /* FC */ 1505 0x8080808080800080ull, /* FD */ 1506 0x8080808080808000ull, /* FE */ 1507 0x8080808080808080ull, /* FF */ 1508 }; 1509 1510 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1511 { 1512 int i; 1513 uint64_t t[2] = { 0, 0 }; 1514 1515 VECTOR_FOR_INORDER_I(i, u8) { 1516 #if defined(HOST_WORDS_BIGENDIAN) 1517 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1518 #else 1519 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1520 #endif 1521 } 1522 1523 r->u64[0] = t[0]; 1524 r->u64[1] = t[1]; 1525 } 1526 1527 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1528 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1529 { \ 1530 int i, j; \ 1531 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1532 \ 1533 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1534 prod[i] = 0; \ 1535 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1536 if (a->srcfld[i] & (1ull<<j)) { \ 1537 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1538 } \ 1539 } \ 1540 } \ 1541 \ 1542 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1543 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1544 } \ 1545 } 1546 1547 PMSUM(vpmsumb, u8, u16, uint16_t) 1548 PMSUM(vpmsumh, u16, u32, uint32_t) 1549 PMSUM(vpmsumw, u32, u64, uint64_t) 1550 1551 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1552 { 1553 1554 #ifdef CONFIG_INT128 1555 int i, j; 1556 __uint128_t prod[2]; 1557 1558 VECTOR_FOR_INORDER_I(i, u64) { 1559 prod[i] = 0; 1560 for (j = 0; j < 64; j++) { 1561 if (a->u64[i] & (1ull<<j)) { 1562 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1563 } 1564 } 1565 } 1566 1567 r->u128 = prod[0] ^ prod[1]; 1568 1569 #else 1570 int i, j; 1571 ppc_avr_t prod[2]; 1572 1573 VECTOR_FOR_INORDER_I(i, u64) { 1574 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; 1575 for (j = 0; j < 64; j++) { 1576 if (a->u64[i] & (1ull<<j)) { 1577 ppc_avr_t bshift; 1578 if (j == 0) { 1579 bshift.u64[HI_IDX] = 0; 1580 bshift.u64[LO_IDX] = b->u64[i]; 1581 } else { 1582 bshift.u64[HI_IDX] = b->u64[i] >> (64-j); 1583 bshift.u64[LO_IDX] = b->u64[i] << j; 1584 } 1585 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; 1586 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; 1587 } 1588 } 1589 } 1590 1591 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; 1592 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; 1593 #endif 1594 } 1595 1596 1597 #if defined(HOST_WORDS_BIGENDIAN) 1598 #define PKBIG 1 1599 #else 1600 #define PKBIG 0 1601 #endif 1602 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1603 { 1604 int i, j; 1605 ppc_avr_t result; 1606 #if defined(HOST_WORDS_BIGENDIAN) 1607 const ppc_avr_t *x[2] = { a, b }; 1608 #else 1609 const ppc_avr_t *x[2] = { b, a }; 1610 #endif 1611 1612 VECTOR_FOR_INORDER_I(i, u64) { 1613 VECTOR_FOR_INORDER_I(j, u32) { 1614 uint32_t e = x[i]->u32[j]; 1615 1616 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1617 ((e >> 6) & 0x3e0) | 1618 ((e >> 3) & 0x1f)); 1619 } 1620 } 1621 *r = result; 1622 } 1623 1624 #define VPK(suffix, from, to, cvt, dosat) \ 1625 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1626 ppc_avr_t *a, ppc_avr_t *b) \ 1627 { \ 1628 int i; \ 1629 int sat = 0; \ 1630 ppc_avr_t result; \ 1631 ppc_avr_t *a0 = PKBIG ? a : b; \ 1632 ppc_avr_t *a1 = PKBIG ? b : a; \ 1633 \ 1634 VECTOR_FOR_INORDER_I(i, from) { \ 1635 result.to[i] = cvt(a0->from[i], &sat); \ 1636 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1637 } \ 1638 *r = result; \ 1639 if (dosat && sat) { \ 1640 env->vscr |= (1 << VSCR_SAT); \ 1641 } \ 1642 } 1643 #define I(x, y) (x) 1644 VPK(shss, s16, s8, cvtshsb, 1) 1645 VPK(shus, s16, u8, cvtshub, 1) 1646 VPK(swss, s32, s16, cvtswsh, 1) 1647 VPK(swus, s32, u16, cvtswuh, 1) 1648 VPK(sdss, s64, s32, cvtsdsw, 1) 1649 VPK(sdus, s64, u32, cvtsduw, 1) 1650 VPK(uhus, u16, u8, cvtuhub, 1) 1651 VPK(uwus, u32, u16, cvtuwuh, 1) 1652 VPK(udus, u64, u32, cvtuduw, 1) 1653 VPK(uhum, u16, u8, I, 0) 1654 VPK(uwum, u32, u16, I, 0) 1655 VPK(udum, u64, u32, I, 0) 1656 #undef I 1657 #undef VPK 1658 #undef PKBIG 1659 1660 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1661 { 1662 int i; 1663 1664 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1665 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); 1666 } 1667 } 1668 1669 #define VRFI(suffix, rounding) \ 1670 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1671 ppc_avr_t *b) \ 1672 { \ 1673 int i; \ 1674 float_status s = env->vec_status; \ 1675 \ 1676 set_float_rounding_mode(rounding, &s); \ 1677 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 1678 r->f[i] = float32_round_to_int (b->f[i], &s); \ 1679 } \ 1680 } 1681 VRFI(n, float_round_nearest_even) 1682 VRFI(m, float_round_down) 1683 VRFI(p, float_round_up) 1684 VRFI(z, float_round_to_zero) 1685 #undef VRFI 1686 1687 #define VROTATE(suffix, element, mask) \ 1688 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1689 { \ 1690 int i; \ 1691 \ 1692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1693 unsigned int shift = b->element[i] & mask; \ 1694 r->element[i] = (a->element[i] << shift) | \ 1695 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1696 } \ 1697 } 1698 VROTATE(b, u8, 0x7) 1699 VROTATE(h, u16, 0xF) 1700 VROTATE(w, u32, 0x1F) 1701 VROTATE(d, u64, 0x3F) 1702 #undef VROTATE 1703 1704 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1705 { 1706 int i; 1707 1708 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1709 float32 t = float32_sqrt(b->f[i], &env->vec_status); 1710 1711 r->f[i] = float32_div(float32_one, t, &env->vec_status); 1712 } 1713 } 1714 1715 #define VRLMI(name, size, element, insert) \ 1716 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1717 { \ 1718 int i; \ 1719 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1720 uint##size##_t src1 = a->element[i]; \ 1721 uint##size##_t src2 = b->element[i]; \ 1722 uint##size##_t src3 = r->element[i]; \ 1723 uint##size##_t begin, end, shift, mask, rot_val; \ 1724 \ 1725 shift = extract##size(src2, 0, 6); \ 1726 end = extract##size(src2, 8, 6); \ 1727 begin = extract##size(src2, 16, 6); \ 1728 rot_val = rol##size(src1, shift); \ 1729 mask = mask_u##size(begin, end); \ 1730 if (insert) { \ 1731 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1732 } else { \ 1733 r->element[i] = (rot_val & mask); \ 1734 } \ 1735 } \ 1736 } 1737 1738 VRLMI(vrldmi, 64, u64, 1); 1739 VRLMI(vrlwmi, 32, u32, 1); 1740 VRLMI(vrldnm, 64, u64, 0); 1741 VRLMI(vrlwnm, 32, u32, 0); 1742 1743 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1744 ppc_avr_t *c) 1745 { 1746 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1747 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1748 } 1749 1750 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1751 { 1752 int i; 1753 1754 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1755 r->f[i] = float32_exp2(b->f[i], &env->vec_status); 1756 } 1757 } 1758 1759 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1760 { 1761 int i; 1762 1763 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1764 r->f[i] = float32_log2(b->f[i], &env->vec_status); 1765 } 1766 } 1767 1768 #if defined(HOST_WORDS_BIGENDIAN) 1769 #define VEXTU_X_DO(name, size, left) \ 1770 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1771 { \ 1772 int index; \ 1773 if (left) { \ 1774 index = (a & 0xf) * 8; \ 1775 } else { \ 1776 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1777 } \ 1778 return int128_getlo(int128_rshift(b->s128, index)) & \ 1779 MAKE_64BIT_MASK(0, size); \ 1780 } 1781 #else 1782 #define VEXTU_X_DO(name, size, left) \ 1783 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1784 { \ 1785 int index; \ 1786 if (left) { \ 1787 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1788 } else { \ 1789 index = (a & 0xf) * 8; \ 1790 } \ 1791 return int128_getlo(int128_rshift(b->s128, index)) & \ 1792 MAKE_64BIT_MASK(0, size); \ 1793 } 1794 #endif 1795 1796 VEXTU_X_DO(vextublx, 8, 1) 1797 VEXTU_X_DO(vextuhlx, 16, 1) 1798 VEXTU_X_DO(vextuwlx, 32, 1) 1799 VEXTU_X_DO(vextubrx, 8, 0) 1800 VEXTU_X_DO(vextuhrx, 16, 0) 1801 VEXTU_X_DO(vextuwrx, 32, 0) 1802 #undef VEXTU_X_DO 1803 1804 /* The specification says that the results are undefined if all of the 1805 * shift counts are not identical. We check to make sure that they are 1806 * to conform to what real hardware appears to do. */ 1807 #define VSHIFT(suffix, leftp) \ 1808 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1809 { \ 1810 int shift = b->u8[LO_IDX*15] & 0x7; \ 1811 int doit = 1; \ 1812 int i; \ 1813 \ 1814 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1815 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1816 } \ 1817 if (doit) { \ 1818 if (shift == 0) { \ 1819 *r = *a; \ 1820 } else if (leftp) { \ 1821 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ 1822 \ 1823 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ 1824 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ 1825 } else { \ 1826 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ 1827 \ 1828 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ 1829 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ 1830 } \ 1831 } \ 1832 } 1833 VSHIFT(l, 1) 1834 VSHIFT(r, 0) 1835 #undef VSHIFT 1836 1837 #define VSL(suffix, element, mask) \ 1838 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1839 { \ 1840 int i; \ 1841 \ 1842 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1843 unsigned int shift = b->element[i] & mask; \ 1844 \ 1845 r->element[i] = a->element[i] << shift; \ 1846 } \ 1847 } 1848 VSL(b, u8, 0x7) 1849 VSL(h, u16, 0x0F) 1850 VSL(w, u32, 0x1F) 1851 VSL(d, u64, 0x3F) 1852 #undef VSL 1853 1854 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1855 { 1856 int i; 1857 unsigned int shift, bytes, size; 1858 1859 size = ARRAY_SIZE(r->u8); 1860 for (i = 0; i < size; i++) { 1861 shift = b->u8[i] & 0x7; /* extract shift value */ 1862 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1863 (((i + 1) < size) ? a->u8[i + 1] : 0); 1864 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1865 } 1866 } 1867 1868 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1869 { 1870 int i; 1871 unsigned int shift, bytes; 1872 1873 /* Use reverse order, as destination and source register can be same. Its 1874 * being modified in place saving temporary, reverse order will guarantee 1875 * that computed result is not fed back. 1876 */ 1877 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1878 shift = b->u8[i] & 0x7; /* extract shift value */ 1879 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1880 /* extract adjacent bytes */ 1881 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1882 } 1883 } 1884 1885 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1886 { 1887 int sh = shift & 0xf; 1888 int i; 1889 ppc_avr_t result; 1890 1891 #if defined(HOST_WORDS_BIGENDIAN) 1892 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1893 int index = sh + i; 1894 if (index > 0xf) { 1895 result.u8[i] = b->u8[index - 0x10]; 1896 } else { 1897 result.u8[i] = a->u8[index]; 1898 } 1899 } 1900 #else 1901 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1902 int index = (16 - sh) + i; 1903 if (index > 0xf) { 1904 result.u8[i] = a->u8[index - 0x10]; 1905 } else { 1906 result.u8[i] = b->u8[index]; 1907 } 1908 } 1909 #endif 1910 *r = result; 1911 } 1912 1913 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1914 { 1915 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; 1916 1917 #if defined(HOST_WORDS_BIGENDIAN) 1918 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1919 memset(&r->u8[16-sh], 0, sh); 1920 #else 1921 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1922 memset(&r->u8[0], 0, sh); 1923 #endif 1924 } 1925 1926 /* Experimental testing shows that hardware masks the immediate. */ 1927 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1928 #if defined(HOST_WORDS_BIGENDIAN) 1929 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1930 #else 1931 #define SPLAT_ELEMENT(element) \ 1932 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element)) 1933 #endif 1934 #define VSPLT(suffix, element) \ 1935 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1936 { \ 1937 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ 1938 int i; \ 1939 \ 1940 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1941 r->element[i] = s; \ 1942 } \ 1943 } 1944 VSPLT(b, u8) 1945 VSPLT(h, u16) 1946 VSPLT(w, u32) 1947 #undef VSPLT 1948 #undef SPLAT_ELEMENT 1949 #undef _SPLAT_MASKED 1950 #if defined(HOST_WORDS_BIGENDIAN) 1951 #define VINSERT(suffix, element) \ 1952 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1953 { \ 1954 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1955 sizeof(r->element[0])); \ 1956 } 1957 #else 1958 #define VINSERT(suffix, element) \ 1959 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1960 { \ 1961 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1962 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1963 } 1964 #endif 1965 VINSERT(b, u8) 1966 VINSERT(h, u16) 1967 VINSERT(w, u32) 1968 VINSERT(d, u64) 1969 #undef VINSERT 1970 #if defined(HOST_WORDS_BIGENDIAN) 1971 #define VEXTRACT(suffix, element) \ 1972 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1973 { \ 1974 uint32_t es = sizeof(r->element[0]); \ 1975 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1976 memset(&r->u8[8], 0, 8); \ 1977 memset(&r->u8[0], 0, 8 - es); \ 1978 } 1979 #else 1980 #define VEXTRACT(suffix, element) \ 1981 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1982 { \ 1983 uint32_t es = sizeof(r->element[0]); \ 1984 uint32_t s = (16 - index) - es; \ 1985 memmove(&r->u8[8], &b->u8[s], es); \ 1986 memset(&r->u8[0], 0, 8); \ 1987 memset(&r->u8[8 + es], 0, 8 - es); \ 1988 } 1989 #endif 1990 VEXTRACT(ub, u8) 1991 VEXTRACT(uh, u16) 1992 VEXTRACT(uw, u32) 1993 VEXTRACT(d, u64) 1994 #undef VEXTRACT 1995 1996 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1997 target_ulong xbn, uint32_t index) 1998 { 1999 ppc_vsr_t xt, xb; 2000 size_t es = sizeof(uint32_t); 2001 uint32_t ext_index; 2002 int i; 2003 2004 getVSR(xbn, &xb, env); 2005 memset(&xt, 0, sizeof(xt)); 2006 2007 #if defined(HOST_WORDS_BIGENDIAN) 2008 ext_index = index; 2009 for (i = 0; i < es; i++, ext_index++) { 2010 xt.u8[8 - es + i] = xb.u8[ext_index % 16]; 2011 } 2012 #else 2013 ext_index = 15 - index; 2014 for (i = es - 1; i >= 0; i--, ext_index--) { 2015 xt.u8[8 + i] = xb.u8[ext_index % 16]; 2016 } 2017 #endif 2018 2019 putVSR(xtn, &xt, env); 2020 } 2021 2022 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 2023 target_ulong xbn, uint32_t index) 2024 { 2025 ppc_vsr_t xt, xb; 2026 size_t es = sizeof(uint32_t); 2027 int ins_index, i = 0; 2028 2029 getVSR(xbn, &xb, env); 2030 getVSR(xtn, &xt, env); 2031 2032 #if defined(HOST_WORDS_BIGENDIAN) 2033 ins_index = index; 2034 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 2035 xt.u8[ins_index] = xb.u8[8 - es + i]; 2036 } 2037 #else 2038 ins_index = 15 - index; 2039 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) { 2040 xt.u8[ins_index] = xb.u8[8 + i]; 2041 } 2042 #endif 2043 2044 putVSR(xtn, &xt, env); 2045 } 2046 2047 #define VEXT_SIGNED(name, element, mask, cast, recast) \ 2048 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2049 { \ 2050 int i; \ 2051 VECTOR_FOR_INORDER_I(i, element) { \ 2052 r->element[i] = (recast)((cast)(b->element[i] & mask)); \ 2053 } \ 2054 } 2055 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) 2056 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) 2057 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) 2058 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) 2059 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) 2060 #undef VEXT_SIGNED 2061 2062 #define VNEG(name, element) \ 2063 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2064 { \ 2065 int i; \ 2066 VECTOR_FOR_INORDER_I(i, element) { \ 2067 r->element[i] = -b->element[i]; \ 2068 } \ 2069 } 2070 VNEG(vnegw, s32) 2071 VNEG(vnegd, s64) 2072 #undef VNEG 2073 2074 #define VSPLTI(suffix, element, splat_type) \ 2075 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2076 { \ 2077 splat_type x = (int8_t)(splat << 3) >> 3; \ 2078 int i; \ 2079 \ 2080 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2081 r->element[i] = x; \ 2082 } \ 2083 } 2084 VSPLTI(b, s8, int8_t) 2085 VSPLTI(h, s16, int16_t) 2086 VSPLTI(w, s32, int32_t) 2087 #undef VSPLTI 2088 2089 #define VSR(suffix, element, mask) \ 2090 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2091 { \ 2092 int i; \ 2093 \ 2094 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2095 unsigned int shift = b->element[i] & mask; \ 2096 r->element[i] = a->element[i] >> shift; \ 2097 } \ 2098 } 2099 VSR(ab, s8, 0x7) 2100 VSR(ah, s16, 0xF) 2101 VSR(aw, s32, 0x1F) 2102 VSR(ad, s64, 0x3F) 2103 VSR(b, u8, 0x7) 2104 VSR(h, u16, 0xF) 2105 VSR(w, u32, 0x1F) 2106 VSR(d, u64, 0x3F) 2107 #undef VSR 2108 2109 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2110 { 2111 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf; 2112 2113 #if defined(HOST_WORDS_BIGENDIAN) 2114 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2115 memset(&r->u8[0], 0, sh); 2116 #else 2117 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2118 memset(&r->u8[16 - sh], 0, sh); 2119 #endif 2120 } 2121 2122 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2123 { 2124 int i; 2125 2126 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2127 r->u32[i] = a->u32[i] >= b->u32[i]; 2128 } 2129 } 2130 2131 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2132 { 2133 int64_t t; 2134 int i, upper; 2135 ppc_avr_t result; 2136 int sat = 0; 2137 2138 #if defined(HOST_WORDS_BIGENDIAN) 2139 upper = ARRAY_SIZE(r->s32)-1; 2140 #else 2141 upper = 0; 2142 #endif 2143 t = (int64_t)b->s32[upper]; 2144 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2145 t += a->s32[i]; 2146 result.s32[i] = 0; 2147 } 2148 result.s32[upper] = cvtsdsw(t, &sat); 2149 *r = result; 2150 2151 if (sat) { 2152 env->vscr |= (1 << VSCR_SAT); 2153 } 2154 } 2155 2156 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2157 { 2158 int i, j, upper; 2159 ppc_avr_t result; 2160 int sat = 0; 2161 2162 #if defined(HOST_WORDS_BIGENDIAN) 2163 upper = 1; 2164 #else 2165 upper = 0; 2166 #endif 2167 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2168 int64_t t = (int64_t)b->s32[upper + i * 2]; 2169 2170 result.u64[i] = 0; 2171 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2172 t += a->s32[2 * i + j]; 2173 } 2174 result.s32[upper + i * 2] = cvtsdsw(t, &sat); 2175 } 2176 2177 *r = result; 2178 if (sat) { 2179 env->vscr |= (1 << VSCR_SAT); 2180 } 2181 } 2182 2183 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2184 { 2185 int i, j; 2186 int sat = 0; 2187 2188 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2189 int64_t t = (int64_t)b->s32[i]; 2190 2191 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2192 t += a->s8[4 * i + j]; 2193 } 2194 r->s32[i] = cvtsdsw(t, &sat); 2195 } 2196 2197 if (sat) { 2198 env->vscr |= (1 << VSCR_SAT); 2199 } 2200 } 2201 2202 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2203 { 2204 int sat = 0; 2205 int i; 2206 2207 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2208 int64_t t = (int64_t)b->s32[i]; 2209 2210 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2211 r->s32[i] = cvtsdsw(t, &sat); 2212 } 2213 2214 if (sat) { 2215 env->vscr |= (1 << VSCR_SAT); 2216 } 2217 } 2218 2219 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2220 { 2221 int i, j; 2222 int sat = 0; 2223 2224 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2225 uint64_t t = (uint64_t)b->u32[i]; 2226 2227 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2228 t += a->u8[4 * i + j]; 2229 } 2230 r->u32[i] = cvtuduw(t, &sat); 2231 } 2232 2233 if (sat) { 2234 env->vscr |= (1 << VSCR_SAT); 2235 } 2236 } 2237 2238 #if defined(HOST_WORDS_BIGENDIAN) 2239 #define UPKHI 1 2240 #define UPKLO 0 2241 #else 2242 #define UPKHI 0 2243 #define UPKLO 1 2244 #endif 2245 #define VUPKPX(suffix, hi) \ 2246 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2247 { \ 2248 int i; \ 2249 ppc_avr_t result; \ 2250 \ 2251 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2252 uint16_t e = b->u16[hi ? i : i+4]; \ 2253 uint8_t a = (e >> 15) ? 0xff : 0; \ 2254 uint8_t r = (e >> 10) & 0x1f; \ 2255 uint8_t g = (e >> 5) & 0x1f; \ 2256 uint8_t b = e & 0x1f; \ 2257 \ 2258 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2259 } \ 2260 *r = result; \ 2261 } 2262 VUPKPX(lpx, UPKLO) 2263 VUPKPX(hpx, UPKHI) 2264 #undef VUPKPX 2265 2266 #define VUPK(suffix, unpacked, packee, hi) \ 2267 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2268 { \ 2269 int i; \ 2270 ppc_avr_t result; \ 2271 \ 2272 if (hi) { \ 2273 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2274 result.unpacked[i] = b->packee[i]; \ 2275 } \ 2276 } else { \ 2277 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2278 i++) { \ 2279 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2280 } \ 2281 } \ 2282 *r = result; \ 2283 } 2284 VUPK(hsb, s16, s8, UPKHI) 2285 VUPK(hsh, s32, s16, UPKHI) 2286 VUPK(hsw, s64, s32, UPKHI) 2287 VUPK(lsb, s16, s8, UPKLO) 2288 VUPK(lsh, s32, s16, UPKLO) 2289 VUPK(lsw, s64, s32, UPKLO) 2290 #undef VUPK 2291 #undef UPKHI 2292 #undef UPKLO 2293 2294 #define VGENERIC_DO(name, element) \ 2295 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2296 { \ 2297 int i; \ 2298 \ 2299 VECTOR_FOR_INORDER_I(i, element) { \ 2300 r->element[i] = name(b->element[i]); \ 2301 } \ 2302 } 2303 2304 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2305 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2306 #define clzw(v) clz32((v)) 2307 #define clzd(v) clz64((v)) 2308 2309 VGENERIC_DO(clzb, u8) 2310 VGENERIC_DO(clzh, u16) 2311 VGENERIC_DO(clzw, u32) 2312 VGENERIC_DO(clzd, u64) 2313 2314 #undef clzb 2315 #undef clzh 2316 #undef clzw 2317 #undef clzd 2318 2319 #define ctzb(v) ((v) ? ctz32(v) : 8) 2320 #define ctzh(v) ((v) ? ctz32(v) : 16) 2321 #define ctzw(v) ctz32((v)) 2322 #define ctzd(v) ctz64((v)) 2323 2324 VGENERIC_DO(ctzb, u8) 2325 VGENERIC_DO(ctzh, u16) 2326 VGENERIC_DO(ctzw, u32) 2327 VGENERIC_DO(ctzd, u64) 2328 2329 #undef ctzb 2330 #undef ctzh 2331 #undef ctzw 2332 #undef ctzd 2333 2334 #define popcntb(v) ctpop8(v) 2335 #define popcnth(v) ctpop16(v) 2336 #define popcntw(v) ctpop32(v) 2337 #define popcntd(v) ctpop64(v) 2338 2339 VGENERIC_DO(popcntb, u8) 2340 VGENERIC_DO(popcnth, u16) 2341 VGENERIC_DO(popcntw, u32) 2342 VGENERIC_DO(popcntd, u64) 2343 2344 #undef popcntb 2345 #undef popcnth 2346 #undef popcntw 2347 #undef popcntd 2348 2349 #undef VGENERIC_DO 2350 2351 #if defined(HOST_WORDS_BIGENDIAN) 2352 #define QW_ONE { .u64 = { 0, 1 } } 2353 #else 2354 #define QW_ONE { .u64 = { 1, 0 } } 2355 #endif 2356 2357 #ifndef CONFIG_INT128 2358 2359 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2360 { 2361 t->u64[0] = ~a.u64[0]; 2362 t->u64[1] = ~a.u64[1]; 2363 } 2364 2365 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2366 { 2367 if (a.u64[HI_IDX] < b.u64[HI_IDX]) { 2368 return -1; 2369 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { 2370 return 1; 2371 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { 2372 return -1; 2373 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { 2374 return 1; 2375 } else { 2376 return 0; 2377 } 2378 } 2379 2380 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2381 { 2382 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2383 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2384 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2385 } 2386 2387 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2388 { 2389 ppc_avr_t not_a; 2390 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2391 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2392 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2393 avr_qw_not(¬_a, a); 2394 return avr_qw_cmpu(not_a, b) < 0; 2395 } 2396 2397 #endif 2398 2399 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2400 { 2401 #ifdef CONFIG_INT128 2402 r->u128 = a->u128 + b->u128; 2403 #else 2404 avr_qw_add(r, *a, *b); 2405 #endif 2406 } 2407 2408 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2409 { 2410 #ifdef CONFIG_INT128 2411 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2412 #else 2413 2414 if (c->u64[LO_IDX] & 1) { 2415 ppc_avr_t tmp; 2416 2417 tmp.u64[HI_IDX] = 0; 2418 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2419 avr_qw_add(&tmp, *a, tmp); 2420 avr_qw_add(r, tmp, *b); 2421 } else { 2422 avr_qw_add(r, *a, *b); 2423 } 2424 #endif 2425 } 2426 2427 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2428 { 2429 #ifdef CONFIG_INT128 2430 r->u128 = (~a->u128 < b->u128); 2431 #else 2432 ppc_avr_t not_a; 2433 2434 avr_qw_not(¬_a, *a); 2435 2436 r->u64[HI_IDX] = 0; 2437 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); 2438 #endif 2439 } 2440 2441 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2442 { 2443 #ifdef CONFIG_INT128 2444 int carry_out = (~a->u128 < b->u128); 2445 if (!carry_out && (c->u128 & 1)) { 2446 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2447 ((a->u128 != 0) || (b->u128 != 0)); 2448 } 2449 r->u128 = carry_out; 2450 #else 2451 2452 int carry_in = c->u64[LO_IDX] & 1; 2453 int carry_out = 0; 2454 ppc_avr_t tmp; 2455 2456 carry_out = avr_qw_addc(&tmp, *a, *b); 2457 2458 if (!carry_out && carry_in) { 2459 ppc_avr_t one = QW_ONE; 2460 carry_out = avr_qw_addc(&tmp, tmp, one); 2461 } 2462 r->u64[HI_IDX] = 0; 2463 r->u64[LO_IDX] = carry_out; 2464 #endif 2465 } 2466 2467 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2468 { 2469 #ifdef CONFIG_INT128 2470 r->u128 = a->u128 - b->u128; 2471 #else 2472 ppc_avr_t tmp; 2473 ppc_avr_t one = QW_ONE; 2474 2475 avr_qw_not(&tmp, *b); 2476 avr_qw_add(&tmp, *a, tmp); 2477 avr_qw_add(r, tmp, one); 2478 #endif 2479 } 2480 2481 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2482 { 2483 #ifdef CONFIG_INT128 2484 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2485 #else 2486 ppc_avr_t tmp, sum; 2487 2488 avr_qw_not(&tmp, *b); 2489 avr_qw_add(&sum, *a, tmp); 2490 2491 tmp.u64[HI_IDX] = 0; 2492 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2493 avr_qw_add(r, sum, tmp); 2494 #endif 2495 } 2496 2497 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2498 { 2499 #ifdef CONFIG_INT128 2500 r->u128 = (~a->u128 < ~b->u128) || 2501 (a->u128 + ~b->u128 == (__uint128_t)-1); 2502 #else 2503 int carry = (avr_qw_cmpu(*a, *b) > 0); 2504 if (!carry) { 2505 ppc_avr_t tmp; 2506 avr_qw_not(&tmp, *b); 2507 avr_qw_add(&tmp, *a, tmp); 2508 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); 2509 } 2510 r->u64[HI_IDX] = 0; 2511 r->u64[LO_IDX] = carry; 2512 #endif 2513 } 2514 2515 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2516 { 2517 #ifdef CONFIG_INT128 2518 r->u128 = 2519 (~a->u128 < ~b->u128) || 2520 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2521 #else 2522 int carry_in = c->u64[LO_IDX] & 1; 2523 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2524 if (!carry_out && carry_in) { 2525 ppc_avr_t tmp; 2526 avr_qw_not(&tmp, *b); 2527 avr_qw_add(&tmp, *a, tmp); 2528 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); 2529 } 2530 2531 r->u64[HI_IDX] = 0; 2532 r->u64[LO_IDX] = carry_out; 2533 #endif 2534 } 2535 2536 #define BCD_PLUS_PREF_1 0xC 2537 #define BCD_PLUS_PREF_2 0xF 2538 #define BCD_PLUS_ALT_1 0xA 2539 #define BCD_NEG_PREF 0xD 2540 #define BCD_NEG_ALT 0xB 2541 #define BCD_PLUS_ALT_2 0xE 2542 #define NATIONAL_PLUS 0x2B 2543 #define NATIONAL_NEG 0x2D 2544 2545 #if defined(HOST_WORDS_BIGENDIAN) 2546 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2547 #else 2548 #define BCD_DIG_BYTE(n) ((n) / 2) 2549 #endif 2550 2551 static int bcd_get_sgn(ppc_avr_t *bcd) 2552 { 2553 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2554 case BCD_PLUS_PREF_1: 2555 case BCD_PLUS_PREF_2: 2556 case BCD_PLUS_ALT_1: 2557 case BCD_PLUS_ALT_2: 2558 { 2559 return 1; 2560 } 2561 2562 case BCD_NEG_PREF: 2563 case BCD_NEG_ALT: 2564 { 2565 return -1; 2566 } 2567 2568 default: 2569 { 2570 return 0; 2571 } 2572 } 2573 } 2574 2575 static int bcd_preferred_sgn(int sgn, int ps) 2576 { 2577 if (sgn >= 0) { 2578 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2579 } else { 2580 return BCD_NEG_PREF; 2581 } 2582 } 2583 2584 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2585 { 2586 uint8_t result; 2587 if (n & 1) { 2588 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2589 } else { 2590 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2591 } 2592 2593 if (unlikely(result > 9)) { 2594 *invalid = true; 2595 } 2596 return result; 2597 } 2598 2599 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2600 { 2601 if (n & 1) { 2602 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2603 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2604 } else { 2605 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2606 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2607 } 2608 } 2609 2610 static bool bcd_is_valid(ppc_avr_t *bcd) 2611 { 2612 int i; 2613 int invalid = 0; 2614 2615 if (bcd_get_sgn(bcd) == 0) { 2616 return false; 2617 } 2618 2619 for (i = 1; i < 32; i++) { 2620 bcd_get_digit(bcd, i, &invalid); 2621 if (unlikely(invalid)) { 2622 return false; 2623 } 2624 } 2625 return true; 2626 } 2627 2628 static int bcd_cmp_zero(ppc_avr_t *bcd) 2629 { 2630 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) { 2631 return CRF_EQ; 2632 } else { 2633 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2634 } 2635 } 2636 2637 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2638 { 2639 #if defined(HOST_WORDS_BIGENDIAN) 2640 return reg->u16[7 - n]; 2641 #else 2642 return reg->u16[n]; 2643 #endif 2644 } 2645 2646 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2647 { 2648 #if defined(HOST_WORDS_BIGENDIAN) 2649 reg->u16[7 - n] = val; 2650 #else 2651 reg->u16[n] = val; 2652 #endif 2653 } 2654 2655 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2656 { 2657 int i; 2658 int invalid = 0; 2659 for (i = 31; i > 0; i--) { 2660 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2661 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2662 if (unlikely(invalid)) { 2663 return 0; /* doesn't matter */ 2664 } else if (dig_a > dig_b) { 2665 return 1; 2666 } else if (dig_a < dig_b) { 2667 return -1; 2668 } 2669 } 2670 2671 return 0; 2672 } 2673 2674 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2675 int *overflow) 2676 { 2677 int carry = 0; 2678 int i; 2679 for (i = 1; i <= 31; i++) { 2680 uint8_t digit = bcd_get_digit(a, i, invalid) + 2681 bcd_get_digit(b, i, invalid) + carry; 2682 if (digit > 9) { 2683 carry = 1; 2684 digit -= 10; 2685 } else { 2686 carry = 0; 2687 } 2688 2689 bcd_put_digit(t, digit, i); 2690 } 2691 2692 *overflow = carry; 2693 } 2694 2695 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2696 int *overflow) 2697 { 2698 int carry = 0; 2699 int i; 2700 2701 for (i = 1; i <= 31; i++) { 2702 uint8_t digit = bcd_get_digit(a, i, invalid) - 2703 bcd_get_digit(b, i, invalid) + carry; 2704 if (digit & 0x80) { 2705 carry = -1; 2706 digit += 10; 2707 } else { 2708 carry = 0; 2709 } 2710 2711 bcd_put_digit(t, digit, i); 2712 } 2713 2714 *overflow = carry; 2715 } 2716 2717 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2718 { 2719 2720 int sgna = bcd_get_sgn(a); 2721 int sgnb = bcd_get_sgn(b); 2722 int invalid = (sgna == 0) || (sgnb == 0); 2723 int overflow = 0; 2724 uint32_t cr = 0; 2725 ppc_avr_t result = { .u64 = { 0, 0 } }; 2726 2727 if (!invalid) { 2728 if (sgna == sgnb) { 2729 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2730 bcd_add_mag(&result, a, b, &invalid, &overflow); 2731 cr = bcd_cmp_zero(&result); 2732 } else { 2733 int magnitude = bcd_cmp_mag(a, b); 2734 if (magnitude > 0) { 2735 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2736 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2737 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2738 } else if (magnitude < 0) { 2739 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2740 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2741 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2742 } else { 2743 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2744 cr = CRF_EQ; 2745 } 2746 } 2747 } 2748 2749 if (unlikely(invalid)) { 2750 result.u64[HI_IDX] = result.u64[LO_IDX] = -1; 2751 cr = CRF_SO; 2752 } else if (overflow) { 2753 cr |= CRF_SO; 2754 } 2755 2756 *r = result; 2757 2758 return cr; 2759 } 2760 2761 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2762 { 2763 ppc_avr_t bcopy = *b; 2764 int sgnb = bcd_get_sgn(b); 2765 if (sgnb < 0) { 2766 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2767 } else if (sgnb > 0) { 2768 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2769 } 2770 /* else invalid ... defer to bcdadd code for proper handling */ 2771 2772 return helper_bcdadd(r, a, &bcopy, ps); 2773 } 2774 2775 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2776 { 2777 int i; 2778 int cr = 0; 2779 uint16_t national = 0; 2780 uint16_t sgnb = get_national_digit(b, 0); 2781 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2782 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2783 2784 for (i = 1; i < 8; i++) { 2785 national = get_national_digit(b, i); 2786 if (unlikely(national < 0x30 || national > 0x39)) { 2787 invalid = 1; 2788 break; 2789 } 2790 2791 bcd_put_digit(&ret, national & 0xf, i); 2792 } 2793 2794 if (sgnb == NATIONAL_PLUS) { 2795 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2796 } else { 2797 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2798 } 2799 2800 cr = bcd_cmp_zero(&ret); 2801 2802 if (unlikely(invalid)) { 2803 cr = CRF_SO; 2804 } 2805 2806 *r = ret; 2807 2808 return cr; 2809 } 2810 2811 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2812 { 2813 int i; 2814 int cr = 0; 2815 int sgnb = bcd_get_sgn(b); 2816 int invalid = (sgnb == 0); 2817 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2818 2819 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0); 2820 2821 for (i = 1; i < 8; i++) { 2822 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2823 2824 if (unlikely(invalid)) { 2825 break; 2826 } 2827 } 2828 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2829 2830 cr = bcd_cmp_zero(b); 2831 2832 if (ox_flag) { 2833 cr |= CRF_SO; 2834 } 2835 2836 if (unlikely(invalid)) { 2837 cr = CRF_SO; 2838 } 2839 2840 *r = ret; 2841 2842 return cr; 2843 } 2844 2845 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2846 { 2847 int i; 2848 int cr = 0; 2849 int invalid = 0; 2850 int zone_digit = 0; 2851 int zone_lead = ps ? 0xF : 0x3; 2852 int digit = 0; 2853 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2854 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2855 2856 if (unlikely((sgnb < 0xA) && ps)) { 2857 invalid = 1; 2858 } 2859 2860 for (i = 0; i < 16; i++) { 2861 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2862 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2863 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2864 invalid = 1; 2865 break; 2866 } 2867 2868 bcd_put_digit(&ret, digit, i + 1); 2869 } 2870 2871 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2872 (!ps && (sgnb & 0x4))) { 2873 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2874 } else { 2875 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2876 } 2877 2878 cr = bcd_cmp_zero(&ret); 2879 2880 if (unlikely(invalid)) { 2881 cr = CRF_SO; 2882 } 2883 2884 *r = ret; 2885 2886 return cr; 2887 } 2888 2889 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2890 { 2891 int i; 2892 int cr = 0; 2893 uint8_t digit = 0; 2894 int sgnb = bcd_get_sgn(b); 2895 int zone_lead = (ps) ? 0xF0 : 0x30; 2896 int invalid = (sgnb == 0); 2897 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2898 2899 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0); 2900 2901 for (i = 0; i < 16; i++) { 2902 digit = bcd_get_digit(b, i + 1, &invalid); 2903 2904 if (unlikely(invalid)) { 2905 break; 2906 } 2907 2908 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2909 } 2910 2911 if (ps) { 2912 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2913 } else { 2914 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2915 } 2916 2917 cr = bcd_cmp_zero(b); 2918 2919 if (ox_flag) { 2920 cr |= CRF_SO; 2921 } 2922 2923 if (unlikely(invalid)) { 2924 cr = CRF_SO; 2925 } 2926 2927 *r = ret; 2928 2929 return cr; 2930 } 2931 2932 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2933 { 2934 int i; 2935 int cr = 0; 2936 uint64_t lo_value; 2937 uint64_t hi_value; 2938 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2939 2940 if (b->s64[HI_IDX] < 0) { 2941 lo_value = -b->s64[LO_IDX]; 2942 hi_value = ~b->u64[HI_IDX] + !lo_value; 2943 bcd_put_digit(&ret, 0xD, 0); 2944 } else { 2945 lo_value = b->u64[LO_IDX]; 2946 hi_value = b->u64[HI_IDX]; 2947 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2948 } 2949 2950 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2951 lo_value > 9999999999999999ULL) { 2952 cr = CRF_SO; 2953 } 2954 2955 for (i = 1; i < 16; hi_value /= 10, i++) { 2956 bcd_put_digit(&ret, hi_value % 10, i); 2957 } 2958 2959 for (; i < 32; lo_value /= 10, i++) { 2960 bcd_put_digit(&ret, lo_value % 10, i); 2961 } 2962 2963 cr |= bcd_cmp_zero(&ret); 2964 2965 *r = ret; 2966 2967 return cr; 2968 } 2969 2970 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2971 { 2972 uint8_t i; 2973 int cr; 2974 uint64_t carry; 2975 uint64_t unused; 2976 uint64_t lo_value; 2977 uint64_t hi_value = 0; 2978 int sgnb = bcd_get_sgn(b); 2979 int invalid = (sgnb == 0); 2980 2981 lo_value = bcd_get_digit(b, 31, &invalid); 2982 for (i = 30; i > 0; i--) { 2983 mulu64(&lo_value, &carry, lo_value, 10ULL); 2984 mulu64(&hi_value, &unused, hi_value, 10ULL); 2985 lo_value += bcd_get_digit(b, i, &invalid); 2986 hi_value += carry; 2987 2988 if (unlikely(invalid)) { 2989 break; 2990 } 2991 } 2992 2993 if (sgnb == -1) { 2994 r->s64[LO_IDX] = -lo_value; 2995 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX]; 2996 } else { 2997 r->s64[LO_IDX] = lo_value; 2998 r->s64[HI_IDX] = hi_value; 2999 } 3000 3001 cr = bcd_cmp_zero(b); 3002 3003 if (unlikely(invalid)) { 3004 cr = CRF_SO; 3005 } 3006 3007 return cr; 3008 } 3009 3010 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3011 { 3012 int i; 3013 int invalid = 0; 3014 3015 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 3016 return CRF_SO; 3017 } 3018 3019 *r = *a; 3020 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 3021 3022 for (i = 1; i < 32; i++) { 3023 bcd_get_digit(a, i, &invalid); 3024 bcd_get_digit(b, i, &invalid); 3025 if (unlikely(invalid)) { 3026 return CRF_SO; 3027 } 3028 } 3029 3030 return bcd_cmp_zero(r); 3031 } 3032 3033 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 3034 { 3035 int sgnb = bcd_get_sgn(b); 3036 3037 *r = *b; 3038 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 3039 3040 if (bcd_is_valid(b) == false) { 3041 return CRF_SO; 3042 } 3043 3044 return bcd_cmp_zero(r); 3045 } 3046 3047 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3048 { 3049 int cr; 3050 #if defined(HOST_WORDS_BIGENDIAN) 3051 int i = a->s8[7]; 3052 #else 3053 int i = a->s8[8]; 3054 #endif 3055 bool ox_flag = false; 3056 int sgnb = bcd_get_sgn(b); 3057 ppc_avr_t ret = *b; 3058 ret.u64[LO_IDX] &= ~0xf; 3059 3060 if (bcd_is_valid(b) == false) { 3061 return CRF_SO; 3062 } 3063 3064 if (unlikely(i > 31)) { 3065 i = 31; 3066 } else if (unlikely(i < -31)) { 3067 i = -31; 3068 } 3069 3070 if (i > 0) { 3071 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3072 } else { 3073 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3074 } 3075 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3076 3077 *r = ret; 3078 3079 cr = bcd_cmp_zero(r); 3080 if (ox_flag) { 3081 cr |= CRF_SO; 3082 } 3083 3084 return cr; 3085 } 3086 3087 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3088 { 3089 int cr; 3090 int i; 3091 int invalid = 0; 3092 bool ox_flag = false; 3093 ppc_avr_t ret = *b; 3094 3095 for (i = 0; i < 32; i++) { 3096 bcd_get_digit(b, i, &invalid); 3097 3098 if (unlikely(invalid)) { 3099 return CRF_SO; 3100 } 3101 } 3102 3103 #if defined(HOST_WORDS_BIGENDIAN) 3104 i = a->s8[7]; 3105 #else 3106 i = a->s8[8]; 3107 #endif 3108 if (i >= 32) { 3109 ox_flag = true; 3110 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3111 } else if (i <= -32) { 3112 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3113 } else if (i > 0) { 3114 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3115 } else { 3116 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3117 } 3118 *r = ret; 3119 3120 cr = bcd_cmp_zero(r); 3121 if (ox_flag) { 3122 cr |= CRF_SO; 3123 } 3124 3125 return cr; 3126 } 3127 3128 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3129 { 3130 int cr; 3131 int unused = 0; 3132 int invalid = 0; 3133 bool ox_flag = false; 3134 int sgnb = bcd_get_sgn(b); 3135 ppc_avr_t ret = *b; 3136 ret.u64[LO_IDX] &= ~0xf; 3137 3138 #if defined(HOST_WORDS_BIGENDIAN) 3139 int i = a->s8[7]; 3140 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3141 #else 3142 int i = a->s8[8]; 3143 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3144 #endif 3145 3146 if (bcd_is_valid(b) == false) { 3147 return CRF_SO; 3148 } 3149 3150 if (unlikely(i > 31)) { 3151 i = 31; 3152 } else if (unlikely(i < -31)) { 3153 i = -31; 3154 } 3155 3156 if (i > 0) { 3157 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3158 } else { 3159 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3160 3161 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3162 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3163 } 3164 } 3165 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3166 3167 cr = bcd_cmp_zero(&ret); 3168 if (ox_flag) { 3169 cr |= CRF_SO; 3170 } 3171 *r = ret; 3172 3173 return cr; 3174 } 3175 3176 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3177 { 3178 uint64_t mask; 3179 uint32_t ox_flag = 0; 3180 #if defined(HOST_WORDS_BIGENDIAN) 3181 int i = a->s16[3] + 1; 3182 #else 3183 int i = a->s16[4] + 1; 3184 #endif 3185 ppc_avr_t ret = *b; 3186 3187 if (bcd_is_valid(b) == false) { 3188 return CRF_SO; 3189 } 3190 3191 if (i > 16 && i < 32) { 3192 mask = (uint64_t)-1 >> (128 - i * 4); 3193 if (ret.u64[HI_IDX] & ~mask) { 3194 ox_flag = CRF_SO; 3195 } 3196 3197 ret.u64[HI_IDX] &= mask; 3198 } else if (i >= 0 && i <= 16) { 3199 mask = (uint64_t)-1 >> (64 - i * 4); 3200 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3201 ox_flag = CRF_SO; 3202 } 3203 3204 ret.u64[LO_IDX] &= mask; 3205 ret.u64[HI_IDX] = 0; 3206 } 3207 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3208 *r = ret; 3209 3210 return bcd_cmp_zero(&ret) | ox_flag; 3211 } 3212 3213 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3214 { 3215 int i; 3216 uint64_t mask; 3217 uint32_t ox_flag = 0; 3218 int invalid = 0; 3219 ppc_avr_t ret = *b; 3220 3221 for (i = 0; i < 32; i++) { 3222 bcd_get_digit(b, i, &invalid); 3223 3224 if (unlikely(invalid)) { 3225 return CRF_SO; 3226 } 3227 } 3228 3229 #if defined(HOST_WORDS_BIGENDIAN) 3230 i = a->s16[3]; 3231 #else 3232 i = a->s16[4]; 3233 #endif 3234 if (i > 16 && i < 33) { 3235 mask = (uint64_t)-1 >> (128 - i * 4); 3236 if (ret.u64[HI_IDX] & ~mask) { 3237 ox_flag = CRF_SO; 3238 } 3239 3240 ret.u64[HI_IDX] &= mask; 3241 } else if (i > 0 && i <= 16) { 3242 mask = (uint64_t)-1 >> (64 - i * 4); 3243 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3244 ox_flag = CRF_SO; 3245 } 3246 3247 ret.u64[LO_IDX] &= mask; 3248 ret.u64[HI_IDX] = 0; 3249 } else if (i == 0) { 3250 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) { 3251 ox_flag = CRF_SO; 3252 } 3253 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0; 3254 } 3255 3256 *r = ret; 3257 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) { 3258 return ox_flag | CRF_EQ; 3259 } 3260 3261 return ox_flag | CRF_GT; 3262 } 3263 3264 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3265 { 3266 int i; 3267 VECTOR_FOR_INORDER_I(i, u8) { 3268 r->u8[i] = AES_sbox[a->u8[i]]; 3269 } 3270 } 3271 3272 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3273 { 3274 ppc_avr_t result; 3275 int i; 3276 3277 VECTOR_FOR_INORDER_I(i, u32) { 3278 result.AVRW(i) = b->AVRW(i) ^ 3279 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^ 3280 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^ 3281 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^ 3282 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]); 3283 } 3284 *r = result; 3285 } 3286 3287 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3288 { 3289 ppc_avr_t result; 3290 int i; 3291 3292 VECTOR_FOR_INORDER_I(i, u8) { 3293 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]); 3294 } 3295 *r = result; 3296 } 3297 3298 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3299 { 3300 /* This differs from what is written in ISA V2.07. The RTL is */ 3301 /* incorrect and will be fixed in V2.07B. */ 3302 int i; 3303 ppc_avr_t tmp; 3304 3305 VECTOR_FOR_INORDER_I(i, u8) { 3306 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])]; 3307 } 3308 3309 VECTOR_FOR_INORDER_I(i, u32) { 3310 r->AVRW(i) = 3311 AES_imc[tmp.AVRB(4*i + 0)][0] ^ 3312 AES_imc[tmp.AVRB(4*i + 1)][1] ^ 3313 AES_imc[tmp.AVRB(4*i + 2)][2] ^ 3314 AES_imc[tmp.AVRB(4*i + 3)][3]; 3315 } 3316 } 3317 3318 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3319 { 3320 ppc_avr_t result; 3321 int i; 3322 3323 VECTOR_FOR_INORDER_I(i, u8) { 3324 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]); 3325 } 3326 *r = result; 3327 } 3328 3329 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) 3330 #if defined(HOST_WORDS_BIGENDIAN) 3331 #define EL_IDX(i) (i) 3332 #else 3333 #define EL_IDX(i) (3 - (i)) 3334 #endif 3335 3336 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3337 { 3338 int st = (st_six & 0x10) != 0; 3339 int six = st_six & 0xF; 3340 int i; 3341 3342 VECTOR_FOR_INORDER_I(i, u32) { 3343 if (st == 0) { 3344 if ((six & (0x8 >> i)) == 0) { 3345 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ 3346 ROTRu32(a->u32[EL_IDX(i)], 18) ^ 3347 (a->u32[EL_IDX(i)] >> 3); 3348 } else { /* six.bit[i] == 1 */ 3349 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ 3350 ROTRu32(a->u32[EL_IDX(i)], 19) ^ 3351 (a->u32[EL_IDX(i)] >> 10); 3352 } 3353 } else { /* st == 1 */ 3354 if ((six & (0x8 >> i)) == 0) { 3355 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ 3356 ROTRu32(a->u32[EL_IDX(i)], 13) ^ 3357 ROTRu32(a->u32[EL_IDX(i)], 22); 3358 } else { /* six.bit[i] == 1 */ 3359 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ 3360 ROTRu32(a->u32[EL_IDX(i)], 11) ^ 3361 ROTRu32(a->u32[EL_IDX(i)], 25); 3362 } 3363 } 3364 } 3365 } 3366 3367 #undef ROTRu32 3368 #undef EL_IDX 3369 3370 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) 3371 #if defined(HOST_WORDS_BIGENDIAN) 3372 #define EL_IDX(i) (i) 3373 #else 3374 #define EL_IDX(i) (1 - (i)) 3375 #endif 3376 3377 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3378 { 3379 int st = (st_six & 0x10) != 0; 3380 int six = st_six & 0xF; 3381 int i; 3382 3383 VECTOR_FOR_INORDER_I(i, u64) { 3384 if (st == 0) { 3385 if ((six & (0x8 >> (2*i))) == 0) { 3386 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ 3387 ROTRu64(a->u64[EL_IDX(i)], 8) ^ 3388 (a->u64[EL_IDX(i)] >> 7); 3389 } else { /* six.bit[2*i] == 1 */ 3390 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ 3391 ROTRu64(a->u64[EL_IDX(i)], 61) ^ 3392 (a->u64[EL_IDX(i)] >> 6); 3393 } 3394 } else { /* st == 1 */ 3395 if ((six & (0x8 >> (2*i))) == 0) { 3396 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ 3397 ROTRu64(a->u64[EL_IDX(i)], 34) ^ 3398 ROTRu64(a->u64[EL_IDX(i)], 39); 3399 } else { /* six.bit[2*i] == 1 */ 3400 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ 3401 ROTRu64(a->u64[EL_IDX(i)], 18) ^ 3402 ROTRu64(a->u64[EL_IDX(i)], 41); 3403 } 3404 } 3405 } 3406 } 3407 3408 #undef ROTRu64 3409 #undef EL_IDX 3410 3411 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3412 { 3413 ppc_avr_t result; 3414 int i; 3415 3416 VECTOR_FOR_INORDER_I(i, u8) { 3417 int indexA = c->u8[i] >> 4; 3418 int indexB = c->u8[i] & 0xF; 3419 #if defined(HOST_WORDS_BIGENDIAN) 3420 result.u8[i] = a->u8[indexA] ^ b->u8[indexB]; 3421 #else 3422 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; 3423 #endif 3424 } 3425 *r = result; 3426 } 3427 3428 #undef VECTOR_FOR_INORDER_I 3429 #undef HI_IDX 3430 #undef LO_IDX 3431 3432 /*****************************************************************************/ 3433 /* SPE extension helpers */ 3434 /* Use a table to make this quicker */ 3435 static const uint8_t hbrev[16] = { 3436 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3437 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3438 }; 3439 3440 static inline uint8_t byte_reverse(uint8_t val) 3441 { 3442 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3443 } 3444 3445 static inline uint32_t word_reverse(uint32_t val) 3446 { 3447 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3448 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3449 } 3450 3451 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3452 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3453 { 3454 uint32_t a, b, d, mask; 3455 3456 mask = UINT32_MAX >> (32 - MASKBITS); 3457 a = arg1 & mask; 3458 b = arg2 & mask; 3459 d = word_reverse(1 + word_reverse(a | ~b)); 3460 return (arg1 & ~mask) | (d & b); 3461 } 3462 3463 uint32_t helper_cntlsw32(uint32_t val) 3464 { 3465 if (val & 0x80000000) { 3466 return clz32(~val); 3467 } else { 3468 return clz32(val); 3469 } 3470 } 3471 3472 uint32_t helper_cntlzw32(uint32_t val) 3473 { 3474 return clz32(val); 3475 } 3476 3477 /* 440 specific */ 3478 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3479 target_ulong low, uint32_t update_Rc) 3480 { 3481 target_ulong mask; 3482 int i; 3483 3484 i = 1; 3485 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3486 if ((high & mask) == 0) { 3487 if (update_Rc) { 3488 env->crf[0] = 0x4; 3489 } 3490 goto done; 3491 } 3492 i++; 3493 } 3494 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3495 if ((low & mask) == 0) { 3496 if (update_Rc) { 3497 env->crf[0] = 0x8; 3498 } 3499 goto done; 3500 } 3501 i++; 3502 } 3503 i = 8; 3504 if (update_Rc) { 3505 env->crf[0] = 0x2; 3506 } 3507 done: 3508 env->xer = (env->xer & ~0x7F) | i; 3509 if (update_Rc) { 3510 env->crf[0] |= xer_so; 3511 } 3512 return i; 3513 } 3514