1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* 141 * subtract 1 from each byte, and with inverse, check if MSB is set at each 142 * byte. 143 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 144 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 145 */ 146 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 147 148 /* When you XOR the pattern and there is a match, that byte will be zero */ 149 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 150 151 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 152 { 153 return hasvalue(rb, ra) ? CRF_GT : 0; 154 } 155 156 #undef pattern 157 #undef haszero 158 #undef hasvalue 159 160 /* 161 * Return invalid random number. 162 * 163 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 164 * random number 165 */ 166 target_ulong helper_darn32(void) 167 { 168 return -1; 169 } 170 171 target_ulong helper_darn64(void) 172 { 173 return -1; 174 } 175 176 #endif 177 178 #if defined(TARGET_PPC64) 179 180 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 181 { 182 int i; 183 uint64_t ra = 0; 184 185 for (i = 0; i < 8; i++) { 186 int index = (rs >> (i * 8)) & 0xFF; 187 if (index < 64) { 188 if (rb & PPC_BIT(index)) { 189 ra |= 1 << i; 190 } 191 } 192 } 193 return ra; 194 } 195 196 #endif 197 198 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 199 { 200 target_ulong mask = 0xff; 201 target_ulong ra = 0; 202 int i; 203 204 for (i = 0; i < sizeof(target_ulong); i++) { 205 if ((rs & mask) == (rb & mask)) { 206 ra |= mask; 207 } 208 mask <<= 8; 209 } 210 return ra; 211 } 212 213 /* shift right arithmetic helper */ 214 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 215 target_ulong shift) 216 { 217 int32_t ret; 218 219 if (likely(!(shift & 0x20))) { 220 if (likely((uint32_t)shift != 0)) { 221 shift &= 0x1f; 222 ret = (int32_t)value >> shift; 223 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 224 env->ca32 = env->ca = 0; 225 } else { 226 env->ca32 = env->ca = 1; 227 } 228 } else { 229 ret = (int32_t)value; 230 env->ca32 = env->ca = 0; 231 } 232 } else { 233 ret = (int32_t)value >> 31; 234 env->ca32 = env->ca = (ret != 0); 235 } 236 return (target_long)ret; 237 } 238 239 #if defined(TARGET_PPC64) 240 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 241 target_ulong shift) 242 { 243 int64_t ret; 244 245 if (likely(!(shift & 0x40))) { 246 if (likely((uint64_t)shift != 0)) { 247 shift &= 0x3f; 248 ret = (int64_t)value >> shift; 249 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 250 env->ca32 = env->ca = 0; 251 } else { 252 env->ca32 = env->ca = 1; 253 } 254 } else { 255 ret = (int64_t)value; 256 env->ca32 = env->ca = 0; 257 } 258 } else { 259 ret = (int64_t)value >> 63; 260 env->ca32 = env->ca = (ret != 0); 261 } 262 return ret; 263 } 264 #endif 265 266 #if defined(TARGET_PPC64) 267 target_ulong helper_popcntb(target_ulong val) 268 { 269 /* Note that we don't fold past bytes */ 270 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 271 0x5555555555555555ULL); 272 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 273 0x3333333333333333ULL); 274 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 275 0x0f0f0f0f0f0f0f0fULL); 276 return val; 277 } 278 279 target_ulong helper_popcntw(target_ulong val) 280 { 281 /* Note that we don't fold past words. */ 282 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 283 0x5555555555555555ULL); 284 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 285 0x3333333333333333ULL); 286 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 287 0x0f0f0f0f0f0f0f0fULL); 288 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 289 0x00ff00ff00ff00ffULL); 290 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 291 0x0000ffff0000ffffULL); 292 return val; 293 } 294 #else 295 target_ulong helper_popcntb(target_ulong val) 296 { 297 /* Note that we don't fold past bytes */ 298 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 299 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 300 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 301 return val; 302 } 303 #endif 304 305 /*****************************************************************************/ 306 /* PowerPC 601 specific instructions (POWER bridge) */ 307 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 308 { 309 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 310 311 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 312 (int32_t)arg2 == 0) { 313 env->spr[SPR_MQ] = 0; 314 return INT32_MIN; 315 } else { 316 env->spr[SPR_MQ] = tmp % arg2; 317 return tmp / (int32_t)arg2; 318 } 319 } 320 321 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 322 target_ulong arg2) 323 { 324 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 325 326 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 327 (int32_t)arg2 == 0) { 328 env->so = env->ov = 1; 329 env->spr[SPR_MQ] = 0; 330 return INT32_MIN; 331 } else { 332 env->spr[SPR_MQ] = tmp % arg2; 333 tmp /= (int32_t)arg2; 334 if ((int32_t)tmp != tmp) { 335 env->so = env->ov = 1; 336 } else { 337 env->ov = 0; 338 } 339 return tmp; 340 } 341 } 342 343 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 344 target_ulong arg2) 345 { 346 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 347 (int32_t)arg2 == 0) { 348 env->spr[SPR_MQ] = 0; 349 return INT32_MIN; 350 } else { 351 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 352 return (int32_t)arg1 / (int32_t)arg2; 353 } 354 } 355 356 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 357 target_ulong arg2) 358 { 359 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 360 (int32_t)arg2 == 0) { 361 env->so = env->ov = 1; 362 env->spr[SPR_MQ] = 0; 363 return INT32_MIN; 364 } else { 365 env->ov = 0; 366 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 367 return (int32_t)arg1 / (int32_t)arg2; 368 } 369 } 370 371 /*****************************************************************************/ 372 /* 602 specific instructions */ 373 /* mfrom is the most crazy instruction ever seen, imho ! */ 374 /* Real implementation uses a ROM table. Do the same */ 375 /* 376 * Extremely decomposed: 377 * -arg / 256 378 * return 256 * log10(10 + 1.0) + 0.5 379 */ 380 #if !defined(CONFIG_USER_ONLY) 381 target_ulong helper_602_mfrom(target_ulong arg) 382 { 383 if (likely(arg < 602)) { 384 #include "mfrom_table.inc.c" 385 return mfrom_ROM_table[arg]; 386 } else { 387 return 0; 388 } 389 } 390 #endif 391 392 /*****************************************************************************/ 393 /* Altivec extension helpers */ 394 #if defined(HOST_WORDS_BIGENDIAN) 395 #define VECTOR_FOR_INORDER_I(index, element) \ 396 for (index = 0; index < ARRAY_SIZE(r->element); index++) 397 #else 398 #define VECTOR_FOR_INORDER_I(index, element) \ 399 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 400 #endif 401 402 /* Saturating arithmetic helpers. */ 403 #define SATCVT(from, to, from_type, to_type, min, max) \ 404 static inline to_type cvt##from##to(from_type x, int *sat) \ 405 { \ 406 to_type r; \ 407 \ 408 if (x < (from_type)min) { \ 409 r = min; \ 410 *sat = 1; \ 411 } else if (x > (from_type)max) { \ 412 r = max; \ 413 *sat = 1; \ 414 } else { \ 415 r = x; \ 416 } \ 417 return r; \ 418 } 419 #define SATCVTU(from, to, from_type, to_type, min, max) \ 420 static inline to_type cvt##from##to(from_type x, int *sat) \ 421 { \ 422 to_type r; \ 423 \ 424 if (x > (from_type)max) { \ 425 r = max; \ 426 *sat = 1; \ 427 } else { \ 428 r = x; \ 429 } \ 430 return r; \ 431 } 432 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 433 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 434 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 435 436 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 437 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 438 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 439 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 440 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 441 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 442 #undef SATCVT 443 #undef SATCVTU 444 445 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 446 { 447 int i, j = (sh & 0xf); 448 449 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 450 r->VsrB(i) = j++; 451 } 452 } 453 454 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 455 { 456 int i, j = 0x10 - (sh & 0xf); 457 458 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 459 r->VsrB(i) = j++; 460 } 461 } 462 463 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 464 { 465 env->vscr = vscr & ~(1u << VSCR_SAT); 466 /* Which bit we set is completely arbitrary, but clear the rest. */ 467 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 468 env->vscr_sat.u64[1] = 0; 469 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 470 } 471 472 uint32_t helper_mfvscr(CPUPPCState *env) 473 { 474 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 475 return env->vscr | (sat << VSCR_SAT); 476 } 477 478 static inline void set_vscr_sat(CPUPPCState *env) 479 { 480 /* The choice of non-zero value is arbitrary. */ 481 env->vscr_sat.u32[0] = 1; 482 } 483 484 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 485 { 486 int i; 487 488 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 489 r->u32[i] = ~a->u32[i] < b->u32[i]; 490 } 491 } 492 493 /* vprtybw */ 494 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 495 { 496 int i; 497 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 498 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 499 res ^= res >> 8; 500 r->u32[i] = res & 1; 501 } 502 } 503 504 /* vprtybd */ 505 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 509 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 510 res ^= res >> 16; 511 res ^= res >> 8; 512 r->u64[i] = res & 1; 513 } 514 } 515 516 /* vprtybq */ 517 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 518 { 519 uint64_t res = b->u64[0] ^ b->u64[1]; 520 res ^= res >> 32; 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->VsrD(1) = res & 1; 524 r->VsrD(0) = 0; 525 } 526 527 #define VARITH_DO(name, op, element) \ 528 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 529 { \ 530 int i; \ 531 \ 532 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 533 r->element[i] = a->element[i] op b->element[i]; \ 534 } \ 535 } 536 VARITH_DO(muluwm, *, u32) 537 #undef VARITH_DO 538 #undef VARITH 539 540 #define VARITHFP(suffix, func) \ 541 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 542 ppc_avr_t *b) \ 543 { \ 544 int i; \ 545 \ 546 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 547 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 548 } \ 549 } 550 VARITHFP(addfp, float32_add) 551 VARITHFP(subfp, float32_sub) 552 VARITHFP(minfp, float32_min) 553 VARITHFP(maxfp, float32_max) 554 #undef VARITHFP 555 556 #define VARITHFPFMA(suffix, type) \ 557 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 558 ppc_avr_t *b, ppc_avr_t *c) \ 559 { \ 560 int i; \ 561 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 562 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 563 type, &env->vec_status); \ 564 } \ 565 } 566 VARITHFPFMA(maddfp, 0); 567 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 568 #undef VARITHFPFMA 569 570 #define VARITHSAT_CASE(type, op, cvt, element) \ 571 { \ 572 type result = (type)a->element[i] op (type)b->element[i]; \ 573 r->element[i] = cvt(result, &sat); \ 574 } 575 576 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 577 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 578 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 579 { \ 580 int sat = 0; \ 581 int i; \ 582 \ 583 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 584 VARITHSAT_CASE(optype, op, cvt, element); \ 585 } \ 586 if (sat) { \ 587 vscr_sat->u32[0] = 1; \ 588 } \ 589 } 590 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 591 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 592 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 593 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 594 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 595 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 596 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 597 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 598 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 599 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 600 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 601 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 602 #undef VARITHSAT_CASE 603 #undef VARITHSAT_DO 604 #undef VARITHSAT_SIGNED 605 #undef VARITHSAT_UNSIGNED 606 607 #define VAVG_DO(name, element, etype) \ 608 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 609 { \ 610 int i; \ 611 \ 612 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 613 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 614 r->element[i] = x >> 1; \ 615 } \ 616 } 617 618 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 619 unsigned_type) \ 620 VAVG_DO(avgs##type, signed_element, signed_type) \ 621 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 622 VAVG(b, s8, int16_t, u8, uint16_t) 623 VAVG(h, s16, int32_t, u16, uint32_t) 624 VAVG(w, s32, int64_t, u32, uint64_t) 625 #undef VAVG_DO 626 #undef VAVG 627 628 #define VABSDU_DO(name, element) \ 629 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 630 { \ 631 int i; \ 632 \ 633 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 634 r->element[i] = (a->element[i] > b->element[i]) ? \ 635 (a->element[i] - b->element[i]) : \ 636 (b->element[i] - a->element[i]); \ 637 } \ 638 } 639 640 /* 641 * VABSDU - Vector absolute difference unsigned 642 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 643 * element - element type to access from vector 644 */ 645 #define VABSDU(type, element) \ 646 VABSDU_DO(absdu##type, element) 647 VABSDU(b, u8) 648 VABSDU(h, u16) 649 VABSDU(w, u32) 650 #undef VABSDU_DO 651 #undef VABSDU 652 653 #define VCF(suffix, cvt, element) \ 654 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 655 ppc_avr_t *b, uint32_t uim) \ 656 { \ 657 int i; \ 658 \ 659 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 660 float32 t = cvt(b->element[i], &env->vec_status); \ 661 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 662 } \ 663 } 664 VCF(ux, uint32_to_float32, u32) 665 VCF(sx, int32_to_float32, s32) 666 #undef VCF 667 668 #define VCMP_DO(suffix, compare, element, record) \ 669 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 670 ppc_avr_t *a, ppc_avr_t *b) \ 671 { \ 672 uint64_t ones = (uint64_t)-1; \ 673 uint64_t all = ones; \ 674 uint64_t none = 0; \ 675 int i; \ 676 \ 677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 678 uint64_t result = (a->element[i] compare b->element[i] ? \ 679 ones : 0x0); \ 680 switch (sizeof(a->element[0])) { \ 681 case 8: \ 682 r->u64[i] = result; \ 683 break; \ 684 case 4: \ 685 r->u32[i] = result; \ 686 break; \ 687 case 2: \ 688 r->u16[i] = result; \ 689 break; \ 690 case 1: \ 691 r->u8[i] = result; \ 692 break; \ 693 } \ 694 all &= result; \ 695 none |= result; \ 696 } \ 697 if (record) { \ 698 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 699 } \ 700 } 701 #define VCMP(suffix, compare, element) \ 702 VCMP_DO(suffix, compare, element, 0) \ 703 VCMP_DO(suffix##_dot, compare, element, 1) 704 VCMP(equb, ==, u8) 705 VCMP(equh, ==, u16) 706 VCMP(equw, ==, u32) 707 VCMP(equd, ==, u64) 708 VCMP(gtub, >, u8) 709 VCMP(gtuh, >, u16) 710 VCMP(gtuw, >, u32) 711 VCMP(gtud, >, u64) 712 VCMP(gtsb, >, s8) 713 VCMP(gtsh, >, s16) 714 VCMP(gtsw, >, s32) 715 VCMP(gtsd, >, s64) 716 #undef VCMP_DO 717 #undef VCMP 718 719 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 720 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 721 ppc_avr_t *a, ppc_avr_t *b) \ 722 { \ 723 etype ones = (etype)-1; \ 724 etype all = ones; \ 725 etype result, none = 0; \ 726 int i; \ 727 \ 728 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 729 if (cmpzero) { \ 730 result = ((a->element[i] == 0) \ 731 || (b->element[i] == 0) \ 732 || (a->element[i] != b->element[i]) ? \ 733 ones : 0x0); \ 734 } else { \ 735 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 736 } \ 737 r->element[i] = result; \ 738 all &= result; \ 739 none |= result; \ 740 } \ 741 if (record) { \ 742 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 743 } \ 744 } 745 746 /* 747 * VCMPNEZ - Vector compare not equal to zero 748 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 749 * element - element type to access from vector 750 */ 751 #define VCMPNE(suffix, element, etype, cmpzero) \ 752 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 753 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 754 VCMPNE(zb, u8, uint8_t, 1) 755 VCMPNE(zh, u16, uint16_t, 1) 756 VCMPNE(zw, u32, uint32_t, 1) 757 VCMPNE(b, u8, uint8_t, 0) 758 VCMPNE(h, u16, uint16_t, 0) 759 VCMPNE(w, u32, uint32_t, 0) 760 #undef VCMPNE_DO 761 #undef VCMPNE 762 763 #define VCMPFP_DO(suffix, compare, order, record) \ 764 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 765 ppc_avr_t *a, ppc_avr_t *b) \ 766 { \ 767 uint32_t ones = (uint32_t)-1; \ 768 uint32_t all = ones; \ 769 uint32_t none = 0; \ 770 int i; \ 771 \ 772 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 773 uint32_t result; \ 774 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 775 &env->vec_status); \ 776 if (rel == float_relation_unordered) { \ 777 result = 0; \ 778 } else if (rel compare order) { \ 779 result = ones; \ 780 } else { \ 781 result = 0; \ 782 } \ 783 r->u32[i] = result; \ 784 all &= result; \ 785 none |= result; \ 786 } \ 787 if (record) { \ 788 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 789 } \ 790 } 791 #define VCMPFP(suffix, compare, order) \ 792 VCMPFP_DO(suffix, compare, order, 0) \ 793 VCMPFP_DO(suffix##_dot, compare, order, 1) 794 VCMPFP(eqfp, ==, float_relation_equal) 795 VCMPFP(gefp, !=, float_relation_less) 796 VCMPFP(gtfp, ==, float_relation_greater) 797 #undef VCMPFP_DO 798 #undef VCMPFP 799 800 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 801 ppc_avr_t *a, ppc_avr_t *b, int record) 802 { 803 int i; 804 int all_in = 0; 805 806 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 807 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 808 &env->vec_status); 809 if (le_rel == float_relation_unordered) { 810 r->u32[i] = 0xc0000000; 811 all_in = 1; 812 } else { 813 float32 bneg = float32_chs(b->f32[i]); 814 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 815 &env->vec_status); 816 int le = le_rel != float_relation_greater; 817 int ge = ge_rel != float_relation_less; 818 819 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 820 all_in |= (!le | !ge); 821 } 822 } 823 if (record) { 824 env->crf[6] = (all_in == 0) << 1; 825 } 826 } 827 828 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 829 { 830 vcmpbfp_internal(env, r, a, b, 0); 831 } 832 833 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 834 ppc_avr_t *b) 835 { 836 vcmpbfp_internal(env, r, a, b, 1); 837 } 838 839 #define VCT(suffix, satcvt, element) \ 840 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 841 ppc_avr_t *b, uint32_t uim) \ 842 { \ 843 int i; \ 844 int sat = 0; \ 845 float_status s = env->vec_status; \ 846 \ 847 set_float_rounding_mode(float_round_to_zero, &s); \ 848 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 849 if (float32_is_any_nan(b->f32[i])) { \ 850 r->element[i] = 0; \ 851 } else { \ 852 float64 t = float32_to_float64(b->f32[i], &s); \ 853 int64_t j; \ 854 \ 855 t = float64_scalbn(t, uim, &s); \ 856 j = float64_to_int64(t, &s); \ 857 r->element[i] = satcvt(j, &sat); \ 858 } \ 859 } \ 860 if (sat) { \ 861 set_vscr_sat(env); \ 862 } \ 863 } 864 VCT(uxs, cvtsduw, u32) 865 VCT(sxs, cvtsdsw, s32) 866 #undef VCT 867 868 target_ulong helper_vclzlsbb(ppc_avr_t *r) 869 { 870 target_ulong count = 0; 871 int i; 872 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 873 if (r->VsrB(i) & 0x01) { 874 break; 875 } 876 count++; 877 } 878 return count; 879 } 880 881 target_ulong helper_vctzlsbb(ppc_avr_t *r) 882 { 883 target_ulong count = 0; 884 int i; 885 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 886 if (r->VsrB(i) & 0x01) { 887 break; 888 } 889 count++; 890 } 891 return count; 892 } 893 894 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 895 ppc_avr_t *b, ppc_avr_t *c) 896 { 897 int sat = 0; 898 int i; 899 900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 901 int32_t prod = a->s16[i] * b->s16[i]; 902 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 903 904 r->s16[i] = cvtswsh(t, &sat); 905 } 906 907 if (sat) { 908 set_vscr_sat(env); 909 } 910 } 911 912 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 913 ppc_avr_t *b, ppc_avr_t *c) 914 { 915 int sat = 0; 916 int i; 917 918 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 919 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 920 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 921 r->s16[i] = cvtswsh(t, &sat); 922 } 923 924 if (sat) { 925 set_vscr_sat(env); 926 } 927 } 928 929 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 930 { 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 934 int32_t prod = a->s16[i] * b->s16[i]; 935 r->s16[i] = (int16_t) (prod + c->s16[i]); 936 } 937 } 938 939 #define VMRG_DO(name, element, access, ofs) \ 940 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 941 { \ 942 ppc_avr_t result; \ 943 int i, half = ARRAY_SIZE(r->element) / 2; \ 944 \ 945 for (i = 0; i < half; i++) { \ 946 result.access(i * 2 + 0) = a->access(i + ofs); \ 947 result.access(i * 2 + 1) = b->access(i + ofs); \ 948 } \ 949 *r = result; \ 950 } 951 952 #define VMRG(suffix, element, access) \ 953 VMRG_DO(mrgl##suffix, element, access, half) \ 954 VMRG_DO(mrgh##suffix, element, access, 0) 955 VMRG(b, u8, VsrB) 956 VMRG(h, u16, VsrH) 957 VMRG(w, u32, VsrW) 958 #undef VMRG_DO 959 #undef VMRG 960 961 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 962 ppc_avr_t *b, ppc_avr_t *c) 963 { 964 int32_t prod[16]; 965 int i; 966 967 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 968 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 969 } 970 971 VECTOR_FOR_INORDER_I(i, s32) { 972 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 973 prod[4 * i + 2] + prod[4 * i + 3]; 974 } 975 } 976 977 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 978 ppc_avr_t *b, ppc_avr_t *c) 979 { 980 int32_t prod[8]; 981 int i; 982 983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 984 prod[i] = a->s16[i] * b->s16[i]; 985 } 986 987 VECTOR_FOR_INORDER_I(i, s32) { 988 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 989 } 990 } 991 992 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 993 ppc_avr_t *b, ppc_avr_t *c) 994 { 995 int32_t prod[8]; 996 int i; 997 int sat = 0; 998 999 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1000 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1001 } 1002 1003 VECTOR_FOR_INORDER_I(i, s32) { 1004 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1005 1006 r->u32[i] = cvtsdsw(t, &sat); 1007 } 1008 1009 if (sat) { 1010 set_vscr_sat(env); 1011 } 1012 } 1013 1014 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1015 ppc_avr_t *b, ppc_avr_t *c) 1016 { 1017 uint16_t prod[16]; 1018 int i; 1019 1020 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1021 prod[i] = a->u8[i] * b->u8[i]; 1022 } 1023 1024 VECTOR_FOR_INORDER_I(i, u32) { 1025 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1026 prod[4 * i + 2] + prod[4 * i + 3]; 1027 } 1028 } 1029 1030 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1031 ppc_avr_t *b, ppc_avr_t *c) 1032 { 1033 uint32_t prod[8]; 1034 int i; 1035 1036 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1037 prod[i] = a->u16[i] * b->u16[i]; 1038 } 1039 1040 VECTOR_FOR_INORDER_I(i, u32) { 1041 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1042 } 1043 } 1044 1045 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1046 ppc_avr_t *b, ppc_avr_t *c) 1047 { 1048 uint32_t prod[8]; 1049 int i; 1050 int sat = 0; 1051 1052 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1053 prod[i] = a->u16[i] * b->u16[i]; 1054 } 1055 1056 VECTOR_FOR_INORDER_I(i, s32) { 1057 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1058 1059 r->u32[i] = cvtuduw(t, &sat); 1060 } 1061 1062 if (sat) { 1063 set_vscr_sat(env); 1064 } 1065 } 1066 1067 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1068 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1069 { \ 1070 int i; \ 1071 \ 1072 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1073 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1074 (cast)b->mul_access(i); \ 1075 } \ 1076 } 1077 1078 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1079 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1080 { \ 1081 int i; \ 1082 \ 1083 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1084 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1085 (cast)b->mul_access(i + 1); \ 1086 } \ 1087 } 1088 1089 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1090 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1091 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1092 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1093 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1094 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1095 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1096 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1097 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1098 #undef VMUL_DO_EVN 1099 #undef VMUL_DO_ODD 1100 #undef VMUL 1101 1102 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1103 ppc_avr_t *c) 1104 { 1105 ppc_avr_t result; 1106 int i; 1107 1108 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1109 int s = c->VsrB(i) & 0x1f; 1110 int index = s & 0xf; 1111 1112 if (s & 0x10) { 1113 result.VsrB(i) = b->VsrB(index); 1114 } else { 1115 result.VsrB(i) = a->VsrB(index); 1116 } 1117 } 1118 *r = result; 1119 } 1120 1121 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1122 ppc_avr_t *c) 1123 { 1124 ppc_avr_t result; 1125 int i; 1126 1127 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1128 int s = c->VsrB(i) & 0x1f; 1129 int index = 15 - (s & 0xf); 1130 1131 if (s & 0x10) { 1132 result.VsrB(i) = a->VsrB(index); 1133 } else { 1134 result.VsrB(i) = b->VsrB(index); 1135 } 1136 } 1137 *r = result; 1138 } 1139 1140 #if defined(HOST_WORDS_BIGENDIAN) 1141 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1142 #define VBPERMD_INDEX(i) (i) 1143 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1144 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1145 #else 1146 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1147 #define VBPERMD_INDEX(i) (1 - i) 1148 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1149 #define EXTRACT_BIT(avr, i, index) \ 1150 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1151 #endif 1152 1153 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1154 { 1155 int i, j; 1156 ppc_avr_t result = { .u64 = { 0, 0 } }; 1157 VECTOR_FOR_INORDER_I(i, u64) { 1158 for (j = 0; j < 8; j++) { 1159 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1160 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1161 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1162 } 1163 } 1164 } 1165 *r = result; 1166 } 1167 1168 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1169 { 1170 int i; 1171 uint64_t perm = 0; 1172 1173 VECTOR_FOR_INORDER_I(i, u8) { 1174 int index = VBPERMQ_INDEX(b, i); 1175 1176 if (index < 128) { 1177 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1178 if (a->u64[VBPERMQ_DW(index)] & mask) { 1179 perm |= (0x8000 >> i); 1180 } 1181 } 1182 } 1183 1184 r->VsrD(0) = perm; 1185 r->VsrD(1) = 0; 1186 } 1187 1188 #undef VBPERMQ_INDEX 1189 #undef VBPERMQ_DW 1190 1191 static const uint64_t VGBBD_MASKS[256] = { 1192 0x0000000000000000ull, /* 00 */ 1193 0x0000000000000080ull, /* 01 */ 1194 0x0000000000008000ull, /* 02 */ 1195 0x0000000000008080ull, /* 03 */ 1196 0x0000000000800000ull, /* 04 */ 1197 0x0000000000800080ull, /* 05 */ 1198 0x0000000000808000ull, /* 06 */ 1199 0x0000000000808080ull, /* 07 */ 1200 0x0000000080000000ull, /* 08 */ 1201 0x0000000080000080ull, /* 09 */ 1202 0x0000000080008000ull, /* 0A */ 1203 0x0000000080008080ull, /* 0B */ 1204 0x0000000080800000ull, /* 0C */ 1205 0x0000000080800080ull, /* 0D */ 1206 0x0000000080808000ull, /* 0E */ 1207 0x0000000080808080ull, /* 0F */ 1208 0x0000008000000000ull, /* 10 */ 1209 0x0000008000000080ull, /* 11 */ 1210 0x0000008000008000ull, /* 12 */ 1211 0x0000008000008080ull, /* 13 */ 1212 0x0000008000800000ull, /* 14 */ 1213 0x0000008000800080ull, /* 15 */ 1214 0x0000008000808000ull, /* 16 */ 1215 0x0000008000808080ull, /* 17 */ 1216 0x0000008080000000ull, /* 18 */ 1217 0x0000008080000080ull, /* 19 */ 1218 0x0000008080008000ull, /* 1A */ 1219 0x0000008080008080ull, /* 1B */ 1220 0x0000008080800000ull, /* 1C */ 1221 0x0000008080800080ull, /* 1D */ 1222 0x0000008080808000ull, /* 1E */ 1223 0x0000008080808080ull, /* 1F */ 1224 0x0000800000000000ull, /* 20 */ 1225 0x0000800000000080ull, /* 21 */ 1226 0x0000800000008000ull, /* 22 */ 1227 0x0000800000008080ull, /* 23 */ 1228 0x0000800000800000ull, /* 24 */ 1229 0x0000800000800080ull, /* 25 */ 1230 0x0000800000808000ull, /* 26 */ 1231 0x0000800000808080ull, /* 27 */ 1232 0x0000800080000000ull, /* 28 */ 1233 0x0000800080000080ull, /* 29 */ 1234 0x0000800080008000ull, /* 2A */ 1235 0x0000800080008080ull, /* 2B */ 1236 0x0000800080800000ull, /* 2C */ 1237 0x0000800080800080ull, /* 2D */ 1238 0x0000800080808000ull, /* 2E */ 1239 0x0000800080808080ull, /* 2F */ 1240 0x0000808000000000ull, /* 30 */ 1241 0x0000808000000080ull, /* 31 */ 1242 0x0000808000008000ull, /* 32 */ 1243 0x0000808000008080ull, /* 33 */ 1244 0x0000808000800000ull, /* 34 */ 1245 0x0000808000800080ull, /* 35 */ 1246 0x0000808000808000ull, /* 36 */ 1247 0x0000808000808080ull, /* 37 */ 1248 0x0000808080000000ull, /* 38 */ 1249 0x0000808080000080ull, /* 39 */ 1250 0x0000808080008000ull, /* 3A */ 1251 0x0000808080008080ull, /* 3B */ 1252 0x0000808080800000ull, /* 3C */ 1253 0x0000808080800080ull, /* 3D */ 1254 0x0000808080808000ull, /* 3E */ 1255 0x0000808080808080ull, /* 3F */ 1256 0x0080000000000000ull, /* 40 */ 1257 0x0080000000000080ull, /* 41 */ 1258 0x0080000000008000ull, /* 42 */ 1259 0x0080000000008080ull, /* 43 */ 1260 0x0080000000800000ull, /* 44 */ 1261 0x0080000000800080ull, /* 45 */ 1262 0x0080000000808000ull, /* 46 */ 1263 0x0080000000808080ull, /* 47 */ 1264 0x0080000080000000ull, /* 48 */ 1265 0x0080000080000080ull, /* 49 */ 1266 0x0080000080008000ull, /* 4A */ 1267 0x0080000080008080ull, /* 4B */ 1268 0x0080000080800000ull, /* 4C */ 1269 0x0080000080800080ull, /* 4D */ 1270 0x0080000080808000ull, /* 4E */ 1271 0x0080000080808080ull, /* 4F */ 1272 0x0080008000000000ull, /* 50 */ 1273 0x0080008000000080ull, /* 51 */ 1274 0x0080008000008000ull, /* 52 */ 1275 0x0080008000008080ull, /* 53 */ 1276 0x0080008000800000ull, /* 54 */ 1277 0x0080008000800080ull, /* 55 */ 1278 0x0080008000808000ull, /* 56 */ 1279 0x0080008000808080ull, /* 57 */ 1280 0x0080008080000000ull, /* 58 */ 1281 0x0080008080000080ull, /* 59 */ 1282 0x0080008080008000ull, /* 5A */ 1283 0x0080008080008080ull, /* 5B */ 1284 0x0080008080800000ull, /* 5C */ 1285 0x0080008080800080ull, /* 5D */ 1286 0x0080008080808000ull, /* 5E */ 1287 0x0080008080808080ull, /* 5F */ 1288 0x0080800000000000ull, /* 60 */ 1289 0x0080800000000080ull, /* 61 */ 1290 0x0080800000008000ull, /* 62 */ 1291 0x0080800000008080ull, /* 63 */ 1292 0x0080800000800000ull, /* 64 */ 1293 0x0080800000800080ull, /* 65 */ 1294 0x0080800000808000ull, /* 66 */ 1295 0x0080800000808080ull, /* 67 */ 1296 0x0080800080000000ull, /* 68 */ 1297 0x0080800080000080ull, /* 69 */ 1298 0x0080800080008000ull, /* 6A */ 1299 0x0080800080008080ull, /* 6B */ 1300 0x0080800080800000ull, /* 6C */ 1301 0x0080800080800080ull, /* 6D */ 1302 0x0080800080808000ull, /* 6E */ 1303 0x0080800080808080ull, /* 6F */ 1304 0x0080808000000000ull, /* 70 */ 1305 0x0080808000000080ull, /* 71 */ 1306 0x0080808000008000ull, /* 72 */ 1307 0x0080808000008080ull, /* 73 */ 1308 0x0080808000800000ull, /* 74 */ 1309 0x0080808000800080ull, /* 75 */ 1310 0x0080808000808000ull, /* 76 */ 1311 0x0080808000808080ull, /* 77 */ 1312 0x0080808080000000ull, /* 78 */ 1313 0x0080808080000080ull, /* 79 */ 1314 0x0080808080008000ull, /* 7A */ 1315 0x0080808080008080ull, /* 7B */ 1316 0x0080808080800000ull, /* 7C */ 1317 0x0080808080800080ull, /* 7D */ 1318 0x0080808080808000ull, /* 7E */ 1319 0x0080808080808080ull, /* 7F */ 1320 0x8000000000000000ull, /* 80 */ 1321 0x8000000000000080ull, /* 81 */ 1322 0x8000000000008000ull, /* 82 */ 1323 0x8000000000008080ull, /* 83 */ 1324 0x8000000000800000ull, /* 84 */ 1325 0x8000000000800080ull, /* 85 */ 1326 0x8000000000808000ull, /* 86 */ 1327 0x8000000000808080ull, /* 87 */ 1328 0x8000000080000000ull, /* 88 */ 1329 0x8000000080000080ull, /* 89 */ 1330 0x8000000080008000ull, /* 8A */ 1331 0x8000000080008080ull, /* 8B */ 1332 0x8000000080800000ull, /* 8C */ 1333 0x8000000080800080ull, /* 8D */ 1334 0x8000000080808000ull, /* 8E */ 1335 0x8000000080808080ull, /* 8F */ 1336 0x8000008000000000ull, /* 90 */ 1337 0x8000008000000080ull, /* 91 */ 1338 0x8000008000008000ull, /* 92 */ 1339 0x8000008000008080ull, /* 93 */ 1340 0x8000008000800000ull, /* 94 */ 1341 0x8000008000800080ull, /* 95 */ 1342 0x8000008000808000ull, /* 96 */ 1343 0x8000008000808080ull, /* 97 */ 1344 0x8000008080000000ull, /* 98 */ 1345 0x8000008080000080ull, /* 99 */ 1346 0x8000008080008000ull, /* 9A */ 1347 0x8000008080008080ull, /* 9B */ 1348 0x8000008080800000ull, /* 9C */ 1349 0x8000008080800080ull, /* 9D */ 1350 0x8000008080808000ull, /* 9E */ 1351 0x8000008080808080ull, /* 9F */ 1352 0x8000800000000000ull, /* A0 */ 1353 0x8000800000000080ull, /* A1 */ 1354 0x8000800000008000ull, /* A2 */ 1355 0x8000800000008080ull, /* A3 */ 1356 0x8000800000800000ull, /* A4 */ 1357 0x8000800000800080ull, /* A5 */ 1358 0x8000800000808000ull, /* A6 */ 1359 0x8000800000808080ull, /* A7 */ 1360 0x8000800080000000ull, /* A8 */ 1361 0x8000800080000080ull, /* A9 */ 1362 0x8000800080008000ull, /* AA */ 1363 0x8000800080008080ull, /* AB */ 1364 0x8000800080800000ull, /* AC */ 1365 0x8000800080800080ull, /* AD */ 1366 0x8000800080808000ull, /* AE */ 1367 0x8000800080808080ull, /* AF */ 1368 0x8000808000000000ull, /* B0 */ 1369 0x8000808000000080ull, /* B1 */ 1370 0x8000808000008000ull, /* B2 */ 1371 0x8000808000008080ull, /* B3 */ 1372 0x8000808000800000ull, /* B4 */ 1373 0x8000808000800080ull, /* B5 */ 1374 0x8000808000808000ull, /* B6 */ 1375 0x8000808000808080ull, /* B7 */ 1376 0x8000808080000000ull, /* B8 */ 1377 0x8000808080000080ull, /* B9 */ 1378 0x8000808080008000ull, /* BA */ 1379 0x8000808080008080ull, /* BB */ 1380 0x8000808080800000ull, /* BC */ 1381 0x8000808080800080ull, /* BD */ 1382 0x8000808080808000ull, /* BE */ 1383 0x8000808080808080ull, /* BF */ 1384 0x8080000000000000ull, /* C0 */ 1385 0x8080000000000080ull, /* C1 */ 1386 0x8080000000008000ull, /* C2 */ 1387 0x8080000000008080ull, /* C3 */ 1388 0x8080000000800000ull, /* C4 */ 1389 0x8080000000800080ull, /* C5 */ 1390 0x8080000000808000ull, /* C6 */ 1391 0x8080000000808080ull, /* C7 */ 1392 0x8080000080000000ull, /* C8 */ 1393 0x8080000080000080ull, /* C9 */ 1394 0x8080000080008000ull, /* CA */ 1395 0x8080000080008080ull, /* CB */ 1396 0x8080000080800000ull, /* CC */ 1397 0x8080000080800080ull, /* CD */ 1398 0x8080000080808000ull, /* CE */ 1399 0x8080000080808080ull, /* CF */ 1400 0x8080008000000000ull, /* D0 */ 1401 0x8080008000000080ull, /* D1 */ 1402 0x8080008000008000ull, /* D2 */ 1403 0x8080008000008080ull, /* D3 */ 1404 0x8080008000800000ull, /* D4 */ 1405 0x8080008000800080ull, /* D5 */ 1406 0x8080008000808000ull, /* D6 */ 1407 0x8080008000808080ull, /* D7 */ 1408 0x8080008080000000ull, /* D8 */ 1409 0x8080008080000080ull, /* D9 */ 1410 0x8080008080008000ull, /* DA */ 1411 0x8080008080008080ull, /* DB */ 1412 0x8080008080800000ull, /* DC */ 1413 0x8080008080800080ull, /* DD */ 1414 0x8080008080808000ull, /* DE */ 1415 0x8080008080808080ull, /* DF */ 1416 0x8080800000000000ull, /* E0 */ 1417 0x8080800000000080ull, /* E1 */ 1418 0x8080800000008000ull, /* E2 */ 1419 0x8080800000008080ull, /* E3 */ 1420 0x8080800000800000ull, /* E4 */ 1421 0x8080800000800080ull, /* E5 */ 1422 0x8080800000808000ull, /* E6 */ 1423 0x8080800000808080ull, /* E7 */ 1424 0x8080800080000000ull, /* E8 */ 1425 0x8080800080000080ull, /* E9 */ 1426 0x8080800080008000ull, /* EA */ 1427 0x8080800080008080ull, /* EB */ 1428 0x8080800080800000ull, /* EC */ 1429 0x8080800080800080ull, /* ED */ 1430 0x8080800080808000ull, /* EE */ 1431 0x8080800080808080ull, /* EF */ 1432 0x8080808000000000ull, /* F0 */ 1433 0x8080808000000080ull, /* F1 */ 1434 0x8080808000008000ull, /* F2 */ 1435 0x8080808000008080ull, /* F3 */ 1436 0x8080808000800000ull, /* F4 */ 1437 0x8080808000800080ull, /* F5 */ 1438 0x8080808000808000ull, /* F6 */ 1439 0x8080808000808080ull, /* F7 */ 1440 0x8080808080000000ull, /* F8 */ 1441 0x8080808080000080ull, /* F9 */ 1442 0x8080808080008000ull, /* FA */ 1443 0x8080808080008080ull, /* FB */ 1444 0x8080808080800000ull, /* FC */ 1445 0x8080808080800080ull, /* FD */ 1446 0x8080808080808000ull, /* FE */ 1447 0x8080808080808080ull, /* FF */ 1448 }; 1449 1450 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1451 { 1452 int i; 1453 uint64_t t[2] = { 0, 0 }; 1454 1455 VECTOR_FOR_INORDER_I(i, u8) { 1456 #if defined(HOST_WORDS_BIGENDIAN) 1457 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1458 #else 1459 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7)); 1460 #endif 1461 } 1462 1463 r->u64[0] = t[0]; 1464 r->u64[1] = t[1]; 1465 } 1466 1467 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1468 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1469 { \ 1470 int i, j; \ 1471 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1472 \ 1473 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1474 prod[i] = 0; \ 1475 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1476 if (a->srcfld[i] & (1ull << j)) { \ 1477 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1478 } \ 1479 } \ 1480 } \ 1481 \ 1482 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1483 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1484 } \ 1485 } 1486 1487 PMSUM(vpmsumb, u8, u16, uint16_t) 1488 PMSUM(vpmsumh, u16, u32, uint32_t) 1489 PMSUM(vpmsumw, u32, u64, uint64_t) 1490 1491 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1492 { 1493 1494 #ifdef CONFIG_INT128 1495 int i, j; 1496 __uint128_t prod[2]; 1497 1498 VECTOR_FOR_INORDER_I(i, u64) { 1499 prod[i] = 0; 1500 for (j = 0; j < 64; j++) { 1501 if (a->u64[i] & (1ull << j)) { 1502 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1503 } 1504 } 1505 } 1506 1507 r->u128 = prod[0] ^ prod[1]; 1508 1509 #else 1510 int i, j; 1511 ppc_avr_t prod[2]; 1512 1513 VECTOR_FOR_INORDER_I(i, u64) { 1514 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1515 for (j = 0; j < 64; j++) { 1516 if (a->u64[i] & (1ull << j)) { 1517 ppc_avr_t bshift; 1518 if (j == 0) { 1519 bshift.VsrD(0) = 0; 1520 bshift.VsrD(1) = b->u64[i]; 1521 } else { 1522 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1523 bshift.VsrD(1) = b->u64[i] << j; 1524 } 1525 prod[i].VsrD(1) ^= bshift.VsrD(1); 1526 prod[i].VsrD(0) ^= bshift.VsrD(0); 1527 } 1528 } 1529 } 1530 1531 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1532 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1533 #endif 1534 } 1535 1536 1537 #if defined(HOST_WORDS_BIGENDIAN) 1538 #define PKBIG 1 1539 #else 1540 #define PKBIG 0 1541 #endif 1542 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1543 { 1544 int i, j; 1545 ppc_avr_t result; 1546 #if defined(HOST_WORDS_BIGENDIAN) 1547 const ppc_avr_t *x[2] = { a, b }; 1548 #else 1549 const ppc_avr_t *x[2] = { b, a }; 1550 #endif 1551 1552 VECTOR_FOR_INORDER_I(i, u64) { 1553 VECTOR_FOR_INORDER_I(j, u32) { 1554 uint32_t e = x[i]->u32[j]; 1555 1556 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1557 ((e >> 6) & 0x3e0) | 1558 ((e >> 3) & 0x1f)); 1559 } 1560 } 1561 *r = result; 1562 } 1563 1564 #define VPK(suffix, from, to, cvt, dosat) \ 1565 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1566 ppc_avr_t *a, ppc_avr_t *b) \ 1567 { \ 1568 int i; \ 1569 int sat = 0; \ 1570 ppc_avr_t result; \ 1571 ppc_avr_t *a0 = PKBIG ? a : b; \ 1572 ppc_avr_t *a1 = PKBIG ? b : a; \ 1573 \ 1574 VECTOR_FOR_INORDER_I(i, from) { \ 1575 result.to[i] = cvt(a0->from[i], &sat); \ 1576 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1577 } \ 1578 *r = result; \ 1579 if (dosat && sat) { \ 1580 set_vscr_sat(env); \ 1581 } \ 1582 } 1583 #define I(x, y) (x) 1584 VPK(shss, s16, s8, cvtshsb, 1) 1585 VPK(shus, s16, u8, cvtshub, 1) 1586 VPK(swss, s32, s16, cvtswsh, 1) 1587 VPK(swus, s32, u16, cvtswuh, 1) 1588 VPK(sdss, s64, s32, cvtsdsw, 1) 1589 VPK(sdus, s64, u32, cvtsduw, 1) 1590 VPK(uhus, u16, u8, cvtuhub, 1) 1591 VPK(uwus, u32, u16, cvtuwuh, 1) 1592 VPK(udus, u64, u32, cvtuduw, 1) 1593 VPK(uhum, u16, u8, I, 0) 1594 VPK(uwum, u32, u16, I, 0) 1595 VPK(udum, u64, u32, I, 0) 1596 #undef I 1597 #undef VPK 1598 #undef PKBIG 1599 1600 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1601 { 1602 int i; 1603 1604 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1605 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1606 } 1607 } 1608 1609 #define VRFI(suffix, rounding) \ 1610 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1611 ppc_avr_t *b) \ 1612 { \ 1613 int i; \ 1614 float_status s = env->vec_status; \ 1615 \ 1616 set_float_rounding_mode(rounding, &s); \ 1617 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1618 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1619 } \ 1620 } 1621 VRFI(n, float_round_nearest_even) 1622 VRFI(m, float_round_down) 1623 VRFI(p, float_round_up) 1624 VRFI(z, float_round_to_zero) 1625 #undef VRFI 1626 1627 #define VROTATE(suffix, element, mask) \ 1628 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1629 { \ 1630 int i; \ 1631 \ 1632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1633 unsigned int shift = b->element[i] & mask; \ 1634 r->element[i] = (a->element[i] << shift) | \ 1635 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1636 } \ 1637 } 1638 VROTATE(b, u8, 0x7) 1639 VROTATE(h, u16, 0xF) 1640 VROTATE(w, u32, 0x1F) 1641 VROTATE(d, u64, 0x3F) 1642 #undef VROTATE 1643 1644 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1645 { 1646 int i; 1647 1648 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1649 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1650 1651 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1652 } 1653 } 1654 1655 #define VRLMI(name, size, element, insert) \ 1656 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1657 { \ 1658 int i; \ 1659 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1660 uint##size##_t src1 = a->element[i]; \ 1661 uint##size##_t src2 = b->element[i]; \ 1662 uint##size##_t src3 = r->element[i]; \ 1663 uint##size##_t begin, end, shift, mask, rot_val; \ 1664 \ 1665 shift = extract##size(src2, 0, 6); \ 1666 end = extract##size(src2, 8, 6); \ 1667 begin = extract##size(src2, 16, 6); \ 1668 rot_val = rol##size(src1, shift); \ 1669 mask = mask_u##size(begin, end); \ 1670 if (insert) { \ 1671 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1672 } else { \ 1673 r->element[i] = (rot_val & mask); \ 1674 } \ 1675 } \ 1676 } 1677 1678 VRLMI(vrldmi, 64, u64, 1); 1679 VRLMI(vrlwmi, 32, u32, 1); 1680 VRLMI(vrldnm, 64, u64, 0); 1681 VRLMI(vrlwnm, 32, u32, 0); 1682 1683 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1684 ppc_avr_t *c) 1685 { 1686 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1687 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1688 } 1689 1690 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1691 { 1692 int i; 1693 1694 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1695 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1696 } 1697 } 1698 1699 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1700 { 1701 int i; 1702 1703 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1704 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1705 } 1706 } 1707 1708 #if defined(HOST_WORDS_BIGENDIAN) 1709 #define VEXTU_X_DO(name, size, left) \ 1710 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1711 { \ 1712 int index; \ 1713 if (left) { \ 1714 index = (a & 0xf) * 8; \ 1715 } else { \ 1716 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1717 } \ 1718 return int128_getlo(int128_rshift(b->s128, index)) & \ 1719 MAKE_64BIT_MASK(0, size); \ 1720 } 1721 #else 1722 #define VEXTU_X_DO(name, size, left) \ 1723 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1724 { \ 1725 int index; \ 1726 if (left) { \ 1727 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1728 } else { \ 1729 index = (a & 0xf) * 8; \ 1730 } \ 1731 return int128_getlo(int128_rshift(b->s128, index)) & \ 1732 MAKE_64BIT_MASK(0, size); \ 1733 } 1734 #endif 1735 1736 VEXTU_X_DO(vextublx, 8, 1) 1737 VEXTU_X_DO(vextuhlx, 16, 1) 1738 VEXTU_X_DO(vextuwlx, 32, 1) 1739 VEXTU_X_DO(vextubrx, 8, 0) 1740 VEXTU_X_DO(vextuhrx, 16, 0) 1741 VEXTU_X_DO(vextuwrx, 32, 0) 1742 #undef VEXTU_X_DO 1743 1744 /* 1745 * The specification says that the results are undefined if all of the 1746 * shift counts are not identical. We check to make sure that they 1747 * are to conform to what real hardware appears to do. 1748 */ 1749 #define VSHIFT(suffix, leftp) \ 1750 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1751 { \ 1752 int shift = b->VsrB(15) & 0x7; \ 1753 int doit = 1; \ 1754 int i; \ 1755 \ 1756 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1757 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1758 } \ 1759 if (doit) { \ 1760 if (shift == 0) { \ 1761 *r = *a; \ 1762 } else if (leftp) { \ 1763 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1764 \ 1765 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1766 r->VsrD(1) = a->VsrD(1) << shift; \ 1767 } else { \ 1768 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1769 \ 1770 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1771 r->VsrD(0) = a->VsrD(0) >> shift; \ 1772 } \ 1773 } \ 1774 } 1775 VSHIFT(l, 1) 1776 VSHIFT(r, 0) 1777 #undef VSHIFT 1778 1779 #define VSL(suffix, element, mask) \ 1780 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1781 { \ 1782 int i; \ 1783 \ 1784 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1785 unsigned int shift = b->element[i] & mask; \ 1786 \ 1787 r->element[i] = a->element[i] << shift; \ 1788 } \ 1789 } 1790 VSL(b, u8, 0x7) 1791 VSL(h, u16, 0x0F) 1792 VSL(w, u32, 0x1F) 1793 VSL(d, u64, 0x3F) 1794 #undef VSL 1795 1796 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1797 { 1798 int i; 1799 unsigned int shift, bytes, size; 1800 1801 size = ARRAY_SIZE(r->u8); 1802 for (i = 0; i < size; i++) { 1803 shift = b->u8[i] & 0x7; /* extract shift value */ 1804 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1805 (((i + 1) < size) ? a->u8[i + 1] : 0); 1806 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1807 } 1808 } 1809 1810 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1811 { 1812 int i; 1813 unsigned int shift, bytes; 1814 1815 /* 1816 * Use reverse order, as destination and source register can be 1817 * same. Its being modified in place saving temporary, reverse 1818 * order will guarantee that computed result is not fed back. 1819 */ 1820 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1821 shift = b->u8[i] & 0x7; /* extract shift value */ 1822 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1823 /* extract adjacent bytes */ 1824 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1825 } 1826 } 1827 1828 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1829 { 1830 int sh = shift & 0xf; 1831 int i; 1832 ppc_avr_t result; 1833 1834 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1835 int index = sh + i; 1836 if (index > 0xf) { 1837 result.VsrB(i) = b->VsrB(index - 0x10); 1838 } else { 1839 result.VsrB(i) = a->VsrB(index); 1840 } 1841 } 1842 *r = result; 1843 } 1844 1845 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1846 { 1847 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1848 1849 #if defined(HOST_WORDS_BIGENDIAN) 1850 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1851 memset(&r->u8[16 - sh], 0, sh); 1852 #else 1853 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1854 memset(&r->u8[0], 0, sh); 1855 #endif 1856 } 1857 1858 #if defined(HOST_WORDS_BIGENDIAN) 1859 #define VINSERT(suffix, element) \ 1860 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1861 { \ 1862 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1863 sizeof(r->element[0])); \ 1864 } 1865 #else 1866 #define VINSERT(suffix, element) \ 1867 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1868 { \ 1869 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1870 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1871 } 1872 #endif 1873 VINSERT(b, u8) 1874 VINSERT(h, u16) 1875 VINSERT(w, u32) 1876 VINSERT(d, u64) 1877 #undef VINSERT 1878 #if defined(HOST_WORDS_BIGENDIAN) 1879 #define VEXTRACT(suffix, element) \ 1880 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1881 { \ 1882 uint32_t es = sizeof(r->element[0]); \ 1883 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1884 memset(&r->u8[8], 0, 8); \ 1885 memset(&r->u8[0], 0, 8 - es); \ 1886 } 1887 #else 1888 #define VEXTRACT(suffix, element) \ 1889 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1890 { \ 1891 uint32_t es = sizeof(r->element[0]); \ 1892 uint32_t s = (16 - index) - es; \ 1893 memmove(&r->u8[8], &b->u8[s], es); \ 1894 memset(&r->u8[0], 0, 8); \ 1895 memset(&r->u8[8 + es], 0, 8 - es); \ 1896 } 1897 #endif 1898 VEXTRACT(ub, u8) 1899 VEXTRACT(uh, u16) 1900 VEXTRACT(uw, u32) 1901 VEXTRACT(d, u64) 1902 #undef VEXTRACT 1903 1904 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1905 target_ulong xbn, uint32_t index) 1906 { 1907 ppc_vsr_t xt, xb; 1908 size_t es = sizeof(uint32_t); 1909 uint32_t ext_index; 1910 int i; 1911 1912 getVSR(xbn, &xb, env); 1913 memset(&xt, 0, sizeof(xt)); 1914 1915 ext_index = index; 1916 for (i = 0; i < es; i++, ext_index++) { 1917 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16); 1918 } 1919 1920 putVSR(xtn, &xt, env); 1921 } 1922 1923 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 1924 target_ulong xbn, uint32_t index) 1925 { 1926 ppc_vsr_t xt, xb; 1927 size_t es = sizeof(uint32_t); 1928 int ins_index, i = 0; 1929 1930 getVSR(xbn, &xb, env); 1931 getVSR(xtn, &xt, env); 1932 1933 ins_index = index; 1934 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1935 xt.VsrB(ins_index) = xb.VsrB(8 - es + i); 1936 } 1937 1938 putVSR(xtn, &xt, env); 1939 } 1940 1941 #define VEXT_SIGNED(name, element, cast) \ 1942 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1943 { \ 1944 int i; \ 1945 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1946 r->element[i] = (cast)b->element[i]; \ 1947 } \ 1948 } 1949 VEXT_SIGNED(vextsb2w, s32, int8_t) 1950 VEXT_SIGNED(vextsb2d, s64, int8_t) 1951 VEXT_SIGNED(vextsh2w, s32, int16_t) 1952 VEXT_SIGNED(vextsh2d, s64, int16_t) 1953 VEXT_SIGNED(vextsw2d, s64, int32_t) 1954 #undef VEXT_SIGNED 1955 1956 #define VNEG(name, element) \ 1957 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1958 { \ 1959 int i; \ 1960 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1961 r->element[i] = -b->element[i]; \ 1962 } \ 1963 } 1964 VNEG(vnegw, s32) 1965 VNEG(vnegd, s64) 1966 #undef VNEG 1967 1968 #define VSR(suffix, element, mask) \ 1969 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1970 { \ 1971 int i; \ 1972 \ 1973 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1974 unsigned int shift = b->element[i] & mask; \ 1975 r->element[i] = a->element[i] >> shift; \ 1976 } \ 1977 } 1978 VSR(ab, s8, 0x7) 1979 VSR(ah, s16, 0xF) 1980 VSR(aw, s32, 0x1F) 1981 VSR(ad, s64, 0x3F) 1982 VSR(b, u8, 0x7) 1983 VSR(h, u16, 0xF) 1984 VSR(w, u32, 0x1F) 1985 VSR(d, u64, 0x3F) 1986 #undef VSR 1987 1988 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1989 { 1990 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1991 1992 #if defined(HOST_WORDS_BIGENDIAN) 1993 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1994 memset(&r->u8[0], 0, sh); 1995 #else 1996 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1997 memset(&r->u8[16 - sh], 0, sh); 1998 #endif 1999 } 2000 2001 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2002 { 2003 int i; 2004 2005 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2006 r->u32[i] = a->u32[i] >= b->u32[i]; 2007 } 2008 } 2009 2010 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2011 { 2012 int64_t t; 2013 int i, upper; 2014 ppc_avr_t result; 2015 int sat = 0; 2016 2017 upper = ARRAY_SIZE(r->s32) - 1; 2018 t = (int64_t)b->VsrSW(upper); 2019 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2020 t += a->VsrSW(i); 2021 result.VsrSW(i) = 0; 2022 } 2023 result.VsrSW(upper) = cvtsdsw(t, &sat); 2024 *r = result; 2025 2026 if (sat) { 2027 set_vscr_sat(env); 2028 } 2029 } 2030 2031 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2032 { 2033 int i, j, upper; 2034 ppc_avr_t result; 2035 int sat = 0; 2036 2037 upper = 1; 2038 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2039 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2040 2041 result.VsrW(i) = 0; 2042 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2043 t += a->VsrSW(2 * i + j); 2044 } 2045 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2046 } 2047 2048 *r = result; 2049 if (sat) { 2050 set_vscr_sat(env); 2051 } 2052 } 2053 2054 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2055 { 2056 int i, j; 2057 int sat = 0; 2058 2059 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2060 int64_t t = (int64_t)b->s32[i]; 2061 2062 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2063 t += a->s8[4 * i + j]; 2064 } 2065 r->s32[i] = cvtsdsw(t, &sat); 2066 } 2067 2068 if (sat) { 2069 set_vscr_sat(env); 2070 } 2071 } 2072 2073 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2074 { 2075 int sat = 0; 2076 int i; 2077 2078 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2079 int64_t t = (int64_t)b->s32[i]; 2080 2081 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2082 r->s32[i] = cvtsdsw(t, &sat); 2083 } 2084 2085 if (sat) { 2086 set_vscr_sat(env); 2087 } 2088 } 2089 2090 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2091 { 2092 int i, j; 2093 int sat = 0; 2094 2095 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2096 uint64_t t = (uint64_t)b->u32[i]; 2097 2098 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2099 t += a->u8[4 * i + j]; 2100 } 2101 r->u32[i] = cvtuduw(t, &sat); 2102 } 2103 2104 if (sat) { 2105 set_vscr_sat(env); 2106 } 2107 } 2108 2109 #if defined(HOST_WORDS_BIGENDIAN) 2110 #define UPKHI 1 2111 #define UPKLO 0 2112 #else 2113 #define UPKHI 0 2114 #define UPKLO 1 2115 #endif 2116 #define VUPKPX(suffix, hi) \ 2117 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2118 { \ 2119 int i; \ 2120 ppc_avr_t result; \ 2121 \ 2122 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2123 uint16_t e = b->u16[hi ? i : i + 4]; \ 2124 uint8_t a = (e >> 15) ? 0xff : 0; \ 2125 uint8_t r = (e >> 10) & 0x1f; \ 2126 uint8_t g = (e >> 5) & 0x1f; \ 2127 uint8_t b = e & 0x1f; \ 2128 \ 2129 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2130 } \ 2131 *r = result; \ 2132 } 2133 VUPKPX(lpx, UPKLO) 2134 VUPKPX(hpx, UPKHI) 2135 #undef VUPKPX 2136 2137 #define VUPK(suffix, unpacked, packee, hi) \ 2138 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2139 { \ 2140 int i; \ 2141 ppc_avr_t result; \ 2142 \ 2143 if (hi) { \ 2144 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2145 result.unpacked[i] = b->packee[i]; \ 2146 } \ 2147 } else { \ 2148 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2149 i++) { \ 2150 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2151 } \ 2152 } \ 2153 *r = result; \ 2154 } 2155 VUPK(hsb, s16, s8, UPKHI) 2156 VUPK(hsh, s32, s16, UPKHI) 2157 VUPK(hsw, s64, s32, UPKHI) 2158 VUPK(lsb, s16, s8, UPKLO) 2159 VUPK(lsh, s32, s16, UPKLO) 2160 VUPK(lsw, s64, s32, UPKLO) 2161 #undef VUPK 2162 #undef UPKHI 2163 #undef UPKLO 2164 2165 #define VGENERIC_DO(name, element) \ 2166 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2167 { \ 2168 int i; \ 2169 \ 2170 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2171 r->element[i] = name(b->element[i]); \ 2172 } \ 2173 } 2174 2175 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2176 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2177 #define clzw(v) clz32((v)) 2178 #define clzd(v) clz64((v)) 2179 2180 VGENERIC_DO(clzb, u8) 2181 VGENERIC_DO(clzh, u16) 2182 VGENERIC_DO(clzw, u32) 2183 VGENERIC_DO(clzd, u64) 2184 2185 #undef clzb 2186 #undef clzh 2187 #undef clzw 2188 #undef clzd 2189 2190 #define ctzb(v) ((v) ? ctz32(v) : 8) 2191 #define ctzh(v) ((v) ? ctz32(v) : 16) 2192 #define ctzw(v) ctz32((v)) 2193 #define ctzd(v) ctz64((v)) 2194 2195 VGENERIC_DO(ctzb, u8) 2196 VGENERIC_DO(ctzh, u16) 2197 VGENERIC_DO(ctzw, u32) 2198 VGENERIC_DO(ctzd, u64) 2199 2200 #undef ctzb 2201 #undef ctzh 2202 #undef ctzw 2203 #undef ctzd 2204 2205 #define popcntb(v) ctpop8(v) 2206 #define popcnth(v) ctpop16(v) 2207 #define popcntw(v) ctpop32(v) 2208 #define popcntd(v) ctpop64(v) 2209 2210 VGENERIC_DO(popcntb, u8) 2211 VGENERIC_DO(popcnth, u16) 2212 VGENERIC_DO(popcntw, u32) 2213 VGENERIC_DO(popcntd, u64) 2214 2215 #undef popcntb 2216 #undef popcnth 2217 #undef popcntw 2218 #undef popcntd 2219 2220 #undef VGENERIC_DO 2221 2222 #if defined(HOST_WORDS_BIGENDIAN) 2223 #define QW_ONE { .u64 = { 0, 1 } } 2224 #else 2225 #define QW_ONE { .u64 = { 1, 0 } } 2226 #endif 2227 2228 #ifndef CONFIG_INT128 2229 2230 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2231 { 2232 t->u64[0] = ~a.u64[0]; 2233 t->u64[1] = ~a.u64[1]; 2234 } 2235 2236 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2237 { 2238 if (a.VsrD(0) < b.VsrD(0)) { 2239 return -1; 2240 } else if (a.VsrD(0) > b.VsrD(0)) { 2241 return 1; 2242 } else if (a.VsrD(1) < b.VsrD(1)) { 2243 return -1; 2244 } else if (a.VsrD(1) > b.VsrD(1)) { 2245 return 1; 2246 } else { 2247 return 0; 2248 } 2249 } 2250 2251 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2252 { 2253 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2254 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2255 (~a.VsrD(1) < b.VsrD(1)); 2256 } 2257 2258 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2259 { 2260 ppc_avr_t not_a; 2261 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2262 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2263 (~a.VsrD(1) < b.VsrD(1)); 2264 avr_qw_not(¬_a, a); 2265 return avr_qw_cmpu(not_a, b) < 0; 2266 } 2267 2268 #endif 2269 2270 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2271 { 2272 #ifdef CONFIG_INT128 2273 r->u128 = a->u128 + b->u128; 2274 #else 2275 avr_qw_add(r, *a, *b); 2276 #endif 2277 } 2278 2279 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2280 { 2281 #ifdef CONFIG_INT128 2282 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2283 #else 2284 2285 if (c->VsrD(1) & 1) { 2286 ppc_avr_t tmp; 2287 2288 tmp.VsrD(0) = 0; 2289 tmp.VsrD(1) = c->VsrD(1) & 1; 2290 avr_qw_add(&tmp, *a, tmp); 2291 avr_qw_add(r, tmp, *b); 2292 } else { 2293 avr_qw_add(r, *a, *b); 2294 } 2295 #endif 2296 } 2297 2298 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2299 { 2300 #ifdef CONFIG_INT128 2301 r->u128 = (~a->u128 < b->u128); 2302 #else 2303 ppc_avr_t not_a; 2304 2305 avr_qw_not(¬_a, *a); 2306 2307 r->VsrD(0) = 0; 2308 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2309 #endif 2310 } 2311 2312 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2313 { 2314 #ifdef CONFIG_INT128 2315 int carry_out = (~a->u128 < b->u128); 2316 if (!carry_out && (c->u128 & 1)) { 2317 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2318 ((a->u128 != 0) || (b->u128 != 0)); 2319 } 2320 r->u128 = carry_out; 2321 #else 2322 2323 int carry_in = c->VsrD(1) & 1; 2324 int carry_out = 0; 2325 ppc_avr_t tmp; 2326 2327 carry_out = avr_qw_addc(&tmp, *a, *b); 2328 2329 if (!carry_out && carry_in) { 2330 ppc_avr_t one = QW_ONE; 2331 carry_out = avr_qw_addc(&tmp, tmp, one); 2332 } 2333 r->VsrD(0) = 0; 2334 r->VsrD(1) = carry_out; 2335 #endif 2336 } 2337 2338 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2339 { 2340 #ifdef CONFIG_INT128 2341 r->u128 = a->u128 - b->u128; 2342 #else 2343 ppc_avr_t tmp; 2344 ppc_avr_t one = QW_ONE; 2345 2346 avr_qw_not(&tmp, *b); 2347 avr_qw_add(&tmp, *a, tmp); 2348 avr_qw_add(r, tmp, one); 2349 #endif 2350 } 2351 2352 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2353 { 2354 #ifdef CONFIG_INT128 2355 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2356 #else 2357 ppc_avr_t tmp, sum; 2358 2359 avr_qw_not(&tmp, *b); 2360 avr_qw_add(&sum, *a, tmp); 2361 2362 tmp.VsrD(0) = 0; 2363 tmp.VsrD(1) = c->VsrD(1) & 1; 2364 avr_qw_add(r, sum, tmp); 2365 #endif 2366 } 2367 2368 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2369 { 2370 #ifdef CONFIG_INT128 2371 r->u128 = (~a->u128 < ~b->u128) || 2372 (a->u128 + ~b->u128 == (__uint128_t)-1); 2373 #else 2374 int carry = (avr_qw_cmpu(*a, *b) > 0); 2375 if (!carry) { 2376 ppc_avr_t tmp; 2377 avr_qw_not(&tmp, *b); 2378 avr_qw_add(&tmp, *a, tmp); 2379 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2380 } 2381 r->VsrD(0) = 0; 2382 r->VsrD(1) = carry; 2383 #endif 2384 } 2385 2386 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2387 { 2388 #ifdef CONFIG_INT128 2389 r->u128 = 2390 (~a->u128 < ~b->u128) || 2391 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2392 #else 2393 int carry_in = c->VsrD(1) & 1; 2394 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2395 if (!carry_out && carry_in) { 2396 ppc_avr_t tmp; 2397 avr_qw_not(&tmp, *b); 2398 avr_qw_add(&tmp, *a, tmp); 2399 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2400 } 2401 2402 r->VsrD(0) = 0; 2403 r->VsrD(1) = carry_out; 2404 #endif 2405 } 2406 2407 #define BCD_PLUS_PREF_1 0xC 2408 #define BCD_PLUS_PREF_2 0xF 2409 #define BCD_PLUS_ALT_1 0xA 2410 #define BCD_NEG_PREF 0xD 2411 #define BCD_NEG_ALT 0xB 2412 #define BCD_PLUS_ALT_2 0xE 2413 #define NATIONAL_PLUS 0x2B 2414 #define NATIONAL_NEG 0x2D 2415 2416 #if defined(HOST_WORDS_BIGENDIAN) 2417 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2418 #else 2419 #define BCD_DIG_BYTE(n) ((n) / 2) 2420 #endif 2421 2422 static int bcd_get_sgn(ppc_avr_t *bcd) 2423 { 2424 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2425 case BCD_PLUS_PREF_1: 2426 case BCD_PLUS_PREF_2: 2427 case BCD_PLUS_ALT_1: 2428 case BCD_PLUS_ALT_2: 2429 { 2430 return 1; 2431 } 2432 2433 case BCD_NEG_PREF: 2434 case BCD_NEG_ALT: 2435 { 2436 return -1; 2437 } 2438 2439 default: 2440 { 2441 return 0; 2442 } 2443 } 2444 } 2445 2446 static int bcd_preferred_sgn(int sgn, int ps) 2447 { 2448 if (sgn >= 0) { 2449 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2450 } else { 2451 return BCD_NEG_PREF; 2452 } 2453 } 2454 2455 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2456 { 2457 uint8_t result; 2458 if (n & 1) { 2459 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2460 } else { 2461 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2462 } 2463 2464 if (unlikely(result > 9)) { 2465 *invalid = true; 2466 } 2467 return result; 2468 } 2469 2470 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2471 { 2472 if (n & 1) { 2473 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2474 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4); 2475 } else { 2476 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2477 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2478 } 2479 } 2480 2481 static bool bcd_is_valid(ppc_avr_t *bcd) 2482 { 2483 int i; 2484 int invalid = 0; 2485 2486 if (bcd_get_sgn(bcd) == 0) { 2487 return false; 2488 } 2489 2490 for (i = 1; i < 32; i++) { 2491 bcd_get_digit(bcd, i, &invalid); 2492 if (unlikely(invalid)) { 2493 return false; 2494 } 2495 } 2496 return true; 2497 } 2498 2499 static int bcd_cmp_zero(ppc_avr_t *bcd) 2500 { 2501 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2502 return CRF_EQ; 2503 } else { 2504 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2505 } 2506 } 2507 2508 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2509 { 2510 return reg->VsrH(7 - n); 2511 } 2512 2513 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2514 { 2515 reg->VsrH(7 - n) = val; 2516 } 2517 2518 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2519 { 2520 int i; 2521 int invalid = 0; 2522 for (i = 31; i > 0; i--) { 2523 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2524 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2525 if (unlikely(invalid)) { 2526 return 0; /* doesn't matter */ 2527 } else if (dig_a > dig_b) { 2528 return 1; 2529 } else if (dig_a < dig_b) { 2530 return -1; 2531 } 2532 } 2533 2534 return 0; 2535 } 2536 2537 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2538 int *overflow) 2539 { 2540 int carry = 0; 2541 int i; 2542 for (i = 1; i <= 31; i++) { 2543 uint8_t digit = bcd_get_digit(a, i, invalid) + 2544 bcd_get_digit(b, i, invalid) + carry; 2545 if (digit > 9) { 2546 carry = 1; 2547 digit -= 10; 2548 } else { 2549 carry = 0; 2550 } 2551 2552 bcd_put_digit(t, digit, i); 2553 } 2554 2555 *overflow = carry; 2556 } 2557 2558 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2559 int *overflow) 2560 { 2561 int carry = 0; 2562 int i; 2563 2564 for (i = 1; i <= 31; i++) { 2565 uint8_t digit = bcd_get_digit(a, i, invalid) - 2566 bcd_get_digit(b, i, invalid) + carry; 2567 if (digit & 0x80) { 2568 carry = -1; 2569 digit += 10; 2570 } else { 2571 carry = 0; 2572 } 2573 2574 bcd_put_digit(t, digit, i); 2575 } 2576 2577 *overflow = carry; 2578 } 2579 2580 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2581 { 2582 2583 int sgna = bcd_get_sgn(a); 2584 int sgnb = bcd_get_sgn(b); 2585 int invalid = (sgna == 0) || (sgnb == 0); 2586 int overflow = 0; 2587 uint32_t cr = 0; 2588 ppc_avr_t result = { .u64 = { 0, 0 } }; 2589 2590 if (!invalid) { 2591 if (sgna == sgnb) { 2592 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2593 bcd_add_mag(&result, a, b, &invalid, &overflow); 2594 cr = bcd_cmp_zero(&result); 2595 } else { 2596 int magnitude = bcd_cmp_mag(a, b); 2597 if (magnitude > 0) { 2598 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2599 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2600 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2601 } else if (magnitude < 0) { 2602 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2603 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2604 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2605 } else { 2606 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2607 cr = CRF_EQ; 2608 } 2609 } 2610 } 2611 2612 if (unlikely(invalid)) { 2613 result.VsrD(0) = result.VsrD(1) = -1; 2614 cr = CRF_SO; 2615 } else if (overflow) { 2616 cr |= CRF_SO; 2617 } 2618 2619 *r = result; 2620 2621 return cr; 2622 } 2623 2624 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2625 { 2626 ppc_avr_t bcopy = *b; 2627 int sgnb = bcd_get_sgn(b); 2628 if (sgnb < 0) { 2629 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2630 } else if (sgnb > 0) { 2631 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2632 } 2633 /* else invalid ... defer to bcdadd code for proper handling */ 2634 2635 return helper_bcdadd(r, a, &bcopy, ps); 2636 } 2637 2638 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2639 { 2640 int i; 2641 int cr = 0; 2642 uint16_t national = 0; 2643 uint16_t sgnb = get_national_digit(b, 0); 2644 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2645 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2646 2647 for (i = 1; i < 8; i++) { 2648 national = get_national_digit(b, i); 2649 if (unlikely(national < 0x30 || national > 0x39)) { 2650 invalid = 1; 2651 break; 2652 } 2653 2654 bcd_put_digit(&ret, national & 0xf, i); 2655 } 2656 2657 if (sgnb == NATIONAL_PLUS) { 2658 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2659 } else { 2660 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2661 } 2662 2663 cr = bcd_cmp_zero(&ret); 2664 2665 if (unlikely(invalid)) { 2666 cr = CRF_SO; 2667 } 2668 2669 *r = ret; 2670 2671 return cr; 2672 } 2673 2674 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2675 { 2676 int i; 2677 int cr = 0; 2678 int sgnb = bcd_get_sgn(b); 2679 int invalid = (sgnb == 0); 2680 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2681 2682 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2683 2684 for (i = 1; i < 8; i++) { 2685 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2686 2687 if (unlikely(invalid)) { 2688 break; 2689 } 2690 } 2691 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2692 2693 cr = bcd_cmp_zero(b); 2694 2695 if (ox_flag) { 2696 cr |= CRF_SO; 2697 } 2698 2699 if (unlikely(invalid)) { 2700 cr = CRF_SO; 2701 } 2702 2703 *r = ret; 2704 2705 return cr; 2706 } 2707 2708 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2709 { 2710 int i; 2711 int cr = 0; 2712 int invalid = 0; 2713 int zone_digit = 0; 2714 int zone_lead = ps ? 0xF : 0x3; 2715 int digit = 0; 2716 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2717 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2718 2719 if (unlikely((sgnb < 0xA) && ps)) { 2720 invalid = 1; 2721 } 2722 2723 for (i = 0; i < 16; i++) { 2724 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2725 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2726 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2727 invalid = 1; 2728 break; 2729 } 2730 2731 bcd_put_digit(&ret, digit, i + 1); 2732 } 2733 2734 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2735 (!ps && (sgnb & 0x4))) { 2736 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2737 } else { 2738 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2739 } 2740 2741 cr = bcd_cmp_zero(&ret); 2742 2743 if (unlikely(invalid)) { 2744 cr = CRF_SO; 2745 } 2746 2747 *r = ret; 2748 2749 return cr; 2750 } 2751 2752 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2753 { 2754 int i; 2755 int cr = 0; 2756 uint8_t digit = 0; 2757 int sgnb = bcd_get_sgn(b); 2758 int zone_lead = (ps) ? 0xF0 : 0x30; 2759 int invalid = (sgnb == 0); 2760 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2761 2762 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2763 2764 for (i = 0; i < 16; i++) { 2765 digit = bcd_get_digit(b, i + 1, &invalid); 2766 2767 if (unlikely(invalid)) { 2768 break; 2769 } 2770 2771 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2772 } 2773 2774 if (ps) { 2775 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2776 } else { 2777 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2778 } 2779 2780 cr = bcd_cmp_zero(b); 2781 2782 if (ox_flag) { 2783 cr |= CRF_SO; 2784 } 2785 2786 if (unlikely(invalid)) { 2787 cr = CRF_SO; 2788 } 2789 2790 *r = ret; 2791 2792 return cr; 2793 } 2794 2795 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2796 { 2797 int i; 2798 int cr = 0; 2799 uint64_t lo_value; 2800 uint64_t hi_value; 2801 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2802 2803 if (b->VsrSD(0) < 0) { 2804 lo_value = -b->VsrSD(1); 2805 hi_value = ~b->VsrD(0) + !lo_value; 2806 bcd_put_digit(&ret, 0xD, 0); 2807 } else { 2808 lo_value = b->VsrD(1); 2809 hi_value = b->VsrD(0); 2810 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2811 } 2812 2813 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2814 lo_value > 9999999999999999ULL) { 2815 cr = CRF_SO; 2816 } 2817 2818 for (i = 1; i < 16; hi_value /= 10, i++) { 2819 bcd_put_digit(&ret, hi_value % 10, i); 2820 } 2821 2822 for (; i < 32; lo_value /= 10, i++) { 2823 bcd_put_digit(&ret, lo_value % 10, i); 2824 } 2825 2826 cr |= bcd_cmp_zero(&ret); 2827 2828 *r = ret; 2829 2830 return cr; 2831 } 2832 2833 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2834 { 2835 uint8_t i; 2836 int cr; 2837 uint64_t carry; 2838 uint64_t unused; 2839 uint64_t lo_value; 2840 uint64_t hi_value = 0; 2841 int sgnb = bcd_get_sgn(b); 2842 int invalid = (sgnb == 0); 2843 2844 lo_value = bcd_get_digit(b, 31, &invalid); 2845 for (i = 30; i > 0; i--) { 2846 mulu64(&lo_value, &carry, lo_value, 10ULL); 2847 mulu64(&hi_value, &unused, hi_value, 10ULL); 2848 lo_value += bcd_get_digit(b, i, &invalid); 2849 hi_value += carry; 2850 2851 if (unlikely(invalid)) { 2852 break; 2853 } 2854 } 2855 2856 if (sgnb == -1) { 2857 r->VsrSD(1) = -lo_value; 2858 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2859 } else { 2860 r->VsrSD(1) = lo_value; 2861 r->VsrSD(0) = hi_value; 2862 } 2863 2864 cr = bcd_cmp_zero(b); 2865 2866 if (unlikely(invalid)) { 2867 cr = CRF_SO; 2868 } 2869 2870 return cr; 2871 } 2872 2873 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2874 { 2875 int i; 2876 int invalid = 0; 2877 2878 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2879 return CRF_SO; 2880 } 2881 2882 *r = *a; 2883 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2884 2885 for (i = 1; i < 32; i++) { 2886 bcd_get_digit(a, i, &invalid); 2887 bcd_get_digit(b, i, &invalid); 2888 if (unlikely(invalid)) { 2889 return CRF_SO; 2890 } 2891 } 2892 2893 return bcd_cmp_zero(r); 2894 } 2895 2896 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2897 { 2898 int sgnb = bcd_get_sgn(b); 2899 2900 *r = *b; 2901 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2902 2903 if (bcd_is_valid(b) == false) { 2904 return CRF_SO; 2905 } 2906 2907 return bcd_cmp_zero(r); 2908 } 2909 2910 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2911 { 2912 int cr; 2913 #if defined(HOST_WORDS_BIGENDIAN) 2914 int i = a->s8[7]; 2915 #else 2916 int i = a->s8[8]; 2917 #endif 2918 bool ox_flag = false; 2919 int sgnb = bcd_get_sgn(b); 2920 ppc_avr_t ret = *b; 2921 ret.VsrD(1) &= ~0xf; 2922 2923 if (bcd_is_valid(b) == false) { 2924 return CRF_SO; 2925 } 2926 2927 if (unlikely(i > 31)) { 2928 i = 31; 2929 } else if (unlikely(i < -31)) { 2930 i = -31; 2931 } 2932 2933 if (i > 0) { 2934 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2935 } else { 2936 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2937 } 2938 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2939 2940 *r = ret; 2941 2942 cr = bcd_cmp_zero(r); 2943 if (ox_flag) { 2944 cr |= CRF_SO; 2945 } 2946 2947 return cr; 2948 } 2949 2950 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2951 { 2952 int cr; 2953 int i; 2954 int invalid = 0; 2955 bool ox_flag = false; 2956 ppc_avr_t ret = *b; 2957 2958 for (i = 0; i < 32; i++) { 2959 bcd_get_digit(b, i, &invalid); 2960 2961 if (unlikely(invalid)) { 2962 return CRF_SO; 2963 } 2964 } 2965 2966 #if defined(HOST_WORDS_BIGENDIAN) 2967 i = a->s8[7]; 2968 #else 2969 i = a->s8[8]; 2970 #endif 2971 if (i >= 32) { 2972 ox_flag = true; 2973 ret.VsrD(1) = ret.VsrD(0) = 0; 2974 } else if (i <= -32) { 2975 ret.VsrD(1) = ret.VsrD(0) = 0; 2976 } else if (i > 0) { 2977 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2978 } else { 2979 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2980 } 2981 *r = ret; 2982 2983 cr = bcd_cmp_zero(r); 2984 if (ox_flag) { 2985 cr |= CRF_SO; 2986 } 2987 2988 return cr; 2989 } 2990 2991 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2992 { 2993 int cr; 2994 int unused = 0; 2995 int invalid = 0; 2996 bool ox_flag = false; 2997 int sgnb = bcd_get_sgn(b); 2998 ppc_avr_t ret = *b; 2999 ret.VsrD(1) &= ~0xf; 3000 3001 #if defined(HOST_WORDS_BIGENDIAN) 3002 int i = a->s8[7]; 3003 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3004 #else 3005 int i = a->s8[8]; 3006 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3007 #endif 3008 3009 if (bcd_is_valid(b) == false) { 3010 return CRF_SO; 3011 } 3012 3013 if (unlikely(i > 31)) { 3014 i = 31; 3015 } else if (unlikely(i < -31)) { 3016 i = -31; 3017 } 3018 3019 if (i > 0) { 3020 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3021 } else { 3022 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3023 3024 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3025 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3026 } 3027 } 3028 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3029 3030 cr = bcd_cmp_zero(&ret); 3031 if (ox_flag) { 3032 cr |= CRF_SO; 3033 } 3034 *r = ret; 3035 3036 return cr; 3037 } 3038 3039 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3040 { 3041 uint64_t mask; 3042 uint32_t ox_flag = 0; 3043 #if defined(HOST_WORDS_BIGENDIAN) 3044 int i = a->s16[3] + 1; 3045 #else 3046 int i = a->s16[4] + 1; 3047 #endif 3048 ppc_avr_t ret = *b; 3049 3050 if (bcd_is_valid(b) == false) { 3051 return CRF_SO; 3052 } 3053 3054 if (i > 16 && i < 32) { 3055 mask = (uint64_t)-1 >> (128 - i * 4); 3056 if (ret.VsrD(0) & ~mask) { 3057 ox_flag = CRF_SO; 3058 } 3059 3060 ret.VsrD(0) &= mask; 3061 } else if (i >= 0 && i <= 16) { 3062 mask = (uint64_t)-1 >> (64 - i * 4); 3063 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3064 ox_flag = CRF_SO; 3065 } 3066 3067 ret.VsrD(1) &= mask; 3068 ret.VsrD(0) = 0; 3069 } 3070 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3071 *r = ret; 3072 3073 return bcd_cmp_zero(&ret) | ox_flag; 3074 } 3075 3076 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3077 { 3078 int i; 3079 uint64_t mask; 3080 uint32_t ox_flag = 0; 3081 int invalid = 0; 3082 ppc_avr_t ret = *b; 3083 3084 for (i = 0; i < 32; i++) { 3085 bcd_get_digit(b, i, &invalid); 3086 3087 if (unlikely(invalid)) { 3088 return CRF_SO; 3089 } 3090 } 3091 3092 #if defined(HOST_WORDS_BIGENDIAN) 3093 i = a->s16[3]; 3094 #else 3095 i = a->s16[4]; 3096 #endif 3097 if (i > 16 && i < 33) { 3098 mask = (uint64_t)-1 >> (128 - i * 4); 3099 if (ret.VsrD(0) & ~mask) { 3100 ox_flag = CRF_SO; 3101 } 3102 3103 ret.VsrD(0) &= mask; 3104 } else if (i > 0 && i <= 16) { 3105 mask = (uint64_t)-1 >> (64 - i * 4); 3106 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3107 ox_flag = CRF_SO; 3108 } 3109 3110 ret.VsrD(1) &= mask; 3111 ret.VsrD(0) = 0; 3112 } else if (i == 0) { 3113 if (ret.VsrD(0) || ret.VsrD(1)) { 3114 ox_flag = CRF_SO; 3115 } 3116 ret.VsrD(0) = ret.VsrD(1) = 0; 3117 } 3118 3119 *r = ret; 3120 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3121 return ox_flag | CRF_EQ; 3122 } 3123 3124 return ox_flag | CRF_GT; 3125 } 3126 3127 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3128 { 3129 int i; 3130 VECTOR_FOR_INORDER_I(i, u8) { 3131 r->u8[i] = AES_sbox[a->u8[i]]; 3132 } 3133 } 3134 3135 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3136 { 3137 ppc_avr_t result; 3138 int i; 3139 3140 VECTOR_FOR_INORDER_I(i, u32) { 3141 result.VsrW(i) = b->VsrW(i) ^ 3142 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3143 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3144 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3145 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3146 } 3147 *r = result; 3148 } 3149 3150 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3151 { 3152 ppc_avr_t result; 3153 int i; 3154 3155 VECTOR_FOR_INORDER_I(i, u8) { 3156 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3157 } 3158 *r = result; 3159 } 3160 3161 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3162 { 3163 /* This differs from what is written in ISA V2.07. The RTL is */ 3164 /* incorrect and will be fixed in V2.07B. */ 3165 int i; 3166 ppc_avr_t tmp; 3167 3168 VECTOR_FOR_INORDER_I(i, u8) { 3169 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3170 } 3171 3172 VECTOR_FOR_INORDER_I(i, u32) { 3173 r->VsrW(i) = 3174 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3175 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3176 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3177 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3178 } 3179 } 3180 3181 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3182 { 3183 ppc_avr_t result; 3184 int i; 3185 3186 VECTOR_FOR_INORDER_I(i, u8) { 3187 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3188 } 3189 *r = result; 3190 } 3191 3192 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3193 { 3194 int st = (st_six & 0x10) != 0; 3195 int six = st_six & 0xF; 3196 int i; 3197 3198 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3199 if (st == 0) { 3200 if ((six & (0x8 >> i)) == 0) { 3201 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3202 ror32(a->VsrW(i), 18) ^ 3203 (a->VsrW(i) >> 3); 3204 } else { /* six.bit[i] == 1 */ 3205 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3206 ror32(a->VsrW(i), 19) ^ 3207 (a->VsrW(i) >> 10); 3208 } 3209 } else { /* st == 1 */ 3210 if ((six & (0x8 >> i)) == 0) { 3211 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3212 ror32(a->VsrW(i), 13) ^ 3213 ror32(a->VsrW(i), 22); 3214 } else { /* six.bit[i] == 1 */ 3215 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3216 ror32(a->VsrW(i), 11) ^ 3217 ror32(a->VsrW(i), 25); 3218 } 3219 } 3220 } 3221 } 3222 3223 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3224 { 3225 int st = (st_six & 0x10) != 0; 3226 int six = st_six & 0xF; 3227 int i; 3228 3229 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3230 if (st == 0) { 3231 if ((six & (0x8 >> (2 * i))) == 0) { 3232 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3233 ror64(a->VsrD(i), 8) ^ 3234 (a->VsrD(i) >> 7); 3235 } else { /* six.bit[2*i] == 1 */ 3236 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3237 ror64(a->VsrD(i), 61) ^ 3238 (a->VsrD(i) >> 6); 3239 } 3240 } else { /* st == 1 */ 3241 if ((six & (0x8 >> (2 * i))) == 0) { 3242 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3243 ror64(a->VsrD(i), 34) ^ 3244 ror64(a->VsrD(i), 39); 3245 } else { /* six.bit[2*i] == 1 */ 3246 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3247 ror64(a->VsrD(i), 18) ^ 3248 ror64(a->VsrD(i), 41); 3249 } 3250 } 3251 } 3252 } 3253 3254 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3255 { 3256 ppc_avr_t result; 3257 int i; 3258 3259 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3260 int indexA = c->VsrB(i) >> 4; 3261 int indexB = c->VsrB(i) & 0xF; 3262 3263 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3264 } 3265 *r = result; 3266 } 3267 3268 #undef VECTOR_FOR_INORDER_I 3269 3270 /*****************************************************************************/ 3271 /* SPE extension helpers */ 3272 /* Use a table to make this quicker */ 3273 static const uint8_t hbrev[16] = { 3274 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3275 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3276 }; 3277 3278 static inline uint8_t byte_reverse(uint8_t val) 3279 { 3280 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3281 } 3282 3283 static inline uint32_t word_reverse(uint32_t val) 3284 { 3285 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3286 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3287 } 3288 3289 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3290 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3291 { 3292 uint32_t a, b, d, mask; 3293 3294 mask = UINT32_MAX >> (32 - MASKBITS); 3295 a = arg1 & mask; 3296 b = arg2 & mask; 3297 d = word_reverse(1 + word_reverse(a | ~b)); 3298 return (arg1 & ~mask) | (d & b); 3299 } 3300 3301 uint32_t helper_cntlsw32(uint32_t val) 3302 { 3303 if (val & 0x80000000) { 3304 return clz32(~val); 3305 } else { 3306 return clz32(val); 3307 } 3308 } 3309 3310 uint32_t helper_cntlzw32(uint32_t val) 3311 { 3312 return clz32(val); 3313 } 3314 3315 /* 440 specific */ 3316 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3317 target_ulong low, uint32_t update_Rc) 3318 { 3319 target_ulong mask; 3320 int i; 3321 3322 i = 1; 3323 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3324 if ((high & mask) == 0) { 3325 if (update_Rc) { 3326 env->crf[0] = 0x4; 3327 } 3328 goto done; 3329 } 3330 i++; 3331 } 3332 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3333 if ((low & mask) == 0) { 3334 if (update_Rc) { 3335 env->crf[0] = 0x8; 3336 } 3337 goto done; 3338 } 3339 i++; 3340 } 3341 i = 8; 3342 if (update_Rc) { 3343 env->crf[0] = 0x2; 3344 } 3345 done: 3346 env->xer = (env->xer & ~0x7F) | i; 3347 if (update_Rc) { 3348 env->crf[0] |= xer_so; 3349 } 3350 return i; 3351 } 3352