1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "fpu/softfloat.h" 28 #include "qapi/error.h" 29 #include "qemu/guest-random.h" 30 31 #include "helper_regs.h" 32 /*****************************************************************************/ 33 /* Fixed point operations helpers */ 34 35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 36 { 37 if (unlikely(ov)) { 38 env->so = env->ov = 1; 39 } else { 40 env->ov = 0; 41 } 42 } 43 44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 45 uint32_t oe) 46 { 47 uint64_t rt = 0; 48 int overflow = 0; 49 50 uint64_t dividend = (uint64_t)ra << 32; 51 uint64_t divisor = (uint32_t)rb; 52 53 if (unlikely(divisor == 0)) { 54 overflow = 1; 55 } else { 56 rt = dividend / divisor; 57 overflow = rt > UINT32_MAX; 58 } 59 60 if (unlikely(overflow)) { 61 rt = 0; /* Undefined */ 62 } 63 64 if (oe) { 65 helper_update_ov_legacy(env, overflow); 66 } 67 68 return (target_ulong)rt; 69 } 70 71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 72 uint32_t oe) 73 { 74 int64_t rt = 0; 75 int overflow = 0; 76 77 int64_t dividend = (int64_t)ra << 32; 78 int64_t divisor = (int64_t)((int32_t)rb); 79 80 if (unlikely((divisor == 0) || 81 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 82 overflow = 1; 83 } else { 84 rt = dividend / divisor; 85 overflow = rt != (int32_t)rt; 86 } 87 88 if (unlikely(overflow)) { 89 rt = 0; /* Undefined */ 90 } 91 92 if (oe) { 93 helper_update_ov_legacy(env, overflow); 94 } 95 96 return (target_ulong)rt; 97 } 98 99 #if defined(TARGET_PPC64) 100 101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 102 { 103 uint64_t rt = 0; 104 int overflow = 0; 105 106 overflow = divu128(&rt, &ra, rb); 107 108 if (unlikely(overflow)) { 109 rt = 0; /* Undefined */ 110 } 111 112 if (oe) { 113 helper_update_ov_legacy(env, overflow); 114 } 115 116 return rt; 117 } 118 119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 120 { 121 int64_t rt = 0; 122 int64_t ra = (int64_t)rau; 123 int64_t rb = (int64_t)rbu; 124 int overflow = divs128(&rt, &ra, rb); 125 126 if (unlikely(overflow)) { 127 rt = 0; /* Undefined */ 128 } 129 130 if (oe) { 131 helper_update_ov_legacy(env, overflow); 132 } 133 134 return rt; 135 } 136 137 #endif 138 139 140 #if defined(TARGET_PPC64) 141 /* if x = 0xab, returns 0xababababababababa */ 142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 143 144 /* 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each 146 * byte. 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 149 */ 150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 151 152 /* When you XOR the pattern and there is a match, that byte will be zero */ 153 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 154 155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 156 { 157 return hasvalue(rb, ra) ? CRF_GT : 0; 158 } 159 160 #undef pattern 161 #undef haszero 162 #undef hasvalue 163 164 /* 165 * Return a random number. 166 */ 167 uint64_t helper_darn32(void) 168 { 169 Error *err = NULL; 170 uint32_t ret; 171 172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 174 error_get_pretty(err)); 175 error_free(err); 176 return -1; 177 } 178 179 return ret; 180 } 181 182 uint64_t helper_darn64(void) 183 { 184 Error *err = NULL; 185 uint64_t ret; 186 187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 189 error_get_pretty(err)); 190 error_free(err); 191 return -1; 192 } 193 194 return ret; 195 } 196 197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 198 { 199 int i; 200 uint64_t ra = 0; 201 202 for (i = 0; i < 8; i++) { 203 int index = (rs >> (i * 8)) & 0xFF; 204 if (index < 64) { 205 if (rb & PPC_BIT(index)) { 206 ra |= 1 << i; 207 } 208 } 209 } 210 return ra; 211 } 212 213 #endif 214 215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 216 { 217 target_ulong mask = 0xff; 218 target_ulong ra = 0; 219 int i; 220 221 for (i = 0; i < sizeof(target_ulong); i++) { 222 if ((rs & mask) == (rb & mask)) { 223 ra |= mask; 224 } 225 mask <<= 8; 226 } 227 return ra; 228 } 229 230 /* shift right arithmetic helper */ 231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 232 target_ulong shift) 233 { 234 int32_t ret; 235 236 if (likely(!(shift & 0x20))) { 237 if (likely((uint32_t)shift != 0)) { 238 shift &= 0x1f; 239 ret = (int32_t)value >> shift; 240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 241 env->ca32 = env->ca = 0; 242 } else { 243 env->ca32 = env->ca = 1; 244 } 245 } else { 246 ret = (int32_t)value; 247 env->ca32 = env->ca = 0; 248 } 249 } else { 250 ret = (int32_t)value >> 31; 251 env->ca32 = env->ca = (ret != 0); 252 } 253 return (target_long)ret; 254 } 255 256 #if defined(TARGET_PPC64) 257 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 258 target_ulong shift) 259 { 260 int64_t ret; 261 262 if (likely(!(shift & 0x40))) { 263 if (likely((uint64_t)shift != 0)) { 264 shift &= 0x3f; 265 ret = (int64_t)value >> shift; 266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 267 env->ca32 = env->ca = 0; 268 } else { 269 env->ca32 = env->ca = 1; 270 } 271 } else { 272 ret = (int64_t)value; 273 env->ca32 = env->ca = 0; 274 } 275 } else { 276 ret = (int64_t)value >> 63; 277 env->ca32 = env->ca = (ret != 0); 278 } 279 return ret; 280 } 281 #endif 282 283 #if defined(TARGET_PPC64) 284 target_ulong helper_popcntb(target_ulong val) 285 { 286 /* Note that we don't fold past bytes */ 287 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 288 0x5555555555555555ULL); 289 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 290 0x3333333333333333ULL); 291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 292 0x0f0f0f0f0f0f0f0fULL); 293 return val; 294 } 295 296 target_ulong helper_popcntw(target_ulong val) 297 { 298 /* Note that we don't fold past words. */ 299 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 300 0x5555555555555555ULL); 301 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 302 0x3333333333333333ULL); 303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 304 0x0f0f0f0f0f0f0f0fULL); 305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 306 0x00ff00ff00ff00ffULL); 307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 308 0x0000ffff0000ffffULL); 309 return val; 310 } 311 #else 312 target_ulong helper_popcntb(target_ulong val) 313 { 314 /* Note that we don't fold past bytes */ 315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 318 return val; 319 } 320 #endif 321 322 /*****************************************************************************/ 323 /* PowerPC 601 specific instructions (POWER bridge) */ 324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 325 { 326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 327 328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 329 (int32_t)arg2 == 0) { 330 env->spr[SPR_MQ] = 0; 331 return INT32_MIN; 332 } else { 333 env->spr[SPR_MQ] = tmp % arg2; 334 return tmp / (int32_t)arg2; 335 } 336 } 337 338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 339 target_ulong arg2) 340 { 341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 342 343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 344 (int32_t)arg2 == 0) { 345 env->so = env->ov = 1; 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = tmp % arg2; 350 tmp /= (int32_t)arg2; 351 if ((int32_t)tmp != tmp) { 352 env->so = env->ov = 1; 353 } else { 354 env->ov = 0; 355 } 356 return tmp; 357 } 358 } 359 360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 361 target_ulong arg2) 362 { 363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 364 (int32_t)arg2 == 0) { 365 env->spr[SPR_MQ] = 0; 366 return INT32_MIN; 367 } else { 368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 369 return (int32_t)arg1 / (int32_t)arg2; 370 } 371 } 372 373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 374 target_ulong arg2) 375 { 376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 377 (int32_t)arg2 == 0) { 378 env->so = env->ov = 1; 379 env->spr[SPR_MQ] = 0; 380 return INT32_MIN; 381 } else { 382 env->ov = 0; 383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 384 return (int32_t)arg1 / (int32_t)arg2; 385 } 386 } 387 388 /*****************************************************************************/ 389 /* 602 specific instructions */ 390 /* mfrom is the most crazy instruction ever seen, imho ! */ 391 /* Real implementation uses a ROM table. Do the same */ 392 /* 393 * Extremely decomposed: 394 * -arg / 256 395 * return 256 * log10(10 + 1.0) + 0.5 396 */ 397 #if !defined(CONFIG_USER_ONLY) 398 target_ulong helper_602_mfrom(target_ulong arg) 399 { 400 if (likely(arg < 602)) { 401 #include "mfrom_table.inc.c" 402 return mfrom_ROM_table[arg]; 403 } else { 404 return 0; 405 } 406 } 407 #endif 408 409 /*****************************************************************************/ 410 /* Altivec extension helpers */ 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 463 { 464 env->vscr = vscr & ~(1u << VSCR_SAT); 465 /* Which bit we set is completely arbitrary, but clear the rest. */ 466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 467 env->vscr_sat.u64[1] = 0; 468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 469 } 470 471 uint32_t helper_mfvscr(CPUPPCState *env) 472 { 473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 474 return env->vscr | (sat << VSCR_SAT); 475 } 476 477 static inline void set_vscr_sat(CPUPPCState *env) 478 { 479 /* The choice of non-zero value is arbitrary. */ 480 env->vscr_sat.u32[0] = 1; 481 } 482 483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 484 { 485 int i; 486 487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 488 r->u32[i] = ~a->u32[i] < b->u32[i]; 489 } 490 } 491 492 /* vprtybw */ 493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 494 { 495 int i; 496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 498 res ^= res >> 8; 499 r->u32[i] = res & 1; 500 } 501 } 502 503 /* vprtybd */ 504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 509 res ^= res >> 16; 510 res ^= res >> 8; 511 r->u64[i] = res & 1; 512 } 513 } 514 515 /* vprtybq */ 516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 uint64_t res = b->u64[0] ^ b->u64[1]; 519 res ^= res >> 32; 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->VsrD(1) = res & 1; 523 r->VsrD(0) = 0; 524 } 525 526 #define VARITH_DO(name, op, element) \ 527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 528 { \ 529 int i; \ 530 \ 531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 532 r->element[i] = a->element[i] op b->element[i]; \ 533 } \ 534 } 535 VARITH_DO(muluwm, *, u32) 536 #undef VARITH_DO 537 #undef VARITH 538 539 #define VARITHFP(suffix, func) \ 540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 541 ppc_avr_t *b) \ 542 { \ 543 int i; \ 544 \ 545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 547 } \ 548 } 549 VARITHFP(addfp, float32_add) 550 VARITHFP(subfp, float32_sub) 551 VARITHFP(minfp, float32_min) 552 VARITHFP(maxfp, float32_max) 553 #undef VARITHFP 554 555 #define VARITHFPFMA(suffix, type) \ 556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 557 ppc_avr_t *b, ppc_avr_t *c) \ 558 { \ 559 int i; \ 560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 562 type, &env->vec_status); \ 563 } \ 564 } 565 VARITHFPFMA(maddfp, 0); 566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 567 #undef VARITHFPFMA 568 569 #define VARITHSAT_CASE(type, op, cvt, element) \ 570 { \ 571 type result = (type)a->element[i] op (type)b->element[i]; \ 572 r->element[i] = cvt(result, &sat); \ 573 } 574 575 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 578 { \ 579 int sat = 0; \ 580 int i; \ 581 \ 582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 583 VARITHSAT_CASE(optype, op, cvt, element); \ 584 } \ 585 if (sat) { \ 586 vscr_sat->u32[0] = 1; \ 587 } \ 588 } 589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 601 #undef VARITHSAT_CASE 602 #undef VARITHSAT_DO 603 #undef VARITHSAT_SIGNED 604 #undef VARITHSAT_UNSIGNED 605 606 #define VAVG_DO(name, element, etype) \ 607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 608 { \ 609 int i; \ 610 \ 611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 613 r->element[i] = x >> 1; \ 614 } \ 615 } 616 617 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 618 unsigned_type) \ 619 VAVG_DO(avgs##type, signed_element, signed_type) \ 620 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 621 VAVG(b, s8, int16_t, u8, uint16_t) 622 VAVG(h, s16, int32_t, u16, uint32_t) 623 VAVG(w, s32, int64_t, u32, uint64_t) 624 #undef VAVG_DO 625 #undef VAVG 626 627 #define VABSDU_DO(name, element) \ 628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 629 { \ 630 int i; \ 631 \ 632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 633 r->element[i] = (a->element[i] > b->element[i]) ? \ 634 (a->element[i] - b->element[i]) : \ 635 (b->element[i] - a->element[i]); \ 636 } \ 637 } 638 639 /* 640 * VABSDU - Vector absolute difference unsigned 641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 642 * element - element type to access from vector 643 */ 644 #define VABSDU(type, element) \ 645 VABSDU_DO(absdu##type, element) 646 VABSDU(b, u8) 647 VABSDU(h, u16) 648 VABSDU(w, u32) 649 #undef VABSDU_DO 650 #undef VABSDU 651 652 #define VCF(suffix, cvt, element) \ 653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 654 ppc_avr_t *b, uint32_t uim) \ 655 { \ 656 int i; \ 657 \ 658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 659 float32 t = cvt(b->element[i], &env->vec_status); \ 660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 661 } \ 662 } 663 VCF(ux, uint32_to_float32, u32) 664 VCF(sx, int32_to_float32, s32) 665 #undef VCF 666 667 #define VCMP_DO(suffix, compare, element, record) \ 668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 669 ppc_avr_t *a, ppc_avr_t *b) \ 670 { \ 671 uint64_t ones = (uint64_t)-1; \ 672 uint64_t all = ones; \ 673 uint64_t none = 0; \ 674 int i; \ 675 \ 676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 677 uint64_t result = (a->element[i] compare b->element[i] ? \ 678 ones : 0x0); \ 679 switch (sizeof(a->element[0])) { \ 680 case 8: \ 681 r->u64[i] = result; \ 682 break; \ 683 case 4: \ 684 r->u32[i] = result; \ 685 break; \ 686 case 2: \ 687 r->u16[i] = result; \ 688 break; \ 689 case 1: \ 690 r->u8[i] = result; \ 691 break; \ 692 } \ 693 all &= result; \ 694 none |= result; \ 695 } \ 696 if (record) { \ 697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 698 } \ 699 } 700 #define VCMP(suffix, compare, element) \ 701 VCMP_DO(suffix, compare, element, 0) \ 702 VCMP_DO(suffix##_dot, compare, element, 1) 703 VCMP(equb, ==, u8) 704 VCMP(equh, ==, u16) 705 VCMP(equw, ==, u32) 706 VCMP(equd, ==, u64) 707 VCMP(gtub, >, u8) 708 VCMP(gtuh, >, u16) 709 VCMP(gtuw, >, u32) 710 VCMP(gtud, >, u64) 711 VCMP(gtsb, >, s8) 712 VCMP(gtsh, >, s16) 713 VCMP(gtsw, >, s32) 714 VCMP(gtsd, >, s64) 715 #undef VCMP_DO 716 #undef VCMP 717 718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 720 ppc_avr_t *a, ppc_avr_t *b) \ 721 { \ 722 etype ones = (etype)-1; \ 723 etype all = ones; \ 724 etype result, none = 0; \ 725 int i; \ 726 \ 727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 728 if (cmpzero) { \ 729 result = ((a->element[i] == 0) \ 730 || (b->element[i] == 0) \ 731 || (a->element[i] != b->element[i]) ? \ 732 ones : 0x0); \ 733 } else { \ 734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 735 } \ 736 r->element[i] = result; \ 737 all &= result; \ 738 none |= result; \ 739 } \ 740 if (record) { \ 741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 742 } \ 743 } 744 745 /* 746 * VCMPNEZ - Vector compare not equal to zero 747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 748 * element - element type to access from vector 749 */ 750 #define VCMPNE(suffix, element, etype, cmpzero) \ 751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 753 VCMPNE(zb, u8, uint8_t, 1) 754 VCMPNE(zh, u16, uint16_t, 1) 755 VCMPNE(zw, u32, uint32_t, 1) 756 VCMPNE(b, u8, uint8_t, 0) 757 VCMPNE(h, u16, uint16_t, 0) 758 VCMPNE(w, u32, uint32_t, 0) 759 #undef VCMPNE_DO 760 #undef VCMPNE 761 762 #define VCMPFP_DO(suffix, compare, order, record) \ 763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 764 ppc_avr_t *a, ppc_avr_t *b) \ 765 { \ 766 uint32_t ones = (uint32_t)-1; \ 767 uint32_t all = ones; \ 768 uint32_t none = 0; \ 769 int i; \ 770 \ 771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 772 uint32_t result; \ 773 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 774 &env->vec_status); \ 775 if (rel == float_relation_unordered) { \ 776 result = 0; \ 777 } else if (rel compare order) { \ 778 result = ones; \ 779 } else { \ 780 result = 0; \ 781 } \ 782 r->u32[i] = result; \ 783 all &= result; \ 784 none |= result; \ 785 } \ 786 if (record) { \ 787 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 788 } \ 789 } 790 #define VCMPFP(suffix, compare, order) \ 791 VCMPFP_DO(suffix, compare, order, 0) \ 792 VCMPFP_DO(suffix##_dot, compare, order, 1) 793 VCMPFP(eqfp, ==, float_relation_equal) 794 VCMPFP(gefp, !=, float_relation_less) 795 VCMPFP(gtfp, ==, float_relation_greater) 796 #undef VCMPFP_DO 797 #undef VCMPFP 798 799 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 800 ppc_avr_t *a, ppc_avr_t *b, int record) 801 { 802 int i; 803 int all_in = 0; 804 805 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 806 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 807 &env->vec_status); 808 if (le_rel == float_relation_unordered) { 809 r->u32[i] = 0xc0000000; 810 all_in = 1; 811 } else { 812 float32 bneg = float32_chs(b->f32[i]); 813 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 814 &env->vec_status); 815 int le = le_rel != float_relation_greater; 816 int ge = ge_rel != float_relation_less; 817 818 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 819 all_in |= (!le | !ge); 820 } 821 } 822 if (record) { 823 env->crf[6] = (all_in == 0) << 1; 824 } 825 } 826 827 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 828 { 829 vcmpbfp_internal(env, r, a, b, 0); 830 } 831 832 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 833 ppc_avr_t *b) 834 { 835 vcmpbfp_internal(env, r, a, b, 1); 836 } 837 838 #define VCT(suffix, satcvt, element) \ 839 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 840 ppc_avr_t *b, uint32_t uim) \ 841 { \ 842 int i; \ 843 int sat = 0; \ 844 float_status s = env->vec_status; \ 845 \ 846 set_float_rounding_mode(float_round_to_zero, &s); \ 847 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 848 if (float32_is_any_nan(b->f32[i])) { \ 849 r->element[i] = 0; \ 850 } else { \ 851 float64 t = float32_to_float64(b->f32[i], &s); \ 852 int64_t j; \ 853 \ 854 t = float64_scalbn(t, uim, &s); \ 855 j = float64_to_int64(t, &s); \ 856 r->element[i] = satcvt(j, &sat); \ 857 } \ 858 } \ 859 if (sat) { \ 860 set_vscr_sat(env); \ 861 } \ 862 } 863 VCT(uxs, cvtsduw, u32) 864 VCT(sxs, cvtsdsw, s32) 865 #undef VCT 866 867 target_ulong helper_vclzlsbb(ppc_avr_t *r) 868 { 869 target_ulong count = 0; 870 int i; 871 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 872 if (r->VsrB(i) & 0x01) { 873 break; 874 } 875 count++; 876 } 877 return count; 878 } 879 880 target_ulong helper_vctzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 894 ppc_avr_t *b, ppc_avr_t *c) 895 { 896 int sat = 0; 897 int i; 898 899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 900 int32_t prod = a->s16[i] * b->s16[i]; 901 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 902 903 r->s16[i] = cvtswsh(t, &sat); 904 } 905 906 if (sat) { 907 set_vscr_sat(env); 908 } 909 } 910 911 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 912 ppc_avr_t *b, ppc_avr_t *c) 913 { 914 int sat = 0; 915 int i; 916 917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 918 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 919 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 920 r->s16[i] = cvtswsh(t, &sat); 921 } 922 923 if (sat) { 924 set_vscr_sat(env); 925 } 926 } 927 928 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 929 { 930 int i; 931 932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 933 int32_t prod = a->s16[i] * b->s16[i]; 934 r->s16[i] = (int16_t) (prod + c->s16[i]); 935 } 936 } 937 938 #define VMRG_DO(name, element, access, ofs) \ 939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 940 { \ 941 ppc_avr_t result; \ 942 int i, half = ARRAY_SIZE(r->element) / 2; \ 943 \ 944 for (i = 0; i < half; i++) { \ 945 result.access(i * 2 + 0) = a->access(i + ofs); \ 946 result.access(i * 2 + 1) = b->access(i + ofs); \ 947 } \ 948 *r = result; \ 949 } 950 951 #define VMRG(suffix, element, access) \ 952 VMRG_DO(mrgl##suffix, element, access, half) \ 953 VMRG_DO(mrgh##suffix, element, access, 0) 954 VMRG(b, u8, VsrB) 955 VMRG(h, u16, VsrH) 956 VMRG(w, u32, VsrW) 957 #undef VMRG_DO 958 #undef VMRG 959 960 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 961 ppc_avr_t *b, ppc_avr_t *c) 962 { 963 int32_t prod[16]; 964 int i; 965 966 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 967 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 968 } 969 970 VECTOR_FOR_INORDER_I(i, s32) { 971 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 972 prod[4 * i + 2] + prod[4 * i + 3]; 973 } 974 } 975 976 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 977 ppc_avr_t *b, ppc_avr_t *c) 978 { 979 int32_t prod[8]; 980 int i; 981 982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 983 prod[i] = a->s16[i] * b->s16[i]; 984 } 985 986 VECTOR_FOR_INORDER_I(i, s32) { 987 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 988 } 989 } 990 991 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 992 ppc_avr_t *b, ppc_avr_t *c) 993 { 994 int32_t prod[8]; 995 int i; 996 int sat = 0; 997 998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 999 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1000 } 1001 1002 VECTOR_FOR_INORDER_I(i, s32) { 1003 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1004 1005 r->u32[i] = cvtsdsw(t, &sat); 1006 } 1007 1008 if (sat) { 1009 set_vscr_sat(env); 1010 } 1011 } 1012 1013 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1014 ppc_avr_t *b, ppc_avr_t *c) 1015 { 1016 uint16_t prod[16]; 1017 int i; 1018 1019 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1020 prod[i] = a->u8[i] * b->u8[i]; 1021 } 1022 1023 VECTOR_FOR_INORDER_I(i, u32) { 1024 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1025 prod[4 * i + 2] + prod[4 * i + 3]; 1026 } 1027 } 1028 1029 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1030 ppc_avr_t *b, ppc_avr_t *c) 1031 { 1032 uint32_t prod[8]; 1033 int i; 1034 1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1036 prod[i] = a->u16[i] * b->u16[i]; 1037 } 1038 1039 VECTOR_FOR_INORDER_I(i, u32) { 1040 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1041 } 1042 } 1043 1044 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1045 ppc_avr_t *b, ppc_avr_t *c) 1046 { 1047 uint32_t prod[8]; 1048 int i; 1049 int sat = 0; 1050 1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1052 prod[i] = a->u16[i] * b->u16[i]; 1053 } 1054 1055 VECTOR_FOR_INORDER_I(i, s32) { 1056 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1057 1058 r->u32[i] = cvtuduw(t, &sat); 1059 } 1060 1061 if (sat) { 1062 set_vscr_sat(env); 1063 } 1064 } 1065 1066 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1068 { \ 1069 int i; \ 1070 \ 1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1072 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1073 (cast)b->mul_access(i); \ 1074 } \ 1075 } 1076 1077 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1079 { \ 1080 int i; \ 1081 \ 1082 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1083 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1084 (cast)b->mul_access(i + 1); \ 1085 } \ 1086 } 1087 1088 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1089 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1090 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1091 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1092 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1093 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1094 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1095 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1096 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1097 #undef VMUL_DO_EVN 1098 #undef VMUL_DO_ODD 1099 #undef VMUL 1100 1101 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1102 ppc_avr_t *c) 1103 { 1104 ppc_avr_t result; 1105 int i; 1106 1107 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1108 int s = c->VsrB(i) & 0x1f; 1109 int index = s & 0xf; 1110 1111 if (s & 0x10) { 1112 result.VsrB(i) = b->VsrB(index); 1113 } else { 1114 result.VsrB(i) = a->VsrB(index); 1115 } 1116 } 1117 *r = result; 1118 } 1119 1120 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1121 ppc_avr_t *c) 1122 { 1123 ppc_avr_t result; 1124 int i; 1125 1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1127 int s = c->VsrB(i) & 0x1f; 1128 int index = 15 - (s & 0xf); 1129 1130 if (s & 0x10) { 1131 result.VsrB(i) = a->VsrB(index); 1132 } else { 1133 result.VsrB(i) = b->VsrB(index); 1134 } 1135 } 1136 *r = result; 1137 } 1138 1139 #if defined(HOST_WORDS_BIGENDIAN) 1140 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1141 #define VBPERMD_INDEX(i) (i) 1142 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1143 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1144 #else 1145 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1146 #define VBPERMD_INDEX(i) (1 - i) 1147 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1148 #define EXTRACT_BIT(avr, i, index) \ 1149 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1150 #endif 1151 1152 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i, j; 1155 ppc_avr_t result = { .u64 = { 0, 0 } }; 1156 VECTOR_FOR_INORDER_I(i, u64) { 1157 for (j = 0; j < 8; j++) { 1158 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1159 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1160 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1161 } 1162 } 1163 } 1164 *r = result; 1165 } 1166 1167 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1168 { 1169 int i; 1170 uint64_t perm = 0; 1171 1172 VECTOR_FOR_INORDER_I(i, u8) { 1173 int index = VBPERMQ_INDEX(b, i); 1174 1175 if (index < 128) { 1176 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1177 if (a->u64[VBPERMQ_DW(index)] & mask) { 1178 perm |= (0x8000 >> i); 1179 } 1180 } 1181 } 1182 1183 r->VsrD(0) = perm; 1184 r->VsrD(1) = 0; 1185 } 1186 1187 #undef VBPERMQ_INDEX 1188 #undef VBPERMQ_DW 1189 1190 static const uint64_t VGBBD_MASKS[256] = { 1191 0x0000000000000000ull, /* 00 */ 1192 0x0000000000000080ull, /* 01 */ 1193 0x0000000000008000ull, /* 02 */ 1194 0x0000000000008080ull, /* 03 */ 1195 0x0000000000800000ull, /* 04 */ 1196 0x0000000000800080ull, /* 05 */ 1197 0x0000000000808000ull, /* 06 */ 1198 0x0000000000808080ull, /* 07 */ 1199 0x0000000080000000ull, /* 08 */ 1200 0x0000000080000080ull, /* 09 */ 1201 0x0000000080008000ull, /* 0A */ 1202 0x0000000080008080ull, /* 0B */ 1203 0x0000000080800000ull, /* 0C */ 1204 0x0000000080800080ull, /* 0D */ 1205 0x0000000080808000ull, /* 0E */ 1206 0x0000000080808080ull, /* 0F */ 1207 0x0000008000000000ull, /* 10 */ 1208 0x0000008000000080ull, /* 11 */ 1209 0x0000008000008000ull, /* 12 */ 1210 0x0000008000008080ull, /* 13 */ 1211 0x0000008000800000ull, /* 14 */ 1212 0x0000008000800080ull, /* 15 */ 1213 0x0000008000808000ull, /* 16 */ 1214 0x0000008000808080ull, /* 17 */ 1215 0x0000008080000000ull, /* 18 */ 1216 0x0000008080000080ull, /* 19 */ 1217 0x0000008080008000ull, /* 1A */ 1218 0x0000008080008080ull, /* 1B */ 1219 0x0000008080800000ull, /* 1C */ 1220 0x0000008080800080ull, /* 1D */ 1221 0x0000008080808000ull, /* 1E */ 1222 0x0000008080808080ull, /* 1F */ 1223 0x0000800000000000ull, /* 20 */ 1224 0x0000800000000080ull, /* 21 */ 1225 0x0000800000008000ull, /* 22 */ 1226 0x0000800000008080ull, /* 23 */ 1227 0x0000800000800000ull, /* 24 */ 1228 0x0000800000800080ull, /* 25 */ 1229 0x0000800000808000ull, /* 26 */ 1230 0x0000800000808080ull, /* 27 */ 1231 0x0000800080000000ull, /* 28 */ 1232 0x0000800080000080ull, /* 29 */ 1233 0x0000800080008000ull, /* 2A */ 1234 0x0000800080008080ull, /* 2B */ 1235 0x0000800080800000ull, /* 2C */ 1236 0x0000800080800080ull, /* 2D */ 1237 0x0000800080808000ull, /* 2E */ 1238 0x0000800080808080ull, /* 2F */ 1239 0x0000808000000000ull, /* 30 */ 1240 0x0000808000000080ull, /* 31 */ 1241 0x0000808000008000ull, /* 32 */ 1242 0x0000808000008080ull, /* 33 */ 1243 0x0000808000800000ull, /* 34 */ 1244 0x0000808000800080ull, /* 35 */ 1245 0x0000808000808000ull, /* 36 */ 1246 0x0000808000808080ull, /* 37 */ 1247 0x0000808080000000ull, /* 38 */ 1248 0x0000808080000080ull, /* 39 */ 1249 0x0000808080008000ull, /* 3A */ 1250 0x0000808080008080ull, /* 3B */ 1251 0x0000808080800000ull, /* 3C */ 1252 0x0000808080800080ull, /* 3D */ 1253 0x0000808080808000ull, /* 3E */ 1254 0x0000808080808080ull, /* 3F */ 1255 0x0080000000000000ull, /* 40 */ 1256 0x0080000000000080ull, /* 41 */ 1257 0x0080000000008000ull, /* 42 */ 1258 0x0080000000008080ull, /* 43 */ 1259 0x0080000000800000ull, /* 44 */ 1260 0x0080000000800080ull, /* 45 */ 1261 0x0080000000808000ull, /* 46 */ 1262 0x0080000000808080ull, /* 47 */ 1263 0x0080000080000000ull, /* 48 */ 1264 0x0080000080000080ull, /* 49 */ 1265 0x0080000080008000ull, /* 4A */ 1266 0x0080000080008080ull, /* 4B */ 1267 0x0080000080800000ull, /* 4C */ 1268 0x0080000080800080ull, /* 4D */ 1269 0x0080000080808000ull, /* 4E */ 1270 0x0080000080808080ull, /* 4F */ 1271 0x0080008000000000ull, /* 50 */ 1272 0x0080008000000080ull, /* 51 */ 1273 0x0080008000008000ull, /* 52 */ 1274 0x0080008000008080ull, /* 53 */ 1275 0x0080008000800000ull, /* 54 */ 1276 0x0080008000800080ull, /* 55 */ 1277 0x0080008000808000ull, /* 56 */ 1278 0x0080008000808080ull, /* 57 */ 1279 0x0080008080000000ull, /* 58 */ 1280 0x0080008080000080ull, /* 59 */ 1281 0x0080008080008000ull, /* 5A */ 1282 0x0080008080008080ull, /* 5B */ 1283 0x0080008080800000ull, /* 5C */ 1284 0x0080008080800080ull, /* 5D */ 1285 0x0080008080808000ull, /* 5E */ 1286 0x0080008080808080ull, /* 5F */ 1287 0x0080800000000000ull, /* 60 */ 1288 0x0080800000000080ull, /* 61 */ 1289 0x0080800000008000ull, /* 62 */ 1290 0x0080800000008080ull, /* 63 */ 1291 0x0080800000800000ull, /* 64 */ 1292 0x0080800000800080ull, /* 65 */ 1293 0x0080800000808000ull, /* 66 */ 1294 0x0080800000808080ull, /* 67 */ 1295 0x0080800080000000ull, /* 68 */ 1296 0x0080800080000080ull, /* 69 */ 1297 0x0080800080008000ull, /* 6A */ 1298 0x0080800080008080ull, /* 6B */ 1299 0x0080800080800000ull, /* 6C */ 1300 0x0080800080800080ull, /* 6D */ 1301 0x0080800080808000ull, /* 6E */ 1302 0x0080800080808080ull, /* 6F */ 1303 0x0080808000000000ull, /* 70 */ 1304 0x0080808000000080ull, /* 71 */ 1305 0x0080808000008000ull, /* 72 */ 1306 0x0080808000008080ull, /* 73 */ 1307 0x0080808000800000ull, /* 74 */ 1308 0x0080808000800080ull, /* 75 */ 1309 0x0080808000808000ull, /* 76 */ 1310 0x0080808000808080ull, /* 77 */ 1311 0x0080808080000000ull, /* 78 */ 1312 0x0080808080000080ull, /* 79 */ 1313 0x0080808080008000ull, /* 7A */ 1314 0x0080808080008080ull, /* 7B */ 1315 0x0080808080800000ull, /* 7C */ 1316 0x0080808080800080ull, /* 7D */ 1317 0x0080808080808000ull, /* 7E */ 1318 0x0080808080808080ull, /* 7F */ 1319 0x8000000000000000ull, /* 80 */ 1320 0x8000000000000080ull, /* 81 */ 1321 0x8000000000008000ull, /* 82 */ 1322 0x8000000000008080ull, /* 83 */ 1323 0x8000000000800000ull, /* 84 */ 1324 0x8000000000800080ull, /* 85 */ 1325 0x8000000000808000ull, /* 86 */ 1326 0x8000000000808080ull, /* 87 */ 1327 0x8000000080000000ull, /* 88 */ 1328 0x8000000080000080ull, /* 89 */ 1329 0x8000000080008000ull, /* 8A */ 1330 0x8000000080008080ull, /* 8B */ 1331 0x8000000080800000ull, /* 8C */ 1332 0x8000000080800080ull, /* 8D */ 1333 0x8000000080808000ull, /* 8E */ 1334 0x8000000080808080ull, /* 8F */ 1335 0x8000008000000000ull, /* 90 */ 1336 0x8000008000000080ull, /* 91 */ 1337 0x8000008000008000ull, /* 92 */ 1338 0x8000008000008080ull, /* 93 */ 1339 0x8000008000800000ull, /* 94 */ 1340 0x8000008000800080ull, /* 95 */ 1341 0x8000008000808000ull, /* 96 */ 1342 0x8000008000808080ull, /* 97 */ 1343 0x8000008080000000ull, /* 98 */ 1344 0x8000008080000080ull, /* 99 */ 1345 0x8000008080008000ull, /* 9A */ 1346 0x8000008080008080ull, /* 9B */ 1347 0x8000008080800000ull, /* 9C */ 1348 0x8000008080800080ull, /* 9D */ 1349 0x8000008080808000ull, /* 9E */ 1350 0x8000008080808080ull, /* 9F */ 1351 0x8000800000000000ull, /* A0 */ 1352 0x8000800000000080ull, /* A1 */ 1353 0x8000800000008000ull, /* A2 */ 1354 0x8000800000008080ull, /* A3 */ 1355 0x8000800000800000ull, /* A4 */ 1356 0x8000800000800080ull, /* A5 */ 1357 0x8000800000808000ull, /* A6 */ 1358 0x8000800000808080ull, /* A7 */ 1359 0x8000800080000000ull, /* A8 */ 1360 0x8000800080000080ull, /* A9 */ 1361 0x8000800080008000ull, /* AA */ 1362 0x8000800080008080ull, /* AB */ 1363 0x8000800080800000ull, /* AC */ 1364 0x8000800080800080ull, /* AD */ 1365 0x8000800080808000ull, /* AE */ 1366 0x8000800080808080ull, /* AF */ 1367 0x8000808000000000ull, /* B0 */ 1368 0x8000808000000080ull, /* B1 */ 1369 0x8000808000008000ull, /* B2 */ 1370 0x8000808000008080ull, /* B3 */ 1371 0x8000808000800000ull, /* B4 */ 1372 0x8000808000800080ull, /* B5 */ 1373 0x8000808000808000ull, /* B6 */ 1374 0x8000808000808080ull, /* B7 */ 1375 0x8000808080000000ull, /* B8 */ 1376 0x8000808080000080ull, /* B9 */ 1377 0x8000808080008000ull, /* BA */ 1378 0x8000808080008080ull, /* BB */ 1379 0x8000808080800000ull, /* BC */ 1380 0x8000808080800080ull, /* BD */ 1381 0x8000808080808000ull, /* BE */ 1382 0x8000808080808080ull, /* BF */ 1383 0x8080000000000000ull, /* C0 */ 1384 0x8080000000000080ull, /* C1 */ 1385 0x8080000000008000ull, /* C2 */ 1386 0x8080000000008080ull, /* C3 */ 1387 0x8080000000800000ull, /* C4 */ 1388 0x8080000000800080ull, /* C5 */ 1389 0x8080000000808000ull, /* C6 */ 1390 0x8080000000808080ull, /* C7 */ 1391 0x8080000080000000ull, /* C8 */ 1392 0x8080000080000080ull, /* C9 */ 1393 0x8080000080008000ull, /* CA */ 1394 0x8080000080008080ull, /* CB */ 1395 0x8080000080800000ull, /* CC */ 1396 0x8080000080800080ull, /* CD */ 1397 0x8080000080808000ull, /* CE */ 1398 0x8080000080808080ull, /* CF */ 1399 0x8080008000000000ull, /* D0 */ 1400 0x8080008000000080ull, /* D1 */ 1401 0x8080008000008000ull, /* D2 */ 1402 0x8080008000008080ull, /* D3 */ 1403 0x8080008000800000ull, /* D4 */ 1404 0x8080008000800080ull, /* D5 */ 1405 0x8080008000808000ull, /* D6 */ 1406 0x8080008000808080ull, /* D7 */ 1407 0x8080008080000000ull, /* D8 */ 1408 0x8080008080000080ull, /* D9 */ 1409 0x8080008080008000ull, /* DA */ 1410 0x8080008080008080ull, /* DB */ 1411 0x8080008080800000ull, /* DC */ 1412 0x8080008080800080ull, /* DD */ 1413 0x8080008080808000ull, /* DE */ 1414 0x8080008080808080ull, /* DF */ 1415 0x8080800000000000ull, /* E0 */ 1416 0x8080800000000080ull, /* E1 */ 1417 0x8080800000008000ull, /* E2 */ 1418 0x8080800000008080ull, /* E3 */ 1419 0x8080800000800000ull, /* E4 */ 1420 0x8080800000800080ull, /* E5 */ 1421 0x8080800000808000ull, /* E6 */ 1422 0x8080800000808080ull, /* E7 */ 1423 0x8080800080000000ull, /* E8 */ 1424 0x8080800080000080ull, /* E9 */ 1425 0x8080800080008000ull, /* EA */ 1426 0x8080800080008080ull, /* EB */ 1427 0x8080800080800000ull, /* EC */ 1428 0x8080800080800080ull, /* ED */ 1429 0x8080800080808000ull, /* EE */ 1430 0x8080800080808080ull, /* EF */ 1431 0x8080808000000000ull, /* F0 */ 1432 0x8080808000000080ull, /* F1 */ 1433 0x8080808000008000ull, /* F2 */ 1434 0x8080808000008080ull, /* F3 */ 1435 0x8080808000800000ull, /* F4 */ 1436 0x8080808000800080ull, /* F5 */ 1437 0x8080808000808000ull, /* F6 */ 1438 0x8080808000808080ull, /* F7 */ 1439 0x8080808080000000ull, /* F8 */ 1440 0x8080808080000080ull, /* F9 */ 1441 0x8080808080008000ull, /* FA */ 1442 0x8080808080008080ull, /* FB */ 1443 0x8080808080800000ull, /* FC */ 1444 0x8080808080800080ull, /* FD */ 1445 0x8080808080808000ull, /* FE */ 1446 0x8080808080808080ull, /* FF */ 1447 }; 1448 1449 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1450 { 1451 int i; 1452 uint64_t t[2] = { 0, 0 }; 1453 1454 VECTOR_FOR_INORDER_I(i, u8) { 1455 #if defined(HOST_WORDS_BIGENDIAN) 1456 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1457 #else 1458 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7)); 1459 #endif 1460 } 1461 1462 r->u64[0] = t[0]; 1463 r->u64[1] = t[1]; 1464 } 1465 1466 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1467 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1468 { \ 1469 int i, j; \ 1470 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1471 \ 1472 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1473 prod[i] = 0; \ 1474 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1475 if (a->srcfld[i] & (1ull << j)) { \ 1476 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1477 } \ 1478 } \ 1479 } \ 1480 \ 1481 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1482 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1483 } \ 1484 } 1485 1486 PMSUM(vpmsumb, u8, u16, uint16_t) 1487 PMSUM(vpmsumh, u16, u32, uint32_t) 1488 PMSUM(vpmsumw, u32, u64, uint64_t) 1489 1490 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1491 { 1492 1493 #ifdef CONFIG_INT128 1494 int i, j; 1495 __uint128_t prod[2]; 1496 1497 VECTOR_FOR_INORDER_I(i, u64) { 1498 prod[i] = 0; 1499 for (j = 0; j < 64; j++) { 1500 if (a->u64[i] & (1ull << j)) { 1501 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1502 } 1503 } 1504 } 1505 1506 r->u128 = prod[0] ^ prod[1]; 1507 1508 #else 1509 int i, j; 1510 ppc_avr_t prod[2]; 1511 1512 VECTOR_FOR_INORDER_I(i, u64) { 1513 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1514 for (j = 0; j < 64; j++) { 1515 if (a->u64[i] & (1ull << j)) { 1516 ppc_avr_t bshift; 1517 if (j == 0) { 1518 bshift.VsrD(0) = 0; 1519 bshift.VsrD(1) = b->u64[i]; 1520 } else { 1521 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1522 bshift.VsrD(1) = b->u64[i] << j; 1523 } 1524 prod[i].VsrD(1) ^= bshift.VsrD(1); 1525 prod[i].VsrD(0) ^= bshift.VsrD(0); 1526 } 1527 } 1528 } 1529 1530 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1531 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1532 #endif 1533 } 1534 1535 1536 #if defined(HOST_WORDS_BIGENDIAN) 1537 #define PKBIG 1 1538 #else 1539 #define PKBIG 0 1540 #endif 1541 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1542 { 1543 int i, j; 1544 ppc_avr_t result; 1545 #if defined(HOST_WORDS_BIGENDIAN) 1546 const ppc_avr_t *x[2] = { a, b }; 1547 #else 1548 const ppc_avr_t *x[2] = { b, a }; 1549 #endif 1550 1551 VECTOR_FOR_INORDER_I(i, u64) { 1552 VECTOR_FOR_INORDER_I(j, u32) { 1553 uint32_t e = x[i]->u32[j]; 1554 1555 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1556 ((e >> 6) & 0x3e0) | 1557 ((e >> 3) & 0x1f)); 1558 } 1559 } 1560 *r = result; 1561 } 1562 1563 #define VPK(suffix, from, to, cvt, dosat) \ 1564 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1565 ppc_avr_t *a, ppc_avr_t *b) \ 1566 { \ 1567 int i; \ 1568 int sat = 0; \ 1569 ppc_avr_t result; \ 1570 ppc_avr_t *a0 = PKBIG ? a : b; \ 1571 ppc_avr_t *a1 = PKBIG ? b : a; \ 1572 \ 1573 VECTOR_FOR_INORDER_I(i, from) { \ 1574 result.to[i] = cvt(a0->from[i], &sat); \ 1575 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1576 } \ 1577 *r = result; \ 1578 if (dosat && sat) { \ 1579 set_vscr_sat(env); \ 1580 } \ 1581 } 1582 #define I(x, y) (x) 1583 VPK(shss, s16, s8, cvtshsb, 1) 1584 VPK(shus, s16, u8, cvtshub, 1) 1585 VPK(swss, s32, s16, cvtswsh, 1) 1586 VPK(swus, s32, u16, cvtswuh, 1) 1587 VPK(sdss, s64, s32, cvtsdsw, 1) 1588 VPK(sdus, s64, u32, cvtsduw, 1) 1589 VPK(uhus, u16, u8, cvtuhub, 1) 1590 VPK(uwus, u32, u16, cvtuwuh, 1) 1591 VPK(udus, u64, u32, cvtuduw, 1) 1592 VPK(uhum, u16, u8, I, 0) 1593 VPK(uwum, u32, u16, I, 0) 1594 VPK(udum, u64, u32, I, 0) 1595 #undef I 1596 #undef VPK 1597 #undef PKBIG 1598 1599 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1600 { 1601 int i; 1602 1603 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1604 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1605 } 1606 } 1607 1608 #define VRFI(suffix, rounding) \ 1609 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1610 ppc_avr_t *b) \ 1611 { \ 1612 int i; \ 1613 float_status s = env->vec_status; \ 1614 \ 1615 set_float_rounding_mode(rounding, &s); \ 1616 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1617 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1618 } \ 1619 } 1620 VRFI(n, float_round_nearest_even) 1621 VRFI(m, float_round_down) 1622 VRFI(p, float_round_up) 1623 VRFI(z, float_round_to_zero) 1624 #undef VRFI 1625 1626 #define VROTATE(suffix, element, mask) \ 1627 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1628 { \ 1629 int i; \ 1630 \ 1631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1632 unsigned int shift = b->element[i] & mask; \ 1633 r->element[i] = (a->element[i] << shift) | \ 1634 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1635 } \ 1636 } 1637 VROTATE(b, u8, 0x7) 1638 VROTATE(h, u16, 0xF) 1639 VROTATE(w, u32, 0x1F) 1640 VROTATE(d, u64, 0x3F) 1641 #undef VROTATE 1642 1643 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1644 { 1645 int i; 1646 1647 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1648 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1649 1650 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1651 } 1652 } 1653 1654 #define VRLMI(name, size, element, insert) \ 1655 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1656 { \ 1657 int i; \ 1658 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1659 uint##size##_t src1 = a->element[i]; \ 1660 uint##size##_t src2 = b->element[i]; \ 1661 uint##size##_t src3 = r->element[i]; \ 1662 uint##size##_t begin, end, shift, mask, rot_val; \ 1663 \ 1664 shift = extract##size(src2, 0, 6); \ 1665 end = extract##size(src2, 8, 6); \ 1666 begin = extract##size(src2, 16, 6); \ 1667 rot_val = rol##size(src1, shift); \ 1668 mask = mask_u##size(begin, end); \ 1669 if (insert) { \ 1670 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1671 } else { \ 1672 r->element[i] = (rot_val & mask); \ 1673 } \ 1674 } \ 1675 } 1676 1677 VRLMI(vrldmi, 64, u64, 1); 1678 VRLMI(vrlwmi, 32, u32, 1); 1679 VRLMI(vrldnm, 64, u64, 0); 1680 VRLMI(vrlwnm, 32, u32, 0); 1681 1682 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1683 ppc_avr_t *c) 1684 { 1685 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1686 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1687 } 1688 1689 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1690 { 1691 int i; 1692 1693 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1694 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1695 } 1696 } 1697 1698 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1699 { 1700 int i; 1701 1702 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1703 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1704 } 1705 } 1706 1707 #if defined(HOST_WORDS_BIGENDIAN) 1708 #define VEXTU_X_DO(name, size, left) \ 1709 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1710 { \ 1711 int index; \ 1712 if (left) { \ 1713 index = (a & 0xf) * 8; \ 1714 } else { \ 1715 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1716 } \ 1717 return int128_getlo(int128_rshift(b->s128, index)) & \ 1718 MAKE_64BIT_MASK(0, size); \ 1719 } 1720 #else 1721 #define VEXTU_X_DO(name, size, left) \ 1722 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1723 { \ 1724 int index; \ 1725 if (left) { \ 1726 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1727 } else { \ 1728 index = (a & 0xf) * 8; \ 1729 } \ 1730 return int128_getlo(int128_rshift(b->s128, index)) & \ 1731 MAKE_64BIT_MASK(0, size); \ 1732 } 1733 #endif 1734 1735 VEXTU_X_DO(vextublx, 8, 1) 1736 VEXTU_X_DO(vextuhlx, 16, 1) 1737 VEXTU_X_DO(vextuwlx, 32, 1) 1738 VEXTU_X_DO(vextubrx, 8, 0) 1739 VEXTU_X_DO(vextuhrx, 16, 0) 1740 VEXTU_X_DO(vextuwrx, 32, 0) 1741 #undef VEXTU_X_DO 1742 1743 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1744 { 1745 int i; 1746 unsigned int shift, bytes, size; 1747 1748 size = ARRAY_SIZE(r->u8); 1749 for (i = 0; i < size; i++) { 1750 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1751 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1752 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1753 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1754 } 1755 } 1756 1757 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1758 { 1759 int i; 1760 unsigned int shift, bytes; 1761 1762 /* 1763 * Use reverse order, as destination and source register can be 1764 * same. Its being modified in place saving temporary, reverse 1765 * order will guarantee that computed result is not fed back. 1766 */ 1767 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1768 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1769 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1770 /* extract adjacent bytes */ 1771 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1772 } 1773 } 1774 1775 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1776 { 1777 int sh = shift & 0xf; 1778 int i; 1779 ppc_avr_t result; 1780 1781 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1782 int index = sh + i; 1783 if (index > 0xf) { 1784 result.VsrB(i) = b->VsrB(index - 0x10); 1785 } else { 1786 result.VsrB(i) = a->VsrB(index); 1787 } 1788 } 1789 *r = result; 1790 } 1791 1792 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1793 { 1794 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1795 1796 #if defined(HOST_WORDS_BIGENDIAN) 1797 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1798 memset(&r->u8[16 - sh], 0, sh); 1799 #else 1800 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1801 memset(&r->u8[0], 0, sh); 1802 #endif 1803 } 1804 1805 #if defined(HOST_WORDS_BIGENDIAN) 1806 #define VINSERT(suffix, element) \ 1807 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1808 { \ 1809 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1810 sizeof(r->element[0])); \ 1811 } 1812 #else 1813 #define VINSERT(suffix, element) \ 1814 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1815 { \ 1816 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1817 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1818 } 1819 #endif 1820 VINSERT(b, u8) 1821 VINSERT(h, u16) 1822 VINSERT(w, u32) 1823 VINSERT(d, u64) 1824 #undef VINSERT 1825 #if defined(HOST_WORDS_BIGENDIAN) 1826 #define VEXTRACT(suffix, element) \ 1827 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1828 { \ 1829 uint32_t es = sizeof(r->element[0]); \ 1830 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1831 memset(&r->u8[8], 0, 8); \ 1832 memset(&r->u8[0], 0, 8 - es); \ 1833 } 1834 #else 1835 #define VEXTRACT(suffix, element) \ 1836 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1837 { \ 1838 uint32_t es = sizeof(r->element[0]); \ 1839 uint32_t s = (16 - index) - es; \ 1840 memmove(&r->u8[8], &b->u8[s], es); \ 1841 memset(&r->u8[0], 0, 8); \ 1842 memset(&r->u8[8 + es], 0, 8 - es); \ 1843 } 1844 #endif 1845 VEXTRACT(ub, u8) 1846 VEXTRACT(uh, u16) 1847 VEXTRACT(uw, u32) 1848 VEXTRACT(d, u64) 1849 #undef VEXTRACT 1850 1851 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1852 ppc_vsr_t *xb, uint32_t index) 1853 { 1854 ppc_vsr_t t = { }; 1855 size_t es = sizeof(uint32_t); 1856 uint32_t ext_index; 1857 int i; 1858 1859 ext_index = index; 1860 for (i = 0; i < es; i++, ext_index++) { 1861 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1862 } 1863 1864 *xt = t; 1865 } 1866 1867 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1868 ppc_vsr_t *xb, uint32_t index) 1869 { 1870 ppc_vsr_t t = *xt; 1871 size_t es = sizeof(uint32_t); 1872 int ins_index, i = 0; 1873 1874 ins_index = index; 1875 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1876 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1877 } 1878 1879 *xt = t; 1880 } 1881 1882 #define VEXT_SIGNED(name, element, cast) \ 1883 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1884 { \ 1885 int i; \ 1886 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1887 r->element[i] = (cast)b->element[i]; \ 1888 } \ 1889 } 1890 VEXT_SIGNED(vextsb2w, s32, int8_t) 1891 VEXT_SIGNED(vextsb2d, s64, int8_t) 1892 VEXT_SIGNED(vextsh2w, s32, int16_t) 1893 VEXT_SIGNED(vextsh2d, s64, int16_t) 1894 VEXT_SIGNED(vextsw2d, s64, int32_t) 1895 #undef VEXT_SIGNED 1896 1897 #define VNEG(name, element) \ 1898 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1899 { \ 1900 int i; \ 1901 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1902 r->element[i] = -b->element[i]; \ 1903 } \ 1904 } 1905 VNEG(vnegw, s32) 1906 VNEG(vnegd, s64) 1907 #undef VNEG 1908 1909 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1910 { 1911 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1912 1913 #if defined(HOST_WORDS_BIGENDIAN) 1914 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1915 memset(&r->u8[0], 0, sh); 1916 #else 1917 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1918 memset(&r->u8[16 - sh], 0, sh); 1919 #endif 1920 } 1921 1922 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1923 { 1924 int i; 1925 1926 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1927 r->u32[i] = a->u32[i] >= b->u32[i]; 1928 } 1929 } 1930 1931 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1932 { 1933 int64_t t; 1934 int i, upper; 1935 ppc_avr_t result; 1936 int sat = 0; 1937 1938 upper = ARRAY_SIZE(r->s32) - 1; 1939 t = (int64_t)b->VsrSW(upper); 1940 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1941 t += a->VsrSW(i); 1942 result.VsrSW(i) = 0; 1943 } 1944 result.VsrSW(upper) = cvtsdsw(t, &sat); 1945 *r = result; 1946 1947 if (sat) { 1948 set_vscr_sat(env); 1949 } 1950 } 1951 1952 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1953 { 1954 int i, j, upper; 1955 ppc_avr_t result; 1956 int sat = 0; 1957 1958 upper = 1; 1959 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1960 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1961 1962 result.VsrD(i) = 0; 1963 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1964 t += a->VsrSW(2 * i + j); 1965 } 1966 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1967 } 1968 1969 *r = result; 1970 if (sat) { 1971 set_vscr_sat(env); 1972 } 1973 } 1974 1975 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1976 { 1977 int i, j; 1978 int sat = 0; 1979 1980 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1981 int64_t t = (int64_t)b->s32[i]; 1982 1983 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1984 t += a->s8[4 * i + j]; 1985 } 1986 r->s32[i] = cvtsdsw(t, &sat); 1987 } 1988 1989 if (sat) { 1990 set_vscr_sat(env); 1991 } 1992 } 1993 1994 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1995 { 1996 int sat = 0; 1997 int i; 1998 1999 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2000 int64_t t = (int64_t)b->s32[i]; 2001 2002 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2003 r->s32[i] = cvtsdsw(t, &sat); 2004 } 2005 2006 if (sat) { 2007 set_vscr_sat(env); 2008 } 2009 } 2010 2011 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2012 { 2013 int i, j; 2014 int sat = 0; 2015 2016 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2017 uint64_t t = (uint64_t)b->u32[i]; 2018 2019 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2020 t += a->u8[4 * i + j]; 2021 } 2022 r->u32[i] = cvtuduw(t, &sat); 2023 } 2024 2025 if (sat) { 2026 set_vscr_sat(env); 2027 } 2028 } 2029 2030 #if defined(HOST_WORDS_BIGENDIAN) 2031 #define UPKHI 1 2032 #define UPKLO 0 2033 #else 2034 #define UPKHI 0 2035 #define UPKLO 1 2036 #endif 2037 #define VUPKPX(suffix, hi) \ 2038 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2039 { \ 2040 int i; \ 2041 ppc_avr_t result; \ 2042 \ 2043 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2044 uint16_t e = b->u16[hi ? i : i + 4]; \ 2045 uint8_t a = (e >> 15) ? 0xff : 0; \ 2046 uint8_t r = (e >> 10) & 0x1f; \ 2047 uint8_t g = (e >> 5) & 0x1f; \ 2048 uint8_t b = e & 0x1f; \ 2049 \ 2050 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2051 } \ 2052 *r = result; \ 2053 } 2054 VUPKPX(lpx, UPKLO) 2055 VUPKPX(hpx, UPKHI) 2056 #undef VUPKPX 2057 2058 #define VUPK(suffix, unpacked, packee, hi) \ 2059 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2060 { \ 2061 int i; \ 2062 ppc_avr_t result; \ 2063 \ 2064 if (hi) { \ 2065 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2066 result.unpacked[i] = b->packee[i]; \ 2067 } \ 2068 } else { \ 2069 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2070 i++) { \ 2071 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2072 } \ 2073 } \ 2074 *r = result; \ 2075 } 2076 VUPK(hsb, s16, s8, UPKHI) 2077 VUPK(hsh, s32, s16, UPKHI) 2078 VUPK(hsw, s64, s32, UPKHI) 2079 VUPK(lsb, s16, s8, UPKLO) 2080 VUPK(lsh, s32, s16, UPKLO) 2081 VUPK(lsw, s64, s32, UPKLO) 2082 #undef VUPK 2083 #undef UPKHI 2084 #undef UPKLO 2085 2086 #define VGENERIC_DO(name, element) \ 2087 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2088 { \ 2089 int i; \ 2090 \ 2091 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2092 r->element[i] = name(b->element[i]); \ 2093 } \ 2094 } 2095 2096 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2097 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2098 #define clzw(v) clz32((v)) 2099 #define clzd(v) clz64((v)) 2100 2101 VGENERIC_DO(clzb, u8) 2102 VGENERIC_DO(clzh, u16) 2103 VGENERIC_DO(clzw, u32) 2104 VGENERIC_DO(clzd, u64) 2105 2106 #undef clzb 2107 #undef clzh 2108 #undef clzw 2109 #undef clzd 2110 2111 #define ctzb(v) ((v) ? ctz32(v) : 8) 2112 #define ctzh(v) ((v) ? ctz32(v) : 16) 2113 #define ctzw(v) ctz32((v)) 2114 #define ctzd(v) ctz64((v)) 2115 2116 VGENERIC_DO(ctzb, u8) 2117 VGENERIC_DO(ctzh, u16) 2118 VGENERIC_DO(ctzw, u32) 2119 VGENERIC_DO(ctzd, u64) 2120 2121 #undef ctzb 2122 #undef ctzh 2123 #undef ctzw 2124 #undef ctzd 2125 2126 #define popcntb(v) ctpop8(v) 2127 #define popcnth(v) ctpop16(v) 2128 #define popcntw(v) ctpop32(v) 2129 #define popcntd(v) ctpop64(v) 2130 2131 VGENERIC_DO(popcntb, u8) 2132 VGENERIC_DO(popcnth, u16) 2133 VGENERIC_DO(popcntw, u32) 2134 VGENERIC_DO(popcntd, u64) 2135 2136 #undef popcntb 2137 #undef popcnth 2138 #undef popcntw 2139 #undef popcntd 2140 2141 #undef VGENERIC_DO 2142 2143 #if defined(HOST_WORDS_BIGENDIAN) 2144 #define QW_ONE { .u64 = { 0, 1 } } 2145 #else 2146 #define QW_ONE { .u64 = { 1, 0 } } 2147 #endif 2148 2149 #ifndef CONFIG_INT128 2150 2151 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2152 { 2153 t->u64[0] = ~a.u64[0]; 2154 t->u64[1] = ~a.u64[1]; 2155 } 2156 2157 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2158 { 2159 if (a.VsrD(0) < b.VsrD(0)) { 2160 return -1; 2161 } else if (a.VsrD(0) > b.VsrD(0)) { 2162 return 1; 2163 } else if (a.VsrD(1) < b.VsrD(1)) { 2164 return -1; 2165 } else if (a.VsrD(1) > b.VsrD(1)) { 2166 return 1; 2167 } else { 2168 return 0; 2169 } 2170 } 2171 2172 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2173 { 2174 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2175 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2176 (~a.VsrD(1) < b.VsrD(1)); 2177 } 2178 2179 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2180 { 2181 ppc_avr_t not_a; 2182 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2183 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2184 (~a.VsrD(1) < b.VsrD(1)); 2185 avr_qw_not(¬_a, a); 2186 return avr_qw_cmpu(not_a, b) < 0; 2187 } 2188 2189 #endif 2190 2191 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2192 { 2193 #ifdef CONFIG_INT128 2194 r->u128 = a->u128 + b->u128; 2195 #else 2196 avr_qw_add(r, *a, *b); 2197 #endif 2198 } 2199 2200 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2201 { 2202 #ifdef CONFIG_INT128 2203 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2204 #else 2205 2206 if (c->VsrD(1) & 1) { 2207 ppc_avr_t tmp; 2208 2209 tmp.VsrD(0) = 0; 2210 tmp.VsrD(1) = c->VsrD(1) & 1; 2211 avr_qw_add(&tmp, *a, tmp); 2212 avr_qw_add(r, tmp, *b); 2213 } else { 2214 avr_qw_add(r, *a, *b); 2215 } 2216 #endif 2217 } 2218 2219 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2220 { 2221 #ifdef CONFIG_INT128 2222 r->u128 = (~a->u128 < b->u128); 2223 #else 2224 ppc_avr_t not_a; 2225 2226 avr_qw_not(¬_a, *a); 2227 2228 r->VsrD(0) = 0; 2229 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2230 #endif 2231 } 2232 2233 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2234 { 2235 #ifdef CONFIG_INT128 2236 int carry_out = (~a->u128 < b->u128); 2237 if (!carry_out && (c->u128 & 1)) { 2238 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2239 ((a->u128 != 0) || (b->u128 != 0)); 2240 } 2241 r->u128 = carry_out; 2242 #else 2243 2244 int carry_in = c->VsrD(1) & 1; 2245 int carry_out = 0; 2246 ppc_avr_t tmp; 2247 2248 carry_out = avr_qw_addc(&tmp, *a, *b); 2249 2250 if (!carry_out && carry_in) { 2251 ppc_avr_t one = QW_ONE; 2252 carry_out = avr_qw_addc(&tmp, tmp, one); 2253 } 2254 r->VsrD(0) = 0; 2255 r->VsrD(1) = carry_out; 2256 #endif 2257 } 2258 2259 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2260 { 2261 #ifdef CONFIG_INT128 2262 r->u128 = a->u128 - b->u128; 2263 #else 2264 ppc_avr_t tmp; 2265 ppc_avr_t one = QW_ONE; 2266 2267 avr_qw_not(&tmp, *b); 2268 avr_qw_add(&tmp, *a, tmp); 2269 avr_qw_add(r, tmp, one); 2270 #endif 2271 } 2272 2273 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2274 { 2275 #ifdef CONFIG_INT128 2276 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2277 #else 2278 ppc_avr_t tmp, sum; 2279 2280 avr_qw_not(&tmp, *b); 2281 avr_qw_add(&sum, *a, tmp); 2282 2283 tmp.VsrD(0) = 0; 2284 tmp.VsrD(1) = c->VsrD(1) & 1; 2285 avr_qw_add(r, sum, tmp); 2286 #endif 2287 } 2288 2289 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2290 { 2291 #ifdef CONFIG_INT128 2292 r->u128 = (~a->u128 < ~b->u128) || 2293 (a->u128 + ~b->u128 == (__uint128_t)-1); 2294 #else 2295 int carry = (avr_qw_cmpu(*a, *b) > 0); 2296 if (!carry) { 2297 ppc_avr_t tmp; 2298 avr_qw_not(&tmp, *b); 2299 avr_qw_add(&tmp, *a, tmp); 2300 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2301 } 2302 r->VsrD(0) = 0; 2303 r->VsrD(1) = carry; 2304 #endif 2305 } 2306 2307 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2308 { 2309 #ifdef CONFIG_INT128 2310 r->u128 = 2311 (~a->u128 < ~b->u128) || 2312 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2313 #else 2314 int carry_in = c->VsrD(1) & 1; 2315 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2316 if (!carry_out && carry_in) { 2317 ppc_avr_t tmp; 2318 avr_qw_not(&tmp, *b); 2319 avr_qw_add(&tmp, *a, tmp); 2320 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2321 } 2322 2323 r->VsrD(0) = 0; 2324 r->VsrD(1) = carry_out; 2325 #endif 2326 } 2327 2328 #define BCD_PLUS_PREF_1 0xC 2329 #define BCD_PLUS_PREF_2 0xF 2330 #define BCD_PLUS_ALT_1 0xA 2331 #define BCD_NEG_PREF 0xD 2332 #define BCD_NEG_ALT 0xB 2333 #define BCD_PLUS_ALT_2 0xE 2334 #define NATIONAL_PLUS 0x2B 2335 #define NATIONAL_NEG 0x2D 2336 2337 #if defined(HOST_WORDS_BIGENDIAN) 2338 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2339 #else 2340 #define BCD_DIG_BYTE(n) ((n) / 2) 2341 #endif 2342 2343 static int bcd_get_sgn(ppc_avr_t *bcd) 2344 { 2345 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2346 case BCD_PLUS_PREF_1: 2347 case BCD_PLUS_PREF_2: 2348 case BCD_PLUS_ALT_1: 2349 case BCD_PLUS_ALT_2: 2350 { 2351 return 1; 2352 } 2353 2354 case BCD_NEG_PREF: 2355 case BCD_NEG_ALT: 2356 { 2357 return -1; 2358 } 2359 2360 default: 2361 { 2362 return 0; 2363 } 2364 } 2365 } 2366 2367 static int bcd_preferred_sgn(int sgn, int ps) 2368 { 2369 if (sgn >= 0) { 2370 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2371 } else { 2372 return BCD_NEG_PREF; 2373 } 2374 } 2375 2376 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2377 { 2378 uint8_t result; 2379 if (n & 1) { 2380 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2381 } else { 2382 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2383 } 2384 2385 if (unlikely(result > 9)) { 2386 *invalid = true; 2387 } 2388 return result; 2389 } 2390 2391 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2392 { 2393 if (n & 1) { 2394 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2395 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4); 2396 } else { 2397 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2398 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2399 } 2400 } 2401 2402 static bool bcd_is_valid(ppc_avr_t *bcd) 2403 { 2404 int i; 2405 int invalid = 0; 2406 2407 if (bcd_get_sgn(bcd) == 0) { 2408 return false; 2409 } 2410 2411 for (i = 1; i < 32; i++) { 2412 bcd_get_digit(bcd, i, &invalid); 2413 if (unlikely(invalid)) { 2414 return false; 2415 } 2416 } 2417 return true; 2418 } 2419 2420 static int bcd_cmp_zero(ppc_avr_t *bcd) 2421 { 2422 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2423 return CRF_EQ; 2424 } else { 2425 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2426 } 2427 } 2428 2429 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2430 { 2431 return reg->VsrH(7 - n); 2432 } 2433 2434 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2435 { 2436 reg->VsrH(7 - n) = val; 2437 } 2438 2439 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2440 { 2441 int i; 2442 int invalid = 0; 2443 for (i = 31; i > 0; i--) { 2444 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2445 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2446 if (unlikely(invalid)) { 2447 return 0; /* doesn't matter */ 2448 } else if (dig_a > dig_b) { 2449 return 1; 2450 } else if (dig_a < dig_b) { 2451 return -1; 2452 } 2453 } 2454 2455 return 0; 2456 } 2457 2458 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2459 int *overflow) 2460 { 2461 int carry = 0; 2462 int i; 2463 for (i = 1; i <= 31; i++) { 2464 uint8_t digit = bcd_get_digit(a, i, invalid) + 2465 bcd_get_digit(b, i, invalid) + carry; 2466 if (digit > 9) { 2467 carry = 1; 2468 digit -= 10; 2469 } else { 2470 carry = 0; 2471 } 2472 2473 bcd_put_digit(t, digit, i); 2474 } 2475 2476 *overflow = carry; 2477 } 2478 2479 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2480 int *overflow) 2481 { 2482 int carry = 0; 2483 int i; 2484 2485 for (i = 1; i <= 31; i++) { 2486 uint8_t digit = bcd_get_digit(a, i, invalid) - 2487 bcd_get_digit(b, i, invalid) + carry; 2488 if (digit & 0x80) { 2489 carry = -1; 2490 digit += 10; 2491 } else { 2492 carry = 0; 2493 } 2494 2495 bcd_put_digit(t, digit, i); 2496 } 2497 2498 *overflow = carry; 2499 } 2500 2501 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2502 { 2503 2504 int sgna = bcd_get_sgn(a); 2505 int sgnb = bcd_get_sgn(b); 2506 int invalid = (sgna == 0) || (sgnb == 0); 2507 int overflow = 0; 2508 uint32_t cr = 0; 2509 ppc_avr_t result = { .u64 = { 0, 0 } }; 2510 2511 if (!invalid) { 2512 if (sgna == sgnb) { 2513 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2514 bcd_add_mag(&result, a, b, &invalid, &overflow); 2515 cr = bcd_cmp_zero(&result); 2516 } else { 2517 int magnitude = bcd_cmp_mag(a, b); 2518 if (magnitude > 0) { 2519 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2520 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2521 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2522 } else if (magnitude < 0) { 2523 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2524 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2525 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2526 } else { 2527 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2528 cr = CRF_EQ; 2529 } 2530 } 2531 } 2532 2533 if (unlikely(invalid)) { 2534 result.VsrD(0) = result.VsrD(1) = -1; 2535 cr = CRF_SO; 2536 } else if (overflow) { 2537 cr |= CRF_SO; 2538 } 2539 2540 *r = result; 2541 2542 return cr; 2543 } 2544 2545 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2546 { 2547 ppc_avr_t bcopy = *b; 2548 int sgnb = bcd_get_sgn(b); 2549 if (sgnb < 0) { 2550 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2551 } else if (sgnb > 0) { 2552 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2553 } 2554 /* else invalid ... defer to bcdadd code for proper handling */ 2555 2556 return helper_bcdadd(r, a, &bcopy, ps); 2557 } 2558 2559 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2560 { 2561 int i; 2562 int cr = 0; 2563 uint16_t national = 0; 2564 uint16_t sgnb = get_national_digit(b, 0); 2565 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2566 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2567 2568 for (i = 1; i < 8; i++) { 2569 national = get_national_digit(b, i); 2570 if (unlikely(national < 0x30 || national > 0x39)) { 2571 invalid = 1; 2572 break; 2573 } 2574 2575 bcd_put_digit(&ret, national & 0xf, i); 2576 } 2577 2578 if (sgnb == NATIONAL_PLUS) { 2579 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2580 } else { 2581 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2582 } 2583 2584 cr = bcd_cmp_zero(&ret); 2585 2586 if (unlikely(invalid)) { 2587 cr = CRF_SO; 2588 } 2589 2590 *r = ret; 2591 2592 return cr; 2593 } 2594 2595 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2596 { 2597 int i; 2598 int cr = 0; 2599 int sgnb = bcd_get_sgn(b); 2600 int invalid = (sgnb == 0); 2601 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2602 2603 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2604 2605 for (i = 1; i < 8; i++) { 2606 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2607 2608 if (unlikely(invalid)) { 2609 break; 2610 } 2611 } 2612 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2613 2614 cr = bcd_cmp_zero(b); 2615 2616 if (ox_flag) { 2617 cr |= CRF_SO; 2618 } 2619 2620 if (unlikely(invalid)) { 2621 cr = CRF_SO; 2622 } 2623 2624 *r = ret; 2625 2626 return cr; 2627 } 2628 2629 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2630 { 2631 int i; 2632 int cr = 0; 2633 int invalid = 0; 2634 int zone_digit = 0; 2635 int zone_lead = ps ? 0xF : 0x3; 2636 int digit = 0; 2637 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2638 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2639 2640 if (unlikely((sgnb < 0xA) && ps)) { 2641 invalid = 1; 2642 } 2643 2644 for (i = 0; i < 16; i++) { 2645 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2646 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2647 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2648 invalid = 1; 2649 break; 2650 } 2651 2652 bcd_put_digit(&ret, digit, i + 1); 2653 } 2654 2655 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2656 (!ps && (sgnb & 0x4))) { 2657 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2658 } else { 2659 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2660 } 2661 2662 cr = bcd_cmp_zero(&ret); 2663 2664 if (unlikely(invalid)) { 2665 cr = CRF_SO; 2666 } 2667 2668 *r = ret; 2669 2670 return cr; 2671 } 2672 2673 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2674 { 2675 int i; 2676 int cr = 0; 2677 uint8_t digit = 0; 2678 int sgnb = bcd_get_sgn(b); 2679 int zone_lead = (ps) ? 0xF0 : 0x30; 2680 int invalid = (sgnb == 0); 2681 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2682 2683 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2684 2685 for (i = 0; i < 16; i++) { 2686 digit = bcd_get_digit(b, i + 1, &invalid); 2687 2688 if (unlikely(invalid)) { 2689 break; 2690 } 2691 2692 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2693 } 2694 2695 if (ps) { 2696 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2697 } else { 2698 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2699 } 2700 2701 cr = bcd_cmp_zero(b); 2702 2703 if (ox_flag) { 2704 cr |= CRF_SO; 2705 } 2706 2707 if (unlikely(invalid)) { 2708 cr = CRF_SO; 2709 } 2710 2711 *r = ret; 2712 2713 return cr; 2714 } 2715 2716 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2717 { 2718 int i; 2719 int cr = 0; 2720 uint64_t lo_value; 2721 uint64_t hi_value; 2722 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2723 2724 if (b->VsrSD(0) < 0) { 2725 lo_value = -b->VsrSD(1); 2726 hi_value = ~b->VsrD(0) + !lo_value; 2727 bcd_put_digit(&ret, 0xD, 0); 2728 } else { 2729 lo_value = b->VsrD(1); 2730 hi_value = b->VsrD(0); 2731 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2732 } 2733 2734 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2735 lo_value > 9999999999999999ULL) { 2736 cr = CRF_SO; 2737 } 2738 2739 for (i = 1; i < 16; hi_value /= 10, i++) { 2740 bcd_put_digit(&ret, hi_value % 10, i); 2741 } 2742 2743 for (; i < 32; lo_value /= 10, i++) { 2744 bcd_put_digit(&ret, lo_value % 10, i); 2745 } 2746 2747 cr |= bcd_cmp_zero(&ret); 2748 2749 *r = ret; 2750 2751 return cr; 2752 } 2753 2754 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2755 { 2756 uint8_t i; 2757 int cr; 2758 uint64_t carry; 2759 uint64_t unused; 2760 uint64_t lo_value; 2761 uint64_t hi_value = 0; 2762 int sgnb = bcd_get_sgn(b); 2763 int invalid = (sgnb == 0); 2764 2765 lo_value = bcd_get_digit(b, 31, &invalid); 2766 for (i = 30; i > 0; i--) { 2767 mulu64(&lo_value, &carry, lo_value, 10ULL); 2768 mulu64(&hi_value, &unused, hi_value, 10ULL); 2769 lo_value += bcd_get_digit(b, i, &invalid); 2770 hi_value += carry; 2771 2772 if (unlikely(invalid)) { 2773 break; 2774 } 2775 } 2776 2777 if (sgnb == -1) { 2778 r->VsrSD(1) = -lo_value; 2779 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2780 } else { 2781 r->VsrSD(1) = lo_value; 2782 r->VsrSD(0) = hi_value; 2783 } 2784 2785 cr = bcd_cmp_zero(b); 2786 2787 if (unlikely(invalid)) { 2788 cr = CRF_SO; 2789 } 2790 2791 return cr; 2792 } 2793 2794 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2795 { 2796 int i; 2797 int invalid = 0; 2798 2799 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2800 return CRF_SO; 2801 } 2802 2803 *r = *a; 2804 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2805 2806 for (i = 1; i < 32; i++) { 2807 bcd_get_digit(a, i, &invalid); 2808 bcd_get_digit(b, i, &invalid); 2809 if (unlikely(invalid)) { 2810 return CRF_SO; 2811 } 2812 } 2813 2814 return bcd_cmp_zero(r); 2815 } 2816 2817 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2818 { 2819 int sgnb = bcd_get_sgn(b); 2820 2821 *r = *b; 2822 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2823 2824 if (bcd_is_valid(b) == false) { 2825 return CRF_SO; 2826 } 2827 2828 return bcd_cmp_zero(r); 2829 } 2830 2831 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2832 { 2833 int cr; 2834 #if defined(HOST_WORDS_BIGENDIAN) 2835 int i = a->s8[7]; 2836 #else 2837 int i = a->s8[8]; 2838 #endif 2839 bool ox_flag = false; 2840 int sgnb = bcd_get_sgn(b); 2841 ppc_avr_t ret = *b; 2842 ret.VsrD(1) &= ~0xf; 2843 2844 if (bcd_is_valid(b) == false) { 2845 return CRF_SO; 2846 } 2847 2848 if (unlikely(i > 31)) { 2849 i = 31; 2850 } else if (unlikely(i < -31)) { 2851 i = -31; 2852 } 2853 2854 if (i > 0) { 2855 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2856 } else { 2857 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2858 } 2859 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2860 2861 *r = ret; 2862 2863 cr = bcd_cmp_zero(r); 2864 if (ox_flag) { 2865 cr |= CRF_SO; 2866 } 2867 2868 return cr; 2869 } 2870 2871 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2872 { 2873 int cr; 2874 int i; 2875 int invalid = 0; 2876 bool ox_flag = false; 2877 ppc_avr_t ret = *b; 2878 2879 for (i = 0; i < 32; i++) { 2880 bcd_get_digit(b, i, &invalid); 2881 2882 if (unlikely(invalid)) { 2883 return CRF_SO; 2884 } 2885 } 2886 2887 #if defined(HOST_WORDS_BIGENDIAN) 2888 i = a->s8[7]; 2889 #else 2890 i = a->s8[8]; 2891 #endif 2892 if (i >= 32) { 2893 ox_flag = true; 2894 ret.VsrD(1) = ret.VsrD(0) = 0; 2895 } else if (i <= -32) { 2896 ret.VsrD(1) = ret.VsrD(0) = 0; 2897 } else if (i > 0) { 2898 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2899 } else { 2900 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2901 } 2902 *r = ret; 2903 2904 cr = bcd_cmp_zero(r); 2905 if (ox_flag) { 2906 cr |= CRF_SO; 2907 } 2908 2909 return cr; 2910 } 2911 2912 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2913 { 2914 int cr; 2915 int unused = 0; 2916 int invalid = 0; 2917 bool ox_flag = false; 2918 int sgnb = bcd_get_sgn(b); 2919 ppc_avr_t ret = *b; 2920 ret.VsrD(1) &= ~0xf; 2921 2922 #if defined(HOST_WORDS_BIGENDIAN) 2923 int i = a->s8[7]; 2924 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 2925 #else 2926 int i = a->s8[8]; 2927 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 2928 #endif 2929 2930 if (bcd_is_valid(b) == false) { 2931 return CRF_SO; 2932 } 2933 2934 if (unlikely(i > 31)) { 2935 i = 31; 2936 } else if (unlikely(i < -31)) { 2937 i = -31; 2938 } 2939 2940 if (i > 0) { 2941 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2942 } else { 2943 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2944 2945 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2946 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2947 } 2948 } 2949 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2950 2951 cr = bcd_cmp_zero(&ret); 2952 if (ox_flag) { 2953 cr |= CRF_SO; 2954 } 2955 *r = ret; 2956 2957 return cr; 2958 } 2959 2960 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2961 { 2962 uint64_t mask; 2963 uint32_t ox_flag = 0; 2964 #if defined(HOST_WORDS_BIGENDIAN) 2965 int i = a->s16[3] + 1; 2966 #else 2967 int i = a->s16[4] + 1; 2968 #endif 2969 ppc_avr_t ret = *b; 2970 2971 if (bcd_is_valid(b) == false) { 2972 return CRF_SO; 2973 } 2974 2975 if (i > 16 && i < 32) { 2976 mask = (uint64_t)-1 >> (128 - i * 4); 2977 if (ret.VsrD(0) & ~mask) { 2978 ox_flag = CRF_SO; 2979 } 2980 2981 ret.VsrD(0) &= mask; 2982 } else if (i >= 0 && i <= 16) { 2983 mask = (uint64_t)-1 >> (64 - i * 4); 2984 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2985 ox_flag = CRF_SO; 2986 } 2987 2988 ret.VsrD(1) &= mask; 2989 ret.VsrD(0) = 0; 2990 } 2991 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2992 *r = ret; 2993 2994 return bcd_cmp_zero(&ret) | ox_flag; 2995 } 2996 2997 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2998 { 2999 int i; 3000 uint64_t mask; 3001 uint32_t ox_flag = 0; 3002 int invalid = 0; 3003 ppc_avr_t ret = *b; 3004 3005 for (i = 0; i < 32; i++) { 3006 bcd_get_digit(b, i, &invalid); 3007 3008 if (unlikely(invalid)) { 3009 return CRF_SO; 3010 } 3011 } 3012 3013 #if defined(HOST_WORDS_BIGENDIAN) 3014 i = a->s16[3]; 3015 #else 3016 i = a->s16[4]; 3017 #endif 3018 if (i > 16 && i < 33) { 3019 mask = (uint64_t)-1 >> (128 - i * 4); 3020 if (ret.VsrD(0) & ~mask) { 3021 ox_flag = CRF_SO; 3022 } 3023 3024 ret.VsrD(0) &= mask; 3025 } else if (i > 0 && i <= 16) { 3026 mask = (uint64_t)-1 >> (64 - i * 4); 3027 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3028 ox_flag = CRF_SO; 3029 } 3030 3031 ret.VsrD(1) &= mask; 3032 ret.VsrD(0) = 0; 3033 } else if (i == 0) { 3034 if (ret.VsrD(0) || ret.VsrD(1)) { 3035 ox_flag = CRF_SO; 3036 } 3037 ret.VsrD(0) = ret.VsrD(1) = 0; 3038 } 3039 3040 *r = ret; 3041 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3042 return ox_flag | CRF_EQ; 3043 } 3044 3045 return ox_flag | CRF_GT; 3046 } 3047 3048 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3049 { 3050 int i; 3051 VECTOR_FOR_INORDER_I(i, u8) { 3052 r->u8[i] = AES_sbox[a->u8[i]]; 3053 } 3054 } 3055 3056 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3057 { 3058 ppc_avr_t result; 3059 int i; 3060 3061 VECTOR_FOR_INORDER_I(i, u32) { 3062 result.VsrW(i) = b->VsrW(i) ^ 3063 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3064 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3065 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3066 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3067 } 3068 *r = result; 3069 } 3070 3071 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3072 { 3073 ppc_avr_t result; 3074 int i; 3075 3076 VECTOR_FOR_INORDER_I(i, u8) { 3077 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3078 } 3079 *r = result; 3080 } 3081 3082 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3083 { 3084 /* This differs from what is written in ISA V2.07. The RTL is */ 3085 /* incorrect and will be fixed in V2.07B. */ 3086 int i; 3087 ppc_avr_t tmp; 3088 3089 VECTOR_FOR_INORDER_I(i, u8) { 3090 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3091 } 3092 3093 VECTOR_FOR_INORDER_I(i, u32) { 3094 r->VsrW(i) = 3095 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3096 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3097 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3098 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3099 } 3100 } 3101 3102 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3103 { 3104 ppc_avr_t result; 3105 int i; 3106 3107 VECTOR_FOR_INORDER_I(i, u8) { 3108 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3109 } 3110 *r = result; 3111 } 3112 3113 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3114 { 3115 int st = (st_six & 0x10) != 0; 3116 int six = st_six & 0xF; 3117 int i; 3118 3119 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3120 if (st == 0) { 3121 if ((six & (0x8 >> i)) == 0) { 3122 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3123 ror32(a->VsrW(i), 18) ^ 3124 (a->VsrW(i) >> 3); 3125 } else { /* six.bit[i] == 1 */ 3126 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3127 ror32(a->VsrW(i), 19) ^ 3128 (a->VsrW(i) >> 10); 3129 } 3130 } else { /* st == 1 */ 3131 if ((six & (0x8 >> i)) == 0) { 3132 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3133 ror32(a->VsrW(i), 13) ^ 3134 ror32(a->VsrW(i), 22); 3135 } else { /* six.bit[i] == 1 */ 3136 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3137 ror32(a->VsrW(i), 11) ^ 3138 ror32(a->VsrW(i), 25); 3139 } 3140 } 3141 } 3142 } 3143 3144 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3145 { 3146 int st = (st_six & 0x10) != 0; 3147 int six = st_six & 0xF; 3148 int i; 3149 3150 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3151 if (st == 0) { 3152 if ((six & (0x8 >> (2 * i))) == 0) { 3153 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3154 ror64(a->VsrD(i), 8) ^ 3155 (a->VsrD(i) >> 7); 3156 } else { /* six.bit[2*i] == 1 */ 3157 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3158 ror64(a->VsrD(i), 61) ^ 3159 (a->VsrD(i) >> 6); 3160 } 3161 } else { /* st == 1 */ 3162 if ((six & (0x8 >> (2 * i))) == 0) { 3163 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3164 ror64(a->VsrD(i), 34) ^ 3165 ror64(a->VsrD(i), 39); 3166 } else { /* six.bit[2*i] == 1 */ 3167 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3168 ror64(a->VsrD(i), 18) ^ 3169 ror64(a->VsrD(i), 41); 3170 } 3171 } 3172 } 3173 } 3174 3175 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3176 { 3177 ppc_avr_t result; 3178 int i; 3179 3180 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3181 int indexA = c->VsrB(i) >> 4; 3182 int indexB = c->VsrB(i) & 0xF; 3183 3184 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3185 } 3186 *r = result; 3187 } 3188 3189 #undef VECTOR_FOR_INORDER_I 3190 3191 /*****************************************************************************/ 3192 /* SPE extension helpers */ 3193 /* Use a table to make this quicker */ 3194 static const uint8_t hbrev[16] = { 3195 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3196 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3197 }; 3198 3199 static inline uint8_t byte_reverse(uint8_t val) 3200 { 3201 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3202 } 3203 3204 static inline uint32_t word_reverse(uint32_t val) 3205 { 3206 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3207 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3208 } 3209 3210 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3211 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3212 { 3213 uint32_t a, b, d, mask; 3214 3215 mask = UINT32_MAX >> (32 - MASKBITS); 3216 a = arg1 & mask; 3217 b = arg2 & mask; 3218 d = word_reverse(1 + word_reverse(a | ~b)); 3219 return (arg1 & ~mask) | (d & b); 3220 } 3221 3222 uint32_t helper_cntlsw32(uint32_t val) 3223 { 3224 if (val & 0x80000000) { 3225 return clz32(~val); 3226 } else { 3227 return clz32(val); 3228 } 3229 } 3230 3231 uint32_t helper_cntlzw32(uint32_t val) 3232 { 3233 return clz32(val); 3234 } 3235 3236 /* 440 specific */ 3237 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3238 target_ulong low, uint32_t update_Rc) 3239 { 3240 target_ulong mask; 3241 int i; 3242 3243 i = 1; 3244 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3245 if ((high & mask) == 0) { 3246 if (update_Rc) { 3247 env->crf[0] = 0x4; 3248 } 3249 goto done; 3250 } 3251 i++; 3252 } 3253 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3254 if ((low & mask) == 0) { 3255 if (update_Rc) { 3256 env->crf[0] = 0x8; 3257 } 3258 goto done; 3259 } 3260 i++; 3261 } 3262 i = 8; 3263 if (update_Rc) { 3264 env->crf[0] = 0x2; 3265 } 3266 done: 3267 env->xer = (env->xer & ~0x7F) | i; 3268 if (update_Rc) { 3269 env->crf[0] |= xer_so; 3270 } 3271 return i; 3272 } 3273