1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 #include "qapi/error.h" 27 #include "qemu/guest-random.h" 28 29 #include "helper_regs.h" 30 /*****************************************************************************/ 31 /* Fixed point operations helpers */ 32 33 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 34 { 35 if (unlikely(ov)) { 36 env->so = env->ov = 1; 37 } else { 38 env->ov = 0; 39 } 40 } 41 42 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 43 uint32_t oe) 44 { 45 uint64_t rt = 0; 46 int overflow = 0; 47 48 uint64_t dividend = (uint64_t)ra << 32; 49 uint64_t divisor = (uint32_t)rb; 50 51 if (unlikely(divisor == 0)) { 52 overflow = 1; 53 } else { 54 rt = dividend / divisor; 55 overflow = rt > UINT32_MAX; 56 } 57 58 if (unlikely(overflow)) { 59 rt = 0; /* Undefined */ 60 } 61 62 if (oe) { 63 helper_update_ov_legacy(env, overflow); 64 } 65 66 return (target_ulong)rt; 67 } 68 69 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 70 uint32_t oe) 71 { 72 int64_t rt = 0; 73 int overflow = 0; 74 75 int64_t dividend = (int64_t)ra << 32; 76 int64_t divisor = (int64_t)((int32_t)rb); 77 78 if (unlikely((divisor == 0) || 79 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 80 overflow = 1; 81 } else { 82 rt = dividend / divisor; 83 overflow = rt != (int32_t)rt; 84 } 85 86 if (unlikely(overflow)) { 87 rt = 0; /* Undefined */ 88 } 89 90 if (oe) { 91 helper_update_ov_legacy(env, overflow); 92 } 93 94 return (target_ulong)rt; 95 } 96 97 #if defined(TARGET_PPC64) 98 99 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 100 { 101 uint64_t rt = 0; 102 int overflow = 0; 103 104 overflow = divu128(&rt, &ra, rb); 105 106 if (unlikely(overflow)) { 107 rt = 0; /* Undefined */ 108 } 109 110 if (oe) { 111 helper_update_ov_legacy(env, overflow); 112 } 113 114 return rt; 115 } 116 117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 118 { 119 int64_t rt = 0; 120 int64_t ra = (int64_t)rau; 121 int64_t rb = (int64_t)rbu; 122 int overflow = divs128(&rt, &ra, rb); 123 124 if (unlikely(overflow)) { 125 rt = 0; /* Undefined */ 126 } 127 128 if (oe) { 129 helper_update_ov_legacy(env, overflow); 130 } 131 132 return rt; 133 } 134 135 #endif 136 137 138 #if defined(TARGET_PPC64) 139 /* if x = 0xab, returns 0xababababababababa */ 140 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 141 142 /* 143 * subtract 1 from each byte, and with inverse, check if MSB is set at each 144 * byte. 145 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 146 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 147 */ 148 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 149 150 /* When you XOR the pattern and there is a match, that byte will be zero */ 151 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 152 153 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 154 { 155 return hasvalue(rb, ra) ? CRF_GT : 0; 156 } 157 158 #undef pattern 159 #undef haszero 160 #undef hasvalue 161 162 /* 163 * Return a random number. 164 */ 165 uint64_t helper_darn32(void) 166 { 167 Error *err = NULL; 168 uint32_t ret; 169 170 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 171 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 172 error_get_pretty(err)); 173 error_free(err); 174 return -1; 175 } 176 177 return ret; 178 } 179 180 uint64_t helper_darn64(void) 181 { 182 Error *err = NULL; 183 uint64_t ret; 184 185 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 186 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 187 error_get_pretty(err)); 188 error_free(err); 189 return -1; 190 } 191 192 return ret; 193 } 194 195 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 196 { 197 int i; 198 uint64_t ra = 0; 199 200 for (i = 0; i < 8; i++) { 201 int index = (rs >> (i * 8)) & 0xFF; 202 if (index < 64) { 203 if (rb & PPC_BIT(index)) { 204 ra |= 1 << i; 205 } 206 } 207 } 208 return ra; 209 } 210 211 #endif 212 213 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 214 { 215 target_ulong mask = 0xff; 216 target_ulong ra = 0; 217 int i; 218 219 for (i = 0; i < sizeof(target_ulong); i++) { 220 if ((rs & mask) == (rb & mask)) { 221 ra |= mask; 222 } 223 mask <<= 8; 224 } 225 return ra; 226 } 227 228 /* shift right arithmetic helper */ 229 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 230 target_ulong shift) 231 { 232 int32_t ret; 233 234 if (likely(!(shift & 0x20))) { 235 if (likely((uint32_t)shift != 0)) { 236 shift &= 0x1f; 237 ret = (int32_t)value >> shift; 238 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 239 env->ca32 = env->ca = 0; 240 } else { 241 env->ca32 = env->ca = 1; 242 } 243 } else { 244 ret = (int32_t)value; 245 env->ca32 = env->ca = 0; 246 } 247 } else { 248 ret = (int32_t)value >> 31; 249 env->ca32 = env->ca = (ret != 0); 250 } 251 return (target_long)ret; 252 } 253 254 #if defined(TARGET_PPC64) 255 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 256 target_ulong shift) 257 { 258 int64_t ret; 259 260 if (likely(!(shift & 0x40))) { 261 if (likely((uint64_t)shift != 0)) { 262 shift &= 0x3f; 263 ret = (int64_t)value >> shift; 264 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 265 env->ca32 = env->ca = 0; 266 } else { 267 env->ca32 = env->ca = 1; 268 } 269 } else { 270 ret = (int64_t)value; 271 env->ca32 = env->ca = 0; 272 } 273 } else { 274 ret = (int64_t)value >> 63; 275 env->ca32 = env->ca = (ret != 0); 276 } 277 return ret; 278 } 279 #endif 280 281 #if defined(TARGET_PPC64) 282 target_ulong helper_popcntb(target_ulong val) 283 { 284 /* Note that we don't fold past bytes */ 285 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 286 0x5555555555555555ULL); 287 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 288 0x3333333333333333ULL); 289 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 290 0x0f0f0f0f0f0f0f0fULL); 291 return val; 292 } 293 294 target_ulong helper_popcntw(target_ulong val) 295 { 296 /* Note that we don't fold past words. */ 297 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 298 0x5555555555555555ULL); 299 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 300 0x3333333333333333ULL); 301 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 302 0x0f0f0f0f0f0f0f0fULL); 303 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 304 0x00ff00ff00ff00ffULL); 305 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 306 0x0000ffff0000ffffULL); 307 return val; 308 } 309 #else 310 target_ulong helper_popcntb(target_ulong val) 311 { 312 /* Note that we don't fold past bytes */ 313 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 314 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 315 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 316 return val; 317 } 318 #endif 319 320 /*****************************************************************************/ 321 /* PowerPC 601 specific instructions (POWER bridge) */ 322 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 323 { 324 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 325 326 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 327 (int32_t)arg2 == 0) { 328 env->spr[SPR_MQ] = 0; 329 return INT32_MIN; 330 } else { 331 env->spr[SPR_MQ] = tmp % arg2; 332 return tmp / (int32_t)arg2; 333 } 334 } 335 336 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 337 target_ulong arg2) 338 { 339 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 340 341 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 342 (int32_t)arg2 == 0) { 343 env->so = env->ov = 1; 344 env->spr[SPR_MQ] = 0; 345 return INT32_MIN; 346 } else { 347 env->spr[SPR_MQ] = tmp % arg2; 348 tmp /= (int32_t)arg2; 349 if ((int32_t)tmp != tmp) { 350 env->so = env->ov = 1; 351 } else { 352 env->ov = 0; 353 } 354 return tmp; 355 } 356 } 357 358 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 359 target_ulong arg2) 360 { 361 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 362 (int32_t)arg2 == 0) { 363 env->spr[SPR_MQ] = 0; 364 return INT32_MIN; 365 } else { 366 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 367 return (int32_t)arg1 / (int32_t)arg2; 368 } 369 } 370 371 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 372 target_ulong arg2) 373 { 374 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 375 (int32_t)arg2 == 0) { 376 env->so = env->ov = 1; 377 env->spr[SPR_MQ] = 0; 378 return INT32_MIN; 379 } else { 380 env->ov = 0; 381 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 382 return (int32_t)arg1 / (int32_t)arg2; 383 } 384 } 385 386 /*****************************************************************************/ 387 /* 602 specific instructions */ 388 /* mfrom is the most crazy instruction ever seen, imho ! */ 389 /* Real implementation uses a ROM table. Do the same */ 390 /* 391 * Extremely decomposed: 392 * -arg / 256 393 * return 256 * log10(10 + 1.0) + 0.5 394 */ 395 #if !defined(CONFIG_USER_ONLY) 396 target_ulong helper_602_mfrom(target_ulong arg) 397 { 398 if (likely(arg < 602)) { 399 #include "mfrom_table.inc.c" 400 return mfrom_ROM_table[arg]; 401 } else { 402 return 0; 403 } 404 } 405 #endif 406 407 /*****************************************************************************/ 408 /* Altivec extension helpers */ 409 #if defined(HOST_WORDS_BIGENDIAN) 410 #define VECTOR_FOR_INORDER_I(index, element) \ 411 for (index = 0; index < ARRAY_SIZE(r->element); index++) 412 #else 413 #define VECTOR_FOR_INORDER_I(index, element) \ 414 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 415 #endif 416 417 /* Saturating arithmetic helpers. */ 418 #define SATCVT(from, to, from_type, to_type, min, max) \ 419 static inline to_type cvt##from##to(from_type x, int *sat) \ 420 { \ 421 to_type r; \ 422 \ 423 if (x < (from_type)min) { \ 424 r = min; \ 425 *sat = 1; \ 426 } else if (x > (from_type)max) { \ 427 r = max; \ 428 *sat = 1; \ 429 } else { \ 430 r = x; \ 431 } \ 432 return r; \ 433 } 434 #define SATCVTU(from, to, from_type, to_type, min, max) \ 435 static inline to_type cvt##from##to(from_type x, int *sat) \ 436 { \ 437 to_type r; \ 438 \ 439 if (x > (from_type)max) { \ 440 r = max; \ 441 *sat = 1; \ 442 } else { \ 443 r = x; \ 444 } \ 445 return r; \ 446 } 447 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 448 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 449 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 450 451 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 452 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 453 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 454 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 455 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 456 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 457 #undef SATCVT 458 #undef SATCVTU 459 460 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 461 { 462 int i, j = (sh & 0xf); 463 464 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 465 r->VsrB(i) = j++; 466 } 467 } 468 469 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 470 { 471 int i, j = 0x10 - (sh & 0xf); 472 473 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 474 r->VsrB(i) = j++; 475 } 476 } 477 478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 479 { 480 env->vscr = vscr & ~(1u << VSCR_SAT); 481 /* Which bit we set is completely arbitrary, but clear the rest. */ 482 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 483 env->vscr_sat.u64[1] = 0; 484 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 485 } 486 487 uint32_t helper_mfvscr(CPUPPCState *env) 488 { 489 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 490 return env->vscr | (sat << VSCR_SAT); 491 } 492 493 static inline void set_vscr_sat(CPUPPCState *env) 494 { 495 /* The choice of non-zero value is arbitrary. */ 496 env->vscr_sat.u32[0] = 1; 497 } 498 499 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 500 { 501 int i; 502 503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 504 r->u32[i] = ~a->u32[i] < b->u32[i]; 505 } 506 } 507 508 /* vprtybw */ 509 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 510 { 511 int i; 512 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 513 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 514 res ^= res >> 8; 515 r->u32[i] = res & 1; 516 } 517 } 518 519 /* vprtybd */ 520 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 521 { 522 int i; 523 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 524 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 525 res ^= res >> 16; 526 res ^= res >> 8; 527 r->u64[i] = res & 1; 528 } 529 } 530 531 /* vprtybq */ 532 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 533 { 534 uint64_t res = b->u64[0] ^ b->u64[1]; 535 res ^= res >> 32; 536 res ^= res >> 16; 537 res ^= res >> 8; 538 r->VsrD(1) = res & 1; 539 r->VsrD(0) = 0; 540 } 541 542 #define VARITH_DO(name, op, element) \ 543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 544 { \ 545 int i; \ 546 \ 547 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 548 r->element[i] = a->element[i] op b->element[i]; \ 549 } \ 550 } 551 VARITH_DO(muluwm, *, u32) 552 #undef VARITH_DO 553 #undef VARITH 554 555 #define VARITHFP(suffix, func) \ 556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 557 ppc_avr_t *b) \ 558 { \ 559 int i; \ 560 \ 561 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 562 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 563 } \ 564 } 565 VARITHFP(addfp, float32_add) 566 VARITHFP(subfp, float32_sub) 567 VARITHFP(minfp, float32_min) 568 VARITHFP(maxfp, float32_max) 569 #undef VARITHFP 570 571 #define VARITHFPFMA(suffix, type) \ 572 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 573 ppc_avr_t *b, ppc_avr_t *c) \ 574 { \ 575 int i; \ 576 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 577 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 578 type, &env->vec_status); \ 579 } \ 580 } 581 VARITHFPFMA(maddfp, 0); 582 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 583 #undef VARITHFPFMA 584 585 #define VARITHSAT_CASE(type, op, cvt, element) \ 586 { \ 587 type result = (type)a->element[i] op (type)b->element[i]; \ 588 r->element[i] = cvt(result, &sat); \ 589 } 590 591 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 592 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 593 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 594 { \ 595 int sat = 0; \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 VARITHSAT_CASE(optype, op, cvt, element); \ 600 } \ 601 if (sat) { \ 602 vscr_sat->u32[0] = 1; \ 603 } \ 604 } 605 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 606 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 607 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 608 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 609 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 610 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 611 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 612 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 613 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 614 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 615 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 616 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 617 #undef VARITHSAT_CASE 618 #undef VARITHSAT_DO 619 #undef VARITHSAT_SIGNED 620 #undef VARITHSAT_UNSIGNED 621 622 #define VAVG_DO(name, element, etype) \ 623 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 624 { \ 625 int i; \ 626 \ 627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 628 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 629 r->element[i] = x >> 1; \ 630 } \ 631 } 632 633 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 634 unsigned_type) \ 635 VAVG_DO(avgs##type, signed_element, signed_type) \ 636 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 637 VAVG(b, s8, int16_t, u8, uint16_t) 638 VAVG(h, s16, int32_t, u16, uint32_t) 639 VAVG(w, s32, int64_t, u32, uint64_t) 640 #undef VAVG_DO 641 #undef VAVG 642 643 #define VABSDU_DO(name, element) \ 644 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 645 { \ 646 int i; \ 647 \ 648 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 649 r->element[i] = (a->element[i] > b->element[i]) ? \ 650 (a->element[i] - b->element[i]) : \ 651 (b->element[i] - a->element[i]); \ 652 } \ 653 } 654 655 /* 656 * VABSDU - Vector absolute difference unsigned 657 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 658 * element - element type to access from vector 659 */ 660 #define VABSDU(type, element) \ 661 VABSDU_DO(absdu##type, element) 662 VABSDU(b, u8) 663 VABSDU(h, u16) 664 VABSDU(w, u32) 665 #undef VABSDU_DO 666 #undef VABSDU 667 668 #define VCF(suffix, cvt, element) \ 669 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 670 ppc_avr_t *b, uint32_t uim) \ 671 { \ 672 int i; \ 673 \ 674 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 675 float32 t = cvt(b->element[i], &env->vec_status); \ 676 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 677 } \ 678 } 679 VCF(ux, uint32_to_float32, u32) 680 VCF(sx, int32_to_float32, s32) 681 #undef VCF 682 683 #define VCMP_DO(suffix, compare, element, record) \ 684 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 685 ppc_avr_t *a, ppc_avr_t *b) \ 686 { \ 687 uint64_t ones = (uint64_t)-1; \ 688 uint64_t all = ones; \ 689 uint64_t none = 0; \ 690 int i; \ 691 \ 692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 693 uint64_t result = (a->element[i] compare b->element[i] ? \ 694 ones : 0x0); \ 695 switch (sizeof(a->element[0])) { \ 696 case 8: \ 697 r->u64[i] = result; \ 698 break; \ 699 case 4: \ 700 r->u32[i] = result; \ 701 break; \ 702 case 2: \ 703 r->u16[i] = result; \ 704 break; \ 705 case 1: \ 706 r->u8[i] = result; \ 707 break; \ 708 } \ 709 all &= result; \ 710 none |= result; \ 711 } \ 712 if (record) { \ 713 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 714 } \ 715 } 716 #define VCMP(suffix, compare, element) \ 717 VCMP_DO(suffix, compare, element, 0) \ 718 VCMP_DO(suffix##_dot, compare, element, 1) 719 VCMP(equb, ==, u8) 720 VCMP(equh, ==, u16) 721 VCMP(equw, ==, u32) 722 VCMP(equd, ==, u64) 723 VCMP(gtub, >, u8) 724 VCMP(gtuh, >, u16) 725 VCMP(gtuw, >, u32) 726 VCMP(gtud, >, u64) 727 VCMP(gtsb, >, s8) 728 VCMP(gtsh, >, s16) 729 VCMP(gtsw, >, s32) 730 VCMP(gtsd, >, s64) 731 #undef VCMP_DO 732 #undef VCMP 733 734 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 735 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 736 ppc_avr_t *a, ppc_avr_t *b) \ 737 { \ 738 etype ones = (etype)-1; \ 739 etype all = ones; \ 740 etype result, none = 0; \ 741 int i; \ 742 \ 743 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 744 if (cmpzero) { \ 745 result = ((a->element[i] == 0) \ 746 || (b->element[i] == 0) \ 747 || (a->element[i] != b->element[i]) ? \ 748 ones : 0x0); \ 749 } else { \ 750 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 751 } \ 752 r->element[i] = result; \ 753 all &= result; \ 754 none |= result; \ 755 } \ 756 if (record) { \ 757 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 758 } \ 759 } 760 761 /* 762 * VCMPNEZ - Vector compare not equal to zero 763 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 764 * element - element type to access from vector 765 */ 766 #define VCMPNE(suffix, element, etype, cmpzero) \ 767 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 768 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 769 VCMPNE(zb, u8, uint8_t, 1) 770 VCMPNE(zh, u16, uint16_t, 1) 771 VCMPNE(zw, u32, uint32_t, 1) 772 VCMPNE(b, u8, uint8_t, 0) 773 VCMPNE(h, u16, uint16_t, 0) 774 VCMPNE(w, u32, uint32_t, 0) 775 #undef VCMPNE_DO 776 #undef VCMPNE 777 778 #define VCMPFP_DO(suffix, compare, order, record) \ 779 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 780 ppc_avr_t *a, ppc_avr_t *b) \ 781 { \ 782 uint32_t ones = (uint32_t)-1; \ 783 uint32_t all = ones; \ 784 uint32_t none = 0; \ 785 int i; \ 786 \ 787 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 788 uint32_t result; \ 789 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 790 &env->vec_status); \ 791 if (rel == float_relation_unordered) { \ 792 result = 0; \ 793 } else if (rel compare order) { \ 794 result = ones; \ 795 } else { \ 796 result = 0; \ 797 } \ 798 r->u32[i] = result; \ 799 all &= result; \ 800 none |= result; \ 801 } \ 802 if (record) { \ 803 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 804 } \ 805 } 806 #define VCMPFP(suffix, compare, order) \ 807 VCMPFP_DO(suffix, compare, order, 0) \ 808 VCMPFP_DO(suffix##_dot, compare, order, 1) 809 VCMPFP(eqfp, ==, float_relation_equal) 810 VCMPFP(gefp, !=, float_relation_less) 811 VCMPFP(gtfp, ==, float_relation_greater) 812 #undef VCMPFP_DO 813 #undef VCMPFP 814 815 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 816 ppc_avr_t *a, ppc_avr_t *b, int record) 817 { 818 int i; 819 int all_in = 0; 820 821 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 822 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 823 &env->vec_status); 824 if (le_rel == float_relation_unordered) { 825 r->u32[i] = 0xc0000000; 826 all_in = 1; 827 } else { 828 float32 bneg = float32_chs(b->f32[i]); 829 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 830 &env->vec_status); 831 int le = le_rel != float_relation_greater; 832 int ge = ge_rel != float_relation_less; 833 834 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 835 all_in |= (!le | !ge); 836 } 837 } 838 if (record) { 839 env->crf[6] = (all_in == 0) << 1; 840 } 841 } 842 843 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 844 { 845 vcmpbfp_internal(env, r, a, b, 0); 846 } 847 848 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 849 ppc_avr_t *b) 850 { 851 vcmpbfp_internal(env, r, a, b, 1); 852 } 853 854 #define VCT(suffix, satcvt, element) \ 855 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 856 ppc_avr_t *b, uint32_t uim) \ 857 { \ 858 int i; \ 859 int sat = 0; \ 860 float_status s = env->vec_status; \ 861 \ 862 set_float_rounding_mode(float_round_to_zero, &s); \ 863 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 864 if (float32_is_any_nan(b->f32[i])) { \ 865 r->element[i] = 0; \ 866 } else { \ 867 float64 t = float32_to_float64(b->f32[i], &s); \ 868 int64_t j; \ 869 \ 870 t = float64_scalbn(t, uim, &s); \ 871 j = float64_to_int64(t, &s); \ 872 r->element[i] = satcvt(j, &sat); \ 873 } \ 874 } \ 875 if (sat) { \ 876 set_vscr_sat(env); \ 877 } \ 878 } 879 VCT(uxs, cvtsduw, u32) 880 VCT(sxs, cvtsdsw, s32) 881 #undef VCT 882 883 target_ulong helper_vclzlsbb(ppc_avr_t *r) 884 { 885 target_ulong count = 0; 886 int i; 887 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 888 if (r->VsrB(i) & 0x01) { 889 break; 890 } 891 count++; 892 } 893 return count; 894 } 895 896 target_ulong helper_vctzlsbb(ppc_avr_t *r) 897 { 898 target_ulong count = 0; 899 int i; 900 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 901 if (r->VsrB(i) & 0x01) { 902 break; 903 } 904 count++; 905 } 906 return count; 907 } 908 909 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 910 ppc_avr_t *b, ppc_avr_t *c) 911 { 912 int sat = 0; 913 int i; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 int32_t prod = a->s16[i] * b->s16[i]; 917 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 918 919 r->s16[i] = cvtswsh(t, &sat); 920 } 921 922 if (sat) { 923 set_vscr_sat(env); 924 } 925 } 926 927 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 928 ppc_avr_t *b, ppc_avr_t *c) 929 { 930 int sat = 0; 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 934 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 935 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 936 r->s16[i] = cvtswsh(t, &sat); 937 } 938 939 if (sat) { 940 set_vscr_sat(env); 941 } 942 } 943 944 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 945 { 946 int i; 947 948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 949 int32_t prod = a->s16[i] * b->s16[i]; 950 r->s16[i] = (int16_t) (prod + c->s16[i]); 951 } 952 } 953 954 #define VMRG_DO(name, element, access, ofs) \ 955 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 956 { \ 957 ppc_avr_t result; \ 958 int i, half = ARRAY_SIZE(r->element) / 2; \ 959 \ 960 for (i = 0; i < half; i++) { \ 961 result.access(i * 2 + 0) = a->access(i + ofs); \ 962 result.access(i * 2 + 1) = b->access(i + ofs); \ 963 } \ 964 *r = result; \ 965 } 966 967 #define VMRG(suffix, element, access) \ 968 VMRG_DO(mrgl##suffix, element, access, half) \ 969 VMRG_DO(mrgh##suffix, element, access, 0) 970 VMRG(b, u8, VsrB) 971 VMRG(h, u16, VsrH) 972 VMRG(w, u32, VsrW) 973 #undef VMRG_DO 974 #undef VMRG 975 976 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 977 ppc_avr_t *b, ppc_avr_t *c) 978 { 979 int32_t prod[16]; 980 int i; 981 982 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 983 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 984 } 985 986 VECTOR_FOR_INORDER_I(i, s32) { 987 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 988 prod[4 * i + 2] + prod[4 * i + 3]; 989 } 990 } 991 992 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 993 ppc_avr_t *b, ppc_avr_t *c) 994 { 995 int32_t prod[8]; 996 int i; 997 998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 999 prod[i] = a->s16[i] * b->s16[i]; 1000 } 1001 1002 VECTOR_FOR_INORDER_I(i, s32) { 1003 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1004 } 1005 } 1006 1007 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1008 ppc_avr_t *b, ppc_avr_t *c) 1009 { 1010 int32_t prod[8]; 1011 int i; 1012 int sat = 0; 1013 1014 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1015 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1016 } 1017 1018 VECTOR_FOR_INORDER_I(i, s32) { 1019 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1020 1021 r->u32[i] = cvtsdsw(t, &sat); 1022 } 1023 1024 if (sat) { 1025 set_vscr_sat(env); 1026 } 1027 } 1028 1029 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1030 ppc_avr_t *b, ppc_avr_t *c) 1031 { 1032 uint16_t prod[16]; 1033 int i; 1034 1035 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1036 prod[i] = a->u8[i] * b->u8[i]; 1037 } 1038 1039 VECTOR_FOR_INORDER_I(i, u32) { 1040 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1041 prod[4 * i + 2] + prod[4 * i + 3]; 1042 } 1043 } 1044 1045 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1046 ppc_avr_t *b, ppc_avr_t *c) 1047 { 1048 uint32_t prod[8]; 1049 int i; 1050 1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1052 prod[i] = a->u16[i] * b->u16[i]; 1053 } 1054 1055 VECTOR_FOR_INORDER_I(i, u32) { 1056 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1057 } 1058 } 1059 1060 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1061 ppc_avr_t *b, ppc_avr_t *c) 1062 { 1063 uint32_t prod[8]; 1064 int i; 1065 int sat = 0; 1066 1067 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1068 prod[i] = a->u16[i] * b->u16[i]; 1069 } 1070 1071 VECTOR_FOR_INORDER_I(i, s32) { 1072 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1073 1074 r->u32[i] = cvtuduw(t, &sat); 1075 } 1076 1077 if (sat) { 1078 set_vscr_sat(env); 1079 } 1080 } 1081 1082 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1083 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1084 { \ 1085 int i; \ 1086 \ 1087 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1088 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1089 (cast)b->mul_access(i); \ 1090 } \ 1091 } 1092 1093 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1094 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1095 { \ 1096 int i; \ 1097 \ 1098 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1099 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1100 (cast)b->mul_access(i + 1); \ 1101 } \ 1102 } 1103 1104 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1105 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1106 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1107 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1108 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1109 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1110 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1111 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1112 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1113 #undef VMUL_DO_EVN 1114 #undef VMUL_DO_ODD 1115 #undef VMUL 1116 1117 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1118 ppc_avr_t *c) 1119 { 1120 ppc_avr_t result; 1121 int i; 1122 1123 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1124 int s = c->VsrB(i) & 0x1f; 1125 int index = s & 0xf; 1126 1127 if (s & 0x10) { 1128 result.VsrB(i) = b->VsrB(index); 1129 } else { 1130 result.VsrB(i) = a->VsrB(index); 1131 } 1132 } 1133 *r = result; 1134 } 1135 1136 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1137 ppc_avr_t *c) 1138 { 1139 ppc_avr_t result; 1140 int i; 1141 1142 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1143 int s = c->VsrB(i) & 0x1f; 1144 int index = 15 - (s & 0xf); 1145 1146 if (s & 0x10) { 1147 result.VsrB(i) = a->VsrB(index); 1148 } else { 1149 result.VsrB(i) = b->VsrB(index); 1150 } 1151 } 1152 *r = result; 1153 } 1154 1155 #if defined(HOST_WORDS_BIGENDIAN) 1156 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1157 #define VBPERMD_INDEX(i) (i) 1158 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1159 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1160 #else 1161 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1162 #define VBPERMD_INDEX(i) (1 - i) 1163 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1164 #define EXTRACT_BIT(avr, i, index) \ 1165 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1166 #endif 1167 1168 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1169 { 1170 int i, j; 1171 ppc_avr_t result = { .u64 = { 0, 0 } }; 1172 VECTOR_FOR_INORDER_I(i, u64) { 1173 for (j = 0; j < 8; j++) { 1174 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1175 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1176 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1177 } 1178 } 1179 } 1180 *r = result; 1181 } 1182 1183 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1184 { 1185 int i; 1186 uint64_t perm = 0; 1187 1188 VECTOR_FOR_INORDER_I(i, u8) { 1189 int index = VBPERMQ_INDEX(b, i); 1190 1191 if (index < 128) { 1192 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1193 if (a->u64[VBPERMQ_DW(index)] & mask) { 1194 perm |= (0x8000 >> i); 1195 } 1196 } 1197 } 1198 1199 r->VsrD(0) = perm; 1200 r->VsrD(1) = 0; 1201 } 1202 1203 #undef VBPERMQ_INDEX 1204 #undef VBPERMQ_DW 1205 1206 static const uint64_t VGBBD_MASKS[256] = { 1207 0x0000000000000000ull, /* 00 */ 1208 0x0000000000000080ull, /* 01 */ 1209 0x0000000000008000ull, /* 02 */ 1210 0x0000000000008080ull, /* 03 */ 1211 0x0000000000800000ull, /* 04 */ 1212 0x0000000000800080ull, /* 05 */ 1213 0x0000000000808000ull, /* 06 */ 1214 0x0000000000808080ull, /* 07 */ 1215 0x0000000080000000ull, /* 08 */ 1216 0x0000000080000080ull, /* 09 */ 1217 0x0000000080008000ull, /* 0A */ 1218 0x0000000080008080ull, /* 0B */ 1219 0x0000000080800000ull, /* 0C */ 1220 0x0000000080800080ull, /* 0D */ 1221 0x0000000080808000ull, /* 0E */ 1222 0x0000000080808080ull, /* 0F */ 1223 0x0000008000000000ull, /* 10 */ 1224 0x0000008000000080ull, /* 11 */ 1225 0x0000008000008000ull, /* 12 */ 1226 0x0000008000008080ull, /* 13 */ 1227 0x0000008000800000ull, /* 14 */ 1228 0x0000008000800080ull, /* 15 */ 1229 0x0000008000808000ull, /* 16 */ 1230 0x0000008000808080ull, /* 17 */ 1231 0x0000008080000000ull, /* 18 */ 1232 0x0000008080000080ull, /* 19 */ 1233 0x0000008080008000ull, /* 1A */ 1234 0x0000008080008080ull, /* 1B */ 1235 0x0000008080800000ull, /* 1C */ 1236 0x0000008080800080ull, /* 1D */ 1237 0x0000008080808000ull, /* 1E */ 1238 0x0000008080808080ull, /* 1F */ 1239 0x0000800000000000ull, /* 20 */ 1240 0x0000800000000080ull, /* 21 */ 1241 0x0000800000008000ull, /* 22 */ 1242 0x0000800000008080ull, /* 23 */ 1243 0x0000800000800000ull, /* 24 */ 1244 0x0000800000800080ull, /* 25 */ 1245 0x0000800000808000ull, /* 26 */ 1246 0x0000800000808080ull, /* 27 */ 1247 0x0000800080000000ull, /* 28 */ 1248 0x0000800080000080ull, /* 29 */ 1249 0x0000800080008000ull, /* 2A */ 1250 0x0000800080008080ull, /* 2B */ 1251 0x0000800080800000ull, /* 2C */ 1252 0x0000800080800080ull, /* 2D */ 1253 0x0000800080808000ull, /* 2E */ 1254 0x0000800080808080ull, /* 2F */ 1255 0x0000808000000000ull, /* 30 */ 1256 0x0000808000000080ull, /* 31 */ 1257 0x0000808000008000ull, /* 32 */ 1258 0x0000808000008080ull, /* 33 */ 1259 0x0000808000800000ull, /* 34 */ 1260 0x0000808000800080ull, /* 35 */ 1261 0x0000808000808000ull, /* 36 */ 1262 0x0000808000808080ull, /* 37 */ 1263 0x0000808080000000ull, /* 38 */ 1264 0x0000808080000080ull, /* 39 */ 1265 0x0000808080008000ull, /* 3A */ 1266 0x0000808080008080ull, /* 3B */ 1267 0x0000808080800000ull, /* 3C */ 1268 0x0000808080800080ull, /* 3D */ 1269 0x0000808080808000ull, /* 3E */ 1270 0x0000808080808080ull, /* 3F */ 1271 0x0080000000000000ull, /* 40 */ 1272 0x0080000000000080ull, /* 41 */ 1273 0x0080000000008000ull, /* 42 */ 1274 0x0080000000008080ull, /* 43 */ 1275 0x0080000000800000ull, /* 44 */ 1276 0x0080000000800080ull, /* 45 */ 1277 0x0080000000808000ull, /* 46 */ 1278 0x0080000000808080ull, /* 47 */ 1279 0x0080000080000000ull, /* 48 */ 1280 0x0080000080000080ull, /* 49 */ 1281 0x0080000080008000ull, /* 4A */ 1282 0x0080000080008080ull, /* 4B */ 1283 0x0080000080800000ull, /* 4C */ 1284 0x0080000080800080ull, /* 4D */ 1285 0x0080000080808000ull, /* 4E */ 1286 0x0080000080808080ull, /* 4F */ 1287 0x0080008000000000ull, /* 50 */ 1288 0x0080008000000080ull, /* 51 */ 1289 0x0080008000008000ull, /* 52 */ 1290 0x0080008000008080ull, /* 53 */ 1291 0x0080008000800000ull, /* 54 */ 1292 0x0080008000800080ull, /* 55 */ 1293 0x0080008000808000ull, /* 56 */ 1294 0x0080008000808080ull, /* 57 */ 1295 0x0080008080000000ull, /* 58 */ 1296 0x0080008080000080ull, /* 59 */ 1297 0x0080008080008000ull, /* 5A */ 1298 0x0080008080008080ull, /* 5B */ 1299 0x0080008080800000ull, /* 5C */ 1300 0x0080008080800080ull, /* 5D */ 1301 0x0080008080808000ull, /* 5E */ 1302 0x0080008080808080ull, /* 5F */ 1303 0x0080800000000000ull, /* 60 */ 1304 0x0080800000000080ull, /* 61 */ 1305 0x0080800000008000ull, /* 62 */ 1306 0x0080800000008080ull, /* 63 */ 1307 0x0080800000800000ull, /* 64 */ 1308 0x0080800000800080ull, /* 65 */ 1309 0x0080800000808000ull, /* 66 */ 1310 0x0080800000808080ull, /* 67 */ 1311 0x0080800080000000ull, /* 68 */ 1312 0x0080800080000080ull, /* 69 */ 1313 0x0080800080008000ull, /* 6A */ 1314 0x0080800080008080ull, /* 6B */ 1315 0x0080800080800000ull, /* 6C */ 1316 0x0080800080800080ull, /* 6D */ 1317 0x0080800080808000ull, /* 6E */ 1318 0x0080800080808080ull, /* 6F */ 1319 0x0080808000000000ull, /* 70 */ 1320 0x0080808000000080ull, /* 71 */ 1321 0x0080808000008000ull, /* 72 */ 1322 0x0080808000008080ull, /* 73 */ 1323 0x0080808000800000ull, /* 74 */ 1324 0x0080808000800080ull, /* 75 */ 1325 0x0080808000808000ull, /* 76 */ 1326 0x0080808000808080ull, /* 77 */ 1327 0x0080808080000000ull, /* 78 */ 1328 0x0080808080000080ull, /* 79 */ 1329 0x0080808080008000ull, /* 7A */ 1330 0x0080808080008080ull, /* 7B */ 1331 0x0080808080800000ull, /* 7C */ 1332 0x0080808080800080ull, /* 7D */ 1333 0x0080808080808000ull, /* 7E */ 1334 0x0080808080808080ull, /* 7F */ 1335 0x8000000000000000ull, /* 80 */ 1336 0x8000000000000080ull, /* 81 */ 1337 0x8000000000008000ull, /* 82 */ 1338 0x8000000000008080ull, /* 83 */ 1339 0x8000000000800000ull, /* 84 */ 1340 0x8000000000800080ull, /* 85 */ 1341 0x8000000000808000ull, /* 86 */ 1342 0x8000000000808080ull, /* 87 */ 1343 0x8000000080000000ull, /* 88 */ 1344 0x8000000080000080ull, /* 89 */ 1345 0x8000000080008000ull, /* 8A */ 1346 0x8000000080008080ull, /* 8B */ 1347 0x8000000080800000ull, /* 8C */ 1348 0x8000000080800080ull, /* 8D */ 1349 0x8000000080808000ull, /* 8E */ 1350 0x8000000080808080ull, /* 8F */ 1351 0x8000008000000000ull, /* 90 */ 1352 0x8000008000000080ull, /* 91 */ 1353 0x8000008000008000ull, /* 92 */ 1354 0x8000008000008080ull, /* 93 */ 1355 0x8000008000800000ull, /* 94 */ 1356 0x8000008000800080ull, /* 95 */ 1357 0x8000008000808000ull, /* 96 */ 1358 0x8000008000808080ull, /* 97 */ 1359 0x8000008080000000ull, /* 98 */ 1360 0x8000008080000080ull, /* 99 */ 1361 0x8000008080008000ull, /* 9A */ 1362 0x8000008080008080ull, /* 9B */ 1363 0x8000008080800000ull, /* 9C */ 1364 0x8000008080800080ull, /* 9D */ 1365 0x8000008080808000ull, /* 9E */ 1366 0x8000008080808080ull, /* 9F */ 1367 0x8000800000000000ull, /* A0 */ 1368 0x8000800000000080ull, /* A1 */ 1369 0x8000800000008000ull, /* A2 */ 1370 0x8000800000008080ull, /* A3 */ 1371 0x8000800000800000ull, /* A4 */ 1372 0x8000800000800080ull, /* A5 */ 1373 0x8000800000808000ull, /* A6 */ 1374 0x8000800000808080ull, /* A7 */ 1375 0x8000800080000000ull, /* A8 */ 1376 0x8000800080000080ull, /* A9 */ 1377 0x8000800080008000ull, /* AA */ 1378 0x8000800080008080ull, /* AB */ 1379 0x8000800080800000ull, /* AC */ 1380 0x8000800080800080ull, /* AD */ 1381 0x8000800080808000ull, /* AE */ 1382 0x8000800080808080ull, /* AF */ 1383 0x8000808000000000ull, /* B0 */ 1384 0x8000808000000080ull, /* B1 */ 1385 0x8000808000008000ull, /* B2 */ 1386 0x8000808000008080ull, /* B3 */ 1387 0x8000808000800000ull, /* B4 */ 1388 0x8000808000800080ull, /* B5 */ 1389 0x8000808000808000ull, /* B6 */ 1390 0x8000808000808080ull, /* B7 */ 1391 0x8000808080000000ull, /* B8 */ 1392 0x8000808080000080ull, /* B9 */ 1393 0x8000808080008000ull, /* BA */ 1394 0x8000808080008080ull, /* BB */ 1395 0x8000808080800000ull, /* BC */ 1396 0x8000808080800080ull, /* BD */ 1397 0x8000808080808000ull, /* BE */ 1398 0x8000808080808080ull, /* BF */ 1399 0x8080000000000000ull, /* C0 */ 1400 0x8080000000000080ull, /* C1 */ 1401 0x8080000000008000ull, /* C2 */ 1402 0x8080000000008080ull, /* C3 */ 1403 0x8080000000800000ull, /* C4 */ 1404 0x8080000000800080ull, /* C5 */ 1405 0x8080000000808000ull, /* C6 */ 1406 0x8080000000808080ull, /* C7 */ 1407 0x8080000080000000ull, /* C8 */ 1408 0x8080000080000080ull, /* C9 */ 1409 0x8080000080008000ull, /* CA */ 1410 0x8080000080008080ull, /* CB */ 1411 0x8080000080800000ull, /* CC */ 1412 0x8080000080800080ull, /* CD */ 1413 0x8080000080808000ull, /* CE */ 1414 0x8080000080808080ull, /* CF */ 1415 0x8080008000000000ull, /* D0 */ 1416 0x8080008000000080ull, /* D1 */ 1417 0x8080008000008000ull, /* D2 */ 1418 0x8080008000008080ull, /* D3 */ 1419 0x8080008000800000ull, /* D4 */ 1420 0x8080008000800080ull, /* D5 */ 1421 0x8080008000808000ull, /* D6 */ 1422 0x8080008000808080ull, /* D7 */ 1423 0x8080008080000000ull, /* D8 */ 1424 0x8080008080000080ull, /* D9 */ 1425 0x8080008080008000ull, /* DA */ 1426 0x8080008080008080ull, /* DB */ 1427 0x8080008080800000ull, /* DC */ 1428 0x8080008080800080ull, /* DD */ 1429 0x8080008080808000ull, /* DE */ 1430 0x8080008080808080ull, /* DF */ 1431 0x8080800000000000ull, /* E0 */ 1432 0x8080800000000080ull, /* E1 */ 1433 0x8080800000008000ull, /* E2 */ 1434 0x8080800000008080ull, /* E3 */ 1435 0x8080800000800000ull, /* E4 */ 1436 0x8080800000800080ull, /* E5 */ 1437 0x8080800000808000ull, /* E6 */ 1438 0x8080800000808080ull, /* E7 */ 1439 0x8080800080000000ull, /* E8 */ 1440 0x8080800080000080ull, /* E9 */ 1441 0x8080800080008000ull, /* EA */ 1442 0x8080800080008080ull, /* EB */ 1443 0x8080800080800000ull, /* EC */ 1444 0x8080800080800080ull, /* ED */ 1445 0x8080800080808000ull, /* EE */ 1446 0x8080800080808080ull, /* EF */ 1447 0x8080808000000000ull, /* F0 */ 1448 0x8080808000000080ull, /* F1 */ 1449 0x8080808000008000ull, /* F2 */ 1450 0x8080808000008080ull, /* F3 */ 1451 0x8080808000800000ull, /* F4 */ 1452 0x8080808000800080ull, /* F5 */ 1453 0x8080808000808000ull, /* F6 */ 1454 0x8080808000808080ull, /* F7 */ 1455 0x8080808080000000ull, /* F8 */ 1456 0x8080808080000080ull, /* F9 */ 1457 0x8080808080008000ull, /* FA */ 1458 0x8080808080008080ull, /* FB */ 1459 0x8080808080800000ull, /* FC */ 1460 0x8080808080800080ull, /* FD */ 1461 0x8080808080808000ull, /* FE */ 1462 0x8080808080808080ull, /* FF */ 1463 }; 1464 1465 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1466 { 1467 int i; 1468 uint64_t t[2] = { 0, 0 }; 1469 1470 VECTOR_FOR_INORDER_I(i, u8) { 1471 #if defined(HOST_WORDS_BIGENDIAN) 1472 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1473 #else 1474 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7)); 1475 #endif 1476 } 1477 1478 r->u64[0] = t[0]; 1479 r->u64[1] = t[1]; 1480 } 1481 1482 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1483 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1484 { \ 1485 int i, j; \ 1486 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1487 \ 1488 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1489 prod[i] = 0; \ 1490 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1491 if (a->srcfld[i] & (1ull << j)) { \ 1492 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1493 } \ 1494 } \ 1495 } \ 1496 \ 1497 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1498 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1499 } \ 1500 } 1501 1502 PMSUM(vpmsumb, u8, u16, uint16_t) 1503 PMSUM(vpmsumh, u16, u32, uint32_t) 1504 PMSUM(vpmsumw, u32, u64, uint64_t) 1505 1506 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1507 { 1508 1509 #ifdef CONFIG_INT128 1510 int i, j; 1511 __uint128_t prod[2]; 1512 1513 VECTOR_FOR_INORDER_I(i, u64) { 1514 prod[i] = 0; 1515 for (j = 0; j < 64; j++) { 1516 if (a->u64[i] & (1ull << j)) { 1517 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1518 } 1519 } 1520 } 1521 1522 r->u128 = prod[0] ^ prod[1]; 1523 1524 #else 1525 int i, j; 1526 ppc_avr_t prod[2]; 1527 1528 VECTOR_FOR_INORDER_I(i, u64) { 1529 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1530 for (j = 0; j < 64; j++) { 1531 if (a->u64[i] & (1ull << j)) { 1532 ppc_avr_t bshift; 1533 if (j == 0) { 1534 bshift.VsrD(0) = 0; 1535 bshift.VsrD(1) = b->u64[i]; 1536 } else { 1537 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1538 bshift.VsrD(1) = b->u64[i] << j; 1539 } 1540 prod[i].VsrD(1) ^= bshift.VsrD(1); 1541 prod[i].VsrD(0) ^= bshift.VsrD(0); 1542 } 1543 } 1544 } 1545 1546 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1547 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1548 #endif 1549 } 1550 1551 1552 #if defined(HOST_WORDS_BIGENDIAN) 1553 #define PKBIG 1 1554 #else 1555 #define PKBIG 0 1556 #endif 1557 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1558 { 1559 int i, j; 1560 ppc_avr_t result; 1561 #if defined(HOST_WORDS_BIGENDIAN) 1562 const ppc_avr_t *x[2] = { a, b }; 1563 #else 1564 const ppc_avr_t *x[2] = { b, a }; 1565 #endif 1566 1567 VECTOR_FOR_INORDER_I(i, u64) { 1568 VECTOR_FOR_INORDER_I(j, u32) { 1569 uint32_t e = x[i]->u32[j]; 1570 1571 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1572 ((e >> 6) & 0x3e0) | 1573 ((e >> 3) & 0x1f)); 1574 } 1575 } 1576 *r = result; 1577 } 1578 1579 #define VPK(suffix, from, to, cvt, dosat) \ 1580 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1581 ppc_avr_t *a, ppc_avr_t *b) \ 1582 { \ 1583 int i; \ 1584 int sat = 0; \ 1585 ppc_avr_t result; \ 1586 ppc_avr_t *a0 = PKBIG ? a : b; \ 1587 ppc_avr_t *a1 = PKBIG ? b : a; \ 1588 \ 1589 VECTOR_FOR_INORDER_I(i, from) { \ 1590 result.to[i] = cvt(a0->from[i], &sat); \ 1591 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1592 } \ 1593 *r = result; \ 1594 if (dosat && sat) { \ 1595 set_vscr_sat(env); \ 1596 } \ 1597 } 1598 #define I(x, y) (x) 1599 VPK(shss, s16, s8, cvtshsb, 1) 1600 VPK(shus, s16, u8, cvtshub, 1) 1601 VPK(swss, s32, s16, cvtswsh, 1) 1602 VPK(swus, s32, u16, cvtswuh, 1) 1603 VPK(sdss, s64, s32, cvtsdsw, 1) 1604 VPK(sdus, s64, u32, cvtsduw, 1) 1605 VPK(uhus, u16, u8, cvtuhub, 1) 1606 VPK(uwus, u32, u16, cvtuwuh, 1) 1607 VPK(udus, u64, u32, cvtuduw, 1) 1608 VPK(uhum, u16, u8, I, 0) 1609 VPK(uwum, u32, u16, I, 0) 1610 VPK(udum, u64, u32, I, 0) 1611 #undef I 1612 #undef VPK 1613 #undef PKBIG 1614 1615 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1616 { 1617 int i; 1618 1619 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1620 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1621 } 1622 } 1623 1624 #define VRFI(suffix, rounding) \ 1625 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1626 ppc_avr_t *b) \ 1627 { \ 1628 int i; \ 1629 float_status s = env->vec_status; \ 1630 \ 1631 set_float_rounding_mode(rounding, &s); \ 1632 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1633 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1634 } \ 1635 } 1636 VRFI(n, float_round_nearest_even) 1637 VRFI(m, float_round_down) 1638 VRFI(p, float_round_up) 1639 VRFI(z, float_round_to_zero) 1640 #undef VRFI 1641 1642 #define VROTATE(suffix, element, mask) \ 1643 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1644 { \ 1645 int i; \ 1646 \ 1647 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1648 unsigned int shift = b->element[i] & mask; \ 1649 r->element[i] = (a->element[i] << shift) | \ 1650 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1651 } \ 1652 } 1653 VROTATE(b, u8, 0x7) 1654 VROTATE(h, u16, 0xF) 1655 VROTATE(w, u32, 0x1F) 1656 VROTATE(d, u64, 0x3F) 1657 #undef VROTATE 1658 1659 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1660 { 1661 int i; 1662 1663 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1664 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1665 1666 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1667 } 1668 } 1669 1670 #define VRLMI(name, size, element, insert) \ 1671 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1672 { \ 1673 int i; \ 1674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1675 uint##size##_t src1 = a->element[i]; \ 1676 uint##size##_t src2 = b->element[i]; \ 1677 uint##size##_t src3 = r->element[i]; \ 1678 uint##size##_t begin, end, shift, mask, rot_val; \ 1679 \ 1680 shift = extract##size(src2, 0, 6); \ 1681 end = extract##size(src2, 8, 6); \ 1682 begin = extract##size(src2, 16, 6); \ 1683 rot_val = rol##size(src1, shift); \ 1684 mask = mask_u##size(begin, end); \ 1685 if (insert) { \ 1686 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1687 } else { \ 1688 r->element[i] = (rot_val & mask); \ 1689 } \ 1690 } \ 1691 } 1692 1693 VRLMI(vrldmi, 64, u64, 1); 1694 VRLMI(vrlwmi, 32, u32, 1); 1695 VRLMI(vrldnm, 64, u64, 0); 1696 VRLMI(vrlwnm, 32, u32, 0); 1697 1698 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1699 ppc_avr_t *c) 1700 { 1701 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1702 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1703 } 1704 1705 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1706 { 1707 int i; 1708 1709 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1710 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1711 } 1712 } 1713 1714 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1715 { 1716 int i; 1717 1718 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1719 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1720 } 1721 } 1722 1723 #if defined(HOST_WORDS_BIGENDIAN) 1724 #define VEXTU_X_DO(name, size, left) \ 1725 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1726 { \ 1727 int index; \ 1728 if (left) { \ 1729 index = (a & 0xf) * 8; \ 1730 } else { \ 1731 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1732 } \ 1733 return int128_getlo(int128_rshift(b->s128, index)) & \ 1734 MAKE_64BIT_MASK(0, size); \ 1735 } 1736 #else 1737 #define VEXTU_X_DO(name, size, left) \ 1738 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1739 { \ 1740 int index; \ 1741 if (left) { \ 1742 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1743 } else { \ 1744 index = (a & 0xf) * 8; \ 1745 } \ 1746 return int128_getlo(int128_rshift(b->s128, index)) & \ 1747 MAKE_64BIT_MASK(0, size); \ 1748 } 1749 #endif 1750 1751 VEXTU_X_DO(vextublx, 8, 1) 1752 VEXTU_X_DO(vextuhlx, 16, 1) 1753 VEXTU_X_DO(vextuwlx, 32, 1) 1754 VEXTU_X_DO(vextubrx, 8, 0) 1755 VEXTU_X_DO(vextuhrx, 16, 0) 1756 VEXTU_X_DO(vextuwrx, 32, 0) 1757 #undef VEXTU_X_DO 1758 1759 /* 1760 * The specification says that the results are undefined if all of the 1761 * shift counts are not identical. We check to make sure that they 1762 * are to conform to what real hardware appears to do. 1763 */ 1764 #define VSHIFT(suffix, leftp) \ 1765 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1766 { \ 1767 int shift = b->VsrB(15) & 0x7; \ 1768 int doit = 1; \ 1769 int i; \ 1770 \ 1771 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1772 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1773 } \ 1774 if (doit) { \ 1775 if (shift == 0) { \ 1776 *r = *a; \ 1777 } else if (leftp) { \ 1778 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1779 \ 1780 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1781 r->VsrD(1) = a->VsrD(1) << shift; \ 1782 } else { \ 1783 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1784 \ 1785 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1786 r->VsrD(0) = a->VsrD(0) >> shift; \ 1787 } \ 1788 } \ 1789 } 1790 VSHIFT(l, 1) 1791 VSHIFT(r, 0) 1792 #undef VSHIFT 1793 1794 #define VSL(suffix, element, mask) \ 1795 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1796 { \ 1797 int i; \ 1798 \ 1799 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1800 unsigned int shift = b->element[i] & mask; \ 1801 \ 1802 r->element[i] = a->element[i] << shift; \ 1803 } \ 1804 } 1805 VSL(b, u8, 0x7) 1806 VSL(h, u16, 0x0F) 1807 VSL(w, u32, 0x1F) 1808 VSL(d, u64, 0x3F) 1809 #undef VSL 1810 1811 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1812 { 1813 int i; 1814 unsigned int shift, bytes, size; 1815 1816 size = ARRAY_SIZE(r->u8); 1817 for (i = 0; i < size; i++) { 1818 shift = b->u8[i] & 0x7; /* extract shift value */ 1819 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1820 (((i + 1) < size) ? a->u8[i + 1] : 0); 1821 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1822 } 1823 } 1824 1825 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1826 { 1827 int i; 1828 unsigned int shift, bytes; 1829 1830 /* 1831 * Use reverse order, as destination and source register can be 1832 * same. Its being modified in place saving temporary, reverse 1833 * order will guarantee that computed result is not fed back. 1834 */ 1835 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1836 shift = b->u8[i] & 0x7; /* extract shift value */ 1837 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1838 /* extract adjacent bytes */ 1839 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1840 } 1841 } 1842 1843 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1844 { 1845 int sh = shift & 0xf; 1846 int i; 1847 ppc_avr_t result; 1848 1849 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1850 int index = sh + i; 1851 if (index > 0xf) { 1852 result.VsrB(i) = b->VsrB(index - 0x10); 1853 } else { 1854 result.VsrB(i) = a->VsrB(index); 1855 } 1856 } 1857 *r = result; 1858 } 1859 1860 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1861 { 1862 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1863 1864 #if defined(HOST_WORDS_BIGENDIAN) 1865 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1866 memset(&r->u8[16 - sh], 0, sh); 1867 #else 1868 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1869 memset(&r->u8[0], 0, sh); 1870 #endif 1871 } 1872 1873 #if defined(HOST_WORDS_BIGENDIAN) 1874 #define VINSERT(suffix, element) \ 1875 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1876 { \ 1877 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1878 sizeof(r->element[0])); \ 1879 } 1880 #else 1881 #define VINSERT(suffix, element) \ 1882 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1883 { \ 1884 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1885 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1886 } 1887 #endif 1888 VINSERT(b, u8) 1889 VINSERT(h, u16) 1890 VINSERT(w, u32) 1891 VINSERT(d, u64) 1892 #undef VINSERT 1893 #if defined(HOST_WORDS_BIGENDIAN) 1894 #define VEXTRACT(suffix, element) \ 1895 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1896 { \ 1897 uint32_t es = sizeof(r->element[0]); \ 1898 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1899 memset(&r->u8[8], 0, 8); \ 1900 memset(&r->u8[0], 0, 8 - es); \ 1901 } 1902 #else 1903 #define VEXTRACT(suffix, element) \ 1904 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1905 { \ 1906 uint32_t es = sizeof(r->element[0]); \ 1907 uint32_t s = (16 - index) - es; \ 1908 memmove(&r->u8[8], &b->u8[s], es); \ 1909 memset(&r->u8[0], 0, 8); \ 1910 memset(&r->u8[8 + es], 0, 8 - es); \ 1911 } 1912 #endif 1913 VEXTRACT(ub, u8) 1914 VEXTRACT(uh, u16) 1915 VEXTRACT(uw, u32) 1916 VEXTRACT(d, u64) 1917 #undef VEXTRACT 1918 1919 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1920 target_ulong xbn, uint32_t index) 1921 { 1922 ppc_vsr_t xt, xb; 1923 size_t es = sizeof(uint32_t); 1924 uint32_t ext_index; 1925 int i; 1926 1927 getVSR(xbn, &xb, env); 1928 memset(&xt, 0, sizeof(xt)); 1929 1930 ext_index = index; 1931 for (i = 0; i < es; i++, ext_index++) { 1932 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16); 1933 } 1934 1935 putVSR(xtn, &xt, env); 1936 } 1937 1938 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 1939 target_ulong xbn, uint32_t index) 1940 { 1941 ppc_vsr_t xt, xb; 1942 size_t es = sizeof(uint32_t); 1943 int ins_index, i = 0; 1944 1945 getVSR(xbn, &xb, env); 1946 getVSR(xtn, &xt, env); 1947 1948 ins_index = index; 1949 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1950 xt.VsrB(ins_index) = xb.VsrB(8 - es + i); 1951 } 1952 1953 putVSR(xtn, &xt, env); 1954 } 1955 1956 #define VEXT_SIGNED(name, element, cast) \ 1957 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1958 { \ 1959 int i; \ 1960 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1961 r->element[i] = (cast)b->element[i]; \ 1962 } \ 1963 } 1964 VEXT_SIGNED(vextsb2w, s32, int8_t) 1965 VEXT_SIGNED(vextsb2d, s64, int8_t) 1966 VEXT_SIGNED(vextsh2w, s32, int16_t) 1967 VEXT_SIGNED(vextsh2d, s64, int16_t) 1968 VEXT_SIGNED(vextsw2d, s64, int32_t) 1969 #undef VEXT_SIGNED 1970 1971 #define VNEG(name, element) \ 1972 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1973 { \ 1974 int i; \ 1975 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1976 r->element[i] = -b->element[i]; \ 1977 } \ 1978 } 1979 VNEG(vnegw, s32) 1980 VNEG(vnegd, s64) 1981 #undef VNEG 1982 1983 #define VSR(suffix, element, mask) \ 1984 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1985 { \ 1986 int i; \ 1987 \ 1988 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1989 unsigned int shift = b->element[i] & mask; \ 1990 r->element[i] = a->element[i] >> shift; \ 1991 } \ 1992 } 1993 VSR(ab, s8, 0x7) 1994 VSR(ah, s16, 0xF) 1995 VSR(aw, s32, 0x1F) 1996 VSR(ad, s64, 0x3F) 1997 VSR(b, u8, 0x7) 1998 VSR(h, u16, 0xF) 1999 VSR(w, u32, 0x1F) 2000 VSR(d, u64, 0x3F) 2001 #undef VSR 2002 2003 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2004 { 2005 int sh = (b->VsrB(0xf) >> 3) & 0xf; 2006 2007 #if defined(HOST_WORDS_BIGENDIAN) 2008 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2009 memset(&r->u8[0], 0, sh); 2010 #else 2011 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2012 memset(&r->u8[16 - sh], 0, sh); 2013 #endif 2014 } 2015 2016 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2017 { 2018 int i; 2019 2020 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2021 r->u32[i] = a->u32[i] >= b->u32[i]; 2022 } 2023 } 2024 2025 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2026 { 2027 int64_t t; 2028 int i, upper; 2029 ppc_avr_t result; 2030 int sat = 0; 2031 2032 upper = ARRAY_SIZE(r->s32) - 1; 2033 t = (int64_t)b->VsrSW(upper); 2034 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2035 t += a->VsrSW(i); 2036 result.VsrSW(i) = 0; 2037 } 2038 result.VsrSW(upper) = cvtsdsw(t, &sat); 2039 *r = result; 2040 2041 if (sat) { 2042 set_vscr_sat(env); 2043 } 2044 } 2045 2046 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2047 { 2048 int i, j, upper; 2049 ppc_avr_t result; 2050 int sat = 0; 2051 2052 upper = 1; 2053 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2054 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2055 2056 result.VsrW(i) = 0; 2057 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2058 t += a->VsrSW(2 * i + j); 2059 } 2060 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2061 } 2062 2063 *r = result; 2064 if (sat) { 2065 set_vscr_sat(env); 2066 } 2067 } 2068 2069 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2070 { 2071 int i, j; 2072 int sat = 0; 2073 2074 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2075 int64_t t = (int64_t)b->s32[i]; 2076 2077 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2078 t += a->s8[4 * i + j]; 2079 } 2080 r->s32[i] = cvtsdsw(t, &sat); 2081 } 2082 2083 if (sat) { 2084 set_vscr_sat(env); 2085 } 2086 } 2087 2088 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2089 { 2090 int sat = 0; 2091 int i; 2092 2093 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2094 int64_t t = (int64_t)b->s32[i]; 2095 2096 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2097 r->s32[i] = cvtsdsw(t, &sat); 2098 } 2099 2100 if (sat) { 2101 set_vscr_sat(env); 2102 } 2103 } 2104 2105 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2106 { 2107 int i, j; 2108 int sat = 0; 2109 2110 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2111 uint64_t t = (uint64_t)b->u32[i]; 2112 2113 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2114 t += a->u8[4 * i + j]; 2115 } 2116 r->u32[i] = cvtuduw(t, &sat); 2117 } 2118 2119 if (sat) { 2120 set_vscr_sat(env); 2121 } 2122 } 2123 2124 #if defined(HOST_WORDS_BIGENDIAN) 2125 #define UPKHI 1 2126 #define UPKLO 0 2127 #else 2128 #define UPKHI 0 2129 #define UPKLO 1 2130 #endif 2131 #define VUPKPX(suffix, hi) \ 2132 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2133 { \ 2134 int i; \ 2135 ppc_avr_t result; \ 2136 \ 2137 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2138 uint16_t e = b->u16[hi ? i : i + 4]; \ 2139 uint8_t a = (e >> 15) ? 0xff : 0; \ 2140 uint8_t r = (e >> 10) & 0x1f; \ 2141 uint8_t g = (e >> 5) & 0x1f; \ 2142 uint8_t b = e & 0x1f; \ 2143 \ 2144 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2145 } \ 2146 *r = result; \ 2147 } 2148 VUPKPX(lpx, UPKLO) 2149 VUPKPX(hpx, UPKHI) 2150 #undef VUPKPX 2151 2152 #define VUPK(suffix, unpacked, packee, hi) \ 2153 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2154 { \ 2155 int i; \ 2156 ppc_avr_t result; \ 2157 \ 2158 if (hi) { \ 2159 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2160 result.unpacked[i] = b->packee[i]; \ 2161 } \ 2162 } else { \ 2163 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2164 i++) { \ 2165 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2166 } \ 2167 } \ 2168 *r = result; \ 2169 } 2170 VUPK(hsb, s16, s8, UPKHI) 2171 VUPK(hsh, s32, s16, UPKHI) 2172 VUPK(hsw, s64, s32, UPKHI) 2173 VUPK(lsb, s16, s8, UPKLO) 2174 VUPK(lsh, s32, s16, UPKLO) 2175 VUPK(lsw, s64, s32, UPKLO) 2176 #undef VUPK 2177 #undef UPKHI 2178 #undef UPKLO 2179 2180 #define VGENERIC_DO(name, element) \ 2181 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2182 { \ 2183 int i; \ 2184 \ 2185 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2186 r->element[i] = name(b->element[i]); \ 2187 } \ 2188 } 2189 2190 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2191 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2192 #define clzw(v) clz32((v)) 2193 #define clzd(v) clz64((v)) 2194 2195 VGENERIC_DO(clzb, u8) 2196 VGENERIC_DO(clzh, u16) 2197 VGENERIC_DO(clzw, u32) 2198 VGENERIC_DO(clzd, u64) 2199 2200 #undef clzb 2201 #undef clzh 2202 #undef clzw 2203 #undef clzd 2204 2205 #define ctzb(v) ((v) ? ctz32(v) : 8) 2206 #define ctzh(v) ((v) ? ctz32(v) : 16) 2207 #define ctzw(v) ctz32((v)) 2208 #define ctzd(v) ctz64((v)) 2209 2210 VGENERIC_DO(ctzb, u8) 2211 VGENERIC_DO(ctzh, u16) 2212 VGENERIC_DO(ctzw, u32) 2213 VGENERIC_DO(ctzd, u64) 2214 2215 #undef ctzb 2216 #undef ctzh 2217 #undef ctzw 2218 #undef ctzd 2219 2220 #define popcntb(v) ctpop8(v) 2221 #define popcnth(v) ctpop16(v) 2222 #define popcntw(v) ctpop32(v) 2223 #define popcntd(v) ctpop64(v) 2224 2225 VGENERIC_DO(popcntb, u8) 2226 VGENERIC_DO(popcnth, u16) 2227 VGENERIC_DO(popcntw, u32) 2228 VGENERIC_DO(popcntd, u64) 2229 2230 #undef popcntb 2231 #undef popcnth 2232 #undef popcntw 2233 #undef popcntd 2234 2235 #undef VGENERIC_DO 2236 2237 #if defined(HOST_WORDS_BIGENDIAN) 2238 #define QW_ONE { .u64 = { 0, 1 } } 2239 #else 2240 #define QW_ONE { .u64 = { 1, 0 } } 2241 #endif 2242 2243 #ifndef CONFIG_INT128 2244 2245 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2246 { 2247 t->u64[0] = ~a.u64[0]; 2248 t->u64[1] = ~a.u64[1]; 2249 } 2250 2251 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2252 { 2253 if (a.VsrD(0) < b.VsrD(0)) { 2254 return -1; 2255 } else if (a.VsrD(0) > b.VsrD(0)) { 2256 return 1; 2257 } else if (a.VsrD(1) < b.VsrD(1)) { 2258 return -1; 2259 } else if (a.VsrD(1) > b.VsrD(1)) { 2260 return 1; 2261 } else { 2262 return 0; 2263 } 2264 } 2265 2266 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2267 { 2268 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2269 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2270 (~a.VsrD(1) < b.VsrD(1)); 2271 } 2272 2273 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2274 { 2275 ppc_avr_t not_a; 2276 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2277 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2278 (~a.VsrD(1) < b.VsrD(1)); 2279 avr_qw_not(¬_a, a); 2280 return avr_qw_cmpu(not_a, b) < 0; 2281 } 2282 2283 #endif 2284 2285 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2286 { 2287 #ifdef CONFIG_INT128 2288 r->u128 = a->u128 + b->u128; 2289 #else 2290 avr_qw_add(r, *a, *b); 2291 #endif 2292 } 2293 2294 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2295 { 2296 #ifdef CONFIG_INT128 2297 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2298 #else 2299 2300 if (c->VsrD(1) & 1) { 2301 ppc_avr_t tmp; 2302 2303 tmp.VsrD(0) = 0; 2304 tmp.VsrD(1) = c->VsrD(1) & 1; 2305 avr_qw_add(&tmp, *a, tmp); 2306 avr_qw_add(r, tmp, *b); 2307 } else { 2308 avr_qw_add(r, *a, *b); 2309 } 2310 #endif 2311 } 2312 2313 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2314 { 2315 #ifdef CONFIG_INT128 2316 r->u128 = (~a->u128 < b->u128); 2317 #else 2318 ppc_avr_t not_a; 2319 2320 avr_qw_not(¬_a, *a); 2321 2322 r->VsrD(0) = 0; 2323 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2324 #endif 2325 } 2326 2327 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2328 { 2329 #ifdef CONFIG_INT128 2330 int carry_out = (~a->u128 < b->u128); 2331 if (!carry_out && (c->u128 & 1)) { 2332 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2333 ((a->u128 != 0) || (b->u128 != 0)); 2334 } 2335 r->u128 = carry_out; 2336 #else 2337 2338 int carry_in = c->VsrD(1) & 1; 2339 int carry_out = 0; 2340 ppc_avr_t tmp; 2341 2342 carry_out = avr_qw_addc(&tmp, *a, *b); 2343 2344 if (!carry_out && carry_in) { 2345 ppc_avr_t one = QW_ONE; 2346 carry_out = avr_qw_addc(&tmp, tmp, one); 2347 } 2348 r->VsrD(0) = 0; 2349 r->VsrD(1) = carry_out; 2350 #endif 2351 } 2352 2353 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2354 { 2355 #ifdef CONFIG_INT128 2356 r->u128 = a->u128 - b->u128; 2357 #else 2358 ppc_avr_t tmp; 2359 ppc_avr_t one = QW_ONE; 2360 2361 avr_qw_not(&tmp, *b); 2362 avr_qw_add(&tmp, *a, tmp); 2363 avr_qw_add(r, tmp, one); 2364 #endif 2365 } 2366 2367 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2368 { 2369 #ifdef CONFIG_INT128 2370 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2371 #else 2372 ppc_avr_t tmp, sum; 2373 2374 avr_qw_not(&tmp, *b); 2375 avr_qw_add(&sum, *a, tmp); 2376 2377 tmp.VsrD(0) = 0; 2378 tmp.VsrD(1) = c->VsrD(1) & 1; 2379 avr_qw_add(r, sum, tmp); 2380 #endif 2381 } 2382 2383 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2384 { 2385 #ifdef CONFIG_INT128 2386 r->u128 = (~a->u128 < ~b->u128) || 2387 (a->u128 + ~b->u128 == (__uint128_t)-1); 2388 #else 2389 int carry = (avr_qw_cmpu(*a, *b) > 0); 2390 if (!carry) { 2391 ppc_avr_t tmp; 2392 avr_qw_not(&tmp, *b); 2393 avr_qw_add(&tmp, *a, tmp); 2394 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2395 } 2396 r->VsrD(0) = 0; 2397 r->VsrD(1) = carry; 2398 #endif 2399 } 2400 2401 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2402 { 2403 #ifdef CONFIG_INT128 2404 r->u128 = 2405 (~a->u128 < ~b->u128) || 2406 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2407 #else 2408 int carry_in = c->VsrD(1) & 1; 2409 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2410 if (!carry_out && carry_in) { 2411 ppc_avr_t tmp; 2412 avr_qw_not(&tmp, *b); 2413 avr_qw_add(&tmp, *a, tmp); 2414 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2415 } 2416 2417 r->VsrD(0) = 0; 2418 r->VsrD(1) = carry_out; 2419 #endif 2420 } 2421 2422 #define BCD_PLUS_PREF_1 0xC 2423 #define BCD_PLUS_PREF_2 0xF 2424 #define BCD_PLUS_ALT_1 0xA 2425 #define BCD_NEG_PREF 0xD 2426 #define BCD_NEG_ALT 0xB 2427 #define BCD_PLUS_ALT_2 0xE 2428 #define NATIONAL_PLUS 0x2B 2429 #define NATIONAL_NEG 0x2D 2430 2431 #if defined(HOST_WORDS_BIGENDIAN) 2432 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2433 #else 2434 #define BCD_DIG_BYTE(n) ((n) / 2) 2435 #endif 2436 2437 static int bcd_get_sgn(ppc_avr_t *bcd) 2438 { 2439 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2440 case BCD_PLUS_PREF_1: 2441 case BCD_PLUS_PREF_2: 2442 case BCD_PLUS_ALT_1: 2443 case BCD_PLUS_ALT_2: 2444 { 2445 return 1; 2446 } 2447 2448 case BCD_NEG_PREF: 2449 case BCD_NEG_ALT: 2450 { 2451 return -1; 2452 } 2453 2454 default: 2455 { 2456 return 0; 2457 } 2458 } 2459 } 2460 2461 static int bcd_preferred_sgn(int sgn, int ps) 2462 { 2463 if (sgn >= 0) { 2464 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2465 } else { 2466 return BCD_NEG_PREF; 2467 } 2468 } 2469 2470 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2471 { 2472 uint8_t result; 2473 if (n & 1) { 2474 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2475 } else { 2476 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2477 } 2478 2479 if (unlikely(result > 9)) { 2480 *invalid = true; 2481 } 2482 return result; 2483 } 2484 2485 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2486 { 2487 if (n & 1) { 2488 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2489 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4); 2490 } else { 2491 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2492 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2493 } 2494 } 2495 2496 static bool bcd_is_valid(ppc_avr_t *bcd) 2497 { 2498 int i; 2499 int invalid = 0; 2500 2501 if (bcd_get_sgn(bcd) == 0) { 2502 return false; 2503 } 2504 2505 for (i = 1; i < 32; i++) { 2506 bcd_get_digit(bcd, i, &invalid); 2507 if (unlikely(invalid)) { 2508 return false; 2509 } 2510 } 2511 return true; 2512 } 2513 2514 static int bcd_cmp_zero(ppc_avr_t *bcd) 2515 { 2516 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2517 return CRF_EQ; 2518 } else { 2519 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2520 } 2521 } 2522 2523 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2524 { 2525 return reg->VsrH(7 - n); 2526 } 2527 2528 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2529 { 2530 reg->VsrH(7 - n) = val; 2531 } 2532 2533 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2534 { 2535 int i; 2536 int invalid = 0; 2537 for (i = 31; i > 0; i--) { 2538 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2539 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2540 if (unlikely(invalid)) { 2541 return 0; /* doesn't matter */ 2542 } else if (dig_a > dig_b) { 2543 return 1; 2544 } else if (dig_a < dig_b) { 2545 return -1; 2546 } 2547 } 2548 2549 return 0; 2550 } 2551 2552 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2553 int *overflow) 2554 { 2555 int carry = 0; 2556 int i; 2557 for (i = 1; i <= 31; i++) { 2558 uint8_t digit = bcd_get_digit(a, i, invalid) + 2559 bcd_get_digit(b, i, invalid) + carry; 2560 if (digit > 9) { 2561 carry = 1; 2562 digit -= 10; 2563 } else { 2564 carry = 0; 2565 } 2566 2567 bcd_put_digit(t, digit, i); 2568 } 2569 2570 *overflow = carry; 2571 } 2572 2573 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2574 int *overflow) 2575 { 2576 int carry = 0; 2577 int i; 2578 2579 for (i = 1; i <= 31; i++) { 2580 uint8_t digit = bcd_get_digit(a, i, invalid) - 2581 bcd_get_digit(b, i, invalid) + carry; 2582 if (digit & 0x80) { 2583 carry = -1; 2584 digit += 10; 2585 } else { 2586 carry = 0; 2587 } 2588 2589 bcd_put_digit(t, digit, i); 2590 } 2591 2592 *overflow = carry; 2593 } 2594 2595 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2596 { 2597 2598 int sgna = bcd_get_sgn(a); 2599 int sgnb = bcd_get_sgn(b); 2600 int invalid = (sgna == 0) || (sgnb == 0); 2601 int overflow = 0; 2602 uint32_t cr = 0; 2603 ppc_avr_t result = { .u64 = { 0, 0 } }; 2604 2605 if (!invalid) { 2606 if (sgna == sgnb) { 2607 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2608 bcd_add_mag(&result, a, b, &invalid, &overflow); 2609 cr = bcd_cmp_zero(&result); 2610 } else { 2611 int magnitude = bcd_cmp_mag(a, b); 2612 if (magnitude > 0) { 2613 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2614 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2615 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2616 } else if (magnitude < 0) { 2617 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2618 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2619 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2620 } else { 2621 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2622 cr = CRF_EQ; 2623 } 2624 } 2625 } 2626 2627 if (unlikely(invalid)) { 2628 result.VsrD(0) = result.VsrD(1) = -1; 2629 cr = CRF_SO; 2630 } else if (overflow) { 2631 cr |= CRF_SO; 2632 } 2633 2634 *r = result; 2635 2636 return cr; 2637 } 2638 2639 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2640 { 2641 ppc_avr_t bcopy = *b; 2642 int sgnb = bcd_get_sgn(b); 2643 if (sgnb < 0) { 2644 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2645 } else if (sgnb > 0) { 2646 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2647 } 2648 /* else invalid ... defer to bcdadd code for proper handling */ 2649 2650 return helper_bcdadd(r, a, &bcopy, ps); 2651 } 2652 2653 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2654 { 2655 int i; 2656 int cr = 0; 2657 uint16_t national = 0; 2658 uint16_t sgnb = get_national_digit(b, 0); 2659 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2660 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2661 2662 for (i = 1; i < 8; i++) { 2663 national = get_national_digit(b, i); 2664 if (unlikely(national < 0x30 || national > 0x39)) { 2665 invalid = 1; 2666 break; 2667 } 2668 2669 bcd_put_digit(&ret, national & 0xf, i); 2670 } 2671 2672 if (sgnb == NATIONAL_PLUS) { 2673 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2674 } else { 2675 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2676 } 2677 2678 cr = bcd_cmp_zero(&ret); 2679 2680 if (unlikely(invalid)) { 2681 cr = CRF_SO; 2682 } 2683 2684 *r = ret; 2685 2686 return cr; 2687 } 2688 2689 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2690 { 2691 int i; 2692 int cr = 0; 2693 int sgnb = bcd_get_sgn(b); 2694 int invalid = (sgnb == 0); 2695 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2696 2697 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2698 2699 for (i = 1; i < 8; i++) { 2700 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2701 2702 if (unlikely(invalid)) { 2703 break; 2704 } 2705 } 2706 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2707 2708 cr = bcd_cmp_zero(b); 2709 2710 if (ox_flag) { 2711 cr |= CRF_SO; 2712 } 2713 2714 if (unlikely(invalid)) { 2715 cr = CRF_SO; 2716 } 2717 2718 *r = ret; 2719 2720 return cr; 2721 } 2722 2723 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2724 { 2725 int i; 2726 int cr = 0; 2727 int invalid = 0; 2728 int zone_digit = 0; 2729 int zone_lead = ps ? 0xF : 0x3; 2730 int digit = 0; 2731 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2732 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2733 2734 if (unlikely((sgnb < 0xA) && ps)) { 2735 invalid = 1; 2736 } 2737 2738 for (i = 0; i < 16; i++) { 2739 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2740 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2741 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2742 invalid = 1; 2743 break; 2744 } 2745 2746 bcd_put_digit(&ret, digit, i + 1); 2747 } 2748 2749 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2750 (!ps && (sgnb & 0x4))) { 2751 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2752 } else { 2753 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2754 } 2755 2756 cr = bcd_cmp_zero(&ret); 2757 2758 if (unlikely(invalid)) { 2759 cr = CRF_SO; 2760 } 2761 2762 *r = ret; 2763 2764 return cr; 2765 } 2766 2767 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2768 { 2769 int i; 2770 int cr = 0; 2771 uint8_t digit = 0; 2772 int sgnb = bcd_get_sgn(b); 2773 int zone_lead = (ps) ? 0xF0 : 0x30; 2774 int invalid = (sgnb == 0); 2775 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2776 2777 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2778 2779 for (i = 0; i < 16; i++) { 2780 digit = bcd_get_digit(b, i + 1, &invalid); 2781 2782 if (unlikely(invalid)) { 2783 break; 2784 } 2785 2786 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2787 } 2788 2789 if (ps) { 2790 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2791 } else { 2792 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2793 } 2794 2795 cr = bcd_cmp_zero(b); 2796 2797 if (ox_flag) { 2798 cr |= CRF_SO; 2799 } 2800 2801 if (unlikely(invalid)) { 2802 cr = CRF_SO; 2803 } 2804 2805 *r = ret; 2806 2807 return cr; 2808 } 2809 2810 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2811 { 2812 int i; 2813 int cr = 0; 2814 uint64_t lo_value; 2815 uint64_t hi_value; 2816 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2817 2818 if (b->VsrSD(0) < 0) { 2819 lo_value = -b->VsrSD(1); 2820 hi_value = ~b->VsrD(0) + !lo_value; 2821 bcd_put_digit(&ret, 0xD, 0); 2822 } else { 2823 lo_value = b->VsrD(1); 2824 hi_value = b->VsrD(0); 2825 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2826 } 2827 2828 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2829 lo_value > 9999999999999999ULL) { 2830 cr = CRF_SO; 2831 } 2832 2833 for (i = 1; i < 16; hi_value /= 10, i++) { 2834 bcd_put_digit(&ret, hi_value % 10, i); 2835 } 2836 2837 for (; i < 32; lo_value /= 10, i++) { 2838 bcd_put_digit(&ret, lo_value % 10, i); 2839 } 2840 2841 cr |= bcd_cmp_zero(&ret); 2842 2843 *r = ret; 2844 2845 return cr; 2846 } 2847 2848 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2849 { 2850 uint8_t i; 2851 int cr; 2852 uint64_t carry; 2853 uint64_t unused; 2854 uint64_t lo_value; 2855 uint64_t hi_value = 0; 2856 int sgnb = bcd_get_sgn(b); 2857 int invalid = (sgnb == 0); 2858 2859 lo_value = bcd_get_digit(b, 31, &invalid); 2860 for (i = 30; i > 0; i--) { 2861 mulu64(&lo_value, &carry, lo_value, 10ULL); 2862 mulu64(&hi_value, &unused, hi_value, 10ULL); 2863 lo_value += bcd_get_digit(b, i, &invalid); 2864 hi_value += carry; 2865 2866 if (unlikely(invalid)) { 2867 break; 2868 } 2869 } 2870 2871 if (sgnb == -1) { 2872 r->VsrSD(1) = -lo_value; 2873 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2874 } else { 2875 r->VsrSD(1) = lo_value; 2876 r->VsrSD(0) = hi_value; 2877 } 2878 2879 cr = bcd_cmp_zero(b); 2880 2881 if (unlikely(invalid)) { 2882 cr = CRF_SO; 2883 } 2884 2885 return cr; 2886 } 2887 2888 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2889 { 2890 int i; 2891 int invalid = 0; 2892 2893 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2894 return CRF_SO; 2895 } 2896 2897 *r = *a; 2898 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2899 2900 for (i = 1; i < 32; i++) { 2901 bcd_get_digit(a, i, &invalid); 2902 bcd_get_digit(b, i, &invalid); 2903 if (unlikely(invalid)) { 2904 return CRF_SO; 2905 } 2906 } 2907 2908 return bcd_cmp_zero(r); 2909 } 2910 2911 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2912 { 2913 int sgnb = bcd_get_sgn(b); 2914 2915 *r = *b; 2916 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2917 2918 if (bcd_is_valid(b) == false) { 2919 return CRF_SO; 2920 } 2921 2922 return bcd_cmp_zero(r); 2923 } 2924 2925 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2926 { 2927 int cr; 2928 #if defined(HOST_WORDS_BIGENDIAN) 2929 int i = a->s8[7]; 2930 #else 2931 int i = a->s8[8]; 2932 #endif 2933 bool ox_flag = false; 2934 int sgnb = bcd_get_sgn(b); 2935 ppc_avr_t ret = *b; 2936 ret.VsrD(1) &= ~0xf; 2937 2938 if (bcd_is_valid(b) == false) { 2939 return CRF_SO; 2940 } 2941 2942 if (unlikely(i > 31)) { 2943 i = 31; 2944 } else if (unlikely(i < -31)) { 2945 i = -31; 2946 } 2947 2948 if (i > 0) { 2949 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2950 } else { 2951 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2952 } 2953 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2954 2955 *r = ret; 2956 2957 cr = bcd_cmp_zero(r); 2958 if (ox_flag) { 2959 cr |= CRF_SO; 2960 } 2961 2962 return cr; 2963 } 2964 2965 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2966 { 2967 int cr; 2968 int i; 2969 int invalid = 0; 2970 bool ox_flag = false; 2971 ppc_avr_t ret = *b; 2972 2973 for (i = 0; i < 32; i++) { 2974 bcd_get_digit(b, i, &invalid); 2975 2976 if (unlikely(invalid)) { 2977 return CRF_SO; 2978 } 2979 } 2980 2981 #if defined(HOST_WORDS_BIGENDIAN) 2982 i = a->s8[7]; 2983 #else 2984 i = a->s8[8]; 2985 #endif 2986 if (i >= 32) { 2987 ox_flag = true; 2988 ret.VsrD(1) = ret.VsrD(0) = 0; 2989 } else if (i <= -32) { 2990 ret.VsrD(1) = ret.VsrD(0) = 0; 2991 } else if (i > 0) { 2992 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2993 } else { 2994 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2995 } 2996 *r = ret; 2997 2998 cr = bcd_cmp_zero(r); 2999 if (ox_flag) { 3000 cr |= CRF_SO; 3001 } 3002 3003 return cr; 3004 } 3005 3006 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3007 { 3008 int cr; 3009 int unused = 0; 3010 int invalid = 0; 3011 bool ox_flag = false; 3012 int sgnb = bcd_get_sgn(b); 3013 ppc_avr_t ret = *b; 3014 ret.VsrD(1) &= ~0xf; 3015 3016 #if defined(HOST_WORDS_BIGENDIAN) 3017 int i = a->s8[7]; 3018 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3019 #else 3020 int i = a->s8[8]; 3021 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3022 #endif 3023 3024 if (bcd_is_valid(b) == false) { 3025 return CRF_SO; 3026 } 3027 3028 if (unlikely(i > 31)) { 3029 i = 31; 3030 } else if (unlikely(i < -31)) { 3031 i = -31; 3032 } 3033 3034 if (i > 0) { 3035 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3036 } else { 3037 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3038 3039 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3040 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3041 } 3042 } 3043 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3044 3045 cr = bcd_cmp_zero(&ret); 3046 if (ox_flag) { 3047 cr |= CRF_SO; 3048 } 3049 *r = ret; 3050 3051 return cr; 3052 } 3053 3054 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3055 { 3056 uint64_t mask; 3057 uint32_t ox_flag = 0; 3058 #if defined(HOST_WORDS_BIGENDIAN) 3059 int i = a->s16[3] + 1; 3060 #else 3061 int i = a->s16[4] + 1; 3062 #endif 3063 ppc_avr_t ret = *b; 3064 3065 if (bcd_is_valid(b) == false) { 3066 return CRF_SO; 3067 } 3068 3069 if (i > 16 && i < 32) { 3070 mask = (uint64_t)-1 >> (128 - i * 4); 3071 if (ret.VsrD(0) & ~mask) { 3072 ox_flag = CRF_SO; 3073 } 3074 3075 ret.VsrD(0) &= mask; 3076 } else if (i >= 0 && i <= 16) { 3077 mask = (uint64_t)-1 >> (64 - i * 4); 3078 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3079 ox_flag = CRF_SO; 3080 } 3081 3082 ret.VsrD(1) &= mask; 3083 ret.VsrD(0) = 0; 3084 } 3085 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3086 *r = ret; 3087 3088 return bcd_cmp_zero(&ret) | ox_flag; 3089 } 3090 3091 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3092 { 3093 int i; 3094 uint64_t mask; 3095 uint32_t ox_flag = 0; 3096 int invalid = 0; 3097 ppc_avr_t ret = *b; 3098 3099 for (i = 0; i < 32; i++) { 3100 bcd_get_digit(b, i, &invalid); 3101 3102 if (unlikely(invalid)) { 3103 return CRF_SO; 3104 } 3105 } 3106 3107 #if defined(HOST_WORDS_BIGENDIAN) 3108 i = a->s16[3]; 3109 #else 3110 i = a->s16[4]; 3111 #endif 3112 if (i > 16 && i < 33) { 3113 mask = (uint64_t)-1 >> (128 - i * 4); 3114 if (ret.VsrD(0) & ~mask) { 3115 ox_flag = CRF_SO; 3116 } 3117 3118 ret.VsrD(0) &= mask; 3119 } else if (i > 0 && i <= 16) { 3120 mask = (uint64_t)-1 >> (64 - i * 4); 3121 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3122 ox_flag = CRF_SO; 3123 } 3124 3125 ret.VsrD(1) &= mask; 3126 ret.VsrD(0) = 0; 3127 } else if (i == 0) { 3128 if (ret.VsrD(0) || ret.VsrD(1)) { 3129 ox_flag = CRF_SO; 3130 } 3131 ret.VsrD(0) = ret.VsrD(1) = 0; 3132 } 3133 3134 *r = ret; 3135 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3136 return ox_flag | CRF_EQ; 3137 } 3138 3139 return ox_flag | CRF_GT; 3140 } 3141 3142 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3143 { 3144 int i; 3145 VECTOR_FOR_INORDER_I(i, u8) { 3146 r->u8[i] = AES_sbox[a->u8[i]]; 3147 } 3148 } 3149 3150 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3151 { 3152 ppc_avr_t result; 3153 int i; 3154 3155 VECTOR_FOR_INORDER_I(i, u32) { 3156 result.VsrW(i) = b->VsrW(i) ^ 3157 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3158 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3159 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3160 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3161 } 3162 *r = result; 3163 } 3164 3165 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3166 { 3167 ppc_avr_t result; 3168 int i; 3169 3170 VECTOR_FOR_INORDER_I(i, u8) { 3171 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3172 } 3173 *r = result; 3174 } 3175 3176 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3177 { 3178 /* This differs from what is written in ISA V2.07. The RTL is */ 3179 /* incorrect and will be fixed in V2.07B. */ 3180 int i; 3181 ppc_avr_t tmp; 3182 3183 VECTOR_FOR_INORDER_I(i, u8) { 3184 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3185 } 3186 3187 VECTOR_FOR_INORDER_I(i, u32) { 3188 r->VsrW(i) = 3189 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3190 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3191 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3192 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3193 } 3194 } 3195 3196 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3197 { 3198 ppc_avr_t result; 3199 int i; 3200 3201 VECTOR_FOR_INORDER_I(i, u8) { 3202 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3203 } 3204 *r = result; 3205 } 3206 3207 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3208 { 3209 int st = (st_six & 0x10) != 0; 3210 int six = st_six & 0xF; 3211 int i; 3212 3213 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3214 if (st == 0) { 3215 if ((six & (0x8 >> i)) == 0) { 3216 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3217 ror32(a->VsrW(i), 18) ^ 3218 (a->VsrW(i) >> 3); 3219 } else { /* six.bit[i] == 1 */ 3220 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3221 ror32(a->VsrW(i), 19) ^ 3222 (a->VsrW(i) >> 10); 3223 } 3224 } else { /* st == 1 */ 3225 if ((six & (0x8 >> i)) == 0) { 3226 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3227 ror32(a->VsrW(i), 13) ^ 3228 ror32(a->VsrW(i), 22); 3229 } else { /* six.bit[i] == 1 */ 3230 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3231 ror32(a->VsrW(i), 11) ^ 3232 ror32(a->VsrW(i), 25); 3233 } 3234 } 3235 } 3236 } 3237 3238 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3239 { 3240 int st = (st_six & 0x10) != 0; 3241 int six = st_six & 0xF; 3242 int i; 3243 3244 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3245 if (st == 0) { 3246 if ((six & (0x8 >> (2 * i))) == 0) { 3247 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3248 ror64(a->VsrD(i), 8) ^ 3249 (a->VsrD(i) >> 7); 3250 } else { /* six.bit[2*i] == 1 */ 3251 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3252 ror64(a->VsrD(i), 61) ^ 3253 (a->VsrD(i) >> 6); 3254 } 3255 } else { /* st == 1 */ 3256 if ((six & (0x8 >> (2 * i))) == 0) { 3257 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3258 ror64(a->VsrD(i), 34) ^ 3259 ror64(a->VsrD(i), 39); 3260 } else { /* six.bit[2*i] == 1 */ 3261 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3262 ror64(a->VsrD(i), 18) ^ 3263 ror64(a->VsrD(i), 41); 3264 } 3265 } 3266 } 3267 } 3268 3269 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3270 { 3271 ppc_avr_t result; 3272 int i; 3273 3274 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3275 int indexA = c->VsrB(i) >> 4; 3276 int indexB = c->VsrB(i) & 0xF; 3277 3278 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3279 } 3280 *r = result; 3281 } 3282 3283 #undef VECTOR_FOR_INORDER_I 3284 3285 /*****************************************************************************/ 3286 /* SPE extension helpers */ 3287 /* Use a table to make this quicker */ 3288 static const uint8_t hbrev[16] = { 3289 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3290 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3291 }; 3292 3293 static inline uint8_t byte_reverse(uint8_t val) 3294 { 3295 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3296 } 3297 3298 static inline uint32_t word_reverse(uint32_t val) 3299 { 3300 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3301 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3302 } 3303 3304 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3305 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3306 { 3307 uint32_t a, b, d, mask; 3308 3309 mask = UINT32_MAX >> (32 - MASKBITS); 3310 a = arg1 & mask; 3311 b = arg2 & mask; 3312 d = word_reverse(1 + word_reverse(a | ~b)); 3313 return (arg1 & ~mask) | (d & b); 3314 } 3315 3316 uint32_t helper_cntlsw32(uint32_t val) 3317 { 3318 if (val & 0x80000000) { 3319 return clz32(~val); 3320 } else { 3321 return clz32(val); 3322 } 3323 } 3324 3325 uint32_t helper_cntlzw32(uint32_t val) 3326 { 3327 return clz32(val); 3328 } 3329 3330 /* 440 specific */ 3331 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3332 target_ulong low, uint32_t update_Rc) 3333 { 3334 target_ulong mask; 3335 int i; 3336 3337 i = 1; 3338 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3339 if ((high & mask) == 0) { 3340 if (update_Rc) { 3341 env->crf[0] = 0x4; 3342 } 3343 goto done; 3344 } 3345 i++; 3346 } 3347 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3348 if ((low & mask) == 0) { 3349 if (update_Rc) { 3350 env->crf[0] = 0x8; 3351 } 3352 goto done; 3353 } 3354 i++; 3355 } 3356 i = 8; 3357 if (update_Rc) { 3358 env->crf[0] = 0x2; 3359 } 3360 done: 3361 env->xer = (env->xer & ~0x7F) | i; 3362 if (update_Rc) { 3363 env->crf[0] |= xer_so; 3364 } 3365 return i; 3366 } 3367