1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "fpu/softfloat.h" 28 #include "qapi/error.h" 29 #include "qemu/guest-random.h" 30 31 #include "helper_regs.h" 32 /*****************************************************************************/ 33 /* Fixed point operations helpers */ 34 35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 36 { 37 if (unlikely(ov)) { 38 env->so = env->ov = 1; 39 } else { 40 env->ov = 0; 41 } 42 } 43 44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 45 uint32_t oe) 46 { 47 uint64_t rt = 0; 48 int overflow = 0; 49 50 uint64_t dividend = (uint64_t)ra << 32; 51 uint64_t divisor = (uint32_t)rb; 52 53 if (unlikely(divisor == 0)) { 54 overflow = 1; 55 } else { 56 rt = dividend / divisor; 57 overflow = rt > UINT32_MAX; 58 } 59 60 if (unlikely(overflow)) { 61 rt = 0; /* Undefined */ 62 } 63 64 if (oe) { 65 helper_update_ov_legacy(env, overflow); 66 } 67 68 return (target_ulong)rt; 69 } 70 71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 72 uint32_t oe) 73 { 74 int64_t rt = 0; 75 int overflow = 0; 76 77 int64_t dividend = (int64_t)ra << 32; 78 int64_t divisor = (int64_t)((int32_t)rb); 79 80 if (unlikely((divisor == 0) || 81 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 82 overflow = 1; 83 } else { 84 rt = dividend / divisor; 85 overflow = rt != (int32_t)rt; 86 } 87 88 if (unlikely(overflow)) { 89 rt = 0; /* Undefined */ 90 } 91 92 if (oe) { 93 helper_update_ov_legacy(env, overflow); 94 } 95 96 return (target_ulong)rt; 97 } 98 99 #if defined(TARGET_PPC64) 100 101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 102 { 103 uint64_t rt = 0; 104 int overflow = 0; 105 106 overflow = divu128(&rt, &ra, rb); 107 108 if (unlikely(overflow)) { 109 rt = 0; /* Undefined */ 110 } 111 112 if (oe) { 113 helper_update_ov_legacy(env, overflow); 114 } 115 116 return rt; 117 } 118 119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 120 { 121 int64_t rt = 0; 122 int64_t ra = (int64_t)rau; 123 int64_t rb = (int64_t)rbu; 124 int overflow = divs128(&rt, &ra, rb); 125 126 if (unlikely(overflow)) { 127 rt = 0; /* Undefined */ 128 } 129 130 if (oe) { 131 helper_update_ov_legacy(env, overflow); 132 } 133 134 return rt; 135 } 136 137 #endif 138 139 140 #if defined(TARGET_PPC64) 141 /* if x = 0xab, returns 0xababababababababa */ 142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 143 144 /* 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each 146 * byte. 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 149 */ 150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 151 152 /* When you XOR the pattern and there is a match, that byte will be zero */ 153 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 154 155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 156 { 157 return hasvalue(rb, ra) ? CRF_GT : 0; 158 } 159 160 #undef pattern 161 #undef haszero 162 #undef hasvalue 163 164 /* 165 * Return a random number. 166 */ 167 uint64_t helper_darn32(void) 168 { 169 Error *err = NULL; 170 uint32_t ret; 171 172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 174 error_get_pretty(err)); 175 error_free(err); 176 return -1; 177 } 178 179 return ret; 180 } 181 182 uint64_t helper_darn64(void) 183 { 184 Error *err = NULL; 185 uint64_t ret; 186 187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 189 error_get_pretty(err)); 190 error_free(err); 191 return -1; 192 } 193 194 return ret; 195 } 196 197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 198 { 199 int i; 200 uint64_t ra = 0; 201 202 for (i = 0; i < 8; i++) { 203 int index = (rs >> (i * 8)) & 0xFF; 204 if (index < 64) { 205 if (rb & PPC_BIT(index)) { 206 ra |= 1 << i; 207 } 208 } 209 } 210 return ra; 211 } 212 213 #endif 214 215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 216 { 217 target_ulong mask = 0xff; 218 target_ulong ra = 0; 219 int i; 220 221 for (i = 0; i < sizeof(target_ulong); i++) { 222 if ((rs & mask) == (rb & mask)) { 223 ra |= mask; 224 } 225 mask <<= 8; 226 } 227 return ra; 228 } 229 230 /* shift right arithmetic helper */ 231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 232 target_ulong shift) 233 { 234 int32_t ret; 235 236 if (likely(!(shift & 0x20))) { 237 if (likely((uint32_t)shift != 0)) { 238 shift &= 0x1f; 239 ret = (int32_t)value >> shift; 240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 241 env->ca32 = env->ca = 0; 242 } else { 243 env->ca32 = env->ca = 1; 244 } 245 } else { 246 ret = (int32_t)value; 247 env->ca32 = env->ca = 0; 248 } 249 } else { 250 ret = (int32_t)value >> 31; 251 env->ca32 = env->ca = (ret != 0); 252 } 253 return (target_long)ret; 254 } 255 256 #if defined(TARGET_PPC64) 257 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 258 target_ulong shift) 259 { 260 int64_t ret; 261 262 if (likely(!(shift & 0x40))) { 263 if (likely((uint64_t)shift != 0)) { 264 shift &= 0x3f; 265 ret = (int64_t)value >> shift; 266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 267 env->ca32 = env->ca = 0; 268 } else { 269 env->ca32 = env->ca = 1; 270 } 271 } else { 272 ret = (int64_t)value; 273 env->ca32 = env->ca = 0; 274 } 275 } else { 276 ret = (int64_t)value >> 63; 277 env->ca32 = env->ca = (ret != 0); 278 } 279 return ret; 280 } 281 #endif 282 283 #if defined(TARGET_PPC64) 284 target_ulong helper_popcntb(target_ulong val) 285 { 286 /* Note that we don't fold past bytes */ 287 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 288 0x5555555555555555ULL); 289 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 290 0x3333333333333333ULL); 291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 292 0x0f0f0f0f0f0f0f0fULL); 293 return val; 294 } 295 296 target_ulong helper_popcntw(target_ulong val) 297 { 298 /* Note that we don't fold past words. */ 299 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 300 0x5555555555555555ULL); 301 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 302 0x3333333333333333ULL); 303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 304 0x0f0f0f0f0f0f0f0fULL); 305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 306 0x00ff00ff00ff00ffULL); 307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 308 0x0000ffff0000ffffULL); 309 return val; 310 } 311 #else 312 target_ulong helper_popcntb(target_ulong val) 313 { 314 /* Note that we don't fold past bytes */ 315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 318 return val; 319 } 320 #endif 321 322 /*****************************************************************************/ 323 /* PowerPC 601 specific instructions (POWER bridge) */ 324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 325 { 326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 327 328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 329 (int32_t)arg2 == 0) { 330 env->spr[SPR_MQ] = 0; 331 return INT32_MIN; 332 } else { 333 env->spr[SPR_MQ] = tmp % arg2; 334 return tmp / (int32_t)arg2; 335 } 336 } 337 338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 339 target_ulong arg2) 340 { 341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 342 343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 344 (int32_t)arg2 == 0) { 345 env->so = env->ov = 1; 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = tmp % arg2; 350 tmp /= (int32_t)arg2; 351 if ((int32_t)tmp != tmp) { 352 env->so = env->ov = 1; 353 } else { 354 env->ov = 0; 355 } 356 return tmp; 357 } 358 } 359 360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 361 target_ulong arg2) 362 { 363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 364 (int32_t)arg2 == 0) { 365 env->spr[SPR_MQ] = 0; 366 return INT32_MIN; 367 } else { 368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 369 return (int32_t)arg1 / (int32_t)arg2; 370 } 371 } 372 373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 374 target_ulong arg2) 375 { 376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 377 (int32_t)arg2 == 0) { 378 env->so = env->ov = 1; 379 env->spr[SPR_MQ] = 0; 380 return INT32_MIN; 381 } else { 382 env->ov = 0; 383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 384 return (int32_t)arg1 / (int32_t)arg2; 385 } 386 } 387 388 /*****************************************************************************/ 389 /* 602 specific instructions */ 390 /* mfrom is the most crazy instruction ever seen, imho ! */ 391 /* Real implementation uses a ROM table. Do the same */ 392 /* 393 * Extremely decomposed: 394 * -arg / 256 395 * return 256 * log10(10 + 1.0) + 0.5 396 */ 397 #if !defined(CONFIG_USER_ONLY) 398 target_ulong helper_602_mfrom(target_ulong arg) 399 { 400 if (likely(arg < 602)) { 401 #include "mfrom_table.inc.c" 402 return mfrom_ROM_table[arg]; 403 } else { 404 return 0; 405 } 406 } 407 #endif 408 409 /*****************************************************************************/ 410 /* Altivec extension helpers */ 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 463 { 464 int i, j = (sh & 0xf); 465 466 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 467 r->VsrB(i) = j++; 468 } 469 } 470 471 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 472 { 473 int i, j = 0x10 - (sh & 0xf); 474 475 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 476 r->VsrB(i) = j++; 477 } 478 } 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 env->vscr = vscr & ~(1u << VSCR_SAT); 483 /* Which bit we set is completely arbitrary, but clear the rest. */ 484 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 485 env->vscr_sat.u64[1] = 0; 486 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 487 } 488 489 uint32_t helper_mfvscr(CPUPPCState *env) 490 { 491 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 492 return env->vscr | (sat << VSCR_SAT); 493 } 494 495 static inline void set_vscr_sat(CPUPPCState *env) 496 { 497 /* The choice of non-zero value is arbitrary. */ 498 env->vscr_sat.u32[0] = 1; 499 } 500 501 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 502 { 503 int i; 504 505 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 506 r->u32[i] = ~a->u32[i] < b->u32[i]; 507 } 508 } 509 510 /* vprtybw */ 511 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 512 { 513 int i; 514 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 515 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 516 res ^= res >> 8; 517 r->u32[i] = res & 1; 518 } 519 } 520 521 /* vprtybd */ 522 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 523 { 524 int i; 525 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 526 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 527 res ^= res >> 16; 528 res ^= res >> 8; 529 r->u64[i] = res & 1; 530 } 531 } 532 533 /* vprtybq */ 534 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 535 { 536 uint64_t res = b->u64[0] ^ b->u64[1]; 537 res ^= res >> 32; 538 res ^= res >> 16; 539 res ^= res >> 8; 540 r->VsrD(1) = res & 1; 541 r->VsrD(0) = 0; 542 } 543 544 #define VARITH_DO(name, op, element) \ 545 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 546 { \ 547 int i; \ 548 \ 549 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 550 r->element[i] = a->element[i] op b->element[i]; \ 551 } \ 552 } 553 VARITH_DO(muluwm, *, u32) 554 #undef VARITH_DO 555 #undef VARITH 556 557 #define VARITHFP(suffix, func) \ 558 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 559 ppc_avr_t *b) \ 560 { \ 561 int i; \ 562 \ 563 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 564 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 565 } \ 566 } 567 VARITHFP(addfp, float32_add) 568 VARITHFP(subfp, float32_sub) 569 VARITHFP(minfp, float32_min) 570 VARITHFP(maxfp, float32_max) 571 #undef VARITHFP 572 573 #define VARITHFPFMA(suffix, type) \ 574 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 575 ppc_avr_t *b, ppc_avr_t *c) \ 576 { \ 577 int i; \ 578 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 579 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 580 type, &env->vec_status); \ 581 } \ 582 } 583 VARITHFPFMA(maddfp, 0); 584 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 585 #undef VARITHFPFMA 586 587 #define VARITHSAT_CASE(type, op, cvt, element) \ 588 { \ 589 type result = (type)a->element[i] op (type)b->element[i]; \ 590 r->element[i] = cvt(result, &sat); \ 591 } 592 593 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 594 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 595 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 596 { \ 597 int sat = 0; \ 598 int i; \ 599 \ 600 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 601 VARITHSAT_CASE(optype, op, cvt, element); \ 602 } \ 603 if (sat) { \ 604 vscr_sat->u32[0] = 1; \ 605 } \ 606 } 607 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 608 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 609 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 610 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 611 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 612 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 613 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 614 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 615 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 616 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 617 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 618 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 619 #undef VARITHSAT_CASE 620 #undef VARITHSAT_DO 621 #undef VARITHSAT_SIGNED 622 #undef VARITHSAT_UNSIGNED 623 624 #define VAVG_DO(name, element, etype) \ 625 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 626 { \ 627 int i; \ 628 \ 629 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 630 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 631 r->element[i] = x >> 1; \ 632 } \ 633 } 634 635 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 636 unsigned_type) \ 637 VAVG_DO(avgs##type, signed_element, signed_type) \ 638 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 639 VAVG(b, s8, int16_t, u8, uint16_t) 640 VAVG(h, s16, int32_t, u16, uint32_t) 641 VAVG(w, s32, int64_t, u32, uint64_t) 642 #undef VAVG_DO 643 #undef VAVG 644 645 #define VABSDU_DO(name, element) \ 646 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 647 { \ 648 int i; \ 649 \ 650 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 651 r->element[i] = (a->element[i] > b->element[i]) ? \ 652 (a->element[i] - b->element[i]) : \ 653 (b->element[i] - a->element[i]); \ 654 } \ 655 } 656 657 /* 658 * VABSDU - Vector absolute difference unsigned 659 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 660 * element - element type to access from vector 661 */ 662 #define VABSDU(type, element) \ 663 VABSDU_DO(absdu##type, element) 664 VABSDU(b, u8) 665 VABSDU(h, u16) 666 VABSDU(w, u32) 667 #undef VABSDU_DO 668 #undef VABSDU 669 670 #define VCF(suffix, cvt, element) \ 671 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 672 ppc_avr_t *b, uint32_t uim) \ 673 { \ 674 int i; \ 675 \ 676 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 677 float32 t = cvt(b->element[i], &env->vec_status); \ 678 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 679 } \ 680 } 681 VCF(ux, uint32_to_float32, u32) 682 VCF(sx, int32_to_float32, s32) 683 #undef VCF 684 685 #define VCMP_DO(suffix, compare, element, record) \ 686 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 687 ppc_avr_t *a, ppc_avr_t *b) \ 688 { \ 689 uint64_t ones = (uint64_t)-1; \ 690 uint64_t all = ones; \ 691 uint64_t none = 0; \ 692 int i; \ 693 \ 694 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 695 uint64_t result = (a->element[i] compare b->element[i] ? \ 696 ones : 0x0); \ 697 switch (sizeof(a->element[0])) { \ 698 case 8: \ 699 r->u64[i] = result; \ 700 break; \ 701 case 4: \ 702 r->u32[i] = result; \ 703 break; \ 704 case 2: \ 705 r->u16[i] = result; \ 706 break; \ 707 case 1: \ 708 r->u8[i] = result; \ 709 break; \ 710 } \ 711 all &= result; \ 712 none |= result; \ 713 } \ 714 if (record) { \ 715 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 716 } \ 717 } 718 #define VCMP(suffix, compare, element) \ 719 VCMP_DO(suffix, compare, element, 0) \ 720 VCMP_DO(suffix##_dot, compare, element, 1) 721 VCMP(equb, ==, u8) 722 VCMP(equh, ==, u16) 723 VCMP(equw, ==, u32) 724 VCMP(equd, ==, u64) 725 VCMP(gtub, >, u8) 726 VCMP(gtuh, >, u16) 727 VCMP(gtuw, >, u32) 728 VCMP(gtud, >, u64) 729 VCMP(gtsb, >, s8) 730 VCMP(gtsh, >, s16) 731 VCMP(gtsw, >, s32) 732 VCMP(gtsd, >, s64) 733 #undef VCMP_DO 734 #undef VCMP 735 736 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 737 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 738 ppc_avr_t *a, ppc_avr_t *b) \ 739 { \ 740 etype ones = (etype)-1; \ 741 etype all = ones; \ 742 etype result, none = 0; \ 743 int i; \ 744 \ 745 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 746 if (cmpzero) { \ 747 result = ((a->element[i] == 0) \ 748 || (b->element[i] == 0) \ 749 || (a->element[i] != b->element[i]) ? \ 750 ones : 0x0); \ 751 } else { \ 752 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 753 } \ 754 r->element[i] = result; \ 755 all &= result; \ 756 none |= result; \ 757 } \ 758 if (record) { \ 759 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 760 } \ 761 } 762 763 /* 764 * VCMPNEZ - Vector compare not equal to zero 765 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 766 * element - element type to access from vector 767 */ 768 #define VCMPNE(suffix, element, etype, cmpzero) \ 769 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 770 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 771 VCMPNE(zb, u8, uint8_t, 1) 772 VCMPNE(zh, u16, uint16_t, 1) 773 VCMPNE(zw, u32, uint32_t, 1) 774 VCMPNE(b, u8, uint8_t, 0) 775 VCMPNE(h, u16, uint16_t, 0) 776 VCMPNE(w, u32, uint32_t, 0) 777 #undef VCMPNE_DO 778 #undef VCMPNE 779 780 #define VCMPFP_DO(suffix, compare, order, record) \ 781 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 782 ppc_avr_t *a, ppc_avr_t *b) \ 783 { \ 784 uint32_t ones = (uint32_t)-1; \ 785 uint32_t all = ones; \ 786 uint32_t none = 0; \ 787 int i; \ 788 \ 789 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 790 uint32_t result; \ 791 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 792 &env->vec_status); \ 793 if (rel == float_relation_unordered) { \ 794 result = 0; \ 795 } else if (rel compare order) { \ 796 result = ones; \ 797 } else { \ 798 result = 0; \ 799 } \ 800 r->u32[i] = result; \ 801 all &= result; \ 802 none |= result; \ 803 } \ 804 if (record) { \ 805 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 806 } \ 807 } 808 #define VCMPFP(suffix, compare, order) \ 809 VCMPFP_DO(suffix, compare, order, 0) \ 810 VCMPFP_DO(suffix##_dot, compare, order, 1) 811 VCMPFP(eqfp, ==, float_relation_equal) 812 VCMPFP(gefp, !=, float_relation_less) 813 VCMPFP(gtfp, ==, float_relation_greater) 814 #undef VCMPFP_DO 815 #undef VCMPFP 816 817 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 818 ppc_avr_t *a, ppc_avr_t *b, int record) 819 { 820 int i; 821 int all_in = 0; 822 823 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 824 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 825 &env->vec_status); 826 if (le_rel == float_relation_unordered) { 827 r->u32[i] = 0xc0000000; 828 all_in = 1; 829 } else { 830 float32 bneg = float32_chs(b->f32[i]); 831 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 832 &env->vec_status); 833 int le = le_rel != float_relation_greater; 834 int ge = ge_rel != float_relation_less; 835 836 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 837 all_in |= (!le | !ge); 838 } 839 } 840 if (record) { 841 env->crf[6] = (all_in == 0) << 1; 842 } 843 } 844 845 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 846 { 847 vcmpbfp_internal(env, r, a, b, 0); 848 } 849 850 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 851 ppc_avr_t *b) 852 { 853 vcmpbfp_internal(env, r, a, b, 1); 854 } 855 856 #define VCT(suffix, satcvt, element) \ 857 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 858 ppc_avr_t *b, uint32_t uim) \ 859 { \ 860 int i; \ 861 int sat = 0; \ 862 float_status s = env->vec_status; \ 863 \ 864 set_float_rounding_mode(float_round_to_zero, &s); \ 865 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 866 if (float32_is_any_nan(b->f32[i])) { \ 867 r->element[i] = 0; \ 868 } else { \ 869 float64 t = float32_to_float64(b->f32[i], &s); \ 870 int64_t j; \ 871 \ 872 t = float64_scalbn(t, uim, &s); \ 873 j = float64_to_int64(t, &s); \ 874 r->element[i] = satcvt(j, &sat); \ 875 } \ 876 } \ 877 if (sat) { \ 878 set_vscr_sat(env); \ 879 } \ 880 } 881 VCT(uxs, cvtsduw, u32) 882 VCT(sxs, cvtsdsw, s32) 883 #undef VCT 884 885 target_ulong helper_vclzlsbb(ppc_avr_t *r) 886 { 887 target_ulong count = 0; 888 int i; 889 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 890 if (r->VsrB(i) & 0x01) { 891 break; 892 } 893 count++; 894 } 895 return count; 896 } 897 898 target_ulong helper_vctzlsbb(ppc_avr_t *r) 899 { 900 target_ulong count = 0; 901 int i; 902 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 903 if (r->VsrB(i) & 0x01) { 904 break; 905 } 906 count++; 907 } 908 return count; 909 } 910 911 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 912 ppc_avr_t *b, ppc_avr_t *c) 913 { 914 int sat = 0; 915 int i; 916 917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 918 int32_t prod = a->s16[i] * b->s16[i]; 919 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 920 921 r->s16[i] = cvtswsh(t, &sat); 922 } 923 924 if (sat) { 925 set_vscr_sat(env); 926 } 927 } 928 929 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 930 ppc_avr_t *b, ppc_avr_t *c) 931 { 932 int sat = 0; 933 int i; 934 935 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 936 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 937 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 938 r->s16[i] = cvtswsh(t, &sat); 939 } 940 941 if (sat) { 942 set_vscr_sat(env); 943 } 944 } 945 946 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 947 { 948 int i; 949 950 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 951 int32_t prod = a->s16[i] * b->s16[i]; 952 r->s16[i] = (int16_t) (prod + c->s16[i]); 953 } 954 } 955 956 #define VMRG_DO(name, element, access, ofs) \ 957 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 958 { \ 959 ppc_avr_t result; \ 960 int i, half = ARRAY_SIZE(r->element) / 2; \ 961 \ 962 for (i = 0; i < half; i++) { \ 963 result.access(i * 2 + 0) = a->access(i + ofs); \ 964 result.access(i * 2 + 1) = b->access(i + ofs); \ 965 } \ 966 *r = result; \ 967 } 968 969 #define VMRG(suffix, element, access) \ 970 VMRG_DO(mrgl##suffix, element, access, half) \ 971 VMRG_DO(mrgh##suffix, element, access, 0) 972 VMRG(b, u8, VsrB) 973 VMRG(h, u16, VsrH) 974 VMRG(w, u32, VsrW) 975 #undef VMRG_DO 976 #undef VMRG 977 978 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 979 ppc_avr_t *b, ppc_avr_t *c) 980 { 981 int32_t prod[16]; 982 int i; 983 984 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 985 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 986 } 987 988 VECTOR_FOR_INORDER_I(i, s32) { 989 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 990 prod[4 * i + 2] + prod[4 * i + 3]; 991 } 992 } 993 994 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 995 ppc_avr_t *b, ppc_avr_t *c) 996 { 997 int32_t prod[8]; 998 int i; 999 1000 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1001 prod[i] = a->s16[i] * b->s16[i]; 1002 } 1003 1004 VECTOR_FOR_INORDER_I(i, s32) { 1005 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1006 } 1007 } 1008 1009 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1010 ppc_avr_t *b, ppc_avr_t *c) 1011 { 1012 int32_t prod[8]; 1013 int i; 1014 int sat = 0; 1015 1016 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1017 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1018 } 1019 1020 VECTOR_FOR_INORDER_I(i, s32) { 1021 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1022 1023 r->u32[i] = cvtsdsw(t, &sat); 1024 } 1025 1026 if (sat) { 1027 set_vscr_sat(env); 1028 } 1029 } 1030 1031 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1032 ppc_avr_t *b, ppc_avr_t *c) 1033 { 1034 uint16_t prod[16]; 1035 int i; 1036 1037 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1038 prod[i] = a->u8[i] * b->u8[i]; 1039 } 1040 1041 VECTOR_FOR_INORDER_I(i, u32) { 1042 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1043 prod[4 * i + 2] + prod[4 * i + 3]; 1044 } 1045 } 1046 1047 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1048 ppc_avr_t *b, ppc_avr_t *c) 1049 { 1050 uint32_t prod[8]; 1051 int i; 1052 1053 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1054 prod[i] = a->u16[i] * b->u16[i]; 1055 } 1056 1057 VECTOR_FOR_INORDER_I(i, u32) { 1058 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1059 } 1060 } 1061 1062 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1063 ppc_avr_t *b, ppc_avr_t *c) 1064 { 1065 uint32_t prod[8]; 1066 int i; 1067 int sat = 0; 1068 1069 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1070 prod[i] = a->u16[i] * b->u16[i]; 1071 } 1072 1073 VECTOR_FOR_INORDER_I(i, s32) { 1074 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1075 1076 r->u32[i] = cvtuduw(t, &sat); 1077 } 1078 1079 if (sat) { 1080 set_vscr_sat(env); 1081 } 1082 } 1083 1084 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1085 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1086 { \ 1087 int i; \ 1088 \ 1089 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1090 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1091 (cast)b->mul_access(i); \ 1092 } \ 1093 } 1094 1095 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1096 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1097 { \ 1098 int i; \ 1099 \ 1100 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1101 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1102 (cast)b->mul_access(i + 1); \ 1103 } \ 1104 } 1105 1106 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1107 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1108 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1109 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1110 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1111 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1112 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1113 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1114 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1115 #undef VMUL_DO_EVN 1116 #undef VMUL_DO_ODD 1117 #undef VMUL 1118 1119 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1120 ppc_avr_t *c) 1121 { 1122 ppc_avr_t result; 1123 int i; 1124 1125 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1126 int s = c->VsrB(i) & 0x1f; 1127 int index = s & 0xf; 1128 1129 if (s & 0x10) { 1130 result.VsrB(i) = b->VsrB(index); 1131 } else { 1132 result.VsrB(i) = a->VsrB(index); 1133 } 1134 } 1135 *r = result; 1136 } 1137 1138 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1139 ppc_avr_t *c) 1140 { 1141 ppc_avr_t result; 1142 int i; 1143 1144 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1145 int s = c->VsrB(i) & 0x1f; 1146 int index = 15 - (s & 0xf); 1147 1148 if (s & 0x10) { 1149 result.VsrB(i) = a->VsrB(index); 1150 } else { 1151 result.VsrB(i) = b->VsrB(index); 1152 } 1153 } 1154 *r = result; 1155 } 1156 1157 #if defined(HOST_WORDS_BIGENDIAN) 1158 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1159 #define VBPERMD_INDEX(i) (i) 1160 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1161 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1162 #else 1163 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1164 #define VBPERMD_INDEX(i) (1 - i) 1165 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1166 #define EXTRACT_BIT(avr, i, index) \ 1167 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1168 #endif 1169 1170 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1171 { 1172 int i, j; 1173 ppc_avr_t result = { .u64 = { 0, 0 } }; 1174 VECTOR_FOR_INORDER_I(i, u64) { 1175 for (j = 0; j < 8; j++) { 1176 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1177 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1178 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1179 } 1180 } 1181 } 1182 *r = result; 1183 } 1184 1185 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 int i; 1188 uint64_t perm = 0; 1189 1190 VECTOR_FOR_INORDER_I(i, u8) { 1191 int index = VBPERMQ_INDEX(b, i); 1192 1193 if (index < 128) { 1194 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1195 if (a->u64[VBPERMQ_DW(index)] & mask) { 1196 perm |= (0x8000 >> i); 1197 } 1198 } 1199 } 1200 1201 r->VsrD(0) = perm; 1202 r->VsrD(1) = 0; 1203 } 1204 1205 #undef VBPERMQ_INDEX 1206 #undef VBPERMQ_DW 1207 1208 static const uint64_t VGBBD_MASKS[256] = { 1209 0x0000000000000000ull, /* 00 */ 1210 0x0000000000000080ull, /* 01 */ 1211 0x0000000000008000ull, /* 02 */ 1212 0x0000000000008080ull, /* 03 */ 1213 0x0000000000800000ull, /* 04 */ 1214 0x0000000000800080ull, /* 05 */ 1215 0x0000000000808000ull, /* 06 */ 1216 0x0000000000808080ull, /* 07 */ 1217 0x0000000080000000ull, /* 08 */ 1218 0x0000000080000080ull, /* 09 */ 1219 0x0000000080008000ull, /* 0A */ 1220 0x0000000080008080ull, /* 0B */ 1221 0x0000000080800000ull, /* 0C */ 1222 0x0000000080800080ull, /* 0D */ 1223 0x0000000080808000ull, /* 0E */ 1224 0x0000000080808080ull, /* 0F */ 1225 0x0000008000000000ull, /* 10 */ 1226 0x0000008000000080ull, /* 11 */ 1227 0x0000008000008000ull, /* 12 */ 1228 0x0000008000008080ull, /* 13 */ 1229 0x0000008000800000ull, /* 14 */ 1230 0x0000008000800080ull, /* 15 */ 1231 0x0000008000808000ull, /* 16 */ 1232 0x0000008000808080ull, /* 17 */ 1233 0x0000008080000000ull, /* 18 */ 1234 0x0000008080000080ull, /* 19 */ 1235 0x0000008080008000ull, /* 1A */ 1236 0x0000008080008080ull, /* 1B */ 1237 0x0000008080800000ull, /* 1C */ 1238 0x0000008080800080ull, /* 1D */ 1239 0x0000008080808000ull, /* 1E */ 1240 0x0000008080808080ull, /* 1F */ 1241 0x0000800000000000ull, /* 20 */ 1242 0x0000800000000080ull, /* 21 */ 1243 0x0000800000008000ull, /* 22 */ 1244 0x0000800000008080ull, /* 23 */ 1245 0x0000800000800000ull, /* 24 */ 1246 0x0000800000800080ull, /* 25 */ 1247 0x0000800000808000ull, /* 26 */ 1248 0x0000800000808080ull, /* 27 */ 1249 0x0000800080000000ull, /* 28 */ 1250 0x0000800080000080ull, /* 29 */ 1251 0x0000800080008000ull, /* 2A */ 1252 0x0000800080008080ull, /* 2B */ 1253 0x0000800080800000ull, /* 2C */ 1254 0x0000800080800080ull, /* 2D */ 1255 0x0000800080808000ull, /* 2E */ 1256 0x0000800080808080ull, /* 2F */ 1257 0x0000808000000000ull, /* 30 */ 1258 0x0000808000000080ull, /* 31 */ 1259 0x0000808000008000ull, /* 32 */ 1260 0x0000808000008080ull, /* 33 */ 1261 0x0000808000800000ull, /* 34 */ 1262 0x0000808000800080ull, /* 35 */ 1263 0x0000808000808000ull, /* 36 */ 1264 0x0000808000808080ull, /* 37 */ 1265 0x0000808080000000ull, /* 38 */ 1266 0x0000808080000080ull, /* 39 */ 1267 0x0000808080008000ull, /* 3A */ 1268 0x0000808080008080ull, /* 3B */ 1269 0x0000808080800000ull, /* 3C */ 1270 0x0000808080800080ull, /* 3D */ 1271 0x0000808080808000ull, /* 3E */ 1272 0x0000808080808080ull, /* 3F */ 1273 0x0080000000000000ull, /* 40 */ 1274 0x0080000000000080ull, /* 41 */ 1275 0x0080000000008000ull, /* 42 */ 1276 0x0080000000008080ull, /* 43 */ 1277 0x0080000000800000ull, /* 44 */ 1278 0x0080000000800080ull, /* 45 */ 1279 0x0080000000808000ull, /* 46 */ 1280 0x0080000000808080ull, /* 47 */ 1281 0x0080000080000000ull, /* 48 */ 1282 0x0080000080000080ull, /* 49 */ 1283 0x0080000080008000ull, /* 4A */ 1284 0x0080000080008080ull, /* 4B */ 1285 0x0080000080800000ull, /* 4C */ 1286 0x0080000080800080ull, /* 4D */ 1287 0x0080000080808000ull, /* 4E */ 1288 0x0080000080808080ull, /* 4F */ 1289 0x0080008000000000ull, /* 50 */ 1290 0x0080008000000080ull, /* 51 */ 1291 0x0080008000008000ull, /* 52 */ 1292 0x0080008000008080ull, /* 53 */ 1293 0x0080008000800000ull, /* 54 */ 1294 0x0080008000800080ull, /* 55 */ 1295 0x0080008000808000ull, /* 56 */ 1296 0x0080008000808080ull, /* 57 */ 1297 0x0080008080000000ull, /* 58 */ 1298 0x0080008080000080ull, /* 59 */ 1299 0x0080008080008000ull, /* 5A */ 1300 0x0080008080008080ull, /* 5B */ 1301 0x0080008080800000ull, /* 5C */ 1302 0x0080008080800080ull, /* 5D */ 1303 0x0080008080808000ull, /* 5E */ 1304 0x0080008080808080ull, /* 5F */ 1305 0x0080800000000000ull, /* 60 */ 1306 0x0080800000000080ull, /* 61 */ 1307 0x0080800000008000ull, /* 62 */ 1308 0x0080800000008080ull, /* 63 */ 1309 0x0080800000800000ull, /* 64 */ 1310 0x0080800000800080ull, /* 65 */ 1311 0x0080800000808000ull, /* 66 */ 1312 0x0080800000808080ull, /* 67 */ 1313 0x0080800080000000ull, /* 68 */ 1314 0x0080800080000080ull, /* 69 */ 1315 0x0080800080008000ull, /* 6A */ 1316 0x0080800080008080ull, /* 6B */ 1317 0x0080800080800000ull, /* 6C */ 1318 0x0080800080800080ull, /* 6D */ 1319 0x0080800080808000ull, /* 6E */ 1320 0x0080800080808080ull, /* 6F */ 1321 0x0080808000000000ull, /* 70 */ 1322 0x0080808000000080ull, /* 71 */ 1323 0x0080808000008000ull, /* 72 */ 1324 0x0080808000008080ull, /* 73 */ 1325 0x0080808000800000ull, /* 74 */ 1326 0x0080808000800080ull, /* 75 */ 1327 0x0080808000808000ull, /* 76 */ 1328 0x0080808000808080ull, /* 77 */ 1329 0x0080808080000000ull, /* 78 */ 1330 0x0080808080000080ull, /* 79 */ 1331 0x0080808080008000ull, /* 7A */ 1332 0x0080808080008080ull, /* 7B */ 1333 0x0080808080800000ull, /* 7C */ 1334 0x0080808080800080ull, /* 7D */ 1335 0x0080808080808000ull, /* 7E */ 1336 0x0080808080808080ull, /* 7F */ 1337 0x8000000000000000ull, /* 80 */ 1338 0x8000000000000080ull, /* 81 */ 1339 0x8000000000008000ull, /* 82 */ 1340 0x8000000000008080ull, /* 83 */ 1341 0x8000000000800000ull, /* 84 */ 1342 0x8000000000800080ull, /* 85 */ 1343 0x8000000000808000ull, /* 86 */ 1344 0x8000000000808080ull, /* 87 */ 1345 0x8000000080000000ull, /* 88 */ 1346 0x8000000080000080ull, /* 89 */ 1347 0x8000000080008000ull, /* 8A */ 1348 0x8000000080008080ull, /* 8B */ 1349 0x8000000080800000ull, /* 8C */ 1350 0x8000000080800080ull, /* 8D */ 1351 0x8000000080808000ull, /* 8E */ 1352 0x8000000080808080ull, /* 8F */ 1353 0x8000008000000000ull, /* 90 */ 1354 0x8000008000000080ull, /* 91 */ 1355 0x8000008000008000ull, /* 92 */ 1356 0x8000008000008080ull, /* 93 */ 1357 0x8000008000800000ull, /* 94 */ 1358 0x8000008000800080ull, /* 95 */ 1359 0x8000008000808000ull, /* 96 */ 1360 0x8000008000808080ull, /* 97 */ 1361 0x8000008080000000ull, /* 98 */ 1362 0x8000008080000080ull, /* 99 */ 1363 0x8000008080008000ull, /* 9A */ 1364 0x8000008080008080ull, /* 9B */ 1365 0x8000008080800000ull, /* 9C */ 1366 0x8000008080800080ull, /* 9D */ 1367 0x8000008080808000ull, /* 9E */ 1368 0x8000008080808080ull, /* 9F */ 1369 0x8000800000000000ull, /* A0 */ 1370 0x8000800000000080ull, /* A1 */ 1371 0x8000800000008000ull, /* A2 */ 1372 0x8000800000008080ull, /* A3 */ 1373 0x8000800000800000ull, /* A4 */ 1374 0x8000800000800080ull, /* A5 */ 1375 0x8000800000808000ull, /* A6 */ 1376 0x8000800000808080ull, /* A7 */ 1377 0x8000800080000000ull, /* A8 */ 1378 0x8000800080000080ull, /* A9 */ 1379 0x8000800080008000ull, /* AA */ 1380 0x8000800080008080ull, /* AB */ 1381 0x8000800080800000ull, /* AC */ 1382 0x8000800080800080ull, /* AD */ 1383 0x8000800080808000ull, /* AE */ 1384 0x8000800080808080ull, /* AF */ 1385 0x8000808000000000ull, /* B0 */ 1386 0x8000808000000080ull, /* B1 */ 1387 0x8000808000008000ull, /* B2 */ 1388 0x8000808000008080ull, /* B3 */ 1389 0x8000808000800000ull, /* B4 */ 1390 0x8000808000800080ull, /* B5 */ 1391 0x8000808000808000ull, /* B6 */ 1392 0x8000808000808080ull, /* B7 */ 1393 0x8000808080000000ull, /* B8 */ 1394 0x8000808080000080ull, /* B9 */ 1395 0x8000808080008000ull, /* BA */ 1396 0x8000808080008080ull, /* BB */ 1397 0x8000808080800000ull, /* BC */ 1398 0x8000808080800080ull, /* BD */ 1399 0x8000808080808000ull, /* BE */ 1400 0x8000808080808080ull, /* BF */ 1401 0x8080000000000000ull, /* C0 */ 1402 0x8080000000000080ull, /* C1 */ 1403 0x8080000000008000ull, /* C2 */ 1404 0x8080000000008080ull, /* C3 */ 1405 0x8080000000800000ull, /* C4 */ 1406 0x8080000000800080ull, /* C5 */ 1407 0x8080000000808000ull, /* C6 */ 1408 0x8080000000808080ull, /* C7 */ 1409 0x8080000080000000ull, /* C8 */ 1410 0x8080000080000080ull, /* C9 */ 1411 0x8080000080008000ull, /* CA */ 1412 0x8080000080008080ull, /* CB */ 1413 0x8080000080800000ull, /* CC */ 1414 0x8080000080800080ull, /* CD */ 1415 0x8080000080808000ull, /* CE */ 1416 0x8080000080808080ull, /* CF */ 1417 0x8080008000000000ull, /* D0 */ 1418 0x8080008000000080ull, /* D1 */ 1419 0x8080008000008000ull, /* D2 */ 1420 0x8080008000008080ull, /* D3 */ 1421 0x8080008000800000ull, /* D4 */ 1422 0x8080008000800080ull, /* D5 */ 1423 0x8080008000808000ull, /* D6 */ 1424 0x8080008000808080ull, /* D7 */ 1425 0x8080008080000000ull, /* D8 */ 1426 0x8080008080000080ull, /* D9 */ 1427 0x8080008080008000ull, /* DA */ 1428 0x8080008080008080ull, /* DB */ 1429 0x8080008080800000ull, /* DC */ 1430 0x8080008080800080ull, /* DD */ 1431 0x8080008080808000ull, /* DE */ 1432 0x8080008080808080ull, /* DF */ 1433 0x8080800000000000ull, /* E0 */ 1434 0x8080800000000080ull, /* E1 */ 1435 0x8080800000008000ull, /* E2 */ 1436 0x8080800000008080ull, /* E3 */ 1437 0x8080800000800000ull, /* E4 */ 1438 0x8080800000800080ull, /* E5 */ 1439 0x8080800000808000ull, /* E6 */ 1440 0x8080800000808080ull, /* E7 */ 1441 0x8080800080000000ull, /* E8 */ 1442 0x8080800080000080ull, /* E9 */ 1443 0x8080800080008000ull, /* EA */ 1444 0x8080800080008080ull, /* EB */ 1445 0x8080800080800000ull, /* EC */ 1446 0x8080800080800080ull, /* ED */ 1447 0x8080800080808000ull, /* EE */ 1448 0x8080800080808080ull, /* EF */ 1449 0x8080808000000000ull, /* F0 */ 1450 0x8080808000000080ull, /* F1 */ 1451 0x8080808000008000ull, /* F2 */ 1452 0x8080808000008080ull, /* F3 */ 1453 0x8080808000800000ull, /* F4 */ 1454 0x8080808000800080ull, /* F5 */ 1455 0x8080808000808000ull, /* F6 */ 1456 0x8080808000808080ull, /* F7 */ 1457 0x8080808080000000ull, /* F8 */ 1458 0x8080808080000080ull, /* F9 */ 1459 0x8080808080008000ull, /* FA */ 1460 0x8080808080008080ull, /* FB */ 1461 0x8080808080800000ull, /* FC */ 1462 0x8080808080800080ull, /* FD */ 1463 0x8080808080808000ull, /* FE */ 1464 0x8080808080808080ull, /* FF */ 1465 }; 1466 1467 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1468 { 1469 int i; 1470 uint64_t t[2] = { 0, 0 }; 1471 1472 VECTOR_FOR_INORDER_I(i, u8) { 1473 #if defined(HOST_WORDS_BIGENDIAN) 1474 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1475 #else 1476 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7)); 1477 #endif 1478 } 1479 1480 r->u64[0] = t[0]; 1481 r->u64[1] = t[1]; 1482 } 1483 1484 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1485 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1486 { \ 1487 int i, j; \ 1488 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1489 \ 1490 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1491 prod[i] = 0; \ 1492 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1493 if (a->srcfld[i] & (1ull << j)) { \ 1494 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1495 } \ 1496 } \ 1497 } \ 1498 \ 1499 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1500 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1501 } \ 1502 } 1503 1504 PMSUM(vpmsumb, u8, u16, uint16_t) 1505 PMSUM(vpmsumh, u16, u32, uint32_t) 1506 PMSUM(vpmsumw, u32, u64, uint64_t) 1507 1508 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1509 { 1510 1511 #ifdef CONFIG_INT128 1512 int i, j; 1513 __uint128_t prod[2]; 1514 1515 VECTOR_FOR_INORDER_I(i, u64) { 1516 prod[i] = 0; 1517 for (j = 0; j < 64; j++) { 1518 if (a->u64[i] & (1ull << j)) { 1519 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1520 } 1521 } 1522 } 1523 1524 r->u128 = prod[0] ^ prod[1]; 1525 1526 #else 1527 int i, j; 1528 ppc_avr_t prod[2]; 1529 1530 VECTOR_FOR_INORDER_I(i, u64) { 1531 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1532 for (j = 0; j < 64; j++) { 1533 if (a->u64[i] & (1ull << j)) { 1534 ppc_avr_t bshift; 1535 if (j == 0) { 1536 bshift.VsrD(0) = 0; 1537 bshift.VsrD(1) = b->u64[i]; 1538 } else { 1539 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1540 bshift.VsrD(1) = b->u64[i] << j; 1541 } 1542 prod[i].VsrD(1) ^= bshift.VsrD(1); 1543 prod[i].VsrD(0) ^= bshift.VsrD(0); 1544 } 1545 } 1546 } 1547 1548 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1549 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1550 #endif 1551 } 1552 1553 1554 #if defined(HOST_WORDS_BIGENDIAN) 1555 #define PKBIG 1 1556 #else 1557 #define PKBIG 0 1558 #endif 1559 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1560 { 1561 int i, j; 1562 ppc_avr_t result; 1563 #if defined(HOST_WORDS_BIGENDIAN) 1564 const ppc_avr_t *x[2] = { a, b }; 1565 #else 1566 const ppc_avr_t *x[2] = { b, a }; 1567 #endif 1568 1569 VECTOR_FOR_INORDER_I(i, u64) { 1570 VECTOR_FOR_INORDER_I(j, u32) { 1571 uint32_t e = x[i]->u32[j]; 1572 1573 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1574 ((e >> 6) & 0x3e0) | 1575 ((e >> 3) & 0x1f)); 1576 } 1577 } 1578 *r = result; 1579 } 1580 1581 #define VPK(suffix, from, to, cvt, dosat) \ 1582 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1583 ppc_avr_t *a, ppc_avr_t *b) \ 1584 { \ 1585 int i; \ 1586 int sat = 0; \ 1587 ppc_avr_t result; \ 1588 ppc_avr_t *a0 = PKBIG ? a : b; \ 1589 ppc_avr_t *a1 = PKBIG ? b : a; \ 1590 \ 1591 VECTOR_FOR_INORDER_I(i, from) { \ 1592 result.to[i] = cvt(a0->from[i], &sat); \ 1593 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1594 } \ 1595 *r = result; \ 1596 if (dosat && sat) { \ 1597 set_vscr_sat(env); \ 1598 } \ 1599 } 1600 #define I(x, y) (x) 1601 VPK(shss, s16, s8, cvtshsb, 1) 1602 VPK(shus, s16, u8, cvtshub, 1) 1603 VPK(swss, s32, s16, cvtswsh, 1) 1604 VPK(swus, s32, u16, cvtswuh, 1) 1605 VPK(sdss, s64, s32, cvtsdsw, 1) 1606 VPK(sdus, s64, u32, cvtsduw, 1) 1607 VPK(uhus, u16, u8, cvtuhub, 1) 1608 VPK(uwus, u32, u16, cvtuwuh, 1) 1609 VPK(udus, u64, u32, cvtuduw, 1) 1610 VPK(uhum, u16, u8, I, 0) 1611 VPK(uwum, u32, u16, I, 0) 1612 VPK(udum, u64, u32, I, 0) 1613 #undef I 1614 #undef VPK 1615 #undef PKBIG 1616 1617 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1618 { 1619 int i; 1620 1621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1622 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1623 } 1624 } 1625 1626 #define VRFI(suffix, rounding) \ 1627 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1628 ppc_avr_t *b) \ 1629 { \ 1630 int i; \ 1631 float_status s = env->vec_status; \ 1632 \ 1633 set_float_rounding_mode(rounding, &s); \ 1634 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1635 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1636 } \ 1637 } 1638 VRFI(n, float_round_nearest_even) 1639 VRFI(m, float_round_down) 1640 VRFI(p, float_round_up) 1641 VRFI(z, float_round_to_zero) 1642 #undef VRFI 1643 1644 #define VROTATE(suffix, element, mask) \ 1645 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1646 { \ 1647 int i; \ 1648 \ 1649 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1650 unsigned int shift = b->element[i] & mask; \ 1651 r->element[i] = (a->element[i] << shift) | \ 1652 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1653 } \ 1654 } 1655 VROTATE(b, u8, 0x7) 1656 VROTATE(h, u16, 0xF) 1657 VROTATE(w, u32, 0x1F) 1658 VROTATE(d, u64, 0x3F) 1659 #undef VROTATE 1660 1661 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1662 { 1663 int i; 1664 1665 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1666 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1667 1668 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1669 } 1670 } 1671 1672 #define VRLMI(name, size, element, insert) \ 1673 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1674 { \ 1675 int i; \ 1676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1677 uint##size##_t src1 = a->element[i]; \ 1678 uint##size##_t src2 = b->element[i]; \ 1679 uint##size##_t src3 = r->element[i]; \ 1680 uint##size##_t begin, end, shift, mask, rot_val; \ 1681 \ 1682 shift = extract##size(src2, 0, 6); \ 1683 end = extract##size(src2, 8, 6); \ 1684 begin = extract##size(src2, 16, 6); \ 1685 rot_val = rol##size(src1, shift); \ 1686 mask = mask_u##size(begin, end); \ 1687 if (insert) { \ 1688 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1689 } else { \ 1690 r->element[i] = (rot_val & mask); \ 1691 } \ 1692 } \ 1693 } 1694 1695 VRLMI(vrldmi, 64, u64, 1); 1696 VRLMI(vrlwmi, 32, u32, 1); 1697 VRLMI(vrldnm, 64, u64, 0); 1698 VRLMI(vrlwnm, 32, u32, 0); 1699 1700 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1701 ppc_avr_t *c) 1702 { 1703 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1704 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1705 } 1706 1707 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1708 { 1709 int i; 1710 1711 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1712 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1713 } 1714 } 1715 1716 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1717 { 1718 int i; 1719 1720 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1721 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1722 } 1723 } 1724 1725 #if defined(HOST_WORDS_BIGENDIAN) 1726 #define VEXTU_X_DO(name, size, left) \ 1727 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1728 { \ 1729 int index; \ 1730 if (left) { \ 1731 index = (a & 0xf) * 8; \ 1732 } else { \ 1733 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1734 } \ 1735 return int128_getlo(int128_rshift(b->s128, index)) & \ 1736 MAKE_64BIT_MASK(0, size); \ 1737 } 1738 #else 1739 #define VEXTU_X_DO(name, size, left) \ 1740 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1741 { \ 1742 int index; \ 1743 if (left) { \ 1744 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1745 } else { \ 1746 index = (a & 0xf) * 8; \ 1747 } \ 1748 return int128_getlo(int128_rshift(b->s128, index)) & \ 1749 MAKE_64BIT_MASK(0, size); \ 1750 } 1751 #endif 1752 1753 VEXTU_X_DO(vextublx, 8, 1) 1754 VEXTU_X_DO(vextuhlx, 16, 1) 1755 VEXTU_X_DO(vextuwlx, 32, 1) 1756 VEXTU_X_DO(vextubrx, 8, 0) 1757 VEXTU_X_DO(vextuhrx, 16, 0) 1758 VEXTU_X_DO(vextuwrx, 32, 0) 1759 #undef VEXTU_X_DO 1760 1761 /* 1762 * The specification says that the results are undefined if all of the 1763 * shift counts are not identical. We check to make sure that they 1764 * are to conform to what real hardware appears to do. 1765 */ 1766 #define VSHIFT(suffix, leftp) \ 1767 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1768 { \ 1769 int shift = b->VsrB(15) & 0x7; \ 1770 int doit = 1; \ 1771 int i; \ 1772 \ 1773 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1774 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1775 } \ 1776 if (doit) { \ 1777 if (shift == 0) { \ 1778 *r = *a; \ 1779 } else if (leftp) { \ 1780 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1781 \ 1782 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1783 r->VsrD(1) = a->VsrD(1) << shift; \ 1784 } else { \ 1785 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1786 \ 1787 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1788 r->VsrD(0) = a->VsrD(0) >> shift; \ 1789 } \ 1790 } \ 1791 } 1792 VSHIFT(l, 1) 1793 VSHIFT(r, 0) 1794 #undef VSHIFT 1795 1796 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1797 { 1798 int i; 1799 unsigned int shift, bytes, size; 1800 1801 size = ARRAY_SIZE(r->u8); 1802 for (i = 0; i < size; i++) { 1803 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1804 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1805 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1806 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1807 } 1808 } 1809 1810 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1811 { 1812 int i; 1813 unsigned int shift, bytes; 1814 1815 /* 1816 * Use reverse order, as destination and source register can be 1817 * same. Its being modified in place saving temporary, reverse 1818 * order will guarantee that computed result is not fed back. 1819 */ 1820 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1821 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1822 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1823 /* extract adjacent bytes */ 1824 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1825 } 1826 } 1827 1828 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1829 { 1830 int sh = shift & 0xf; 1831 int i; 1832 ppc_avr_t result; 1833 1834 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1835 int index = sh + i; 1836 if (index > 0xf) { 1837 result.VsrB(i) = b->VsrB(index - 0x10); 1838 } else { 1839 result.VsrB(i) = a->VsrB(index); 1840 } 1841 } 1842 *r = result; 1843 } 1844 1845 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1846 { 1847 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1848 1849 #if defined(HOST_WORDS_BIGENDIAN) 1850 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1851 memset(&r->u8[16 - sh], 0, sh); 1852 #else 1853 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1854 memset(&r->u8[0], 0, sh); 1855 #endif 1856 } 1857 1858 #if defined(HOST_WORDS_BIGENDIAN) 1859 #define VINSERT(suffix, element) \ 1860 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1861 { \ 1862 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1863 sizeof(r->element[0])); \ 1864 } 1865 #else 1866 #define VINSERT(suffix, element) \ 1867 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1868 { \ 1869 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1870 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1871 } 1872 #endif 1873 VINSERT(b, u8) 1874 VINSERT(h, u16) 1875 VINSERT(w, u32) 1876 VINSERT(d, u64) 1877 #undef VINSERT 1878 #if defined(HOST_WORDS_BIGENDIAN) 1879 #define VEXTRACT(suffix, element) \ 1880 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1881 { \ 1882 uint32_t es = sizeof(r->element[0]); \ 1883 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1884 memset(&r->u8[8], 0, 8); \ 1885 memset(&r->u8[0], 0, 8 - es); \ 1886 } 1887 #else 1888 #define VEXTRACT(suffix, element) \ 1889 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1890 { \ 1891 uint32_t es = sizeof(r->element[0]); \ 1892 uint32_t s = (16 - index) - es; \ 1893 memmove(&r->u8[8], &b->u8[s], es); \ 1894 memset(&r->u8[0], 0, 8); \ 1895 memset(&r->u8[8 + es], 0, 8 - es); \ 1896 } 1897 #endif 1898 VEXTRACT(ub, u8) 1899 VEXTRACT(uh, u16) 1900 VEXTRACT(uw, u32) 1901 VEXTRACT(d, u64) 1902 #undef VEXTRACT 1903 1904 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1905 ppc_vsr_t *xb, uint32_t index) 1906 { 1907 ppc_vsr_t t = { }; 1908 size_t es = sizeof(uint32_t); 1909 uint32_t ext_index; 1910 int i; 1911 1912 ext_index = index; 1913 for (i = 0; i < es; i++, ext_index++) { 1914 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1915 } 1916 1917 *xt = t; 1918 } 1919 1920 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1921 ppc_vsr_t *xb, uint32_t index) 1922 { 1923 ppc_vsr_t t = *xt; 1924 size_t es = sizeof(uint32_t); 1925 int ins_index, i = 0; 1926 1927 ins_index = index; 1928 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1929 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1930 } 1931 1932 *xt = t; 1933 } 1934 1935 #define VEXT_SIGNED(name, element, cast) \ 1936 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1937 { \ 1938 int i; \ 1939 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1940 r->element[i] = (cast)b->element[i]; \ 1941 } \ 1942 } 1943 VEXT_SIGNED(vextsb2w, s32, int8_t) 1944 VEXT_SIGNED(vextsb2d, s64, int8_t) 1945 VEXT_SIGNED(vextsh2w, s32, int16_t) 1946 VEXT_SIGNED(vextsh2d, s64, int16_t) 1947 VEXT_SIGNED(vextsw2d, s64, int32_t) 1948 #undef VEXT_SIGNED 1949 1950 #define VNEG(name, element) \ 1951 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1952 { \ 1953 int i; \ 1954 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1955 r->element[i] = -b->element[i]; \ 1956 } \ 1957 } 1958 VNEG(vnegw, s32) 1959 VNEG(vnegd, s64) 1960 #undef VNEG 1961 1962 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1963 { 1964 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1965 1966 #if defined(HOST_WORDS_BIGENDIAN) 1967 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1968 memset(&r->u8[0], 0, sh); 1969 #else 1970 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1971 memset(&r->u8[16 - sh], 0, sh); 1972 #endif 1973 } 1974 1975 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1976 { 1977 int i; 1978 1979 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1980 r->u32[i] = a->u32[i] >= b->u32[i]; 1981 } 1982 } 1983 1984 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1985 { 1986 int64_t t; 1987 int i, upper; 1988 ppc_avr_t result; 1989 int sat = 0; 1990 1991 upper = ARRAY_SIZE(r->s32) - 1; 1992 t = (int64_t)b->VsrSW(upper); 1993 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1994 t += a->VsrSW(i); 1995 result.VsrSW(i) = 0; 1996 } 1997 result.VsrSW(upper) = cvtsdsw(t, &sat); 1998 *r = result; 1999 2000 if (sat) { 2001 set_vscr_sat(env); 2002 } 2003 } 2004 2005 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2006 { 2007 int i, j, upper; 2008 ppc_avr_t result; 2009 int sat = 0; 2010 2011 upper = 1; 2012 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2013 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2014 2015 result.VsrD(i) = 0; 2016 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2017 t += a->VsrSW(2 * i + j); 2018 } 2019 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2020 } 2021 2022 *r = result; 2023 if (sat) { 2024 set_vscr_sat(env); 2025 } 2026 } 2027 2028 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2029 { 2030 int i, j; 2031 int sat = 0; 2032 2033 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2034 int64_t t = (int64_t)b->s32[i]; 2035 2036 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2037 t += a->s8[4 * i + j]; 2038 } 2039 r->s32[i] = cvtsdsw(t, &sat); 2040 } 2041 2042 if (sat) { 2043 set_vscr_sat(env); 2044 } 2045 } 2046 2047 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2048 { 2049 int sat = 0; 2050 int i; 2051 2052 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2053 int64_t t = (int64_t)b->s32[i]; 2054 2055 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2056 r->s32[i] = cvtsdsw(t, &sat); 2057 } 2058 2059 if (sat) { 2060 set_vscr_sat(env); 2061 } 2062 } 2063 2064 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2065 { 2066 int i, j; 2067 int sat = 0; 2068 2069 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2070 uint64_t t = (uint64_t)b->u32[i]; 2071 2072 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2073 t += a->u8[4 * i + j]; 2074 } 2075 r->u32[i] = cvtuduw(t, &sat); 2076 } 2077 2078 if (sat) { 2079 set_vscr_sat(env); 2080 } 2081 } 2082 2083 #if defined(HOST_WORDS_BIGENDIAN) 2084 #define UPKHI 1 2085 #define UPKLO 0 2086 #else 2087 #define UPKHI 0 2088 #define UPKLO 1 2089 #endif 2090 #define VUPKPX(suffix, hi) \ 2091 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2092 { \ 2093 int i; \ 2094 ppc_avr_t result; \ 2095 \ 2096 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2097 uint16_t e = b->u16[hi ? i : i + 4]; \ 2098 uint8_t a = (e >> 15) ? 0xff : 0; \ 2099 uint8_t r = (e >> 10) & 0x1f; \ 2100 uint8_t g = (e >> 5) & 0x1f; \ 2101 uint8_t b = e & 0x1f; \ 2102 \ 2103 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2104 } \ 2105 *r = result; \ 2106 } 2107 VUPKPX(lpx, UPKLO) 2108 VUPKPX(hpx, UPKHI) 2109 #undef VUPKPX 2110 2111 #define VUPK(suffix, unpacked, packee, hi) \ 2112 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2113 { \ 2114 int i; \ 2115 ppc_avr_t result; \ 2116 \ 2117 if (hi) { \ 2118 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2119 result.unpacked[i] = b->packee[i]; \ 2120 } \ 2121 } else { \ 2122 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2123 i++) { \ 2124 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2125 } \ 2126 } \ 2127 *r = result; \ 2128 } 2129 VUPK(hsb, s16, s8, UPKHI) 2130 VUPK(hsh, s32, s16, UPKHI) 2131 VUPK(hsw, s64, s32, UPKHI) 2132 VUPK(lsb, s16, s8, UPKLO) 2133 VUPK(lsh, s32, s16, UPKLO) 2134 VUPK(lsw, s64, s32, UPKLO) 2135 #undef VUPK 2136 #undef UPKHI 2137 #undef UPKLO 2138 2139 #define VGENERIC_DO(name, element) \ 2140 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2141 { \ 2142 int i; \ 2143 \ 2144 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2145 r->element[i] = name(b->element[i]); \ 2146 } \ 2147 } 2148 2149 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2150 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2151 #define clzw(v) clz32((v)) 2152 #define clzd(v) clz64((v)) 2153 2154 VGENERIC_DO(clzb, u8) 2155 VGENERIC_DO(clzh, u16) 2156 VGENERIC_DO(clzw, u32) 2157 VGENERIC_DO(clzd, u64) 2158 2159 #undef clzb 2160 #undef clzh 2161 #undef clzw 2162 #undef clzd 2163 2164 #define ctzb(v) ((v) ? ctz32(v) : 8) 2165 #define ctzh(v) ((v) ? ctz32(v) : 16) 2166 #define ctzw(v) ctz32((v)) 2167 #define ctzd(v) ctz64((v)) 2168 2169 VGENERIC_DO(ctzb, u8) 2170 VGENERIC_DO(ctzh, u16) 2171 VGENERIC_DO(ctzw, u32) 2172 VGENERIC_DO(ctzd, u64) 2173 2174 #undef ctzb 2175 #undef ctzh 2176 #undef ctzw 2177 #undef ctzd 2178 2179 #define popcntb(v) ctpop8(v) 2180 #define popcnth(v) ctpop16(v) 2181 #define popcntw(v) ctpop32(v) 2182 #define popcntd(v) ctpop64(v) 2183 2184 VGENERIC_DO(popcntb, u8) 2185 VGENERIC_DO(popcnth, u16) 2186 VGENERIC_DO(popcntw, u32) 2187 VGENERIC_DO(popcntd, u64) 2188 2189 #undef popcntb 2190 #undef popcnth 2191 #undef popcntw 2192 #undef popcntd 2193 2194 #undef VGENERIC_DO 2195 2196 #if defined(HOST_WORDS_BIGENDIAN) 2197 #define QW_ONE { .u64 = { 0, 1 } } 2198 #else 2199 #define QW_ONE { .u64 = { 1, 0 } } 2200 #endif 2201 2202 #ifndef CONFIG_INT128 2203 2204 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2205 { 2206 t->u64[0] = ~a.u64[0]; 2207 t->u64[1] = ~a.u64[1]; 2208 } 2209 2210 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2211 { 2212 if (a.VsrD(0) < b.VsrD(0)) { 2213 return -1; 2214 } else if (a.VsrD(0) > b.VsrD(0)) { 2215 return 1; 2216 } else if (a.VsrD(1) < b.VsrD(1)) { 2217 return -1; 2218 } else if (a.VsrD(1) > b.VsrD(1)) { 2219 return 1; 2220 } else { 2221 return 0; 2222 } 2223 } 2224 2225 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2226 { 2227 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2228 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2229 (~a.VsrD(1) < b.VsrD(1)); 2230 } 2231 2232 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2233 { 2234 ppc_avr_t not_a; 2235 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2236 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2237 (~a.VsrD(1) < b.VsrD(1)); 2238 avr_qw_not(¬_a, a); 2239 return avr_qw_cmpu(not_a, b) < 0; 2240 } 2241 2242 #endif 2243 2244 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2245 { 2246 #ifdef CONFIG_INT128 2247 r->u128 = a->u128 + b->u128; 2248 #else 2249 avr_qw_add(r, *a, *b); 2250 #endif 2251 } 2252 2253 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2254 { 2255 #ifdef CONFIG_INT128 2256 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2257 #else 2258 2259 if (c->VsrD(1) & 1) { 2260 ppc_avr_t tmp; 2261 2262 tmp.VsrD(0) = 0; 2263 tmp.VsrD(1) = c->VsrD(1) & 1; 2264 avr_qw_add(&tmp, *a, tmp); 2265 avr_qw_add(r, tmp, *b); 2266 } else { 2267 avr_qw_add(r, *a, *b); 2268 } 2269 #endif 2270 } 2271 2272 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2273 { 2274 #ifdef CONFIG_INT128 2275 r->u128 = (~a->u128 < b->u128); 2276 #else 2277 ppc_avr_t not_a; 2278 2279 avr_qw_not(¬_a, *a); 2280 2281 r->VsrD(0) = 0; 2282 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2283 #endif 2284 } 2285 2286 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2287 { 2288 #ifdef CONFIG_INT128 2289 int carry_out = (~a->u128 < b->u128); 2290 if (!carry_out && (c->u128 & 1)) { 2291 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2292 ((a->u128 != 0) || (b->u128 != 0)); 2293 } 2294 r->u128 = carry_out; 2295 #else 2296 2297 int carry_in = c->VsrD(1) & 1; 2298 int carry_out = 0; 2299 ppc_avr_t tmp; 2300 2301 carry_out = avr_qw_addc(&tmp, *a, *b); 2302 2303 if (!carry_out && carry_in) { 2304 ppc_avr_t one = QW_ONE; 2305 carry_out = avr_qw_addc(&tmp, tmp, one); 2306 } 2307 r->VsrD(0) = 0; 2308 r->VsrD(1) = carry_out; 2309 #endif 2310 } 2311 2312 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2313 { 2314 #ifdef CONFIG_INT128 2315 r->u128 = a->u128 - b->u128; 2316 #else 2317 ppc_avr_t tmp; 2318 ppc_avr_t one = QW_ONE; 2319 2320 avr_qw_not(&tmp, *b); 2321 avr_qw_add(&tmp, *a, tmp); 2322 avr_qw_add(r, tmp, one); 2323 #endif 2324 } 2325 2326 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2327 { 2328 #ifdef CONFIG_INT128 2329 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2330 #else 2331 ppc_avr_t tmp, sum; 2332 2333 avr_qw_not(&tmp, *b); 2334 avr_qw_add(&sum, *a, tmp); 2335 2336 tmp.VsrD(0) = 0; 2337 tmp.VsrD(1) = c->VsrD(1) & 1; 2338 avr_qw_add(r, sum, tmp); 2339 #endif 2340 } 2341 2342 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2343 { 2344 #ifdef CONFIG_INT128 2345 r->u128 = (~a->u128 < ~b->u128) || 2346 (a->u128 + ~b->u128 == (__uint128_t)-1); 2347 #else 2348 int carry = (avr_qw_cmpu(*a, *b) > 0); 2349 if (!carry) { 2350 ppc_avr_t tmp; 2351 avr_qw_not(&tmp, *b); 2352 avr_qw_add(&tmp, *a, tmp); 2353 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2354 } 2355 r->VsrD(0) = 0; 2356 r->VsrD(1) = carry; 2357 #endif 2358 } 2359 2360 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2361 { 2362 #ifdef CONFIG_INT128 2363 r->u128 = 2364 (~a->u128 < ~b->u128) || 2365 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2366 #else 2367 int carry_in = c->VsrD(1) & 1; 2368 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2369 if (!carry_out && carry_in) { 2370 ppc_avr_t tmp; 2371 avr_qw_not(&tmp, *b); 2372 avr_qw_add(&tmp, *a, tmp); 2373 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2374 } 2375 2376 r->VsrD(0) = 0; 2377 r->VsrD(1) = carry_out; 2378 #endif 2379 } 2380 2381 #define BCD_PLUS_PREF_1 0xC 2382 #define BCD_PLUS_PREF_2 0xF 2383 #define BCD_PLUS_ALT_1 0xA 2384 #define BCD_NEG_PREF 0xD 2385 #define BCD_NEG_ALT 0xB 2386 #define BCD_PLUS_ALT_2 0xE 2387 #define NATIONAL_PLUS 0x2B 2388 #define NATIONAL_NEG 0x2D 2389 2390 #if defined(HOST_WORDS_BIGENDIAN) 2391 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2392 #else 2393 #define BCD_DIG_BYTE(n) ((n) / 2) 2394 #endif 2395 2396 static int bcd_get_sgn(ppc_avr_t *bcd) 2397 { 2398 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2399 case BCD_PLUS_PREF_1: 2400 case BCD_PLUS_PREF_2: 2401 case BCD_PLUS_ALT_1: 2402 case BCD_PLUS_ALT_2: 2403 { 2404 return 1; 2405 } 2406 2407 case BCD_NEG_PREF: 2408 case BCD_NEG_ALT: 2409 { 2410 return -1; 2411 } 2412 2413 default: 2414 { 2415 return 0; 2416 } 2417 } 2418 } 2419 2420 static int bcd_preferred_sgn(int sgn, int ps) 2421 { 2422 if (sgn >= 0) { 2423 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2424 } else { 2425 return BCD_NEG_PREF; 2426 } 2427 } 2428 2429 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2430 { 2431 uint8_t result; 2432 if (n & 1) { 2433 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2434 } else { 2435 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2436 } 2437 2438 if (unlikely(result > 9)) { 2439 *invalid = true; 2440 } 2441 return result; 2442 } 2443 2444 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2445 { 2446 if (n & 1) { 2447 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2448 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4); 2449 } else { 2450 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2451 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2452 } 2453 } 2454 2455 static bool bcd_is_valid(ppc_avr_t *bcd) 2456 { 2457 int i; 2458 int invalid = 0; 2459 2460 if (bcd_get_sgn(bcd) == 0) { 2461 return false; 2462 } 2463 2464 for (i = 1; i < 32; i++) { 2465 bcd_get_digit(bcd, i, &invalid); 2466 if (unlikely(invalid)) { 2467 return false; 2468 } 2469 } 2470 return true; 2471 } 2472 2473 static int bcd_cmp_zero(ppc_avr_t *bcd) 2474 { 2475 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2476 return CRF_EQ; 2477 } else { 2478 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2479 } 2480 } 2481 2482 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2483 { 2484 return reg->VsrH(7 - n); 2485 } 2486 2487 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2488 { 2489 reg->VsrH(7 - n) = val; 2490 } 2491 2492 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2493 { 2494 int i; 2495 int invalid = 0; 2496 for (i = 31; i > 0; i--) { 2497 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2498 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2499 if (unlikely(invalid)) { 2500 return 0; /* doesn't matter */ 2501 } else if (dig_a > dig_b) { 2502 return 1; 2503 } else if (dig_a < dig_b) { 2504 return -1; 2505 } 2506 } 2507 2508 return 0; 2509 } 2510 2511 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2512 int *overflow) 2513 { 2514 int carry = 0; 2515 int i; 2516 for (i = 1; i <= 31; i++) { 2517 uint8_t digit = bcd_get_digit(a, i, invalid) + 2518 bcd_get_digit(b, i, invalid) + carry; 2519 if (digit > 9) { 2520 carry = 1; 2521 digit -= 10; 2522 } else { 2523 carry = 0; 2524 } 2525 2526 bcd_put_digit(t, digit, i); 2527 } 2528 2529 *overflow = carry; 2530 } 2531 2532 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2533 int *overflow) 2534 { 2535 int carry = 0; 2536 int i; 2537 2538 for (i = 1; i <= 31; i++) { 2539 uint8_t digit = bcd_get_digit(a, i, invalid) - 2540 bcd_get_digit(b, i, invalid) + carry; 2541 if (digit & 0x80) { 2542 carry = -1; 2543 digit += 10; 2544 } else { 2545 carry = 0; 2546 } 2547 2548 bcd_put_digit(t, digit, i); 2549 } 2550 2551 *overflow = carry; 2552 } 2553 2554 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2555 { 2556 2557 int sgna = bcd_get_sgn(a); 2558 int sgnb = bcd_get_sgn(b); 2559 int invalid = (sgna == 0) || (sgnb == 0); 2560 int overflow = 0; 2561 uint32_t cr = 0; 2562 ppc_avr_t result = { .u64 = { 0, 0 } }; 2563 2564 if (!invalid) { 2565 if (sgna == sgnb) { 2566 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2567 bcd_add_mag(&result, a, b, &invalid, &overflow); 2568 cr = bcd_cmp_zero(&result); 2569 } else { 2570 int magnitude = bcd_cmp_mag(a, b); 2571 if (magnitude > 0) { 2572 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2573 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2574 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2575 } else if (magnitude < 0) { 2576 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2577 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2578 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2579 } else { 2580 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2581 cr = CRF_EQ; 2582 } 2583 } 2584 } 2585 2586 if (unlikely(invalid)) { 2587 result.VsrD(0) = result.VsrD(1) = -1; 2588 cr = CRF_SO; 2589 } else if (overflow) { 2590 cr |= CRF_SO; 2591 } 2592 2593 *r = result; 2594 2595 return cr; 2596 } 2597 2598 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2599 { 2600 ppc_avr_t bcopy = *b; 2601 int sgnb = bcd_get_sgn(b); 2602 if (sgnb < 0) { 2603 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2604 } else if (sgnb > 0) { 2605 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2606 } 2607 /* else invalid ... defer to bcdadd code for proper handling */ 2608 2609 return helper_bcdadd(r, a, &bcopy, ps); 2610 } 2611 2612 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2613 { 2614 int i; 2615 int cr = 0; 2616 uint16_t national = 0; 2617 uint16_t sgnb = get_national_digit(b, 0); 2618 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2619 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2620 2621 for (i = 1; i < 8; i++) { 2622 national = get_national_digit(b, i); 2623 if (unlikely(national < 0x30 || national > 0x39)) { 2624 invalid = 1; 2625 break; 2626 } 2627 2628 bcd_put_digit(&ret, national & 0xf, i); 2629 } 2630 2631 if (sgnb == NATIONAL_PLUS) { 2632 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2633 } else { 2634 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2635 } 2636 2637 cr = bcd_cmp_zero(&ret); 2638 2639 if (unlikely(invalid)) { 2640 cr = CRF_SO; 2641 } 2642 2643 *r = ret; 2644 2645 return cr; 2646 } 2647 2648 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2649 { 2650 int i; 2651 int cr = 0; 2652 int sgnb = bcd_get_sgn(b); 2653 int invalid = (sgnb == 0); 2654 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2655 2656 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2657 2658 for (i = 1; i < 8; i++) { 2659 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2660 2661 if (unlikely(invalid)) { 2662 break; 2663 } 2664 } 2665 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2666 2667 cr = bcd_cmp_zero(b); 2668 2669 if (ox_flag) { 2670 cr |= CRF_SO; 2671 } 2672 2673 if (unlikely(invalid)) { 2674 cr = CRF_SO; 2675 } 2676 2677 *r = ret; 2678 2679 return cr; 2680 } 2681 2682 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2683 { 2684 int i; 2685 int cr = 0; 2686 int invalid = 0; 2687 int zone_digit = 0; 2688 int zone_lead = ps ? 0xF : 0x3; 2689 int digit = 0; 2690 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2691 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2692 2693 if (unlikely((sgnb < 0xA) && ps)) { 2694 invalid = 1; 2695 } 2696 2697 for (i = 0; i < 16; i++) { 2698 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2699 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2700 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2701 invalid = 1; 2702 break; 2703 } 2704 2705 bcd_put_digit(&ret, digit, i + 1); 2706 } 2707 2708 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2709 (!ps && (sgnb & 0x4))) { 2710 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2711 } else { 2712 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2713 } 2714 2715 cr = bcd_cmp_zero(&ret); 2716 2717 if (unlikely(invalid)) { 2718 cr = CRF_SO; 2719 } 2720 2721 *r = ret; 2722 2723 return cr; 2724 } 2725 2726 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2727 { 2728 int i; 2729 int cr = 0; 2730 uint8_t digit = 0; 2731 int sgnb = bcd_get_sgn(b); 2732 int zone_lead = (ps) ? 0xF0 : 0x30; 2733 int invalid = (sgnb == 0); 2734 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2735 2736 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2737 2738 for (i = 0; i < 16; i++) { 2739 digit = bcd_get_digit(b, i + 1, &invalid); 2740 2741 if (unlikely(invalid)) { 2742 break; 2743 } 2744 2745 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2746 } 2747 2748 if (ps) { 2749 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2750 } else { 2751 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2752 } 2753 2754 cr = bcd_cmp_zero(b); 2755 2756 if (ox_flag) { 2757 cr |= CRF_SO; 2758 } 2759 2760 if (unlikely(invalid)) { 2761 cr = CRF_SO; 2762 } 2763 2764 *r = ret; 2765 2766 return cr; 2767 } 2768 2769 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2770 { 2771 int i; 2772 int cr = 0; 2773 uint64_t lo_value; 2774 uint64_t hi_value; 2775 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2776 2777 if (b->VsrSD(0) < 0) { 2778 lo_value = -b->VsrSD(1); 2779 hi_value = ~b->VsrD(0) + !lo_value; 2780 bcd_put_digit(&ret, 0xD, 0); 2781 } else { 2782 lo_value = b->VsrD(1); 2783 hi_value = b->VsrD(0); 2784 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2785 } 2786 2787 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2788 lo_value > 9999999999999999ULL) { 2789 cr = CRF_SO; 2790 } 2791 2792 for (i = 1; i < 16; hi_value /= 10, i++) { 2793 bcd_put_digit(&ret, hi_value % 10, i); 2794 } 2795 2796 for (; i < 32; lo_value /= 10, i++) { 2797 bcd_put_digit(&ret, lo_value % 10, i); 2798 } 2799 2800 cr |= bcd_cmp_zero(&ret); 2801 2802 *r = ret; 2803 2804 return cr; 2805 } 2806 2807 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2808 { 2809 uint8_t i; 2810 int cr; 2811 uint64_t carry; 2812 uint64_t unused; 2813 uint64_t lo_value; 2814 uint64_t hi_value = 0; 2815 int sgnb = bcd_get_sgn(b); 2816 int invalid = (sgnb == 0); 2817 2818 lo_value = bcd_get_digit(b, 31, &invalid); 2819 for (i = 30; i > 0; i--) { 2820 mulu64(&lo_value, &carry, lo_value, 10ULL); 2821 mulu64(&hi_value, &unused, hi_value, 10ULL); 2822 lo_value += bcd_get_digit(b, i, &invalid); 2823 hi_value += carry; 2824 2825 if (unlikely(invalid)) { 2826 break; 2827 } 2828 } 2829 2830 if (sgnb == -1) { 2831 r->VsrSD(1) = -lo_value; 2832 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2833 } else { 2834 r->VsrSD(1) = lo_value; 2835 r->VsrSD(0) = hi_value; 2836 } 2837 2838 cr = bcd_cmp_zero(b); 2839 2840 if (unlikely(invalid)) { 2841 cr = CRF_SO; 2842 } 2843 2844 return cr; 2845 } 2846 2847 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2848 { 2849 int i; 2850 int invalid = 0; 2851 2852 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2853 return CRF_SO; 2854 } 2855 2856 *r = *a; 2857 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2858 2859 for (i = 1; i < 32; i++) { 2860 bcd_get_digit(a, i, &invalid); 2861 bcd_get_digit(b, i, &invalid); 2862 if (unlikely(invalid)) { 2863 return CRF_SO; 2864 } 2865 } 2866 2867 return bcd_cmp_zero(r); 2868 } 2869 2870 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2871 { 2872 int sgnb = bcd_get_sgn(b); 2873 2874 *r = *b; 2875 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2876 2877 if (bcd_is_valid(b) == false) { 2878 return CRF_SO; 2879 } 2880 2881 return bcd_cmp_zero(r); 2882 } 2883 2884 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2885 { 2886 int cr; 2887 #if defined(HOST_WORDS_BIGENDIAN) 2888 int i = a->s8[7]; 2889 #else 2890 int i = a->s8[8]; 2891 #endif 2892 bool ox_flag = false; 2893 int sgnb = bcd_get_sgn(b); 2894 ppc_avr_t ret = *b; 2895 ret.VsrD(1) &= ~0xf; 2896 2897 if (bcd_is_valid(b) == false) { 2898 return CRF_SO; 2899 } 2900 2901 if (unlikely(i > 31)) { 2902 i = 31; 2903 } else if (unlikely(i < -31)) { 2904 i = -31; 2905 } 2906 2907 if (i > 0) { 2908 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2909 } else { 2910 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2911 } 2912 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2913 2914 *r = ret; 2915 2916 cr = bcd_cmp_zero(r); 2917 if (ox_flag) { 2918 cr |= CRF_SO; 2919 } 2920 2921 return cr; 2922 } 2923 2924 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2925 { 2926 int cr; 2927 int i; 2928 int invalid = 0; 2929 bool ox_flag = false; 2930 ppc_avr_t ret = *b; 2931 2932 for (i = 0; i < 32; i++) { 2933 bcd_get_digit(b, i, &invalid); 2934 2935 if (unlikely(invalid)) { 2936 return CRF_SO; 2937 } 2938 } 2939 2940 #if defined(HOST_WORDS_BIGENDIAN) 2941 i = a->s8[7]; 2942 #else 2943 i = a->s8[8]; 2944 #endif 2945 if (i >= 32) { 2946 ox_flag = true; 2947 ret.VsrD(1) = ret.VsrD(0) = 0; 2948 } else if (i <= -32) { 2949 ret.VsrD(1) = ret.VsrD(0) = 0; 2950 } else if (i > 0) { 2951 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2952 } else { 2953 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2954 } 2955 *r = ret; 2956 2957 cr = bcd_cmp_zero(r); 2958 if (ox_flag) { 2959 cr |= CRF_SO; 2960 } 2961 2962 return cr; 2963 } 2964 2965 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2966 { 2967 int cr; 2968 int unused = 0; 2969 int invalid = 0; 2970 bool ox_flag = false; 2971 int sgnb = bcd_get_sgn(b); 2972 ppc_avr_t ret = *b; 2973 ret.VsrD(1) &= ~0xf; 2974 2975 #if defined(HOST_WORDS_BIGENDIAN) 2976 int i = a->s8[7]; 2977 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 2978 #else 2979 int i = a->s8[8]; 2980 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 2981 #endif 2982 2983 if (bcd_is_valid(b) == false) { 2984 return CRF_SO; 2985 } 2986 2987 if (unlikely(i > 31)) { 2988 i = 31; 2989 } else if (unlikely(i < -31)) { 2990 i = -31; 2991 } 2992 2993 if (i > 0) { 2994 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2995 } else { 2996 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2997 2998 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2999 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3000 } 3001 } 3002 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3003 3004 cr = bcd_cmp_zero(&ret); 3005 if (ox_flag) { 3006 cr |= CRF_SO; 3007 } 3008 *r = ret; 3009 3010 return cr; 3011 } 3012 3013 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3014 { 3015 uint64_t mask; 3016 uint32_t ox_flag = 0; 3017 #if defined(HOST_WORDS_BIGENDIAN) 3018 int i = a->s16[3] + 1; 3019 #else 3020 int i = a->s16[4] + 1; 3021 #endif 3022 ppc_avr_t ret = *b; 3023 3024 if (bcd_is_valid(b) == false) { 3025 return CRF_SO; 3026 } 3027 3028 if (i > 16 && i < 32) { 3029 mask = (uint64_t)-1 >> (128 - i * 4); 3030 if (ret.VsrD(0) & ~mask) { 3031 ox_flag = CRF_SO; 3032 } 3033 3034 ret.VsrD(0) &= mask; 3035 } else if (i >= 0 && i <= 16) { 3036 mask = (uint64_t)-1 >> (64 - i * 4); 3037 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3038 ox_flag = CRF_SO; 3039 } 3040 3041 ret.VsrD(1) &= mask; 3042 ret.VsrD(0) = 0; 3043 } 3044 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3045 *r = ret; 3046 3047 return bcd_cmp_zero(&ret) | ox_flag; 3048 } 3049 3050 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3051 { 3052 int i; 3053 uint64_t mask; 3054 uint32_t ox_flag = 0; 3055 int invalid = 0; 3056 ppc_avr_t ret = *b; 3057 3058 for (i = 0; i < 32; i++) { 3059 bcd_get_digit(b, i, &invalid); 3060 3061 if (unlikely(invalid)) { 3062 return CRF_SO; 3063 } 3064 } 3065 3066 #if defined(HOST_WORDS_BIGENDIAN) 3067 i = a->s16[3]; 3068 #else 3069 i = a->s16[4]; 3070 #endif 3071 if (i > 16 && i < 33) { 3072 mask = (uint64_t)-1 >> (128 - i * 4); 3073 if (ret.VsrD(0) & ~mask) { 3074 ox_flag = CRF_SO; 3075 } 3076 3077 ret.VsrD(0) &= mask; 3078 } else if (i > 0 && i <= 16) { 3079 mask = (uint64_t)-1 >> (64 - i * 4); 3080 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3081 ox_flag = CRF_SO; 3082 } 3083 3084 ret.VsrD(1) &= mask; 3085 ret.VsrD(0) = 0; 3086 } else if (i == 0) { 3087 if (ret.VsrD(0) || ret.VsrD(1)) { 3088 ox_flag = CRF_SO; 3089 } 3090 ret.VsrD(0) = ret.VsrD(1) = 0; 3091 } 3092 3093 *r = ret; 3094 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3095 return ox_flag | CRF_EQ; 3096 } 3097 3098 return ox_flag | CRF_GT; 3099 } 3100 3101 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3102 { 3103 int i; 3104 VECTOR_FOR_INORDER_I(i, u8) { 3105 r->u8[i] = AES_sbox[a->u8[i]]; 3106 } 3107 } 3108 3109 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3110 { 3111 ppc_avr_t result; 3112 int i; 3113 3114 VECTOR_FOR_INORDER_I(i, u32) { 3115 result.VsrW(i) = b->VsrW(i) ^ 3116 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3117 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3118 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3119 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3120 } 3121 *r = result; 3122 } 3123 3124 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3125 { 3126 ppc_avr_t result; 3127 int i; 3128 3129 VECTOR_FOR_INORDER_I(i, u8) { 3130 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3131 } 3132 *r = result; 3133 } 3134 3135 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3136 { 3137 /* This differs from what is written in ISA V2.07. The RTL is */ 3138 /* incorrect and will be fixed in V2.07B. */ 3139 int i; 3140 ppc_avr_t tmp; 3141 3142 VECTOR_FOR_INORDER_I(i, u8) { 3143 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3144 } 3145 3146 VECTOR_FOR_INORDER_I(i, u32) { 3147 r->VsrW(i) = 3148 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3149 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3150 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3151 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3152 } 3153 } 3154 3155 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3156 { 3157 ppc_avr_t result; 3158 int i; 3159 3160 VECTOR_FOR_INORDER_I(i, u8) { 3161 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3162 } 3163 *r = result; 3164 } 3165 3166 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3167 { 3168 int st = (st_six & 0x10) != 0; 3169 int six = st_six & 0xF; 3170 int i; 3171 3172 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3173 if (st == 0) { 3174 if ((six & (0x8 >> i)) == 0) { 3175 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3176 ror32(a->VsrW(i), 18) ^ 3177 (a->VsrW(i) >> 3); 3178 } else { /* six.bit[i] == 1 */ 3179 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3180 ror32(a->VsrW(i), 19) ^ 3181 (a->VsrW(i) >> 10); 3182 } 3183 } else { /* st == 1 */ 3184 if ((six & (0x8 >> i)) == 0) { 3185 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3186 ror32(a->VsrW(i), 13) ^ 3187 ror32(a->VsrW(i), 22); 3188 } else { /* six.bit[i] == 1 */ 3189 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3190 ror32(a->VsrW(i), 11) ^ 3191 ror32(a->VsrW(i), 25); 3192 } 3193 } 3194 } 3195 } 3196 3197 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3198 { 3199 int st = (st_six & 0x10) != 0; 3200 int six = st_six & 0xF; 3201 int i; 3202 3203 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3204 if (st == 0) { 3205 if ((six & (0x8 >> (2 * i))) == 0) { 3206 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3207 ror64(a->VsrD(i), 8) ^ 3208 (a->VsrD(i) >> 7); 3209 } else { /* six.bit[2*i] == 1 */ 3210 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3211 ror64(a->VsrD(i), 61) ^ 3212 (a->VsrD(i) >> 6); 3213 } 3214 } else { /* st == 1 */ 3215 if ((six & (0x8 >> (2 * i))) == 0) { 3216 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3217 ror64(a->VsrD(i), 34) ^ 3218 ror64(a->VsrD(i), 39); 3219 } else { /* six.bit[2*i] == 1 */ 3220 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3221 ror64(a->VsrD(i), 18) ^ 3222 ror64(a->VsrD(i), 41); 3223 } 3224 } 3225 } 3226 } 3227 3228 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3229 { 3230 ppc_avr_t result; 3231 int i; 3232 3233 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3234 int indexA = c->VsrB(i) >> 4; 3235 int indexB = c->VsrB(i) & 0xF; 3236 3237 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3238 } 3239 *r = result; 3240 } 3241 3242 #undef VECTOR_FOR_INORDER_I 3243 3244 /*****************************************************************************/ 3245 /* SPE extension helpers */ 3246 /* Use a table to make this quicker */ 3247 static const uint8_t hbrev[16] = { 3248 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3249 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3250 }; 3251 3252 static inline uint8_t byte_reverse(uint8_t val) 3253 { 3254 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3255 } 3256 3257 static inline uint32_t word_reverse(uint32_t val) 3258 { 3259 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3260 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3261 } 3262 3263 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3264 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3265 { 3266 uint32_t a, b, d, mask; 3267 3268 mask = UINT32_MAX >> (32 - MASKBITS); 3269 a = arg1 & mask; 3270 b = arg2 & mask; 3271 d = word_reverse(1 + word_reverse(a | ~b)); 3272 return (arg1 & ~mask) | (d & b); 3273 } 3274 3275 uint32_t helper_cntlsw32(uint32_t val) 3276 { 3277 if (val & 0x80000000) { 3278 return clz32(~val); 3279 } else { 3280 return clz32(val); 3281 } 3282 } 3283 3284 uint32_t helper_cntlzw32(uint32_t val) 3285 { 3286 return clz32(val); 3287 } 3288 3289 /* 440 specific */ 3290 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3291 target_ulong low, uint32_t update_Rc) 3292 { 3293 target_ulong mask; 3294 int i; 3295 3296 i = 1; 3297 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3298 if ((high & mask) == 0) { 3299 if (update_Rc) { 3300 env->crf[0] = 0x4; 3301 } 3302 goto done; 3303 } 3304 i++; 3305 } 3306 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3307 if ((low & mask) == 0) { 3308 if (update_Rc) { 3309 env->crf[0] = 0x8; 3310 } 3311 goto done; 3312 } 3313 i++; 3314 } 3315 i = 8; 3316 if (update_Rc) { 3317 env->crf[0] = 0x2; 3318 } 3319 done: 3320 env->xer = (env->xer & ~0x7F) | i; 3321 if (update_Rc) { 3322 env->crf[0] |= xer_so; 3323 } 3324 return i; 3325 } 3326