1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define VECTOR_FOR_INORDER_I(index, element) \ 393 for (index = 0; index < ARRAY_SIZE(r->element); index++) 394 #else 395 #define VECTOR_FOR_INORDER_I(index, element) \ 396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 397 #endif 398 399 /* Saturating arithmetic helpers. */ 400 #define SATCVT(from, to, from_type, to_type, min, max) \ 401 static inline to_type cvt##from##to(from_type x, int *sat) \ 402 { \ 403 to_type r; \ 404 \ 405 if (x < (from_type)min) { \ 406 r = min; \ 407 *sat = 1; \ 408 } else if (x > (from_type)max) { \ 409 r = max; \ 410 *sat = 1; \ 411 } else { \ 412 r = x; \ 413 } \ 414 return r; \ 415 } 416 #define SATCVTU(from, to, from_type, to_type, min, max) \ 417 static inline to_type cvt##from##to(from_type x, int *sat) \ 418 { \ 419 to_type r; \ 420 \ 421 if (x > (from_type)max) { \ 422 r = max; \ 423 *sat = 1; \ 424 } else { \ 425 r = x; \ 426 } \ 427 return r; \ 428 } 429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 432 433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 439 #undef SATCVT 440 #undef SATCVTU 441 442 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 443 { 444 int i, j = (sh & 0xf); 445 446 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 447 r->VsrB(i) = j++; 448 } 449 } 450 451 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 452 { 453 int i, j = 0x10 - (sh & 0xf); 454 455 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 456 r->VsrB(i) = j++; 457 } 458 } 459 460 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 461 { 462 env->vscr = vscr & ~(1u << VSCR_SAT); 463 /* Which bit we set is completely arbitrary, but clear the rest. */ 464 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 465 env->vscr_sat.u64[1] = 0; 466 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 467 } 468 469 uint32_t helper_mfvscr(CPUPPCState *env) 470 { 471 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 472 return env->vscr | (sat << VSCR_SAT); 473 } 474 475 static inline void set_vscr_sat(CPUPPCState *env) 476 { 477 /* The choice of non-zero value is arbitrary. */ 478 env->vscr_sat.u32[0] = 1; 479 } 480 481 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 482 { 483 int i; 484 485 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 486 r->u32[i] = ~a->u32[i] < b->u32[i]; 487 } 488 } 489 490 /* vprtybw */ 491 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 492 { 493 int i; 494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 495 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 496 res ^= res >> 8; 497 r->u32[i] = res & 1; 498 } 499 } 500 501 /* vprtybd */ 502 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 503 { 504 int i; 505 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 506 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 507 res ^= res >> 16; 508 res ^= res >> 8; 509 r->u64[i] = res & 1; 510 } 511 } 512 513 /* vprtybq */ 514 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 515 { 516 uint64_t res = b->u64[0] ^ b->u64[1]; 517 res ^= res >> 32; 518 res ^= res >> 16; 519 res ^= res >> 8; 520 r->VsrD(1) = res & 1; 521 r->VsrD(0) = 0; 522 } 523 524 #define VARITH_DO(name, op, element) \ 525 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 526 { \ 527 int i; \ 528 \ 529 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 530 r->element[i] = a->element[i] op b->element[i]; \ 531 } \ 532 } 533 VARITH_DO(muluwm, *, u32) 534 #undef VARITH_DO 535 #undef VARITH 536 537 #define VARITHFP(suffix, func) \ 538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 539 ppc_avr_t *b) \ 540 { \ 541 int i; \ 542 \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 545 } \ 546 } 547 VARITHFP(addfp, float32_add) 548 VARITHFP(subfp, float32_sub) 549 VARITHFP(minfp, float32_min) 550 VARITHFP(maxfp, float32_max) 551 #undef VARITHFP 552 553 #define VARITHFPFMA(suffix, type) \ 554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 555 ppc_avr_t *b, ppc_avr_t *c) \ 556 { \ 557 int i; \ 558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 560 type, &env->vec_status); \ 561 } \ 562 } 563 VARITHFPFMA(maddfp, 0); 564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 565 #undef VARITHFPFMA 566 567 #define VARITHSAT_CASE(type, op, cvt, element) \ 568 { \ 569 type result = (type)a->element[i] op (type)b->element[i]; \ 570 r->element[i] = cvt(result, &sat); \ 571 } 572 573 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 576 { \ 577 int sat = 0; \ 578 int i; \ 579 \ 580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 581 VARITHSAT_CASE(optype, op, cvt, element); \ 582 } \ 583 if (sat) { \ 584 vscr_sat->u32[0] = 1; \ 585 } \ 586 } 587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 599 #undef VARITHSAT_CASE 600 #undef VARITHSAT_DO 601 #undef VARITHSAT_SIGNED 602 #undef VARITHSAT_UNSIGNED 603 604 #define VAVG_DO(name, element, etype) \ 605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 606 { \ 607 int i; \ 608 \ 609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 611 r->element[i] = x >> 1; \ 612 } \ 613 } 614 615 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 616 unsigned_type) \ 617 VAVG_DO(avgs##type, signed_element, signed_type) \ 618 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 619 VAVG(b, s8, int16_t, u8, uint16_t) 620 VAVG(h, s16, int32_t, u16, uint32_t) 621 VAVG(w, s32, int64_t, u32, uint64_t) 622 #undef VAVG_DO 623 #undef VAVG 624 625 #define VABSDU_DO(name, element) \ 626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 627 { \ 628 int i; \ 629 \ 630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 631 r->element[i] = (a->element[i] > b->element[i]) ? \ 632 (a->element[i] - b->element[i]) : \ 633 (b->element[i] - a->element[i]); \ 634 } \ 635 } 636 637 /* VABSDU - Vector absolute difference unsigned 638 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 639 * element - element type to access from vector 640 */ 641 #define VABSDU(type, element) \ 642 VABSDU_DO(absdu##type, element) 643 VABSDU(b, u8) 644 VABSDU(h, u16) 645 VABSDU(w, u32) 646 #undef VABSDU_DO 647 #undef VABSDU 648 649 #define VCF(suffix, cvt, element) \ 650 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 651 ppc_avr_t *b, uint32_t uim) \ 652 { \ 653 int i; \ 654 \ 655 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 656 float32 t = cvt(b->element[i], &env->vec_status); \ 657 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 658 } \ 659 } 660 VCF(ux, uint32_to_float32, u32) 661 VCF(sx, int32_to_float32, s32) 662 #undef VCF 663 664 #define VCMP_DO(suffix, compare, element, record) \ 665 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 666 ppc_avr_t *a, ppc_avr_t *b) \ 667 { \ 668 uint64_t ones = (uint64_t)-1; \ 669 uint64_t all = ones; \ 670 uint64_t none = 0; \ 671 int i; \ 672 \ 673 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 674 uint64_t result = (a->element[i] compare b->element[i] ? \ 675 ones : 0x0); \ 676 switch (sizeof(a->element[0])) { \ 677 case 8: \ 678 r->u64[i] = result; \ 679 break; \ 680 case 4: \ 681 r->u32[i] = result; \ 682 break; \ 683 case 2: \ 684 r->u16[i] = result; \ 685 break; \ 686 case 1: \ 687 r->u8[i] = result; \ 688 break; \ 689 } \ 690 all &= result; \ 691 none |= result; \ 692 } \ 693 if (record) { \ 694 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 695 } \ 696 } 697 #define VCMP(suffix, compare, element) \ 698 VCMP_DO(suffix, compare, element, 0) \ 699 VCMP_DO(suffix##_dot, compare, element, 1) 700 VCMP(equb, ==, u8) 701 VCMP(equh, ==, u16) 702 VCMP(equw, ==, u32) 703 VCMP(equd, ==, u64) 704 VCMP(gtub, >, u8) 705 VCMP(gtuh, >, u16) 706 VCMP(gtuw, >, u32) 707 VCMP(gtud, >, u64) 708 VCMP(gtsb, >, s8) 709 VCMP(gtsh, >, s16) 710 VCMP(gtsw, >, s32) 711 VCMP(gtsd, >, s64) 712 #undef VCMP_DO 713 #undef VCMP 714 715 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 716 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 717 ppc_avr_t *a, ppc_avr_t *b) \ 718 { \ 719 etype ones = (etype)-1; \ 720 etype all = ones; \ 721 etype result, none = 0; \ 722 int i; \ 723 \ 724 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 725 if (cmpzero) { \ 726 result = ((a->element[i] == 0) \ 727 || (b->element[i] == 0) \ 728 || (a->element[i] != b->element[i]) ? \ 729 ones : 0x0); \ 730 } else { \ 731 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 732 } \ 733 r->element[i] = result; \ 734 all &= result; \ 735 none |= result; \ 736 } \ 737 if (record) { \ 738 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 739 } \ 740 } 741 742 /* VCMPNEZ - Vector compare not equal to zero 743 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 744 * element - element type to access from vector 745 */ 746 #define VCMPNE(suffix, element, etype, cmpzero) \ 747 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 748 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 749 VCMPNE(zb, u8, uint8_t, 1) 750 VCMPNE(zh, u16, uint16_t, 1) 751 VCMPNE(zw, u32, uint32_t, 1) 752 VCMPNE(b, u8, uint8_t, 0) 753 VCMPNE(h, u16, uint16_t, 0) 754 VCMPNE(w, u32, uint32_t, 0) 755 #undef VCMPNE_DO 756 #undef VCMPNE 757 758 #define VCMPFP_DO(suffix, compare, order, record) \ 759 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 760 ppc_avr_t *a, ppc_avr_t *b) \ 761 { \ 762 uint32_t ones = (uint32_t)-1; \ 763 uint32_t all = ones; \ 764 uint32_t none = 0; \ 765 int i; \ 766 \ 767 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 768 uint32_t result; \ 769 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 770 &env->vec_status); \ 771 if (rel == float_relation_unordered) { \ 772 result = 0; \ 773 } else if (rel compare order) { \ 774 result = ones; \ 775 } else { \ 776 result = 0; \ 777 } \ 778 r->u32[i] = result; \ 779 all &= result; \ 780 none |= result; \ 781 } \ 782 if (record) { \ 783 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 784 } \ 785 } 786 #define VCMPFP(suffix, compare, order) \ 787 VCMPFP_DO(suffix, compare, order, 0) \ 788 VCMPFP_DO(suffix##_dot, compare, order, 1) 789 VCMPFP(eqfp, ==, float_relation_equal) 790 VCMPFP(gefp, !=, float_relation_less) 791 VCMPFP(gtfp, ==, float_relation_greater) 792 #undef VCMPFP_DO 793 #undef VCMPFP 794 795 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 796 ppc_avr_t *a, ppc_avr_t *b, int record) 797 { 798 int i; 799 int all_in = 0; 800 801 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 802 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 803 &env->vec_status); 804 if (le_rel == float_relation_unordered) { 805 r->u32[i] = 0xc0000000; 806 all_in = 1; 807 } else { 808 float32 bneg = float32_chs(b->f32[i]); 809 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 810 &env->vec_status); 811 int le = le_rel != float_relation_greater; 812 int ge = ge_rel != float_relation_less; 813 814 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 815 all_in |= (!le | !ge); 816 } 817 } 818 if (record) { 819 env->crf[6] = (all_in == 0) << 1; 820 } 821 } 822 823 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 824 { 825 vcmpbfp_internal(env, r, a, b, 0); 826 } 827 828 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 829 ppc_avr_t *b) 830 { 831 vcmpbfp_internal(env, r, a, b, 1); 832 } 833 834 #define VCT(suffix, satcvt, element) \ 835 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 836 ppc_avr_t *b, uint32_t uim) \ 837 { \ 838 int i; \ 839 int sat = 0; \ 840 float_status s = env->vec_status; \ 841 \ 842 set_float_rounding_mode(float_round_to_zero, &s); \ 843 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 844 if (float32_is_any_nan(b->f32[i])) { \ 845 r->element[i] = 0; \ 846 } else { \ 847 float64 t = float32_to_float64(b->f32[i], &s); \ 848 int64_t j; \ 849 \ 850 t = float64_scalbn(t, uim, &s); \ 851 j = float64_to_int64(t, &s); \ 852 r->element[i] = satcvt(j, &sat); \ 853 } \ 854 } \ 855 if (sat) { \ 856 set_vscr_sat(env); \ 857 } \ 858 } 859 VCT(uxs, cvtsduw, u32) 860 VCT(sxs, cvtsdsw, s32) 861 #undef VCT 862 863 target_ulong helper_vclzlsbb(ppc_avr_t *r) 864 { 865 target_ulong count = 0; 866 int i; 867 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 868 if (r->VsrB(i) & 0x01) { 869 break; 870 } 871 count++; 872 } 873 return count; 874 } 875 876 target_ulong helper_vctzlsbb(ppc_avr_t *r) 877 { 878 target_ulong count = 0; 879 int i; 880 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 881 if (r->VsrB(i) & 0x01) { 882 break; 883 } 884 count++; 885 } 886 return count; 887 } 888 889 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 890 ppc_avr_t *b, ppc_avr_t *c) 891 { 892 int sat = 0; 893 int i; 894 895 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 896 int32_t prod = a->s16[i] * b->s16[i]; 897 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 898 899 r->s16[i] = cvtswsh(t, &sat); 900 } 901 902 if (sat) { 903 set_vscr_sat(env); 904 } 905 } 906 907 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 908 ppc_avr_t *b, ppc_avr_t *c) 909 { 910 int sat = 0; 911 int i; 912 913 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 914 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 915 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 #define VMINMAX_DO(name, compare, element) \ 925 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 926 { \ 927 int i; \ 928 \ 929 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 930 if (a->element[i] compare b->element[i]) { \ 931 r->element[i] = b->element[i]; \ 932 } else { \ 933 r->element[i] = a->element[i]; \ 934 } \ 935 } \ 936 } 937 #define VMINMAX(suffix, element) \ 938 VMINMAX_DO(min##suffix, >, element) \ 939 VMINMAX_DO(max##suffix, <, element) 940 VMINMAX(sb, s8) 941 VMINMAX(sh, s16) 942 VMINMAX(sw, s32) 943 VMINMAX(sd, s64) 944 VMINMAX(ub, u8) 945 VMINMAX(uh, u16) 946 VMINMAX(uw, u32) 947 VMINMAX(ud, u64) 948 #undef VMINMAX_DO 949 #undef VMINMAX 950 951 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 952 { 953 int i; 954 955 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 956 int32_t prod = a->s16[i] * b->s16[i]; 957 r->s16[i] = (int16_t) (prod + c->s16[i]); 958 } 959 } 960 961 #define VMRG_DO(name, element, access, ofs) \ 962 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 963 { \ 964 ppc_avr_t result; \ 965 int i, half = ARRAY_SIZE(r->element) / 2; \ 966 \ 967 for (i = 0; i < half; i++) { \ 968 result.access(i * 2 + 0) = a->access(i + ofs); \ 969 result.access(i * 2 + 1) = b->access(i + ofs); \ 970 } \ 971 *r = result; \ 972 } 973 974 #define VMRG(suffix, element, access) \ 975 VMRG_DO(mrgl##suffix, element, access, half) \ 976 VMRG_DO(mrgh##suffix, element, access, 0) 977 VMRG(b, u8, VsrB) 978 VMRG(h, u16, VsrH) 979 VMRG(w, u32, VsrW) 980 #undef VMRG_DO 981 #undef VMRG 982 983 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 984 ppc_avr_t *b, ppc_avr_t *c) 985 { 986 int32_t prod[16]; 987 int i; 988 989 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 990 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 991 } 992 993 VECTOR_FOR_INORDER_I(i, s32) { 994 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 995 prod[4 * i + 2] + prod[4 * i + 3]; 996 } 997 } 998 999 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1000 ppc_avr_t *b, ppc_avr_t *c) 1001 { 1002 int32_t prod[8]; 1003 int i; 1004 1005 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1006 prod[i] = a->s16[i] * b->s16[i]; 1007 } 1008 1009 VECTOR_FOR_INORDER_I(i, s32) { 1010 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1011 } 1012 } 1013 1014 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1015 ppc_avr_t *b, ppc_avr_t *c) 1016 { 1017 int32_t prod[8]; 1018 int i; 1019 int sat = 0; 1020 1021 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1022 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1023 } 1024 1025 VECTOR_FOR_INORDER_I(i, s32) { 1026 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1027 1028 r->u32[i] = cvtsdsw(t, &sat); 1029 } 1030 1031 if (sat) { 1032 set_vscr_sat(env); 1033 } 1034 } 1035 1036 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1037 ppc_avr_t *b, ppc_avr_t *c) 1038 { 1039 uint16_t prod[16]; 1040 int i; 1041 1042 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1043 prod[i] = a->u8[i] * b->u8[i]; 1044 } 1045 1046 VECTOR_FOR_INORDER_I(i, u32) { 1047 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1048 prod[4 * i + 2] + prod[4 * i + 3]; 1049 } 1050 } 1051 1052 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1053 ppc_avr_t *b, ppc_avr_t *c) 1054 { 1055 uint32_t prod[8]; 1056 int i; 1057 1058 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1059 prod[i] = a->u16[i] * b->u16[i]; 1060 } 1061 1062 VECTOR_FOR_INORDER_I(i, u32) { 1063 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1064 } 1065 } 1066 1067 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1068 ppc_avr_t *b, ppc_avr_t *c) 1069 { 1070 uint32_t prod[8]; 1071 int i; 1072 int sat = 0; 1073 1074 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1075 prod[i] = a->u16[i] * b->u16[i]; 1076 } 1077 1078 VECTOR_FOR_INORDER_I(i, s32) { 1079 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1080 1081 r->u32[i] = cvtuduw(t, &sat); 1082 } 1083 1084 if (sat) { 1085 set_vscr_sat(env); 1086 } 1087 } 1088 1089 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1090 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1091 { \ 1092 int i; \ 1093 \ 1094 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1095 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1096 (cast)b->mul_access(i); \ 1097 } \ 1098 } 1099 1100 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1101 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1102 { \ 1103 int i; \ 1104 \ 1105 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1106 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1107 (cast)b->mul_access(i + 1); \ 1108 } \ 1109 } 1110 1111 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1112 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1113 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1114 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1115 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1116 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1117 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1118 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1119 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1120 #undef VMUL_DO_EVN 1121 #undef VMUL_DO_ODD 1122 #undef VMUL 1123 1124 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1125 ppc_avr_t *c) 1126 { 1127 ppc_avr_t result; 1128 int i; 1129 1130 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1131 int s = c->VsrB(i) & 0x1f; 1132 int index = s & 0xf; 1133 1134 if (s & 0x10) { 1135 result.VsrB(i) = b->VsrB(index); 1136 } else { 1137 result.VsrB(i) = a->VsrB(index); 1138 } 1139 } 1140 *r = result; 1141 } 1142 1143 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1144 ppc_avr_t *c) 1145 { 1146 ppc_avr_t result; 1147 int i; 1148 1149 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1150 int s = c->VsrB(i) & 0x1f; 1151 int index = 15 - (s & 0xf); 1152 1153 if (s & 0x10) { 1154 result.VsrB(i) = a->VsrB(index); 1155 } else { 1156 result.VsrB(i) = b->VsrB(index); 1157 } 1158 } 1159 *r = result; 1160 } 1161 1162 #if defined(HOST_WORDS_BIGENDIAN) 1163 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1164 #define VBPERMD_INDEX(i) (i) 1165 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1166 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1167 #else 1168 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1169 #define VBPERMD_INDEX(i) (1 - i) 1170 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1171 #define EXTRACT_BIT(avr, i, index) \ 1172 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1173 #endif 1174 1175 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1176 { 1177 int i, j; 1178 ppc_avr_t result = { .u64 = { 0, 0 } }; 1179 VECTOR_FOR_INORDER_I(i, u64) { 1180 for (j = 0; j < 8; j++) { 1181 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1182 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1183 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1184 } 1185 } 1186 } 1187 *r = result; 1188 } 1189 1190 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1191 { 1192 int i; 1193 uint64_t perm = 0; 1194 1195 VECTOR_FOR_INORDER_I(i, u8) { 1196 int index = VBPERMQ_INDEX(b, i); 1197 1198 if (index < 128) { 1199 uint64_t mask = (1ull << (63-(index & 0x3F))); 1200 if (a->u64[VBPERMQ_DW(index)] & mask) { 1201 perm |= (0x8000 >> i); 1202 } 1203 } 1204 } 1205 1206 r->VsrD(0) = perm; 1207 r->VsrD(1) = 0; 1208 } 1209 1210 #undef VBPERMQ_INDEX 1211 #undef VBPERMQ_DW 1212 1213 static const uint64_t VGBBD_MASKS[256] = { 1214 0x0000000000000000ull, /* 00 */ 1215 0x0000000000000080ull, /* 01 */ 1216 0x0000000000008000ull, /* 02 */ 1217 0x0000000000008080ull, /* 03 */ 1218 0x0000000000800000ull, /* 04 */ 1219 0x0000000000800080ull, /* 05 */ 1220 0x0000000000808000ull, /* 06 */ 1221 0x0000000000808080ull, /* 07 */ 1222 0x0000000080000000ull, /* 08 */ 1223 0x0000000080000080ull, /* 09 */ 1224 0x0000000080008000ull, /* 0A */ 1225 0x0000000080008080ull, /* 0B */ 1226 0x0000000080800000ull, /* 0C */ 1227 0x0000000080800080ull, /* 0D */ 1228 0x0000000080808000ull, /* 0E */ 1229 0x0000000080808080ull, /* 0F */ 1230 0x0000008000000000ull, /* 10 */ 1231 0x0000008000000080ull, /* 11 */ 1232 0x0000008000008000ull, /* 12 */ 1233 0x0000008000008080ull, /* 13 */ 1234 0x0000008000800000ull, /* 14 */ 1235 0x0000008000800080ull, /* 15 */ 1236 0x0000008000808000ull, /* 16 */ 1237 0x0000008000808080ull, /* 17 */ 1238 0x0000008080000000ull, /* 18 */ 1239 0x0000008080000080ull, /* 19 */ 1240 0x0000008080008000ull, /* 1A */ 1241 0x0000008080008080ull, /* 1B */ 1242 0x0000008080800000ull, /* 1C */ 1243 0x0000008080800080ull, /* 1D */ 1244 0x0000008080808000ull, /* 1E */ 1245 0x0000008080808080ull, /* 1F */ 1246 0x0000800000000000ull, /* 20 */ 1247 0x0000800000000080ull, /* 21 */ 1248 0x0000800000008000ull, /* 22 */ 1249 0x0000800000008080ull, /* 23 */ 1250 0x0000800000800000ull, /* 24 */ 1251 0x0000800000800080ull, /* 25 */ 1252 0x0000800000808000ull, /* 26 */ 1253 0x0000800000808080ull, /* 27 */ 1254 0x0000800080000000ull, /* 28 */ 1255 0x0000800080000080ull, /* 29 */ 1256 0x0000800080008000ull, /* 2A */ 1257 0x0000800080008080ull, /* 2B */ 1258 0x0000800080800000ull, /* 2C */ 1259 0x0000800080800080ull, /* 2D */ 1260 0x0000800080808000ull, /* 2E */ 1261 0x0000800080808080ull, /* 2F */ 1262 0x0000808000000000ull, /* 30 */ 1263 0x0000808000000080ull, /* 31 */ 1264 0x0000808000008000ull, /* 32 */ 1265 0x0000808000008080ull, /* 33 */ 1266 0x0000808000800000ull, /* 34 */ 1267 0x0000808000800080ull, /* 35 */ 1268 0x0000808000808000ull, /* 36 */ 1269 0x0000808000808080ull, /* 37 */ 1270 0x0000808080000000ull, /* 38 */ 1271 0x0000808080000080ull, /* 39 */ 1272 0x0000808080008000ull, /* 3A */ 1273 0x0000808080008080ull, /* 3B */ 1274 0x0000808080800000ull, /* 3C */ 1275 0x0000808080800080ull, /* 3D */ 1276 0x0000808080808000ull, /* 3E */ 1277 0x0000808080808080ull, /* 3F */ 1278 0x0080000000000000ull, /* 40 */ 1279 0x0080000000000080ull, /* 41 */ 1280 0x0080000000008000ull, /* 42 */ 1281 0x0080000000008080ull, /* 43 */ 1282 0x0080000000800000ull, /* 44 */ 1283 0x0080000000800080ull, /* 45 */ 1284 0x0080000000808000ull, /* 46 */ 1285 0x0080000000808080ull, /* 47 */ 1286 0x0080000080000000ull, /* 48 */ 1287 0x0080000080000080ull, /* 49 */ 1288 0x0080000080008000ull, /* 4A */ 1289 0x0080000080008080ull, /* 4B */ 1290 0x0080000080800000ull, /* 4C */ 1291 0x0080000080800080ull, /* 4D */ 1292 0x0080000080808000ull, /* 4E */ 1293 0x0080000080808080ull, /* 4F */ 1294 0x0080008000000000ull, /* 50 */ 1295 0x0080008000000080ull, /* 51 */ 1296 0x0080008000008000ull, /* 52 */ 1297 0x0080008000008080ull, /* 53 */ 1298 0x0080008000800000ull, /* 54 */ 1299 0x0080008000800080ull, /* 55 */ 1300 0x0080008000808000ull, /* 56 */ 1301 0x0080008000808080ull, /* 57 */ 1302 0x0080008080000000ull, /* 58 */ 1303 0x0080008080000080ull, /* 59 */ 1304 0x0080008080008000ull, /* 5A */ 1305 0x0080008080008080ull, /* 5B */ 1306 0x0080008080800000ull, /* 5C */ 1307 0x0080008080800080ull, /* 5D */ 1308 0x0080008080808000ull, /* 5E */ 1309 0x0080008080808080ull, /* 5F */ 1310 0x0080800000000000ull, /* 60 */ 1311 0x0080800000000080ull, /* 61 */ 1312 0x0080800000008000ull, /* 62 */ 1313 0x0080800000008080ull, /* 63 */ 1314 0x0080800000800000ull, /* 64 */ 1315 0x0080800000800080ull, /* 65 */ 1316 0x0080800000808000ull, /* 66 */ 1317 0x0080800000808080ull, /* 67 */ 1318 0x0080800080000000ull, /* 68 */ 1319 0x0080800080000080ull, /* 69 */ 1320 0x0080800080008000ull, /* 6A */ 1321 0x0080800080008080ull, /* 6B */ 1322 0x0080800080800000ull, /* 6C */ 1323 0x0080800080800080ull, /* 6D */ 1324 0x0080800080808000ull, /* 6E */ 1325 0x0080800080808080ull, /* 6F */ 1326 0x0080808000000000ull, /* 70 */ 1327 0x0080808000000080ull, /* 71 */ 1328 0x0080808000008000ull, /* 72 */ 1329 0x0080808000008080ull, /* 73 */ 1330 0x0080808000800000ull, /* 74 */ 1331 0x0080808000800080ull, /* 75 */ 1332 0x0080808000808000ull, /* 76 */ 1333 0x0080808000808080ull, /* 77 */ 1334 0x0080808080000000ull, /* 78 */ 1335 0x0080808080000080ull, /* 79 */ 1336 0x0080808080008000ull, /* 7A */ 1337 0x0080808080008080ull, /* 7B */ 1338 0x0080808080800000ull, /* 7C */ 1339 0x0080808080800080ull, /* 7D */ 1340 0x0080808080808000ull, /* 7E */ 1341 0x0080808080808080ull, /* 7F */ 1342 0x8000000000000000ull, /* 80 */ 1343 0x8000000000000080ull, /* 81 */ 1344 0x8000000000008000ull, /* 82 */ 1345 0x8000000000008080ull, /* 83 */ 1346 0x8000000000800000ull, /* 84 */ 1347 0x8000000000800080ull, /* 85 */ 1348 0x8000000000808000ull, /* 86 */ 1349 0x8000000000808080ull, /* 87 */ 1350 0x8000000080000000ull, /* 88 */ 1351 0x8000000080000080ull, /* 89 */ 1352 0x8000000080008000ull, /* 8A */ 1353 0x8000000080008080ull, /* 8B */ 1354 0x8000000080800000ull, /* 8C */ 1355 0x8000000080800080ull, /* 8D */ 1356 0x8000000080808000ull, /* 8E */ 1357 0x8000000080808080ull, /* 8F */ 1358 0x8000008000000000ull, /* 90 */ 1359 0x8000008000000080ull, /* 91 */ 1360 0x8000008000008000ull, /* 92 */ 1361 0x8000008000008080ull, /* 93 */ 1362 0x8000008000800000ull, /* 94 */ 1363 0x8000008000800080ull, /* 95 */ 1364 0x8000008000808000ull, /* 96 */ 1365 0x8000008000808080ull, /* 97 */ 1366 0x8000008080000000ull, /* 98 */ 1367 0x8000008080000080ull, /* 99 */ 1368 0x8000008080008000ull, /* 9A */ 1369 0x8000008080008080ull, /* 9B */ 1370 0x8000008080800000ull, /* 9C */ 1371 0x8000008080800080ull, /* 9D */ 1372 0x8000008080808000ull, /* 9E */ 1373 0x8000008080808080ull, /* 9F */ 1374 0x8000800000000000ull, /* A0 */ 1375 0x8000800000000080ull, /* A1 */ 1376 0x8000800000008000ull, /* A2 */ 1377 0x8000800000008080ull, /* A3 */ 1378 0x8000800000800000ull, /* A4 */ 1379 0x8000800000800080ull, /* A5 */ 1380 0x8000800000808000ull, /* A6 */ 1381 0x8000800000808080ull, /* A7 */ 1382 0x8000800080000000ull, /* A8 */ 1383 0x8000800080000080ull, /* A9 */ 1384 0x8000800080008000ull, /* AA */ 1385 0x8000800080008080ull, /* AB */ 1386 0x8000800080800000ull, /* AC */ 1387 0x8000800080800080ull, /* AD */ 1388 0x8000800080808000ull, /* AE */ 1389 0x8000800080808080ull, /* AF */ 1390 0x8000808000000000ull, /* B0 */ 1391 0x8000808000000080ull, /* B1 */ 1392 0x8000808000008000ull, /* B2 */ 1393 0x8000808000008080ull, /* B3 */ 1394 0x8000808000800000ull, /* B4 */ 1395 0x8000808000800080ull, /* B5 */ 1396 0x8000808000808000ull, /* B6 */ 1397 0x8000808000808080ull, /* B7 */ 1398 0x8000808080000000ull, /* B8 */ 1399 0x8000808080000080ull, /* B9 */ 1400 0x8000808080008000ull, /* BA */ 1401 0x8000808080008080ull, /* BB */ 1402 0x8000808080800000ull, /* BC */ 1403 0x8000808080800080ull, /* BD */ 1404 0x8000808080808000ull, /* BE */ 1405 0x8000808080808080ull, /* BF */ 1406 0x8080000000000000ull, /* C0 */ 1407 0x8080000000000080ull, /* C1 */ 1408 0x8080000000008000ull, /* C2 */ 1409 0x8080000000008080ull, /* C3 */ 1410 0x8080000000800000ull, /* C4 */ 1411 0x8080000000800080ull, /* C5 */ 1412 0x8080000000808000ull, /* C6 */ 1413 0x8080000000808080ull, /* C7 */ 1414 0x8080000080000000ull, /* C8 */ 1415 0x8080000080000080ull, /* C9 */ 1416 0x8080000080008000ull, /* CA */ 1417 0x8080000080008080ull, /* CB */ 1418 0x8080000080800000ull, /* CC */ 1419 0x8080000080800080ull, /* CD */ 1420 0x8080000080808000ull, /* CE */ 1421 0x8080000080808080ull, /* CF */ 1422 0x8080008000000000ull, /* D0 */ 1423 0x8080008000000080ull, /* D1 */ 1424 0x8080008000008000ull, /* D2 */ 1425 0x8080008000008080ull, /* D3 */ 1426 0x8080008000800000ull, /* D4 */ 1427 0x8080008000800080ull, /* D5 */ 1428 0x8080008000808000ull, /* D6 */ 1429 0x8080008000808080ull, /* D7 */ 1430 0x8080008080000000ull, /* D8 */ 1431 0x8080008080000080ull, /* D9 */ 1432 0x8080008080008000ull, /* DA */ 1433 0x8080008080008080ull, /* DB */ 1434 0x8080008080800000ull, /* DC */ 1435 0x8080008080800080ull, /* DD */ 1436 0x8080008080808000ull, /* DE */ 1437 0x8080008080808080ull, /* DF */ 1438 0x8080800000000000ull, /* E0 */ 1439 0x8080800000000080ull, /* E1 */ 1440 0x8080800000008000ull, /* E2 */ 1441 0x8080800000008080ull, /* E3 */ 1442 0x8080800000800000ull, /* E4 */ 1443 0x8080800000800080ull, /* E5 */ 1444 0x8080800000808000ull, /* E6 */ 1445 0x8080800000808080ull, /* E7 */ 1446 0x8080800080000000ull, /* E8 */ 1447 0x8080800080000080ull, /* E9 */ 1448 0x8080800080008000ull, /* EA */ 1449 0x8080800080008080ull, /* EB */ 1450 0x8080800080800000ull, /* EC */ 1451 0x8080800080800080ull, /* ED */ 1452 0x8080800080808000ull, /* EE */ 1453 0x8080800080808080ull, /* EF */ 1454 0x8080808000000000ull, /* F0 */ 1455 0x8080808000000080ull, /* F1 */ 1456 0x8080808000008000ull, /* F2 */ 1457 0x8080808000008080ull, /* F3 */ 1458 0x8080808000800000ull, /* F4 */ 1459 0x8080808000800080ull, /* F5 */ 1460 0x8080808000808000ull, /* F6 */ 1461 0x8080808000808080ull, /* F7 */ 1462 0x8080808080000000ull, /* F8 */ 1463 0x8080808080000080ull, /* F9 */ 1464 0x8080808080008000ull, /* FA */ 1465 0x8080808080008080ull, /* FB */ 1466 0x8080808080800000ull, /* FC */ 1467 0x8080808080800080ull, /* FD */ 1468 0x8080808080808000ull, /* FE */ 1469 0x8080808080808080ull, /* FF */ 1470 }; 1471 1472 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1473 { 1474 int i; 1475 uint64_t t[2] = { 0, 0 }; 1476 1477 VECTOR_FOR_INORDER_I(i, u8) { 1478 #if defined(HOST_WORDS_BIGENDIAN) 1479 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1480 #else 1481 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1482 #endif 1483 } 1484 1485 r->u64[0] = t[0]; 1486 r->u64[1] = t[1]; 1487 } 1488 1489 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1490 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1491 { \ 1492 int i, j; \ 1493 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1494 \ 1495 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1496 prod[i] = 0; \ 1497 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1498 if (a->srcfld[i] & (1ull<<j)) { \ 1499 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1500 } \ 1501 } \ 1502 } \ 1503 \ 1504 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1505 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1506 } \ 1507 } 1508 1509 PMSUM(vpmsumb, u8, u16, uint16_t) 1510 PMSUM(vpmsumh, u16, u32, uint32_t) 1511 PMSUM(vpmsumw, u32, u64, uint64_t) 1512 1513 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1514 { 1515 1516 #ifdef CONFIG_INT128 1517 int i, j; 1518 __uint128_t prod[2]; 1519 1520 VECTOR_FOR_INORDER_I(i, u64) { 1521 prod[i] = 0; 1522 for (j = 0; j < 64; j++) { 1523 if (a->u64[i] & (1ull<<j)) { 1524 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1525 } 1526 } 1527 } 1528 1529 r->u128 = prod[0] ^ prod[1]; 1530 1531 #else 1532 int i, j; 1533 ppc_avr_t prod[2]; 1534 1535 VECTOR_FOR_INORDER_I(i, u64) { 1536 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1537 for (j = 0; j < 64; j++) { 1538 if (a->u64[i] & (1ull<<j)) { 1539 ppc_avr_t bshift; 1540 if (j == 0) { 1541 bshift.VsrD(0) = 0; 1542 bshift.VsrD(1) = b->u64[i]; 1543 } else { 1544 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1545 bshift.VsrD(1) = b->u64[i] << j; 1546 } 1547 prod[i].VsrD(1) ^= bshift.VsrD(1); 1548 prod[i].VsrD(0) ^= bshift.VsrD(0); 1549 } 1550 } 1551 } 1552 1553 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1554 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1555 #endif 1556 } 1557 1558 1559 #if defined(HOST_WORDS_BIGENDIAN) 1560 #define PKBIG 1 1561 #else 1562 #define PKBIG 0 1563 #endif 1564 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1565 { 1566 int i, j; 1567 ppc_avr_t result; 1568 #if defined(HOST_WORDS_BIGENDIAN) 1569 const ppc_avr_t *x[2] = { a, b }; 1570 #else 1571 const ppc_avr_t *x[2] = { b, a }; 1572 #endif 1573 1574 VECTOR_FOR_INORDER_I(i, u64) { 1575 VECTOR_FOR_INORDER_I(j, u32) { 1576 uint32_t e = x[i]->u32[j]; 1577 1578 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1579 ((e >> 6) & 0x3e0) | 1580 ((e >> 3) & 0x1f)); 1581 } 1582 } 1583 *r = result; 1584 } 1585 1586 #define VPK(suffix, from, to, cvt, dosat) \ 1587 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1588 ppc_avr_t *a, ppc_avr_t *b) \ 1589 { \ 1590 int i; \ 1591 int sat = 0; \ 1592 ppc_avr_t result; \ 1593 ppc_avr_t *a0 = PKBIG ? a : b; \ 1594 ppc_avr_t *a1 = PKBIG ? b : a; \ 1595 \ 1596 VECTOR_FOR_INORDER_I(i, from) { \ 1597 result.to[i] = cvt(a0->from[i], &sat); \ 1598 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1599 } \ 1600 *r = result; \ 1601 if (dosat && sat) { \ 1602 set_vscr_sat(env); \ 1603 } \ 1604 } 1605 #define I(x, y) (x) 1606 VPK(shss, s16, s8, cvtshsb, 1) 1607 VPK(shus, s16, u8, cvtshub, 1) 1608 VPK(swss, s32, s16, cvtswsh, 1) 1609 VPK(swus, s32, u16, cvtswuh, 1) 1610 VPK(sdss, s64, s32, cvtsdsw, 1) 1611 VPK(sdus, s64, u32, cvtsduw, 1) 1612 VPK(uhus, u16, u8, cvtuhub, 1) 1613 VPK(uwus, u32, u16, cvtuwuh, 1) 1614 VPK(udus, u64, u32, cvtuduw, 1) 1615 VPK(uhum, u16, u8, I, 0) 1616 VPK(uwum, u32, u16, I, 0) 1617 VPK(udum, u64, u32, I, 0) 1618 #undef I 1619 #undef VPK 1620 #undef PKBIG 1621 1622 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1623 { 1624 int i; 1625 1626 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1627 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1628 } 1629 } 1630 1631 #define VRFI(suffix, rounding) \ 1632 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1633 ppc_avr_t *b) \ 1634 { \ 1635 int i; \ 1636 float_status s = env->vec_status; \ 1637 \ 1638 set_float_rounding_mode(rounding, &s); \ 1639 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1640 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1641 } \ 1642 } 1643 VRFI(n, float_round_nearest_even) 1644 VRFI(m, float_round_down) 1645 VRFI(p, float_round_up) 1646 VRFI(z, float_round_to_zero) 1647 #undef VRFI 1648 1649 #define VROTATE(suffix, element, mask) \ 1650 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1651 { \ 1652 int i; \ 1653 \ 1654 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1655 unsigned int shift = b->element[i] & mask; \ 1656 r->element[i] = (a->element[i] << shift) | \ 1657 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1658 } \ 1659 } 1660 VROTATE(b, u8, 0x7) 1661 VROTATE(h, u16, 0xF) 1662 VROTATE(w, u32, 0x1F) 1663 VROTATE(d, u64, 0x3F) 1664 #undef VROTATE 1665 1666 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1667 { 1668 int i; 1669 1670 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1671 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1672 1673 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1674 } 1675 } 1676 1677 #define VRLMI(name, size, element, insert) \ 1678 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1679 { \ 1680 int i; \ 1681 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1682 uint##size##_t src1 = a->element[i]; \ 1683 uint##size##_t src2 = b->element[i]; \ 1684 uint##size##_t src3 = r->element[i]; \ 1685 uint##size##_t begin, end, shift, mask, rot_val; \ 1686 \ 1687 shift = extract##size(src2, 0, 6); \ 1688 end = extract##size(src2, 8, 6); \ 1689 begin = extract##size(src2, 16, 6); \ 1690 rot_val = rol##size(src1, shift); \ 1691 mask = mask_u##size(begin, end); \ 1692 if (insert) { \ 1693 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1694 } else { \ 1695 r->element[i] = (rot_val & mask); \ 1696 } \ 1697 } \ 1698 } 1699 1700 VRLMI(vrldmi, 64, u64, 1); 1701 VRLMI(vrlwmi, 32, u32, 1); 1702 VRLMI(vrldnm, 64, u64, 0); 1703 VRLMI(vrlwnm, 32, u32, 0); 1704 1705 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1706 ppc_avr_t *c) 1707 { 1708 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1709 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1710 } 1711 1712 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1713 { 1714 int i; 1715 1716 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1717 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1718 } 1719 } 1720 1721 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1722 { 1723 int i; 1724 1725 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1726 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1727 } 1728 } 1729 1730 #if defined(HOST_WORDS_BIGENDIAN) 1731 #define VEXTU_X_DO(name, size, left) \ 1732 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1733 { \ 1734 int index; \ 1735 if (left) { \ 1736 index = (a & 0xf) * 8; \ 1737 } else { \ 1738 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1739 } \ 1740 return int128_getlo(int128_rshift(b->s128, index)) & \ 1741 MAKE_64BIT_MASK(0, size); \ 1742 } 1743 #else 1744 #define VEXTU_X_DO(name, size, left) \ 1745 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1746 { \ 1747 int index; \ 1748 if (left) { \ 1749 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1750 } else { \ 1751 index = (a & 0xf) * 8; \ 1752 } \ 1753 return int128_getlo(int128_rshift(b->s128, index)) & \ 1754 MAKE_64BIT_MASK(0, size); \ 1755 } 1756 #endif 1757 1758 VEXTU_X_DO(vextublx, 8, 1) 1759 VEXTU_X_DO(vextuhlx, 16, 1) 1760 VEXTU_X_DO(vextuwlx, 32, 1) 1761 VEXTU_X_DO(vextubrx, 8, 0) 1762 VEXTU_X_DO(vextuhrx, 16, 0) 1763 VEXTU_X_DO(vextuwrx, 32, 0) 1764 #undef VEXTU_X_DO 1765 1766 /* The specification says that the results are undefined if all of the 1767 * shift counts are not identical. We check to make sure that they are 1768 * to conform to what real hardware appears to do. */ 1769 #define VSHIFT(suffix, leftp) \ 1770 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1771 { \ 1772 int shift = b->VsrB(15) & 0x7; \ 1773 int doit = 1; \ 1774 int i; \ 1775 \ 1776 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1777 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1778 } \ 1779 if (doit) { \ 1780 if (shift == 0) { \ 1781 *r = *a; \ 1782 } else if (leftp) { \ 1783 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1784 \ 1785 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1786 r->VsrD(1) = a->VsrD(1) << shift; \ 1787 } else { \ 1788 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1789 \ 1790 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1791 r->VsrD(0) = a->VsrD(0) >> shift; \ 1792 } \ 1793 } \ 1794 } 1795 VSHIFT(l, 1) 1796 VSHIFT(r, 0) 1797 #undef VSHIFT 1798 1799 #define VSL(suffix, element, mask) \ 1800 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1801 { \ 1802 int i; \ 1803 \ 1804 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1805 unsigned int shift = b->element[i] & mask; \ 1806 \ 1807 r->element[i] = a->element[i] << shift; \ 1808 } \ 1809 } 1810 VSL(b, u8, 0x7) 1811 VSL(h, u16, 0x0F) 1812 VSL(w, u32, 0x1F) 1813 VSL(d, u64, 0x3F) 1814 #undef VSL 1815 1816 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1817 { 1818 int i; 1819 unsigned int shift, bytes, size; 1820 1821 size = ARRAY_SIZE(r->u8); 1822 for (i = 0; i < size; i++) { 1823 shift = b->u8[i] & 0x7; /* extract shift value */ 1824 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1825 (((i + 1) < size) ? a->u8[i + 1] : 0); 1826 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1827 } 1828 } 1829 1830 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1831 { 1832 int i; 1833 unsigned int shift, bytes; 1834 1835 /* Use reverse order, as destination and source register can be same. Its 1836 * being modified in place saving temporary, reverse order will guarantee 1837 * that computed result is not fed back. 1838 */ 1839 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1840 shift = b->u8[i] & 0x7; /* extract shift value */ 1841 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1842 /* extract adjacent bytes */ 1843 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1844 } 1845 } 1846 1847 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1848 { 1849 int sh = shift & 0xf; 1850 int i; 1851 ppc_avr_t result; 1852 1853 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1854 int index = sh + i; 1855 if (index > 0xf) { 1856 result.VsrB(i) = b->VsrB(index - 0x10); 1857 } else { 1858 result.VsrB(i) = a->VsrB(index); 1859 } 1860 } 1861 *r = result; 1862 } 1863 1864 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1865 { 1866 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1867 1868 #if defined(HOST_WORDS_BIGENDIAN) 1869 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1870 memset(&r->u8[16-sh], 0, sh); 1871 #else 1872 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1873 memset(&r->u8[0], 0, sh); 1874 #endif 1875 } 1876 1877 #if defined(HOST_WORDS_BIGENDIAN) 1878 #define VINSERT(suffix, element) \ 1879 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1880 { \ 1881 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1882 sizeof(r->element[0])); \ 1883 } 1884 #else 1885 #define VINSERT(suffix, element) \ 1886 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1887 { \ 1888 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1889 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1890 } 1891 #endif 1892 VINSERT(b, u8) 1893 VINSERT(h, u16) 1894 VINSERT(w, u32) 1895 VINSERT(d, u64) 1896 #undef VINSERT 1897 #if defined(HOST_WORDS_BIGENDIAN) 1898 #define VEXTRACT(suffix, element) \ 1899 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1900 { \ 1901 uint32_t es = sizeof(r->element[0]); \ 1902 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1903 memset(&r->u8[8], 0, 8); \ 1904 memset(&r->u8[0], 0, 8 - es); \ 1905 } 1906 #else 1907 #define VEXTRACT(suffix, element) \ 1908 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1909 { \ 1910 uint32_t es = sizeof(r->element[0]); \ 1911 uint32_t s = (16 - index) - es; \ 1912 memmove(&r->u8[8], &b->u8[s], es); \ 1913 memset(&r->u8[0], 0, 8); \ 1914 memset(&r->u8[8 + es], 0, 8 - es); \ 1915 } 1916 #endif 1917 VEXTRACT(ub, u8) 1918 VEXTRACT(uh, u16) 1919 VEXTRACT(uw, u32) 1920 VEXTRACT(d, u64) 1921 #undef VEXTRACT 1922 1923 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1924 target_ulong xbn, uint32_t index) 1925 { 1926 ppc_vsr_t xt, xb; 1927 size_t es = sizeof(uint32_t); 1928 uint32_t ext_index; 1929 int i; 1930 1931 getVSR(xbn, &xb, env); 1932 memset(&xt, 0, sizeof(xt)); 1933 1934 ext_index = index; 1935 for (i = 0; i < es; i++, ext_index++) { 1936 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16); 1937 } 1938 1939 putVSR(xtn, &xt, env); 1940 } 1941 1942 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 1943 target_ulong xbn, uint32_t index) 1944 { 1945 ppc_vsr_t xt, xb; 1946 size_t es = sizeof(uint32_t); 1947 int ins_index, i = 0; 1948 1949 getVSR(xbn, &xb, env); 1950 getVSR(xtn, &xt, env); 1951 1952 ins_index = index; 1953 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1954 xt.VsrB(ins_index) = xb.VsrB(8 - es + i); 1955 } 1956 1957 putVSR(xtn, &xt, env); 1958 } 1959 1960 #define VEXT_SIGNED(name, element, cast) \ 1961 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1962 { \ 1963 int i; \ 1964 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1965 r->element[i] = (cast)b->element[i]; \ 1966 } \ 1967 } 1968 VEXT_SIGNED(vextsb2w, s32, int8_t) 1969 VEXT_SIGNED(vextsb2d, s64, int8_t) 1970 VEXT_SIGNED(vextsh2w, s32, int16_t) 1971 VEXT_SIGNED(vextsh2d, s64, int16_t) 1972 VEXT_SIGNED(vextsw2d, s64, int32_t) 1973 #undef VEXT_SIGNED 1974 1975 #define VNEG(name, element) \ 1976 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1977 { \ 1978 int i; \ 1979 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1980 r->element[i] = -b->element[i]; \ 1981 } \ 1982 } 1983 VNEG(vnegw, s32) 1984 VNEG(vnegd, s64) 1985 #undef VNEG 1986 1987 #define VSR(suffix, element, mask) \ 1988 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1989 { \ 1990 int i; \ 1991 \ 1992 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1993 unsigned int shift = b->element[i] & mask; \ 1994 r->element[i] = a->element[i] >> shift; \ 1995 } \ 1996 } 1997 VSR(ab, s8, 0x7) 1998 VSR(ah, s16, 0xF) 1999 VSR(aw, s32, 0x1F) 2000 VSR(ad, s64, 0x3F) 2001 VSR(b, u8, 0x7) 2002 VSR(h, u16, 0xF) 2003 VSR(w, u32, 0x1F) 2004 VSR(d, u64, 0x3F) 2005 #undef VSR 2006 2007 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2008 { 2009 int sh = (b->VsrB(0xf) >> 3) & 0xf; 2010 2011 #if defined(HOST_WORDS_BIGENDIAN) 2012 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2013 memset(&r->u8[0], 0, sh); 2014 #else 2015 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2016 memset(&r->u8[16 - sh], 0, sh); 2017 #endif 2018 } 2019 2020 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2021 { 2022 int i; 2023 2024 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2025 r->u32[i] = a->u32[i] >= b->u32[i]; 2026 } 2027 } 2028 2029 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2030 { 2031 int64_t t; 2032 int i, upper; 2033 ppc_avr_t result; 2034 int sat = 0; 2035 2036 upper = ARRAY_SIZE(r->s32) - 1; 2037 t = (int64_t)b->VsrSW(upper); 2038 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2039 t += a->VsrSW(i); 2040 result.VsrSW(i) = 0; 2041 } 2042 result.VsrSW(upper) = cvtsdsw(t, &sat); 2043 *r = result; 2044 2045 if (sat) { 2046 set_vscr_sat(env); 2047 } 2048 } 2049 2050 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2051 { 2052 int i, j, upper; 2053 ppc_avr_t result; 2054 int sat = 0; 2055 2056 upper = 1; 2057 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2058 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2059 2060 result.VsrW(i) = 0; 2061 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2062 t += a->VsrSW(2 * i + j); 2063 } 2064 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2065 } 2066 2067 *r = result; 2068 if (sat) { 2069 set_vscr_sat(env); 2070 } 2071 } 2072 2073 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2074 { 2075 int i, j; 2076 int sat = 0; 2077 2078 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2079 int64_t t = (int64_t)b->s32[i]; 2080 2081 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2082 t += a->s8[4 * i + j]; 2083 } 2084 r->s32[i] = cvtsdsw(t, &sat); 2085 } 2086 2087 if (sat) { 2088 set_vscr_sat(env); 2089 } 2090 } 2091 2092 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2093 { 2094 int sat = 0; 2095 int i; 2096 2097 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2098 int64_t t = (int64_t)b->s32[i]; 2099 2100 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2101 r->s32[i] = cvtsdsw(t, &sat); 2102 } 2103 2104 if (sat) { 2105 set_vscr_sat(env); 2106 } 2107 } 2108 2109 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2110 { 2111 int i, j; 2112 int sat = 0; 2113 2114 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2115 uint64_t t = (uint64_t)b->u32[i]; 2116 2117 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2118 t += a->u8[4 * i + j]; 2119 } 2120 r->u32[i] = cvtuduw(t, &sat); 2121 } 2122 2123 if (sat) { 2124 set_vscr_sat(env); 2125 } 2126 } 2127 2128 #if defined(HOST_WORDS_BIGENDIAN) 2129 #define UPKHI 1 2130 #define UPKLO 0 2131 #else 2132 #define UPKHI 0 2133 #define UPKLO 1 2134 #endif 2135 #define VUPKPX(suffix, hi) \ 2136 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2137 { \ 2138 int i; \ 2139 ppc_avr_t result; \ 2140 \ 2141 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2142 uint16_t e = b->u16[hi ? i : i+4]; \ 2143 uint8_t a = (e >> 15) ? 0xff : 0; \ 2144 uint8_t r = (e >> 10) & 0x1f; \ 2145 uint8_t g = (e >> 5) & 0x1f; \ 2146 uint8_t b = e & 0x1f; \ 2147 \ 2148 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2149 } \ 2150 *r = result; \ 2151 } 2152 VUPKPX(lpx, UPKLO) 2153 VUPKPX(hpx, UPKHI) 2154 #undef VUPKPX 2155 2156 #define VUPK(suffix, unpacked, packee, hi) \ 2157 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2158 { \ 2159 int i; \ 2160 ppc_avr_t result; \ 2161 \ 2162 if (hi) { \ 2163 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2164 result.unpacked[i] = b->packee[i]; \ 2165 } \ 2166 } else { \ 2167 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2168 i++) { \ 2169 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2170 } \ 2171 } \ 2172 *r = result; \ 2173 } 2174 VUPK(hsb, s16, s8, UPKHI) 2175 VUPK(hsh, s32, s16, UPKHI) 2176 VUPK(hsw, s64, s32, UPKHI) 2177 VUPK(lsb, s16, s8, UPKLO) 2178 VUPK(lsh, s32, s16, UPKLO) 2179 VUPK(lsw, s64, s32, UPKLO) 2180 #undef VUPK 2181 #undef UPKHI 2182 #undef UPKLO 2183 2184 #define VGENERIC_DO(name, element) \ 2185 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2186 { \ 2187 int i; \ 2188 \ 2189 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2190 r->element[i] = name(b->element[i]); \ 2191 } \ 2192 } 2193 2194 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2195 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2196 #define clzw(v) clz32((v)) 2197 #define clzd(v) clz64((v)) 2198 2199 VGENERIC_DO(clzb, u8) 2200 VGENERIC_DO(clzh, u16) 2201 VGENERIC_DO(clzw, u32) 2202 VGENERIC_DO(clzd, u64) 2203 2204 #undef clzb 2205 #undef clzh 2206 #undef clzw 2207 #undef clzd 2208 2209 #define ctzb(v) ((v) ? ctz32(v) : 8) 2210 #define ctzh(v) ((v) ? ctz32(v) : 16) 2211 #define ctzw(v) ctz32((v)) 2212 #define ctzd(v) ctz64((v)) 2213 2214 VGENERIC_DO(ctzb, u8) 2215 VGENERIC_DO(ctzh, u16) 2216 VGENERIC_DO(ctzw, u32) 2217 VGENERIC_DO(ctzd, u64) 2218 2219 #undef ctzb 2220 #undef ctzh 2221 #undef ctzw 2222 #undef ctzd 2223 2224 #define popcntb(v) ctpop8(v) 2225 #define popcnth(v) ctpop16(v) 2226 #define popcntw(v) ctpop32(v) 2227 #define popcntd(v) ctpop64(v) 2228 2229 VGENERIC_DO(popcntb, u8) 2230 VGENERIC_DO(popcnth, u16) 2231 VGENERIC_DO(popcntw, u32) 2232 VGENERIC_DO(popcntd, u64) 2233 2234 #undef popcntb 2235 #undef popcnth 2236 #undef popcntw 2237 #undef popcntd 2238 2239 #undef VGENERIC_DO 2240 2241 #if defined(HOST_WORDS_BIGENDIAN) 2242 #define QW_ONE { .u64 = { 0, 1 } } 2243 #else 2244 #define QW_ONE { .u64 = { 1, 0 } } 2245 #endif 2246 2247 #ifndef CONFIG_INT128 2248 2249 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2250 { 2251 t->u64[0] = ~a.u64[0]; 2252 t->u64[1] = ~a.u64[1]; 2253 } 2254 2255 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2256 { 2257 if (a.VsrD(0) < b.VsrD(0)) { 2258 return -1; 2259 } else if (a.VsrD(0) > b.VsrD(0)) { 2260 return 1; 2261 } else if (a.VsrD(1) < b.VsrD(1)) { 2262 return -1; 2263 } else if (a.VsrD(1) > b.VsrD(1)) { 2264 return 1; 2265 } else { 2266 return 0; 2267 } 2268 } 2269 2270 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2271 { 2272 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2273 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2274 (~a.VsrD(1) < b.VsrD(1)); 2275 } 2276 2277 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2278 { 2279 ppc_avr_t not_a; 2280 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2281 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2282 (~a.VsrD(1) < b.VsrD(1)); 2283 avr_qw_not(¬_a, a); 2284 return avr_qw_cmpu(not_a, b) < 0; 2285 } 2286 2287 #endif 2288 2289 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2290 { 2291 #ifdef CONFIG_INT128 2292 r->u128 = a->u128 + b->u128; 2293 #else 2294 avr_qw_add(r, *a, *b); 2295 #endif 2296 } 2297 2298 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2299 { 2300 #ifdef CONFIG_INT128 2301 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2302 #else 2303 2304 if (c->VsrD(1) & 1) { 2305 ppc_avr_t tmp; 2306 2307 tmp.VsrD(0) = 0; 2308 tmp.VsrD(1) = c->VsrD(1) & 1; 2309 avr_qw_add(&tmp, *a, tmp); 2310 avr_qw_add(r, tmp, *b); 2311 } else { 2312 avr_qw_add(r, *a, *b); 2313 } 2314 #endif 2315 } 2316 2317 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2318 { 2319 #ifdef CONFIG_INT128 2320 r->u128 = (~a->u128 < b->u128); 2321 #else 2322 ppc_avr_t not_a; 2323 2324 avr_qw_not(¬_a, *a); 2325 2326 r->VsrD(0) = 0; 2327 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2328 #endif 2329 } 2330 2331 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2332 { 2333 #ifdef CONFIG_INT128 2334 int carry_out = (~a->u128 < b->u128); 2335 if (!carry_out && (c->u128 & 1)) { 2336 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2337 ((a->u128 != 0) || (b->u128 != 0)); 2338 } 2339 r->u128 = carry_out; 2340 #else 2341 2342 int carry_in = c->VsrD(1) & 1; 2343 int carry_out = 0; 2344 ppc_avr_t tmp; 2345 2346 carry_out = avr_qw_addc(&tmp, *a, *b); 2347 2348 if (!carry_out && carry_in) { 2349 ppc_avr_t one = QW_ONE; 2350 carry_out = avr_qw_addc(&tmp, tmp, one); 2351 } 2352 r->VsrD(0) = 0; 2353 r->VsrD(1) = carry_out; 2354 #endif 2355 } 2356 2357 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2358 { 2359 #ifdef CONFIG_INT128 2360 r->u128 = a->u128 - b->u128; 2361 #else 2362 ppc_avr_t tmp; 2363 ppc_avr_t one = QW_ONE; 2364 2365 avr_qw_not(&tmp, *b); 2366 avr_qw_add(&tmp, *a, tmp); 2367 avr_qw_add(r, tmp, one); 2368 #endif 2369 } 2370 2371 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2372 { 2373 #ifdef CONFIG_INT128 2374 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2375 #else 2376 ppc_avr_t tmp, sum; 2377 2378 avr_qw_not(&tmp, *b); 2379 avr_qw_add(&sum, *a, tmp); 2380 2381 tmp.VsrD(0) = 0; 2382 tmp.VsrD(1) = c->VsrD(1) & 1; 2383 avr_qw_add(r, sum, tmp); 2384 #endif 2385 } 2386 2387 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2388 { 2389 #ifdef CONFIG_INT128 2390 r->u128 = (~a->u128 < ~b->u128) || 2391 (a->u128 + ~b->u128 == (__uint128_t)-1); 2392 #else 2393 int carry = (avr_qw_cmpu(*a, *b) > 0); 2394 if (!carry) { 2395 ppc_avr_t tmp; 2396 avr_qw_not(&tmp, *b); 2397 avr_qw_add(&tmp, *a, tmp); 2398 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2399 } 2400 r->VsrD(0) = 0; 2401 r->VsrD(1) = carry; 2402 #endif 2403 } 2404 2405 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2406 { 2407 #ifdef CONFIG_INT128 2408 r->u128 = 2409 (~a->u128 < ~b->u128) || 2410 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2411 #else 2412 int carry_in = c->VsrD(1) & 1; 2413 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2414 if (!carry_out && carry_in) { 2415 ppc_avr_t tmp; 2416 avr_qw_not(&tmp, *b); 2417 avr_qw_add(&tmp, *a, tmp); 2418 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2419 } 2420 2421 r->VsrD(0) = 0; 2422 r->VsrD(1) = carry_out; 2423 #endif 2424 } 2425 2426 #define BCD_PLUS_PREF_1 0xC 2427 #define BCD_PLUS_PREF_2 0xF 2428 #define BCD_PLUS_ALT_1 0xA 2429 #define BCD_NEG_PREF 0xD 2430 #define BCD_NEG_ALT 0xB 2431 #define BCD_PLUS_ALT_2 0xE 2432 #define NATIONAL_PLUS 0x2B 2433 #define NATIONAL_NEG 0x2D 2434 2435 #if defined(HOST_WORDS_BIGENDIAN) 2436 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2437 #else 2438 #define BCD_DIG_BYTE(n) ((n) / 2) 2439 #endif 2440 2441 static int bcd_get_sgn(ppc_avr_t *bcd) 2442 { 2443 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2444 case BCD_PLUS_PREF_1: 2445 case BCD_PLUS_PREF_2: 2446 case BCD_PLUS_ALT_1: 2447 case BCD_PLUS_ALT_2: 2448 { 2449 return 1; 2450 } 2451 2452 case BCD_NEG_PREF: 2453 case BCD_NEG_ALT: 2454 { 2455 return -1; 2456 } 2457 2458 default: 2459 { 2460 return 0; 2461 } 2462 } 2463 } 2464 2465 static int bcd_preferred_sgn(int sgn, int ps) 2466 { 2467 if (sgn >= 0) { 2468 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2469 } else { 2470 return BCD_NEG_PREF; 2471 } 2472 } 2473 2474 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2475 { 2476 uint8_t result; 2477 if (n & 1) { 2478 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2479 } else { 2480 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2481 } 2482 2483 if (unlikely(result > 9)) { 2484 *invalid = true; 2485 } 2486 return result; 2487 } 2488 2489 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2490 { 2491 if (n & 1) { 2492 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2493 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2494 } else { 2495 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2496 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2497 } 2498 } 2499 2500 static bool bcd_is_valid(ppc_avr_t *bcd) 2501 { 2502 int i; 2503 int invalid = 0; 2504 2505 if (bcd_get_sgn(bcd) == 0) { 2506 return false; 2507 } 2508 2509 for (i = 1; i < 32; i++) { 2510 bcd_get_digit(bcd, i, &invalid); 2511 if (unlikely(invalid)) { 2512 return false; 2513 } 2514 } 2515 return true; 2516 } 2517 2518 static int bcd_cmp_zero(ppc_avr_t *bcd) 2519 { 2520 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2521 return CRF_EQ; 2522 } else { 2523 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2524 } 2525 } 2526 2527 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2528 { 2529 return reg->VsrH(7 - n); 2530 } 2531 2532 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2533 { 2534 reg->VsrH(7 - n) = val; 2535 } 2536 2537 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2538 { 2539 int i; 2540 int invalid = 0; 2541 for (i = 31; i > 0; i--) { 2542 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2543 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2544 if (unlikely(invalid)) { 2545 return 0; /* doesn't matter */ 2546 } else if (dig_a > dig_b) { 2547 return 1; 2548 } else if (dig_a < dig_b) { 2549 return -1; 2550 } 2551 } 2552 2553 return 0; 2554 } 2555 2556 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2557 int *overflow) 2558 { 2559 int carry = 0; 2560 int i; 2561 for (i = 1; i <= 31; i++) { 2562 uint8_t digit = bcd_get_digit(a, i, invalid) + 2563 bcd_get_digit(b, i, invalid) + carry; 2564 if (digit > 9) { 2565 carry = 1; 2566 digit -= 10; 2567 } else { 2568 carry = 0; 2569 } 2570 2571 bcd_put_digit(t, digit, i); 2572 } 2573 2574 *overflow = carry; 2575 } 2576 2577 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2578 int *overflow) 2579 { 2580 int carry = 0; 2581 int i; 2582 2583 for (i = 1; i <= 31; i++) { 2584 uint8_t digit = bcd_get_digit(a, i, invalid) - 2585 bcd_get_digit(b, i, invalid) + carry; 2586 if (digit & 0x80) { 2587 carry = -1; 2588 digit += 10; 2589 } else { 2590 carry = 0; 2591 } 2592 2593 bcd_put_digit(t, digit, i); 2594 } 2595 2596 *overflow = carry; 2597 } 2598 2599 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2600 { 2601 2602 int sgna = bcd_get_sgn(a); 2603 int sgnb = bcd_get_sgn(b); 2604 int invalid = (sgna == 0) || (sgnb == 0); 2605 int overflow = 0; 2606 uint32_t cr = 0; 2607 ppc_avr_t result = { .u64 = { 0, 0 } }; 2608 2609 if (!invalid) { 2610 if (sgna == sgnb) { 2611 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2612 bcd_add_mag(&result, a, b, &invalid, &overflow); 2613 cr = bcd_cmp_zero(&result); 2614 } else { 2615 int magnitude = bcd_cmp_mag(a, b); 2616 if (magnitude > 0) { 2617 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2618 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2619 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2620 } else if (magnitude < 0) { 2621 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2622 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2623 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2624 } else { 2625 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2626 cr = CRF_EQ; 2627 } 2628 } 2629 } 2630 2631 if (unlikely(invalid)) { 2632 result.VsrD(0) = result.VsrD(1) = -1; 2633 cr = CRF_SO; 2634 } else if (overflow) { 2635 cr |= CRF_SO; 2636 } 2637 2638 *r = result; 2639 2640 return cr; 2641 } 2642 2643 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2644 { 2645 ppc_avr_t bcopy = *b; 2646 int sgnb = bcd_get_sgn(b); 2647 if (sgnb < 0) { 2648 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2649 } else if (sgnb > 0) { 2650 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2651 } 2652 /* else invalid ... defer to bcdadd code for proper handling */ 2653 2654 return helper_bcdadd(r, a, &bcopy, ps); 2655 } 2656 2657 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2658 { 2659 int i; 2660 int cr = 0; 2661 uint16_t national = 0; 2662 uint16_t sgnb = get_national_digit(b, 0); 2663 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2664 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2665 2666 for (i = 1; i < 8; i++) { 2667 national = get_national_digit(b, i); 2668 if (unlikely(national < 0x30 || national > 0x39)) { 2669 invalid = 1; 2670 break; 2671 } 2672 2673 bcd_put_digit(&ret, national & 0xf, i); 2674 } 2675 2676 if (sgnb == NATIONAL_PLUS) { 2677 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2678 } else { 2679 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2680 } 2681 2682 cr = bcd_cmp_zero(&ret); 2683 2684 if (unlikely(invalid)) { 2685 cr = CRF_SO; 2686 } 2687 2688 *r = ret; 2689 2690 return cr; 2691 } 2692 2693 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2694 { 2695 int i; 2696 int cr = 0; 2697 int sgnb = bcd_get_sgn(b); 2698 int invalid = (sgnb == 0); 2699 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2700 2701 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2702 2703 for (i = 1; i < 8; i++) { 2704 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2705 2706 if (unlikely(invalid)) { 2707 break; 2708 } 2709 } 2710 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2711 2712 cr = bcd_cmp_zero(b); 2713 2714 if (ox_flag) { 2715 cr |= CRF_SO; 2716 } 2717 2718 if (unlikely(invalid)) { 2719 cr = CRF_SO; 2720 } 2721 2722 *r = ret; 2723 2724 return cr; 2725 } 2726 2727 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2728 { 2729 int i; 2730 int cr = 0; 2731 int invalid = 0; 2732 int zone_digit = 0; 2733 int zone_lead = ps ? 0xF : 0x3; 2734 int digit = 0; 2735 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2736 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2737 2738 if (unlikely((sgnb < 0xA) && ps)) { 2739 invalid = 1; 2740 } 2741 2742 for (i = 0; i < 16; i++) { 2743 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2744 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2745 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2746 invalid = 1; 2747 break; 2748 } 2749 2750 bcd_put_digit(&ret, digit, i + 1); 2751 } 2752 2753 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2754 (!ps && (sgnb & 0x4))) { 2755 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2756 } else { 2757 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2758 } 2759 2760 cr = bcd_cmp_zero(&ret); 2761 2762 if (unlikely(invalid)) { 2763 cr = CRF_SO; 2764 } 2765 2766 *r = ret; 2767 2768 return cr; 2769 } 2770 2771 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2772 { 2773 int i; 2774 int cr = 0; 2775 uint8_t digit = 0; 2776 int sgnb = bcd_get_sgn(b); 2777 int zone_lead = (ps) ? 0xF0 : 0x30; 2778 int invalid = (sgnb == 0); 2779 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2780 2781 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2782 2783 for (i = 0; i < 16; i++) { 2784 digit = bcd_get_digit(b, i + 1, &invalid); 2785 2786 if (unlikely(invalid)) { 2787 break; 2788 } 2789 2790 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2791 } 2792 2793 if (ps) { 2794 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2795 } else { 2796 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2797 } 2798 2799 cr = bcd_cmp_zero(b); 2800 2801 if (ox_flag) { 2802 cr |= CRF_SO; 2803 } 2804 2805 if (unlikely(invalid)) { 2806 cr = CRF_SO; 2807 } 2808 2809 *r = ret; 2810 2811 return cr; 2812 } 2813 2814 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2815 { 2816 int i; 2817 int cr = 0; 2818 uint64_t lo_value; 2819 uint64_t hi_value; 2820 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2821 2822 if (b->VsrSD(0) < 0) { 2823 lo_value = -b->VsrSD(1); 2824 hi_value = ~b->VsrD(0) + !lo_value; 2825 bcd_put_digit(&ret, 0xD, 0); 2826 } else { 2827 lo_value = b->VsrD(1); 2828 hi_value = b->VsrD(0); 2829 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2830 } 2831 2832 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2833 lo_value > 9999999999999999ULL) { 2834 cr = CRF_SO; 2835 } 2836 2837 for (i = 1; i < 16; hi_value /= 10, i++) { 2838 bcd_put_digit(&ret, hi_value % 10, i); 2839 } 2840 2841 for (; i < 32; lo_value /= 10, i++) { 2842 bcd_put_digit(&ret, lo_value % 10, i); 2843 } 2844 2845 cr |= bcd_cmp_zero(&ret); 2846 2847 *r = ret; 2848 2849 return cr; 2850 } 2851 2852 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2853 { 2854 uint8_t i; 2855 int cr; 2856 uint64_t carry; 2857 uint64_t unused; 2858 uint64_t lo_value; 2859 uint64_t hi_value = 0; 2860 int sgnb = bcd_get_sgn(b); 2861 int invalid = (sgnb == 0); 2862 2863 lo_value = bcd_get_digit(b, 31, &invalid); 2864 for (i = 30; i > 0; i--) { 2865 mulu64(&lo_value, &carry, lo_value, 10ULL); 2866 mulu64(&hi_value, &unused, hi_value, 10ULL); 2867 lo_value += bcd_get_digit(b, i, &invalid); 2868 hi_value += carry; 2869 2870 if (unlikely(invalid)) { 2871 break; 2872 } 2873 } 2874 2875 if (sgnb == -1) { 2876 r->VsrSD(1) = -lo_value; 2877 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2878 } else { 2879 r->VsrSD(1) = lo_value; 2880 r->VsrSD(0) = hi_value; 2881 } 2882 2883 cr = bcd_cmp_zero(b); 2884 2885 if (unlikely(invalid)) { 2886 cr = CRF_SO; 2887 } 2888 2889 return cr; 2890 } 2891 2892 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2893 { 2894 int i; 2895 int invalid = 0; 2896 2897 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2898 return CRF_SO; 2899 } 2900 2901 *r = *a; 2902 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2903 2904 for (i = 1; i < 32; i++) { 2905 bcd_get_digit(a, i, &invalid); 2906 bcd_get_digit(b, i, &invalid); 2907 if (unlikely(invalid)) { 2908 return CRF_SO; 2909 } 2910 } 2911 2912 return bcd_cmp_zero(r); 2913 } 2914 2915 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2916 { 2917 int sgnb = bcd_get_sgn(b); 2918 2919 *r = *b; 2920 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2921 2922 if (bcd_is_valid(b) == false) { 2923 return CRF_SO; 2924 } 2925 2926 return bcd_cmp_zero(r); 2927 } 2928 2929 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2930 { 2931 int cr; 2932 #if defined(HOST_WORDS_BIGENDIAN) 2933 int i = a->s8[7]; 2934 #else 2935 int i = a->s8[8]; 2936 #endif 2937 bool ox_flag = false; 2938 int sgnb = bcd_get_sgn(b); 2939 ppc_avr_t ret = *b; 2940 ret.VsrD(1) &= ~0xf; 2941 2942 if (bcd_is_valid(b) == false) { 2943 return CRF_SO; 2944 } 2945 2946 if (unlikely(i > 31)) { 2947 i = 31; 2948 } else if (unlikely(i < -31)) { 2949 i = -31; 2950 } 2951 2952 if (i > 0) { 2953 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2954 } else { 2955 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2956 } 2957 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2958 2959 *r = ret; 2960 2961 cr = bcd_cmp_zero(r); 2962 if (ox_flag) { 2963 cr |= CRF_SO; 2964 } 2965 2966 return cr; 2967 } 2968 2969 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2970 { 2971 int cr; 2972 int i; 2973 int invalid = 0; 2974 bool ox_flag = false; 2975 ppc_avr_t ret = *b; 2976 2977 for (i = 0; i < 32; i++) { 2978 bcd_get_digit(b, i, &invalid); 2979 2980 if (unlikely(invalid)) { 2981 return CRF_SO; 2982 } 2983 } 2984 2985 #if defined(HOST_WORDS_BIGENDIAN) 2986 i = a->s8[7]; 2987 #else 2988 i = a->s8[8]; 2989 #endif 2990 if (i >= 32) { 2991 ox_flag = true; 2992 ret.VsrD(1) = ret.VsrD(0) = 0; 2993 } else if (i <= -32) { 2994 ret.VsrD(1) = ret.VsrD(0) = 0; 2995 } else if (i > 0) { 2996 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2997 } else { 2998 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2999 } 3000 *r = ret; 3001 3002 cr = bcd_cmp_zero(r); 3003 if (ox_flag) { 3004 cr |= CRF_SO; 3005 } 3006 3007 return cr; 3008 } 3009 3010 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3011 { 3012 int cr; 3013 int unused = 0; 3014 int invalid = 0; 3015 bool ox_flag = false; 3016 int sgnb = bcd_get_sgn(b); 3017 ppc_avr_t ret = *b; 3018 ret.VsrD(1) &= ~0xf; 3019 3020 #if defined(HOST_WORDS_BIGENDIAN) 3021 int i = a->s8[7]; 3022 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3023 #else 3024 int i = a->s8[8]; 3025 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3026 #endif 3027 3028 if (bcd_is_valid(b) == false) { 3029 return CRF_SO; 3030 } 3031 3032 if (unlikely(i > 31)) { 3033 i = 31; 3034 } else if (unlikely(i < -31)) { 3035 i = -31; 3036 } 3037 3038 if (i > 0) { 3039 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3040 } else { 3041 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3042 3043 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3044 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3045 } 3046 } 3047 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3048 3049 cr = bcd_cmp_zero(&ret); 3050 if (ox_flag) { 3051 cr |= CRF_SO; 3052 } 3053 *r = ret; 3054 3055 return cr; 3056 } 3057 3058 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3059 { 3060 uint64_t mask; 3061 uint32_t ox_flag = 0; 3062 #if defined(HOST_WORDS_BIGENDIAN) 3063 int i = a->s16[3] + 1; 3064 #else 3065 int i = a->s16[4] + 1; 3066 #endif 3067 ppc_avr_t ret = *b; 3068 3069 if (bcd_is_valid(b) == false) { 3070 return CRF_SO; 3071 } 3072 3073 if (i > 16 && i < 32) { 3074 mask = (uint64_t)-1 >> (128 - i * 4); 3075 if (ret.VsrD(0) & ~mask) { 3076 ox_flag = CRF_SO; 3077 } 3078 3079 ret.VsrD(0) &= mask; 3080 } else if (i >= 0 && i <= 16) { 3081 mask = (uint64_t)-1 >> (64 - i * 4); 3082 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3083 ox_flag = CRF_SO; 3084 } 3085 3086 ret.VsrD(1) &= mask; 3087 ret.VsrD(0) = 0; 3088 } 3089 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3090 *r = ret; 3091 3092 return bcd_cmp_zero(&ret) | ox_flag; 3093 } 3094 3095 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3096 { 3097 int i; 3098 uint64_t mask; 3099 uint32_t ox_flag = 0; 3100 int invalid = 0; 3101 ppc_avr_t ret = *b; 3102 3103 for (i = 0; i < 32; i++) { 3104 bcd_get_digit(b, i, &invalid); 3105 3106 if (unlikely(invalid)) { 3107 return CRF_SO; 3108 } 3109 } 3110 3111 #if defined(HOST_WORDS_BIGENDIAN) 3112 i = a->s16[3]; 3113 #else 3114 i = a->s16[4]; 3115 #endif 3116 if (i > 16 && i < 33) { 3117 mask = (uint64_t)-1 >> (128 - i * 4); 3118 if (ret.VsrD(0) & ~mask) { 3119 ox_flag = CRF_SO; 3120 } 3121 3122 ret.VsrD(0) &= mask; 3123 } else if (i > 0 && i <= 16) { 3124 mask = (uint64_t)-1 >> (64 - i * 4); 3125 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3126 ox_flag = CRF_SO; 3127 } 3128 3129 ret.VsrD(1) &= mask; 3130 ret.VsrD(0) = 0; 3131 } else if (i == 0) { 3132 if (ret.VsrD(0) || ret.VsrD(1)) { 3133 ox_flag = CRF_SO; 3134 } 3135 ret.VsrD(0) = ret.VsrD(1) = 0; 3136 } 3137 3138 *r = ret; 3139 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3140 return ox_flag | CRF_EQ; 3141 } 3142 3143 return ox_flag | CRF_GT; 3144 } 3145 3146 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3147 { 3148 int i; 3149 VECTOR_FOR_INORDER_I(i, u8) { 3150 r->u8[i] = AES_sbox[a->u8[i]]; 3151 } 3152 } 3153 3154 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3155 { 3156 ppc_avr_t result; 3157 int i; 3158 3159 VECTOR_FOR_INORDER_I(i, u32) { 3160 result.VsrW(i) = b->VsrW(i) ^ 3161 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3162 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3163 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3164 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3165 } 3166 *r = result; 3167 } 3168 3169 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3170 { 3171 ppc_avr_t result; 3172 int i; 3173 3174 VECTOR_FOR_INORDER_I(i, u8) { 3175 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3176 } 3177 *r = result; 3178 } 3179 3180 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3181 { 3182 /* This differs from what is written in ISA V2.07. The RTL is */ 3183 /* incorrect and will be fixed in V2.07B. */ 3184 int i; 3185 ppc_avr_t tmp; 3186 3187 VECTOR_FOR_INORDER_I(i, u8) { 3188 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3189 } 3190 3191 VECTOR_FOR_INORDER_I(i, u32) { 3192 r->VsrW(i) = 3193 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3194 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3195 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3196 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3197 } 3198 } 3199 3200 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3201 { 3202 ppc_avr_t result; 3203 int i; 3204 3205 VECTOR_FOR_INORDER_I(i, u8) { 3206 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3207 } 3208 *r = result; 3209 } 3210 3211 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3212 { 3213 int st = (st_six & 0x10) != 0; 3214 int six = st_six & 0xF; 3215 int i; 3216 3217 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3218 if (st == 0) { 3219 if ((six & (0x8 >> i)) == 0) { 3220 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3221 ror32(a->VsrW(i), 18) ^ 3222 (a->VsrW(i) >> 3); 3223 } else { /* six.bit[i] == 1 */ 3224 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3225 ror32(a->VsrW(i), 19) ^ 3226 (a->VsrW(i) >> 10); 3227 } 3228 } else { /* st == 1 */ 3229 if ((six & (0x8 >> i)) == 0) { 3230 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3231 ror32(a->VsrW(i), 13) ^ 3232 ror32(a->VsrW(i), 22); 3233 } else { /* six.bit[i] == 1 */ 3234 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3235 ror32(a->VsrW(i), 11) ^ 3236 ror32(a->VsrW(i), 25); 3237 } 3238 } 3239 } 3240 } 3241 3242 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3243 { 3244 int st = (st_six & 0x10) != 0; 3245 int six = st_six & 0xF; 3246 int i; 3247 3248 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3249 if (st == 0) { 3250 if ((six & (0x8 >> (2*i))) == 0) { 3251 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3252 ror64(a->VsrD(i), 8) ^ 3253 (a->VsrD(i) >> 7); 3254 } else { /* six.bit[2*i] == 1 */ 3255 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3256 ror64(a->VsrD(i), 61) ^ 3257 (a->VsrD(i) >> 6); 3258 } 3259 } else { /* st == 1 */ 3260 if ((six & (0x8 >> (2*i))) == 0) { 3261 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3262 ror64(a->VsrD(i), 34) ^ 3263 ror64(a->VsrD(i), 39); 3264 } else { /* six.bit[2*i] == 1 */ 3265 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3266 ror64(a->VsrD(i), 18) ^ 3267 ror64(a->VsrD(i), 41); 3268 } 3269 } 3270 } 3271 } 3272 3273 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3274 { 3275 ppc_avr_t result; 3276 int i; 3277 3278 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3279 int indexA = c->VsrB(i) >> 4; 3280 int indexB = c->VsrB(i) & 0xF; 3281 3282 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3283 } 3284 *r = result; 3285 } 3286 3287 #undef VECTOR_FOR_INORDER_I 3288 3289 /*****************************************************************************/ 3290 /* SPE extension helpers */ 3291 /* Use a table to make this quicker */ 3292 static const uint8_t hbrev[16] = { 3293 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3294 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3295 }; 3296 3297 static inline uint8_t byte_reverse(uint8_t val) 3298 { 3299 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3300 } 3301 3302 static inline uint32_t word_reverse(uint32_t val) 3303 { 3304 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3305 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3306 } 3307 3308 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3309 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3310 { 3311 uint32_t a, b, d, mask; 3312 3313 mask = UINT32_MAX >> (32 - MASKBITS); 3314 a = arg1 & mask; 3315 b = arg2 & mask; 3316 d = word_reverse(1 + word_reverse(a | ~b)); 3317 return (arg1 & ~mask) | (d & b); 3318 } 3319 3320 uint32_t helper_cntlsw32(uint32_t val) 3321 { 3322 if (val & 0x80000000) { 3323 return clz32(~val); 3324 } else { 3325 return clz32(val); 3326 } 3327 } 3328 3329 uint32_t helper_cntlzw32(uint32_t val) 3330 { 3331 return clz32(val); 3332 } 3333 3334 /* 440 specific */ 3335 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3336 target_ulong low, uint32_t update_Rc) 3337 { 3338 target_ulong mask; 3339 int i; 3340 3341 i = 1; 3342 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3343 if ((high & mask) == 0) { 3344 if (update_Rc) { 3345 env->crf[0] = 0x4; 3346 } 3347 goto done; 3348 } 3349 i++; 3350 } 3351 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3352 if ((low & mask) == 0) { 3353 if (update_Rc) { 3354 env->crf[0] = 0x8; 3355 } 3356 goto done; 3357 } 3358 i++; 3359 } 3360 i = 8; 3361 if (update_Rc) { 3362 env->crf[0] = 0x2; 3363 } 3364 done: 3365 env->xer = (env->xer & ~0x7F) | i; 3366 if (update_Rc) { 3367 env->crf[0] |= xer_so; 3368 } 3369 return i; 3370 } 3371