1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define VECTOR_FOR_INORDER_I(index, element) \ 393 for (index = 0; index < ARRAY_SIZE(r->element); index++) 394 #else 395 #define VECTOR_FOR_INORDER_I(index, element) \ 396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 397 #endif 398 399 /* Saturating arithmetic helpers. */ 400 #define SATCVT(from, to, from_type, to_type, min, max) \ 401 static inline to_type cvt##from##to(from_type x, int *sat) \ 402 { \ 403 to_type r; \ 404 \ 405 if (x < (from_type)min) { \ 406 r = min; \ 407 *sat = 1; \ 408 } else if (x > (from_type)max) { \ 409 r = max; \ 410 *sat = 1; \ 411 } else { \ 412 r = x; \ 413 } \ 414 return r; \ 415 } 416 #define SATCVTU(from, to, from_type, to_type, min, max) \ 417 static inline to_type cvt##from##to(from_type x, int *sat) \ 418 { \ 419 to_type r; \ 420 \ 421 if (x > (from_type)max) { \ 422 r = max; \ 423 *sat = 1; \ 424 } else { \ 425 r = x; \ 426 } \ 427 return r; \ 428 } 429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 432 433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 439 #undef SATCVT 440 #undef SATCVTU 441 442 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 443 { 444 int i, j = (sh & 0xf); 445 446 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 447 r->VsrB(i) = j++; 448 } 449 } 450 451 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 452 { 453 int i, j = 0x10 - (sh & 0xf); 454 455 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 456 r->VsrB(i) = j++; 457 } 458 } 459 460 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 461 { 462 env->vscr = r->VsrW(3); 463 set_flush_to_zero(vscr_nj, &env->vec_status); 464 } 465 466 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 467 { 468 int i; 469 470 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 471 r->u32[i] = ~a->u32[i] < b->u32[i]; 472 } 473 } 474 475 /* vprtybw */ 476 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 477 { 478 int i; 479 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 480 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 481 res ^= res >> 8; 482 r->u32[i] = res & 1; 483 } 484 } 485 486 /* vprtybd */ 487 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 488 { 489 int i; 490 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 491 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 492 res ^= res >> 16; 493 res ^= res >> 8; 494 r->u64[i] = res & 1; 495 } 496 } 497 498 /* vprtybq */ 499 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 500 { 501 uint64_t res = b->u64[0] ^ b->u64[1]; 502 res ^= res >> 32; 503 res ^= res >> 16; 504 res ^= res >> 8; 505 r->VsrD(1) = res & 1; 506 r->VsrD(0) = 0; 507 } 508 509 #define VARITH_DO(name, op, element) \ 510 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 511 { \ 512 int i; \ 513 \ 514 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 515 r->element[i] = a->element[i] op b->element[i]; \ 516 } \ 517 } 518 #define VARITH(suffix, element) \ 519 VARITH_DO(add##suffix, +, element) \ 520 VARITH_DO(sub##suffix, -, element) 521 VARITH(ubm, u8) 522 VARITH(uhm, u16) 523 VARITH(uwm, u32) 524 VARITH(udm, u64) 525 VARITH_DO(muluwm, *, u32) 526 #undef VARITH_DO 527 #undef VARITH 528 529 #define VARITHFP(suffix, func) \ 530 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 531 ppc_avr_t *b) \ 532 { \ 533 int i; \ 534 \ 535 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 536 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 537 } \ 538 } 539 VARITHFP(addfp, float32_add) 540 VARITHFP(subfp, float32_sub) 541 VARITHFP(minfp, float32_min) 542 VARITHFP(maxfp, float32_max) 543 #undef VARITHFP 544 545 #define VARITHFPFMA(suffix, type) \ 546 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 547 ppc_avr_t *b, ppc_avr_t *c) \ 548 { \ 549 int i; \ 550 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 551 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 552 type, &env->vec_status); \ 553 } \ 554 } 555 VARITHFPFMA(maddfp, 0); 556 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 557 #undef VARITHFPFMA 558 559 #define VARITHSAT_CASE(type, op, cvt, element) \ 560 { \ 561 type result = (type)a->element[i] op (type)b->element[i]; \ 562 r->element[i] = cvt(result, &sat); \ 563 } 564 565 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 566 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 567 ppc_avr_t *b) \ 568 { \ 569 int sat = 0; \ 570 int i; \ 571 \ 572 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 573 switch (sizeof(r->element[0])) { \ 574 case 1: \ 575 VARITHSAT_CASE(optype, op, cvt, element); \ 576 break; \ 577 case 2: \ 578 VARITHSAT_CASE(optype, op, cvt, element); \ 579 break; \ 580 case 4: \ 581 VARITHSAT_CASE(optype, op, cvt, element); \ 582 break; \ 583 } \ 584 } \ 585 if (sat) { \ 586 env->vscr |= (1 << VSCR_SAT); \ 587 } \ 588 } 589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 601 #undef VARITHSAT_CASE 602 #undef VARITHSAT_DO 603 #undef VARITHSAT_SIGNED 604 #undef VARITHSAT_UNSIGNED 605 606 #define VAVG_DO(name, element, etype) \ 607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 608 { \ 609 int i; \ 610 \ 611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 613 r->element[i] = x >> 1; \ 614 } \ 615 } 616 617 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 618 unsigned_type) \ 619 VAVG_DO(avgs##type, signed_element, signed_type) \ 620 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 621 VAVG(b, s8, int16_t, u8, uint16_t) 622 VAVG(h, s16, int32_t, u16, uint32_t) 623 VAVG(w, s32, int64_t, u32, uint64_t) 624 #undef VAVG_DO 625 #undef VAVG 626 627 #define VABSDU_DO(name, element) \ 628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 629 { \ 630 int i; \ 631 \ 632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 633 r->element[i] = (a->element[i] > b->element[i]) ? \ 634 (a->element[i] - b->element[i]) : \ 635 (b->element[i] - a->element[i]); \ 636 } \ 637 } 638 639 /* VABSDU - Vector absolute difference unsigned 640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 641 * element - element type to access from vector 642 */ 643 #define VABSDU(type, element) \ 644 VABSDU_DO(absdu##type, element) 645 VABSDU(b, u8) 646 VABSDU(h, u16) 647 VABSDU(w, u32) 648 #undef VABSDU_DO 649 #undef VABSDU 650 651 #define VCF(suffix, cvt, element) \ 652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 653 ppc_avr_t *b, uint32_t uim) \ 654 { \ 655 int i; \ 656 \ 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 658 float32 t = cvt(b->element[i], &env->vec_status); \ 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 660 } \ 661 } 662 VCF(ux, uint32_to_float32, u32) 663 VCF(sx, int32_to_float32, s32) 664 #undef VCF 665 666 #define VCMP_DO(suffix, compare, element, record) \ 667 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 668 ppc_avr_t *a, ppc_avr_t *b) \ 669 { \ 670 uint64_t ones = (uint64_t)-1; \ 671 uint64_t all = ones; \ 672 uint64_t none = 0; \ 673 int i; \ 674 \ 675 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 676 uint64_t result = (a->element[i] compare b->element[i] ? \ 677 ones : 0x0); \ 678 switch (sizeof(a->element[0])) { \ 679 case 8: \ 680 r->u64[i] = result; \ 681 break; \ 682 case 4: \ 683 r->u32[i] = result; \ 684 break; \ 685 case 2: \ 686 r->u16[i] = result; \ 687 break; \ 688 case 1: \ 689 r->u8[i] = result; \ 690 break; \ 691 } \ 692 all &= result; \ 693 none |= result; \ 694 } \ 695 if (record) { \ 696 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 697 } \ 698 } 699 #define VCMP(suffix, compare, element) \ 700 VCMP_DO(suffix, compare, element, 0) \ 701 VCMP_DO(suffix##_dot, compare, element, 1) 702 VCMP(equb, ==, u8) 703 VCMP(equh, ==, u16) 704 VCMP(equw, ==, u32) 705 VCMP(equd, ==, u64) 706 VCMP(gtub, >, u8) 707 VCMP(gtuh, >, u16) 708 VCMP(gtuw, >, u32) 709 VCMP(gtud, >, u64) 710 VCMP(gtsb, >, s8) 711 VCMP(gtsh, >, s16) 712 VCMP(gtsw, >, s32) 713 VCMP(gtsd, >, s64) 714 #undef VCMP_DO 715 #undef VCMP 716 717 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 718 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 719 ppc_avr_t *a, ppc_avr_t *b) \ 720 { \ 721 etype ones = (etype)-1; \ 722 etype all = ones; \ 723 etype result, none = 0; \ 724 int i; \ 725 \ 726 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 727 if (cmpzero) { \ 728 result = ((a->element[i] == 0) \ 729 || (b->element[i] == 0) \ 730 || (a->element[i] != b->element[i]) ? \ 731 ones : 0x0); \ 732 } else { \ 733 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 734 } \ 735 r->element[i] = result; \ 736 all &= result; \ 737 none |= result; \ 738 } \ 739 if (record) { \ 740 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 741 } \ 742 } 743 744 /* VCMPNEZ - Vector compare not equal to zero 745 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 746 * element - element type to access from vector 747 */ 748 #define VCMPNE(suffix, element, etype, cmpzero) \ 749 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 750 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 751 VCMPNE(zb, u8, uint8_t, 1) 752 VCMPNE(zh, u16, uint16_t, 1) 753 VCMPNE(zw, u32, uint32_t, 1) 754 VCMPNE(b, u8, uint8_t, 0) 755 VCMPNE(h, u16, uint16_t, 0) 756 VCMPNE(w, u32, uint32_t, 0) 757 #undef VCMPNE_DO 758 #undef VCMPNE 759 760 #define VCMPFP_DO(suffix, compare, order, record) \ 761 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 762 ppc_avr_t *a, ppc_avr_t *b) \ 763 { \ 764 uint32_t ones = (uint32_t)-1; \ 765 uint32_t all = ones; \ 766 uint32_t none = 0; \ 767 int i; \ 768 \ 769 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 770 uint32_t result; \ 771 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 772 &env->vec_status); \ 773 if (rel == float_relation_unordered) { \ 774 result = 0; \ 775 } else if (rel compare order) { \ 776 result = ones; \ 777 } else { \ 778 result = 0; \ 779 } \ 780 r->u32[i] = result; \ 781 all &= result; \ 782 none |= result; \ 783 } \ 784 if (record) { \ 785 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 786 } \ 787 } 788 #define VCMPFP(suffix, compare, order) \ 789 VCMPFP_DO(suffix, compare, order, 0) \ 790 VCMPFP_DO(suffix##_dot, compare, order, 1) 791 VCMPFP(eqfp, ==, float_relation_equal) 792 VCMPFP(gefp, !=, float_relation_less) 793 VCMPFP(gtfp, ==, float_relation_greater) 794 #undef VCMPFP_DO 795 #undef VCMPFP 796 797 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 798 ppc_avr_t *a, ppc_avr_t *b, int record) 799 { 800 int i; 801 int all_in = 0; 802 803 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 804 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 805 &env->vec_status); 806 if (le_rel == float_relation_unordered) { 807 r->u32[i] = 0xc0000000; 808 all_in = 1; 809 } else { 810 float32 bneg = float32_chs(b->f32[i]); 811 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 812 &env->vec_status); 813 int le = le_rel != float_relation_greater; 814 int ge = ge_rel != float_relation_less; 815 816 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 817 all_in |= (!le | !ge); 818 } 819 } 820 if (record) { 821 env->crf[6] = (all_in == 0) << 1; 822 } 823 } 824 825 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 826 { 827 vcmpbfp_internal(env, r, a, b, 0); 828 } 829 830 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 831 ppc_avr_t *b) 832 { 833 vcmpbfp_internal(env, r, a, b, 1); 834 } 835 836 #define VCT(suffix, satcvt, element) \ 837 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 838 ppc_avr_t *b, uint32_t uim) \ 839 { \ 840 int i; \ 841 int sat = 0; \ 842 float_status s = env->vec_status; \ 843 \ 844 set_float_rounding_mode(float_round_to_zero, &s); \ 845 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 846 if (float32_is_any_nan(b->f32[i])) { \ 847 r->element[i] = 0; \ 848 } else { \ 849 float64 t = float32_to_float64(b->f32[i], &s); \ 850 int64_t j; \ 851 \ 852 t = float64_scalbn(t, uim, &s); \ 853 j = float64_to_int64(t, &s); \ 854 r->element[i] = satcvt(j, &sat); \ 855 } \ 856 } \ 857 if (sat) { \ 858 env->vscr |= (1 << VSCR_SAT); \ 859 } \ 860 } 861 VCT(uxs, cvtsduw, u32) 862 VCT(sxs, cvtsdsw, s32) 863 #undef VCT 864 865 target_ulong helper_vclzlsbb(ppc_avr_t *r) 866 { 867 target_ulong count = 0; 868 int i; 869 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 870 if (r->VsrB(i) & 0x01) { 871 break; 872 } 873 count++; 874 } 875 return count; 876 } 877 878 target_ulong helper_vctzlsbb(ppc_avr_t *r) 879 { 880 target_ulong count = 0; 881 int i; 882 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 883 if (r->VsrB(i) & 0x01) { 884 break; 885 } 886 count++; 887 } 888 return count; 889 } 890 891 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 892 ppc_avr_t *b, ppc_avr_t *c) 893 { 894 int sat = 0; 895 int i; 896 897 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 898 int32_t prod = a->s16[i] * b->s16[i]; 899 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 900 901 r->s16[i] = cvtswsh(t, &sat); 902 } 903 904 if (sat) { 905 env->vscr |= (1 << VSCR_SAT); 906 } 907 } 908 909 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 910 ppc_avr_t *b, ppc_avr_t *c) 911 { 912 int sat = 0; 913 int i; 914 915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 916 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 917 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 918 r->s16[i] = cvtswsh(t, &sat); 919 } 920 921 if (sat) { 922 env->vscr |= (1 << VSCR_SAT); 923 } 924 } 925 926 #define VMINMAX_DO(name, compare, element) \ 927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 928 { \ 929 int i; \ 930 \ 931 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 932 if (a->element[i] compare b->element[i]) { \ 933 r->element[i] = b->element[i]; \ 934 } else { \ 935 r->element[i] = a->element[i]; \ 936 } \ 937 } \ 938 } 939 #define VMINMAX(suffix, element) \ 940 VMINMAX_DO(min##suffix, >, element) \ 941 VMINMAX_DO(max##suffix, <, element) 942 VMINMAX(sb, s8) 943 VMINMAX(sh, s16) 944 VMINMAX(sw, s32) 945 VMINMAX(sd, s64) 946 VMINMAX(ub, u8) 947 VMINMAX(uh, u16) 948 VMINMAX(uw, u32) 949 VMINMAX(ud, u64) 950 #undef VMINMAX_DO 951 #undef VMINMAX 952 953 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 954 { 955 int i; 956 957 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 958 int32_t prod = a->s16[i] * b->s16[i]; 959 r->s16[i] = (int16_t) (prod + c->s16[i]); 960 } 961 } 962 963 #define VMRG_DO(name, element, access, ofs) \ 964 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 965 { \ 966 ppc_avr_t result; \ 967 int i, half = ARRAY_SIZE(r->element) / 2; \ 968 \ 969 for (i = 0; i < half; i++) { \ 970 result.access(i * 2 + 0) = a->access(i + ofs); \ 971 result.access(i * 2 + 1) = b->access(i + ofs); \ 972 } \ 973 *r = result; \ 974 } 975 976 #define VMRG(suffix, element, access) \ 977 VMRG_DO(mrgl##suffix, element, access, half) \ 978 VMRG_DO(mrgh##suffix, element, access, 0) 979 VMRG(b, u8, VsrB) 980 VMRG(h, u16, VsrH) 981 VMRG(w, u32, VsrW) 982 #undef VMRG_DO 983 #undef VMRG 984 985 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 986 ppc_avr_t *b, ppc_avr_t *c) 987 { 988 int32_t prod[16]; 989 int i; 990 991 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 992 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 993 } 994 995 VECTOR_FOR_INORDER_I(i, s32) { 996 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 997 prod[4 * i + 2] + prod[4 * i + 3]; 998 } 999 } 1000 1001 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1002 ppc_avr_t *b, ppc_avr_t *c) 1003 { 1004 int32_t prod[8]; 1005 int i; 1006 1007 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1008 prod[i] = a->s16[i] * b->s16[i]; 1009 } 1010 1011 VECTOR_FOR_INORDER_I(i, s32) { 1012 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1013 } 1014 } 1015 1016 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1017 ppc_avr_t *b, ppc_avr_t *c) 1018 { 1019 int32_t prod[8]; 1020 int i; 1021 int sat = 0; 1022 1023 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1024 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1025 } 1026 1027 VECTOR_FOR_INORDER_I(i, s32) { 1028 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1029 1030 r->u32[i] = cvtsdsw(t, &sat); 1031 } 1032 1033 if (sat) { 1034 env->vscr |= (1 << VSCR_SAT); 1035 } 1036 } 1037 1038 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1039 ppc_avr_t *b, ppc_avr_t *c) 1040 { 1041 uint16_t prod[16]; 1042 int i; 1043 1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1045 prod[i] = a->u8[i] * b->u8[i]; 1046 } 1047 1048 VECTOR_FOR_INORDER_I(i, u32) { 1049 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1050 prod[4 * i + 2] + prod[4 * i + 3]; 1051 } 1052 } 1053 1054 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 1060 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1061 prod[i] = a->u16[i] * b->u16[i]; 1062 } 1063 1064 VECTOR_FOR_INORDER_I(i, u32) { 1065 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1066 } 1067 } 1068 1069 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1070 ppc_avr_t *b, ppc_avr_t *c) 1071 { 1072 uint32_t prod[8]; 1073 int i; 1074 int sat = 0; 1075 1076 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1077 prod[i] = a->u16[i] * b->u16[i]; 1078 } 1079 1080 VECTOR_FOR_INORDER_I(i, s32) { 1081 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1082 1083 r->u32[i] = cvtuduw(t, &sat); 1084 } 1085 1086 if (sat) { 1087 env->vscr |= (1 << VSCR_SAT); 1088 } 1089 } 1090 1091 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1092 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1093 { \ 1094 int i; \ 1095 \ 1096 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1097 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1098 (cast)b->mul_access(i); \ 1099 } \ 1100 } 1101 1102 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1103 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1104 { \ 1105 int i; \ 1106 \ 1107 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1108 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1109 (cast)b->mul_access(i + 1); \ 1110 } \ 1111 } 1112 1113 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1114 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1115 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1116 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1117 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1118 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1119 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1120 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1121 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1122 #undef VMUL_DO_EVN 1123 #undef VMUL_DO_ODD 1124 #undef VMUL 1125 1126 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1127 ppc_avr_t *c) 1128 { 1129 ppc_avr_t result; 1130 int i; 1131 1132 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1133 int s = c->VsrB(i) & 0x1f; 1134 int index = s & 0xf; 1135 1136 if (s & 0x10) { 1137 result.VsrB(i) = b->VsrB(index); 1138 } else { 1139 result.VsrB(i) = a->VsrB(index); 1140 } 1141 } 1142 *r = result; 1143 } 1144 1145 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1146 ppc_avr_t *c) 1147 { 1148 ppc_avr_t result; 1149 int i; 1150 1151 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1152 int s = c->VsrB(i) & 0x1f; 1153 int index = 15 - (s & 0xf); 1154 1155 if (s & 0x10) { 1156 result.VsrB(i) = a->VsrB(index); 1157 } else { 1158 result.VsrB(i) = b->VsrB(index); 1159 } 1160 } 1161 *r = result; 1162 } 1163 1164 #if defined(HOST_WORDS_BIGENDIAN) 1165 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1166 #define VBPERMD_INDEX(i) (i) 1167 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1168 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1169 #else 1170 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1171 #define VBPERMD_INDEX(i) (1 - i) 1172 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1173 #define EXTRACT_BIT(avr, i, index) \ 1174 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1175 #endif 1176 1177 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1178 { 1179 int i, j; 1180 ppc_avr_t result = { .u64 = { 0, 0 } }; 1181 VECTOR_FOR_INORDER_I(i, u64) { 1182 for (j = 0; j < 8; j++) { 1183 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1184 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1185 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1186 } 1187 } 1188 } 1189 *r = result; 1190 } 1191 1192 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1193 { 1194 int i; 1195 uint64_t perm = 0; 1196 1197 VECTOR_FOR_INORDER_I(i, u8) { 1198 int index = VBPERMQ_INDEX(b, i); 1199 1200 if (index < 128) { 1201 uint64_t mask = (1ull << (63-(index & 0x3F))); 1202 if (a->u64[VBPERMQ_DW(index)] & mask) { 1203 perm |= (0x8000 >> i); 1204 } 1205 } 1206 } 1207 1208 r->VsrD(0) = perm; 1209 r->VsrD(1) = 0; 1210 } 1211 1212 #undef VBPERMQ_INDEX 1213 #undef VBPERMQ_DW 1214 1215 static const uint64_t VGBBD_MASKS[256] = { 1216 0x0000000000000000ull, /* 00 */ 1217 0x0000000000000080ull, /* 01 */ 1218 0x0000000000008000ull, /* 02 */ 1219 0x0000000000008080ull, /* 03 */ 1220 0x0000000000800000ull, /* 04 */ 1221 0x0000000000800080ull, /* 05 */ 1222 0x0000000000808000ull, /* 06 */ 1223 0x0000000000808080ull, /* 07 */ 1224 0x0000000080000000ull, /* 08 */ 1225 0x0000000080000080ull, /* 09 */ 1226 0x0000000080008000ull, /* 0A */ 1227 0x0000000080008080ull, /* 0B */ 1228 0x0000000080800000ull, /* 0C */ 1229 0x0000000080800080ull, /* 0D */ 1230 0x0000000080808000ull, /* 0E */ 1231 0x0000000080808080ull, /* 0F */ 1232 0x0000008000000000ull, /* 10 */ 1233 0x0000008000000080ull, /* 11 */ 1234 0x0000008000008000ull, /* 12 */ 1235 0x0000008000008080ull, /* 13 */ 1236 0x0000008000800000ull, /* 14 */ 1237 0x0000008000800080ull, /* 15 */ 1238 0x0000008000808000ull, /* 16 */ 1239 0x0000008000808080ull, /* 17 */ 1240 0x0000008080000000ull, /* 18 */ 1241 0x0000008080000080ull, /* 19 */ 1242 0x0000008080008000ull, /* 1A */ 1243 0x0000008080008080ull, /* 1B */ 1244 0x0000008080800000ull, /* 1C */ 1245 0x0000008080800080ull, /* 1D */ 1246 0x0000008080808000ull, /* 1E */ 1247 0x0000008080808080ull, /* 1F */ 1248 0x0000800000000000ull, /* 20 */ 1249 0x0000800000000080ull, /* 21 */ 1250 0x0000800000008000ull, /* 22 */ 1251 0x0000800000008080ull, /* 23 */ 1252 0x0000800000800000ull, /* 24 */ 1253 0x0000800000800080ull, /* 25 */ 1254 0x0000800000808000ull, /* 26 */ 1255 0x0000800000808080ull, /* 27 */ 1256 0x0000800080000000ull, /* 28 */ 1257 0x0000800080000080ull, /* 29 */ 1258 0x0000800080008000ull, /* 2A */ 1259 0x0000800080008080ull, /* 2B */ 1260 0x0000800080800000ull, /* 2C */ 1261 0x0000800080800080ull, /* 2D */ 1262 0x0000800080808000ull, /* 2E */ 1263 0x0000800080808080ull, /* 2F */ 1264 0x0000808000000000ull, /* 30 */ 1265 0x0000808000000080ull, /* 31 */ 1266 0x0000808000008000ull, /* 32 */ 1267 0x0000808000008080ull, /* 33 */ 1268 0x0000808000800000ull, /* 34 */ 1269 0x0000808000800080ull, /* 35 */ 1270 0x0000808000808000ull, /* 36 */ 1271 0x0000808000808080ull, /* 37 */ 1272 0x0000808080000000ull, /* 38 */ 1273 0x0000808080000080ull, /* 39 */ 1274 0x0000808080008000ull, /* 3A */ 1275 0x0000808080008080ull, /* 3B */ 1276 0x0000808080800000ull, /* 3C */ 1277 0x0000808080800080ull, /* 3D */ 1278 0x0000808080808000ull, /* 3E */ 1279 0x0000808080808080ull, /* 3F */ 1280 0x0080000000000000ull, /* 40 */ 1281 0x0080000000000080ull, /* 41 */ 1282 0x0080000000008000ull, /* 42 */ 1283 0x0080000000008080ull, /* 43 */ 1284 0x0080000000800000ull, /* 44 */ 1285 0x0080000000800080ull, /* 45 */ 1286 0x0080000000808000ull, /* 46 */ 1287 0x0080000000808080ull, /* 47 */ 1288 0x0080000080000000ull, /* 48 */ 1289 0x0080000080000080ull, /* 49 */ 1290 0x0080000080008000ull, /* 4A */ 1291 0x0080000080008080ull, /* 4B */ 1292 0x0080000080800000ull, /* 4C */ 1293 0x0080000080800080ull, /* 4D */ 1294 0x0080000080808000ull, /* 4E */ 1295 0x0080000080808080ull, /* 4F */ 1296 0x0080008000000000ull, /* 50 */ 1297 0x0080008000000080ull, /* 51 */ 1298 0x0080008000008000ull, /* 52 */ 1299 0x0080008000008080ull, /* 53 */ 1300 0x0080008000800000ull, /* 54 */ 1301 0x0080008000800080ull, /* 55 */ 1302 0x0080008000808000ull, /* 56 */ 1303 0x0080008000808080ull, /* 57 */ 1304 0x0080008080000000ull, /* 58 */ 1305 0x0080008080000080ull, /* 59 */ 1306 0x0080008080008000ull, /* 5A */ 1307 0x0080008080008080ull, /* 5B */ 1308 0x0080008080800000ull, /* 5C */ 1309 0x0080008080800080ull, /* 5D */ 1310 0x0080008080808000ull, /* 5E */ 1311 0x0080008080808080ull, /* 5F */ 1312 0x0080800000000000ull, /* 60 */ 1313 0x0080800000000080ull, /* 61 */ 1314 0x0080800000008000ull, /* 62 */ 1315 0x0080800000008080ull, /* 63 */ 1316 0x0080800000800000ull, /* 64 */ 1317 0x0080800000800080ull, /* 65 */ 1318 0x0080800000808000ull, /* 66 */ 1319 0x0080800000808080ull, /* 67 */ 1320 0x0080800080000000ull, /* 68 */ 1321 0x0080800080000080ull, /* 69 */ 1322 0x0080800080008000ull, /* 6A */ 1323 0x0080800080008080ull, /* 6B */ 1324 0x0080800080800000ull, /* 6C */ 1325 0x0080800080800080ull, /* 6D */ 1326 0x0080800080808000ull, /* 6E */ 1327 0x0080800080808080ull, /* 6F */ 1328 0x0080808000000000ull, /* 70 */ 1329 0x0080808000000080ull, /* 71 */ 1330 0x0080808000008000ull, /* 72 */ 1331 0x0080808000008080ull, /* 73 */ 1332 0x0080808000800000ull, /* 74 */ 1333 0x0080808000800080ull, /* 75 */ 1334 0x0080808000808000ull, /* 76 */ 1335 0x0080808000808080ull, /* 77 */ 1336 0x0080808080000000ull, /* 78 */ 1337 0x0080808080000080ull, /* 79 */ 1338 0x0080808080008000ull, /* 7A */ 1339 0x0080808080008080ull, /* 7B */ 1340 0x0080808080800000ull, /* 7C */ 1341 0x0080808080800080ull, /* 7D */ 1342 0x0080808080808000ull, /* 7E */ 1343 0x0080808080808080ull, /* 7F */ 1344 0x8000000000000000ull, /* 80 */ 1345 0x8000000000000080ull, /* 81 */ 1346 0x8000000000008000ull, /* 82 */ 1347 0x8000000000008080ull, /* 83 */ 1348 0x8000000000800000ull, /* 84 */ 1349 0x8000000000800080ull, /* 85 */ 1350 0x8000000000808000ull, /* 86 */ 1351 0x8000000000808080ull, /* 87 */ 1352 0x8000000080000000ull, /* 88 */ 1353 0x8000000080000080ull, /* 89 */ 1354 0x8000000080008000ull, /* 8A */ 1355 0x8000000080008080ull, /* 8B */ 1356 0x8000000080800000ull, /* 8C */ 1357 0x8000000080800080ull, /* 8D */ 1358 0x8000000080808000ull, /* 8E */ 1359 0x8000000080808080ull, /* 8F */ 1360 0x8000008000000000ull, /* 90 */ 1361 0x8000008000000080ull, /* 91 */ 1362 0x8000008000008000ull, /* 92 */ 1363 0x8000008000008080ull, /* 93 */ 1364 0x8000008000800000ull, /* 94 */ 1365 0x8000008000800080ull, /* 95 */ 1366 0x8000008000808000ull, /* 96 */ 1367 0x8000008000808080ull, /* 97 */ 1368 0x8000008080000000ull, /* 98 */ 1369 0x8000008080000080ull, /* 99 */ 1370 0x8000008080008000ull, /* 9A */ 1371 0x8000008080008080ull, /* 9B */ 1372 0x8000008080800000ull, /* 9C */ 1373 0x8000008080800080ull, /* 9D */ 1374 0x8000008080808000ull, /* 9E */ 1375 0x8000008080808080ull, /* 9F */ 1376 0x8000800000000000ull, /* A0 */ 1377 0x8000800000000080ull, /* A1 */ 1378 0x8000800000008000ull, /* A2 */ 1379 0x8000800000008080ull, /* A3 */ 1380 0x8000800000800000ull, /* A4 */ 1381 0x8000800000800080ull, /* A5 */ 1382 0x8000800000808000ull, /* A6 */ 1383 0x8000800000808080ull, /* A7 */ 1384 0x8000800080000000ull, /* A8 */ 1385 0x8000800080000080ull, /* A9 */ 1386 0x8000800080008000ull, /* AA */ 1387 0x8000800080008080ull, /* AB */ 1388 0x8000800080800000ull, /* AC */ 1389 0x8000800080800080ull, /* AD */ 1390 0x8000800080808000ull, /* AE */ 1391 0x8000800080808080ull, /* AF */ 1392 0x8000808000000000ull, /* B0 */ 1393 0x8000808000000080ull, /* B1 */ 1394 0x8000808000008000ull, /* B2 */ 1395 0x8000808000008080ull, /* B3 */ 1396 0x8000808000800000ull, /* B4 */ 1397 0x8000808000800080ull, /* B5 */ 1398 0x8000808000808000ull, /* B6 */ 1399 0x8000808000808080ull, /* B7 */ 1400 0x8000808080000000ull, /* B8 */ 1401 0x8000808080000080ull, /* B9 */ 1402 0x8000808080008000ull, /* BA */ 1403 0x8000808080008080ull, /* BB */ 1404 0x8000808080800000ull, /* BC */ 1405 0x8000808080800080ull, /* BD */ 1406 0x8000808080808000ull, /* BE */ 1407 0x8000808080808080ull, /* BF */ 1408 0x8080000000000000ull, /* C0 */ 1409 0x8080000000000080ull, /* C1 */ 1410 0x8080000000008000ull, /* C2 */ 1411 0x8080000000008080ull, /* C3 */ 1412 0x8080000000800000ull, /* C4 */ 1413 0x8080000000800080ull, /* C5 */ 1414 0x8080000000808000ull, /* C6 */ 1415 0x8080000000808080ull, /* C7 */ 1416 0x8080000080000000ull, /* C8 */ 1417 0x8080000080000080ull, /* C9 */ 1418 0x8080000080008000ull, /* CA */ 1419 0x8080000080008080ull, /* CB */ 1420 0x8080000080800000ull, /* CC */ 1421 0x8080000080800080ull, /* CD */ 1422 0x8080000080808000ull, /* CE */ 1423 0x8080000080808080ull, /* CF */ 1424 0x8080008000000000ull, /* D0 */ 1425 0x8080008000000080ull, /* D1 */ 1426 0x8080008000008000ull, /* D2 */ 1427 0x8080008000008080ull, /* D3 */ 1428 0x8080008000800000ull, /* D4 */ 1429 0x8080008000800080ull, /* D5 */ 1430 0x8080008000808000ull, /* D6 */ 1431 0x8080008000808080ull, /* D7 */ 1432 0x8080008080000000ull, /* D8 */ 1433 0x8080008080000080ull, /* D9 */ 1434 0x8080008080008000ull, /* DA */ 1435 0x8080008080008080ull, /* DB */ 1436 0x8080008080800000ull, /* DC */ 1437 0x8080008080800080ull, /* DD */ 1438 0x8080008080808000ull, /* DE */ 1439 0x8080008080808080ull, /* DF */ 1440 0x8080800000000000ull, /* E0 */ 1441 0x8080800000000080ull, /* E1 */ 1442 0x8080800000008000ull, /* E2 */ 1443 0x8080800000008080ull, /* E3 */ 1444 0x8080800000800000ull, /* E4 */ 1445 0x8080800000800080ull, /* E5 */ 1446 0x8080800000808000ull, /* E6 */ 1447 0x8080800000808080ull, /* E7 */ 1448 0x8080800080000000ull, /* E8 */ 1449 0x8080800080000080ull, /* E9 */ 1450 0x8080800080008000ull, /* EA */ 1451 0x8080800080008080ull, /* EB */ 1452 0x8080800080800000ull, /* EC */ 1453 0x8080800080800080ull, /* ED */ 1454 0x8080800080808000ull, /* EE */ 1455 0x8080800080808080ull, /* EF */ 1456 0x8080808000000000ull, /* F0 */ 1457 0x8080808000000080ull, /* F1 */ 1458 0x8080808000008000ull, /* F2 */ 1459 0x8080808000008080ull, /* F3 */ 1460 0x8080808000800000ull, /* F4 */ 1461 0x8080808000800080ull, /* F5 */ 1462 0x8080808000808000ull, /* F6 */ 1463 0x8080808000808080ull, /* F7 */ 1464 0x8080808080000000ull, /* F8 */ 1465 0x8080808080000080ull, /* F9 */ 1466 0x8080808080008000ull, /* FA */ 1467 0x8080808080008080ull, /* FB */ 1468 0x8080808080800000ull, /* FC */ 1469 0x8080808080800080ull, /* FD */ 1470 0x8080808080808000ull, /* FE */ 1471 0x8080808080808080ull, /* FF */ 1472 }; 1473 1474 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1475 { 1476 int i; 1477 uint64_t t[2] = { 0, 0 }; 1478 1479 VECTOR_FOR_INORDER_I(i, u8) { 1480 #if defined(HOST_WORDS_BIGENDIAN) 1481 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1482 #else 1483 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1484 #endif 1485 } 1486 1487 r->u64[0] = t[0]; 1488 r->u64[1] = t[1]; 1489 } 1490 1491 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1492 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1493 { \ 1494 int i, j; \ 1495 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1496 \ 1497 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1498 prod[i] = 0; \ 1499 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1500 if (a->srcfld[i] & (1ull<<j)) { \ 1501 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1502 } \ 1503 } \ 1504 } \ 1505 \ 1506 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1507 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1508 } \ 1509 } 1510 1511 PMSUM(vpmsumb, u8, u16, uint16_t) 1512 PMSUM(vpmsumh, u16, u32, uint32_t) 1513 PMSUM(vpmsumw, u32, u64, uint64_t) 1514 1515 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1516 { 1517 1518 #ifdef CONFIG_INT128 1519 int i, j; 1520 __uint128_t prod[2]; 1521 1522 VECTOR_FOR_INORDER_I(i, u64) { 1523 prod[i] = 0; 1524 for (j = 0; j < 64; j++) { 1525 if (a->u64[i] & (1ull<<j)) { 1526 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1527 } 1528 } 1529 } 1530 1531 r->u128 = prod[0] ^ prod[1]; 1532 1533 #else 1534 int i, j; 1535 ppc_avr_t prod[2]; 1536 1537 VECTOR_FOR_INORDER_I(i, u64) { 1538 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1539 for (j = 0; j < 64; j++) { 1540 if (a->u64[i] & (1ull<<j)) { 1541 ppc_avr_t bshift; 1542 if (j == 0) { 1543 bshift.VsrD(0) = 0; 1544 bshift.VsrD(1) = b->u64[i]; 1545 } else { 1546 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1547 bshift.VsrD(1) = b->u64[i] << j; 1548 } 1549 prod[i].VsrD(1) ^= bshift.VsrD(1); 1550 prod[i].VsrD(0) ^= bshift.VsrD(0); 1551 } 1552 } 1553 } 1554 1555 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1556 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1557 #endif 1558 } 1559 1560 1561 #if defined(HOST_WORDS_BIGENDIAN) 1562 #define PKBIG 1 1563 #else 1564 #define PKBIG 0 1565 #endif 1566 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1567 { 1568 int i, j; 1569 ppc_avr_t result; 1570 #if defined(HOST_WORDS_BIGENDIAN) 1571 const ppc_avr_t *x[2] = { a, b }; 1572 #else 1573 const ppc_avr_t *x[2] = { b, a }; 1574 #endif 1575 1576 VECTOR_FOR_INORDER_I(i, u64) { 1577 VECTOR_FOR_INORDER_I(j, u32) { 1578 uint32_t e = x[i]->u32[j]; 1579 1580 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1581 ((e >> 6) & 0x3e0) | 1582 ((e >> 3) & 0x1f)); 1583 } 1584 } 1585 *r = result; 1586 } 1587 1588 #define VPK(suffix, from, to, cvt, dosat) \ 1589 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1590 ppc_avr_t *a, ppc_avr_t *b) \ 1591 { \ 1592 int i; \ 1593 int sat = 0; \ 1594 ppc_avr_t result; \ 1595 ppc_avr_t *a0 = PKBIG ? a : b; \ 1596 ppc_avr_t *a1 = PKBIG ? b : a; \ 1597 \ 1598 VECTOR_FOR_INORDER_I(i, from) { \ 1599 result.to[i] = cvt(a0->from[i], &sat); \ 1600 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1601 } \ 1602 *r = result; \ 1603 if (dosat && sat) { \ 1604 env->vscr |= (1 << VSCR_SAT); \ 1605 } \ 1606 } 1607 #define I(x, y) (x) 1608 VPK(shss, s16, s8, cvtshsb, 1) 1609 VPK(shus, s16, u8, cvtshub, 1) 1610 VPK(swss, s32, s16, cvtswsh, 1) 1611 VPK(swus, s32, u16, cvtswuh, 1) 1612 VPK(sdss, s64, s32, cvtsdsw, 1) 1613 VPK(sdus, s64, u32, cvtsduw, 1) 1614 VPK(uhus, u16, u8, cvtuhub, 1) 1615 VPK(uwus, u32, u16, cvtuwuh, 1) 1616 VPK(udus, u64, u32, cvtuduw, 1) 1617 VPK(uhum, u16, u8, I, 0) 1618 VPK(uwum, u32, u16, I, 0) 1619 VPK(udum, u64, u32, I, 0) 1620 #undef I 1621 #undef VPK 1622 #undef PKBIG 1623 1624 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1625 { 1626 int i; 1627 1628 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1629 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1630 } 1631 } 1632 1633 #define VRFI(suffix, rounding) \ 1634 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1635 ppc_avr_t *b) \ 1636 { \ 1637 int i; \ 1638 float_status s = env->vec_status; \ 1639 \ 1640 set_float_rounding_mode(rounding, &s); \ 1641 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1642 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1643 } \ 1644 } 1645 VRFI(n, float_round_nearest_even) 1646 VRFI(m, float_round_down) 1647 VRFI(p, float_round_up) 1648 VRFI(z, float_round_to_zero) 1649 #undef VRFI 1650 1651 #define VROTATE(suffix, element, mask) \ 1652 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1653 { \ 1654 int i; \ 1655 \ 1656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1657 unsigned int shift = b->element[i] & mask; \ 1658 r->element[i] = (a->element[i] << shift) | \ 1659 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1660 } \ 1661 } 1662 VROTATE(b, u8, 0x7) 1663 VROTATE(h, u16, 0xF) 1664 VROTATE(w, u32, 0x1F) 1665 VROTATE(d, u64, 0x3F) 1666 #undef VROTATE 1667 1668 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1669 { 1670 int i; 1671 1672 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1673 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1674 1675 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1676 } 1677 } 1678 1679 #define VRLMI(name, size, element, insert) \ 1680 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1681 { \ 1682 int i; \ 1683 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1684 uint##size##_t src1 = a->element[i]; \ 1685 uint##size##_t src2 = b->element[i]; \ 1686 uint##size##_t src3 = r->element[i]; \ 1687 uint##size##_t begin, end, shift, mask, rot_val; \ 1688 \ 1689 shift = extract##size(src2, 0, 6); \ 1690 end = extract##size(src2, 8, 6); \ 1691 begin = extract##size(src2, 16, 6); \ 1692 rot_val = rol##size(src1, shift); \ 1693 mask = mask_u##size(begin, end); \ 1694 if (insert) { \ 1695 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1696 } else { \ 1697 r->element[i] = (rot_val & mask); \ 1698 } \ 1699 } \ 1700 } 1701 1702 VRLMI(vrldmi, 64, u64, 1); 1703 VRLMI(vrlwmi, 32, u32, 1); 1704 VRLMI(vrldnm, 64, u64, 0); 1705 VRLMI(vrlwnm, 32, u32, 0); 1706 1707 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1708 ppc_avr_t *c) 1709 { 1710 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1711 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1712 } 1713 1714 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1715 { 1716 int i; 1717 1718 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1719 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1720 } 1721 } 1722 1723 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1724 { 1725 int i; 1726 1727 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1728 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1729 } 1730 } 1731 1732 #if defined(HOST_WORDS_BIGENDIAN) 1733 #define VEXTU_X_DO(name, size, left) \ 1734 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1735 { \ 1736 int index; \ 1737 if (left) { \ 1738 index = (a & 0xf) * 8; \ 1739 } else { \ 1740 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1741 } \ 1742 return int128_getlo(int128_rshift(b->s128, index)) & \ 1743 MAKE_64BIT_MASK(0, size); \ 1744 } 1745 #else 1746 #define VEXTU_X_DO(name, size, left) \ 1747 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1748 { \ 1749 int index; \ 1750 if (left) { \ 1751 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1752 } else { \ 1753 index = (a & 0xf) * 8; \ 1754 } \ 1755 return int128_getlo(int128_rshift(b->s128, index)) & \ 1756 MAKE_64BIT_MASK(0, size); \ 1757 } 1758 #endif 1759 1760 VEXTU_X_DO(vextublx, 8, 1) 1761 VEXTU_X_DO(vextuhlx, 16, 1) 1762 VEXTU_X_DO(vextuwlx, 32, 1) 1763 VEXTU_X_DO(vextubrx, 8, 0) 1764 VEXTU_X_DO(vextuhrx, 16, 0) 1765 VEXTU_X_DO(vextuwrx, 32, 0) 1766 #undef VEXTU_X_DO 1767 1768 /* The specification says that the results are undefined if all of the 1769 * shift counts are not identical. We check to make sure that they are 1770 * to conform to what real hardware appears to do. */ 1771 #define VSHIFT(suffix, leftp) \ 1772 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1773 { \ 1774 int shift = b->VsrB(15) & 0x7; \ 1775 int doit = 1; \ 1776 int i; \ 1777 \ 1778 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1779 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1780 } \ 1781 if (doit) { \ 1782 if (shift == 0) { \ 1783 *r = *a; \ 1784 } else if (leftp) { \ 1785 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1786 \ 1787 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1788 r->VsrD(1) = a->VsrD(1) << shift; \ 1789 } else { \ 1790 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1791 \ 1792 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1793 r->VsrD(0) = a->VsrD(0) >> shift; \ 1794 } \ 1795 } \ 1796 } 1797 VSHIFT(l, 1) 1798 VSHIFT(r, 0) 1799 #undef VSHIFT 1800 1801 #define VSL(suffix, element, mask) \ 1802 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1803 { \ 1804 int i; \ 1805 \ 1806 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1807 unsigned int shift = b->element[i] & mask; \ 1808 \ 1809 r->element[i] = a->element[i] << shift; \ 1810 } \ 1811 } 1812 VSL(b, u8, 0x7) 1813 VSL(h, u16, 0x0F) 1814 VSL(w, u32, 0x1F) 1815 VSL(d, u64, 0x3F) 1816 #undef VSL 1817 1818 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1819 { 1820 int i; 1821 unsigned int shift, bytes, size; 1822 1823 size = ARRAY_SIZE(r->u8); 1824 for (i = 0; i < size; i++) { 1825 shift = b->u8[i] & 0x7; /* extract shift value */ 1826 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1827 (((i + 1) < size) ? a->u8[i + 1] : 0); 1828 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1829 } 1830 } 1831 1832 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1833 { 1834 int i; 1835 unsigned int shift, bytes; 1836 1837 /* Use reverse order, as destination and source register can be same. Its 1838 * being modified in place saving temporary, reverse order will guarantee 1839 * that computed result is not fed back. 1840 */ 1841 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1842 shift = b->u8[i] & 0x7; /* extract shift value */ 1843 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1844 /* extract adjacent bytes */ 1845 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1846 } 1847 } 1848 1849 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1850 { 1851 int sh = shift & 0xf; 1852 int i; 1853 ppc_avr_t result; 1854 1855 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1856 int index = sh + i; 1857 if (index > 0xf) { 1858 result.VsrB(i) = b->VsrB(index - 0x10); 1859 } else { 1860 result.VsrB(i) = a->VsrB(index); 1861 } 1862 } 1863 *r = result; 1864 } 1865 1866 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1867 { 1868 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1869 1870 #if defined(HOST_WORDS_BIGENDIAN) 1871 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1872 memset(&r->u8[16-sh], 0, sh); 1873 #else 1874 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1875 memset(&r->u8[0], 0, sh); 1876 #endif 1877 } 1878 1879 /* Experimental testing shows that hardware masks the immediate. */ 1880 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1881 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1882 #define VSPLT(suffix, element, access) \ 1883 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1884 { \ 1885 uint32_t s = b->access(SPLAT_ELEMENT(element)); \ 1886 int i; \ 1887 \ 1888 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1889 r->access(i) = s; \ 1890 } \ 1891 } 1892 VSPLT(b, u8, VsrB) 1893 VSPLT(h, u16, VsrH) 1894 VSPLT(w, u32, VsrW) 1895 #undef VSPLT 1896 #undef SPLAT_ELEMENT 1897 #undef _SPLAT_MASKED 1898 #if defined(HOST_WORDS_BIGENDIAN) 1899 #define VINSERT(suffix, element) \ 1900 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1901 { \ 1902 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1903 sizeof(r->element[0])); \ 1904 } 1905 #else 1906 #define VINSERT(suffix, element) \ 1907 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1908 { \ 1909 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1910 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1911 } 1912 #endif 1913 VINSERT(b, u8) 1914 VINSERT(h, u16) 1915 VINSERT(w, u32) 1916 VINSERT(d, u64) 1917 #undef VINSERT 1918 #if defined(HOST_WORDS_BIGENDIAN) 1919 #define VEXTRACT(suffix, element) \ 1920 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1921 { \ 1922 uint32_t es = sizeof(r->element[0]); \ 1923 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1924 memset(&r->u8[8], 0, 8); \ 1925 memset(&r->u8[0], 0, 8 - es); \ 1926 } 1927 #else 1928 #define VEXTRACT(suffix, element) \ 1929 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1930 { \ 1931 uint32_t es = sizeof(r->element[0]); \ 1932 uint32_t s = (16 - index) - es; \ 1933 memmove(&r->u8[8], &b->u8[s], es); \ 1934 memset(&r->u8[0], 0, 8); \ 1935 memset(&r->u8[8 + es], 0, 8 - es); \ 1936 } 1937 #endif 1938 VEXTRACT(ub, u8) 1939 VEXTRACT(uh, u16) 1940 VEXTRACT(uw, u32) 1941 VEXTRACT(d, u64) 1942 #undef VEXTRACT 1943 1944 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1945 target_ulong xbn, uint32_t index) 1946 { 1947 ppc_vsr_t xt, xb; 1948 size_t es = sizeof(uint32_t); 1949 uint32_t ext_index; 1950 int i; 1951 1952 getVSR(xbn, &xb, env); 1953 memset(&xt, 0, sizeof(xt)); 1954 1955 ext_index = index; 1956 for (i = 0; i < es; i++, ext_index++) { 1957 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16); 1958 } 1959 1960 putVSR(xtn, &xt, env); 1961 } 1962 1963 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 1964 target_ulong xbn, uint32_t index) 1965 { 1966 ppc_vsr_t xt, xb; 1967 size_t es = sizeof(uint32_t); 1968 int ins_index, i = 0; 1969 1970 getVSR(xbn, &xb, env); 1971 getVSR(xtn, &xt, env); 1972 1973 ins_index = index; 1974 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1975 xt.VsrB(ins_index) = xb.VsrB(8 - es + i); 1976 } 1977 1978 putVSR(xtn, &xt, env); 1979 } 1980 1981 #define VEXT_SIGNED(name, element, cast) \ 1982 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1983 { \ 1984 int i; \ 1985 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1986 r->element[i] = (cast)b->element[i]; \ 1987 } \ 1988 } 1989 VEXT_SIGNED(vextsb2w, s32, int8_t) 1990 VEXT_SIGNED(vextsb2d, s64, int8_t) 1991 VEXT_SIGNED(vextsh2w, s32, int16_t) 1992 VEXT_SIGNED(vextsh2d, s64, int16_t) 1993 VEXT_SIGNED(vextsw2d, s64, int32_t) 1994 #undef VEXT_SIGNED 1995 1996 #define VNEG(name, element) \ 1997 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1998 { \ 1999 int i; \ 2000 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2001 r->element[i] = -b->element[i]; \ 2002 } \ 2003 } 2004 VNEG(vnegw, s32) 2005 VNEG(vnegd, s64) 2006 #undef VNEG 2007 2008 #define VSPLTI(suffix, element, splat_type) \ 2009 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2010 { \ 2011 splat_type x = (int8_t)(splat << 3) >> 3; \ 2012 int i; \ 2013 \ 2014 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2015 r->element[i] = x; \ 2016 } \ 2017 } 2018 VSPLTI(b, s8, int8_t) 2019 VSPLTI(h, s16, int16_t) 2020 VSPLTI(w, s32, int32_t) 2021 #undef VSPLTI 2022 2023 #define VSR(suffix, element, mask) \ 2024 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2025 { \ 2026 int i; \ 2027 \ 2028 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2029 unsigned int shift = b->element[i] & mask; \ 2030 r->element[i] = a->element[i] >> shift; \ 2031 } \ 2032 } 2033 VSR(ab, s8, 0x7) 2034 VSR(ah, s16, 0xF) 2035 VSR(aw, s32, 0x1F) 2036 VSR(ad, s64, 0x3F) 2037 VSR(b, u8, 0x7) 2038 VSR(h, u16, 0xF) 2039 VSR(w, u32, 0x1F) 2040 VSR(d, u64, 0x3F) 2041 #undef VSR 2042 2043 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2044 { 2045 int sh = (b->VsrB(0xf) >> 3) & 0xf; 2046 2047 #if defined(HOST_WORDS_BIGENDIAN) 2048 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2049 memset(&r->u8[0], 0, sh); 2050 #else 2051 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2052 memset(&r->u8[16 - sh], 0, sh); 2053 #endif 2054 } 2055 2056 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2057 { 2058 int i; 2059 2060 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2061 r->u32[i] = a->u32[i] >= b->u32[i]; 2062 } 2063 } 2064 2065 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2066 { 2067 int64_t t; 2068 int i, upper; 2069 ppc_avr_t result; 2070 int sat = 0; 2071 2072 upper = ARRAY_SIZE(r->s32) - 1; 2073 t = (int64_t)b->VsrSW(upper); 2074 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2075 t += a->VsrSW(i); 2076 result.VsrSW(i) = 0; 2077 } 2078 result.VsrSW(upper) = cvtsdsw(t, &sat); 2079 *r = result; 2080 2081 if (sat) { 2082 env->vscr |= (1 << VSCR_SAT); 2083 } 2084 } 2085 2086 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2087 { 2088 int i, j, upper; 2089 ppc_avr_t result; 2090 int sat = 0; 2091 2092 upper = 1; 2093 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2094 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2095 2096 result.VsrW(i) = 0; 2097 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2098 t += a->VsrSW(2 * i + j); 2099 } 2100 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2101 } 2102 2103 *r = result; 2104 if (sat) { 2105 env->vscr |= (1 << VSCR_SAT); 2106 } 2107 } 2108 2109 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2110 { 2111 int i, j; 2112 int sat = 0; 2113 2114 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2115 int64_t t = (int64_t)b->s32[i]; 2116 2117 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2118 t += a->s8[4 * i + j]; 2119 } 2120 r->s32[i] = cvtsdsw(t, &sat); 2121 } 2122 2123 if (sat) { 2124 env->vscr |= (1 << VSCR_SAT); 2125 } 2126 } 2127 2128 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2129 { 2130 int sat = 0; 2131 int i; 2132 2133 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2134 int64_t t = (int64_t)b->s32[i]; 2135 2136 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2137 r->s32[i] = cvtsdsw(t, &sat); 2138 } 2139 2140 if (sat) { 2141 env->vscr |= (1 << VSCR_SAT); 2142 } 2143 } 2144 2145 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2146 { 2147 int i, j; 2148 int sat = 0; 2149 2150 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2151 uint64_t t = (uint64_t)b->u32[i]; 2152 2153 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2154 t += a->u8[4 * i + j]; 2155 } 2156 r->u32[i] = cvtuduw(t, &sat); 2157 } 2158 2159 if (sat) { 2160 env->vscr |= (1 << VSCR_SAT); 2161 } 2162 } 2163 2164 #if defined(HOST_WORDS_BIGENDIAN) 2165 #define UPKHI 1 2166 #define UPKLO 0 2167 #else 2168 #define UPKHI 0 2169 #define UPKLO 1 2170 #endif 2171 #define VUPKPX(suffix, hi) \ 2172 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2173 { \ 2174 int i; \ 2175 ppc_avr_t result; \ 2176 \ 2177 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2178 uint16_t e = b->u16[hi ? i : i+4]; \ 2179 uint8_t a = (e >> 15) ? 0xff : 0; \ 2180 uint8_t r = (e >> 10) & 0x1f; \ 2181 uint8_t g = (e >> 5) & 0x1f; \ 2182 uint8_t b = e & 0x1f; \ 2183 \ 2184 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2185 } \ 2186 *r = result; \ 2187 } 2188 VUPKPX(lpx, UPKLO) 2189 VUPKPX(hpx, UPKHI) 2190 #undef VUPKPX 2191 2192 #define VUPK(suffix, unpacked, packee, hi) \ 2193 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2194 { \ 2195 int i; \ 2196 ppc_avr_t result; \ 2197 \ 2198 if (hi) { \ 2199 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2200 result.unpacked[i] = b->packee[i]; \ 2201 } \ 2202 } else { \ 2203 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2204 i++) { \ 2205 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2206 } \ 2207 } \ 2208 *r = result; \ 2209 } 2210 VUPK(hsb, s16, s8, UPKHI) 2211 VUPK(hsh, s32, s16, UPKHI) 2212 VUPK(hsw, s64, s32, UPKHI) 2213 VUPK(lsb, s16, s8, UPKLO) 2214 VUPK(lsh, s32, s16, UPKLO) 2215 VUPK(lsw, s64, s32, UPKLO) 2216 #undef VUPK 2217 #undef UPKHI 2218 #undef UPKLO 2219 2220 #define VGENERIC_DO(name, element) \ 2221 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2222 { \ 2223 int i; \ 2224 \ 2225 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2226 r->element[i] = name(b->element[i]); \ 2227 } \ 2228 } 2229 2230 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2231 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2232 #define clzw(v) clz32((v)) 2233 #define clzd(v) clz64((v)) 2234 2235 VGENERIC_DO(clzb, u8) 2236 VGENERIC_DO(clzh, u16) 2237 VGENERIC_DO(clzw, u32) 2238 VGENERIC_DO(clzd, u64) 2239 2240 #undef clzb 2241 #undef clzh 2242 #undef clzw 2243 #undef clzd 2244 2245 #define ctzb(v) ((v) ? ctz32(v) : 8) 2246 #define ctzh(v) ((v) ? ctz32(v) : 16) 2247 #define ctzw(v) ctz32((v)) 2248 #define ctzd(v) ctz64((v)) 2249 2250 VGENERIC_DO(ctzb, u8) 2251 VGENERIC_DO(ctzh, u16) 2252 VGENERIC_DO(ctzw, u32) 2253 VGENERIC_DO(ctzd, u64) 2254 2255 #undef ctzb 2256 #undef ctzh 2257 #undef ctzw 2258 #undef ctzd 2259 2260 #define popcntb(v) ctpop8(v) 2261 #define popcnth(v) ctpop16(v) 2262 #define popcntw(v) ctpop32(v) 2263 #define popcntd(v) ctpop64(v) 2264 2265 VGENERIC_DO(popcntb, u8) 2266 VGENERIC_DO(popcnth, u16) 2267 VGENERIC_DO(popcntw, u32) 2268 VGENERIC_DO(popcntd, u64) 2269 2270 #undef popcntb 2271 #undef popcnth 2272 #undef popcntw 2273 #undef popcntd 2274 2275 #undef VGENERIC_DO 2276 2277 #if defined(HOST_WORDS_BIGENDIAN) 2278 #define QW_ONE { .u64 = { 0, 1 } } 2279 #else 2280 #define QW_ONE { .u64 = { 1, 0 } } 2281 #endif 2282 2283 #ifndef CONFIG_INT128 2284 2285 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2286 { 2287 t->u64[0] = ~a.u64[0]; 2288 t->u64[1] = ~a.u64[1]; 2289 } 2290 2291 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2292 { 2293 if (a.VsrD(0) < b.VsrD(0)) { 2294 return -1; 2295 } else if (a.VsrD(0) > b.VsrD(0)) { 2296 return 1; 2297 } else if (a.VsrD(1) < b.VsrD(1)) { 2298 return -1; 2299 } else if (a.VsrD(1) > b.VsrD(1)) { 2300 return 1; 2301 } else { 2302 return 0; 2303 } 2304 } 2305 2306 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2307 { 2308 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2309 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2310 (~a.VsrD(1) < b.VsrD(1)); 2311 } 2312 2313 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2314 { 2315 ppc_avr_t not_a; 2316 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2317 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2318 (~a.VsrD(1) < b.VsrD(1)); 2319 avr_qw_not(¬_a, a); 2320 return avr_qw_cmpu(not_a, b) < 0; 2321 } 2322 2323 #endif 2324 2325 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2326 { 2327 #ifdef CONFIG_INT128 2328 r->u128 = a->u128 + b->u128; 2329 #else 2330 avr_qw_add(r, *a, *b); 2331 #endif 2332 } 2333 2334 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2335 { 2336 #ifdef CONFIG_INT128 2337 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2338 #else 2339 2340 if (c->VsrD(1) & 1) { 2341 ppc_avr_t tmp; 2342 2343 tmp.VsrD(0) = 0; 2344 tmp.VsrD(1) = c->VsrD(1) & 1; 2345 avr_qw_add(&tmp, *a, tmp); 2346 avr_qw_add(r, tmp, *b); 2347 } else { 2348 avr_qw_add(r, *a, *b); 2349 } 2350 #endif 2351 } 2352 2353 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2354 { 2355 #ifdef CONFIG_INT128 2356 r->u128 = (~a->u128 < b->u128); 2357 #else 2358 ppc_avr_t not_a; 2359 2360 avr_qw_not(¬_a, *a); 2361 2362 r->VsrD(0) = 0; 2363 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2364 #endif 2365 } 2366 2367 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2368 { 2369 #ifdef CONFIG_INT128 2370 int carry_out = (~a->u128 < b->u128); 2371 if (!carry_out && (c->u128 & 1)) { 2372 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2373 ((a->u128 != 0) || (b->u128 != 0)); 2374 } 2375 r->u128 = carry_out; 2376 #else 2377 2378 int carry_in = c->VsrD(1) & 1; 2379 int carry_out = 0; 2380 ppc_avr_t tmp; 2381 2382 carry_out = avr_qw_addc(&tmp, *a, *b); 2383 2384 if (!carry_out && carry_in) { 2385 ppc_avr_t one = QW_ONE; 2386 carry_out = avr_qw_addc(&tmp, tmp, one); 2387 } 2388 r->VsrD(0) = 0; 2389 r->VsrD(1) = carry_out; 2390 #endif 2391 } 2392 2393 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2394 { 2395 #ifdef CONFIG_INT128 2396 r->u128 = a->u128 - b->u128; 2397 #else 2398 ppc_avr_t tmp; 2399 ppc_avr_t one = QW_ONE; 2400 2401 avr_qw_not(&tmp, *b); 2402 avr_qw_add(&tmp, *a, tmp); 2403 avr_qw_add(r, tmp, one); 2404 #endif 2405 } 2406 2407 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2408 { 2409 #ifdef CONFIG_INT128 2410 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2411 #else 2412 ppc_avr_t tmp, sum; 2413 2414 avr_qw_not(&tmp, *b); 2415 avr_qw_add(&sum, *a, tmp); 2416 2417 tmp.VsrD(0) = 0; 2418 tmp.VsrD(1) = c->VsrD(1) & 1; 2419 avr_qw_add(r, sum, tmp); 2420 #endif 2421 } 2422 2423 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2424 { 2425 #ifdef CONFIG_INT128 2426 r->u128 = (~a->u128 < ~b->u128) || 2427 (a->u128 + ~b->u128 == (__uint128_t)-1); 2428 #else 2429 int carry = (avr_qw_cmpu(*a, *b) > 0); 2430 if (!carry) { 2431 ppc_avr_t tmp; 2432 avr_qw_not(&tmp, *b); 2433 avr_qw_add(&tmp, *a, tmp); 2434 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2435 } 2436 r->VsrD(0) = 0; 2437 r->VsrD(1) = carry; 2438 #endif 2439 } 2440 2441 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2442 { 2443 #ifdef CONFIG_INT128 2444 r->u128 = 2445 (~a->u128 < ~b->u128) || 2446 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2447 #else 2448 int carry_in = c->VsrD(1) & 1; 2449 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2450 if (!carry_out && carry_in) { 2451 ppc_avr_t tmp; 2452 avr_qw_not(&tmp, *b); 2453 avr_qw_add(&tmp, *a, tmp); 2454 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2455 } 2456 2457 r->VsrD(0) = 0; 2458 r->VsrD(1) = carry_out; 2459 #endif 2460 } 2461 2462 #define BCD_PLUS_PREF_1 0xC 2463 #define BCD_PLUS_PREF_2 0xF 2464 #define BCD_PLUS_ALT_1 0xA 2465 #define BCD_NEG_PREF 0xD 2466 #define BCD_NEG_ALT 0xB 2467 #define BCD_PLUS_ALT_2 0xE 2468 #define NATIONAL_PLUS 0x2B 2469 #define NATIONAL_NEG 0x2D 2470 2471 #if defined(HOST_WORDS_BIGENDIAN) 2472 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2473 #else 2474 #define BCD_DIG_BYTE(n) ((n) / 2) 2475 #endif 2476 2477 static int bcd_get_sgn(ppc_avr_t *bcd) 2478 { 2479 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2480 case BCD_PLUS_PREF_1: 2481 case BCD_PLUS_PREF_2: 2482 case BCD_PLUS_ALT_1: 2483 case BCD_PLUS_ALT_2: 2484 { 2485 return 1; 2486 } 2487 2488 case BCD_NEG_PREF: 2489 case BCD_NEG_ALT: 2490 { 2491 return -1; 2492 } 2493 2494 default: 2495 { 2496 return 0; 2497 } 2498 } 2499 } 2500 2501 static int bcd_preferred_sgn(int sgn, int ps) 2502 { 2503 if (sgn >= 0) { 2504 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2505 } else { 2506 return BCD_NEG_PREF; 2507 } 2508 } 2509 2510 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2511 { 2512 uint8_t result; 2513 if (n & 1) { 2514 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2515 } else { 2516 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2517 } 2518 2519 if (unlikely(result > 9)) { 2520 *invalid = true; 2521 } 2522 return result; 2523 } 2524 2525 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2526 { 2527 if (n & 1) { 2528 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2529 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2530 } else { 2531 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2532 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2533 } 2534 } 2535 2536 static bool bcd_is_valid(ppc_avr_t *bcd) 2537 { 2538 int i; 2539 int invalid = 0; 2540 2541 if (bcd_get_sgn(bcd) == 0) { 2542 return false; 2543 } 2544 2545 for (i = 1; i < 32; i++) { 2546 bcd_get_digit(bcd, i, &invalid); 2547 if (unlikely(invalid)) { 2548 return false; 2549 } 2550 } 2551 return true; 2552 } 2553 2554 static int bcd_cmp_zero(ppc_avr_t *bcd) 2555 { 2556 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2557 return CRF_EQ; 2558 } else { 2559 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2560 } 2561 } 2562 2563 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2564 { 2565 return reg->VsrH(7 - n); 2566 } 2567 2568 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2569 { 2570 reg->VsrH(7 - n) = val; 2571 } 2572 2573 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2574 { 2575 int i; 2576 int invalid = 0; 2577 for (i = 31; i > 0; i--) { 2578 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2579 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2580 if (unlikely(invalid)) { 2581 return 0; /* doesn't matter */ 2582 } else if (dig_a > dig_b) { 2583 return 1; 2584 } else if (dig_a < dig_b) { 2585 return -1; 2586 } 2587 } 2588 2589 return 0; 2590 } 2591 2592 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2593 int *overflow) 2594 { 2595 int carry = 0; 2596 int i; 2597 for (i = 1; i <= 31; i++) { 2598 uint8_t digit = bcd_get_digit(a, i, invalid) + 2599 bcd_get_digit(b, i, invalid) + carry; 2600 if (digit > 9) { 2601 carry = 1; 2602 digit -= 10; 2603 } else { 2604 carry = 0; 2605 } 2606 2607 bcd_put_digit(t, digit, i); 2608 } 2609 2610 *overflow = carry; 2611 } 2612 2613 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2614 int *overflow) 2615 { 2616 int carry = 0; 2617 int i; 2618 2619 for (i = 1; i <= 31; i++) { 2620 uint8_t digit = bcd_get_digit(a, i, invalid) - 2621 bcd_get_digit(b, i, invalid) + carry; 2622 if (digit & 0x80) { 2623 carry = -1; 2624 digit += 10; 2625 } else { 2626 carry = 0; 2627 } 2628 2629 bcd_put_digit(t, digit, i); 2630 } 2631 2632 *overflow = carry; 2633 } 2634 2635 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2636 { 2637 2638 int sgna = bcd_get_sgn(a); 2639 int sgnb = bcd_get_sgn(b); 2640 int invalid = (sgna == 0) || (sgnb == 0); 2641 int overflow = 0; 2642 uint32_t cr = 0; 2643 ppc_avr_t result = { .u64 = { 0, 0 } }; 2644 2645 if (!invalid) { 2646 if (sgna == sgnb) { 2647 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2648 bcd_add_mag(&result, a, b, &invalid, &overflow); 2649 cr = bcd_cmp_zero(&result); 2650 } else { 2651 int magnitude = bcd_cmp_mag(a, b); 2652 if (magnitude > 0) { 2653 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2654 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2655 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2656 } else if (magnitude < 0) { 2657 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2658 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2659 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2660 } else { 2661 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2662 cr = CRF_EQ; 2663 } 2664 } 2665 } 2666 2667 if (unlikely(invalid)) { 2668 result.VsrD(0) = result.VsrD(1) = -1; 2669 cr = CRF_SO; 2670 } else if (overflow) { 2671 cr |= CRF_SO; 2672 } 2673 2674 *r = result; 2675 2676 return cr; 2677 } 2678 2679 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2680 { 2681 ppc_avr_t bcopy = *b; 2682 int sgnb = bcd_get_sgn(b); 2683 if (sgnb < 0) { 2684 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2685 } else if (sgnb > 0) { 2686 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2687 } 2688 /* else invalid ... defer to bcdadd code for proper handling */ 2689 2690 return helper_bcdadd(r, a, &bcopy, ps); 2691 } 2692 2693 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2694 { 2695 int i; 2696 int cr = 0; 2697 uint16_t national = 0; 2698 uint16_t sgnb = get_national_digit(b, 0); 2699 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2700 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2701 2702 for (i = 1; i < 8; i++) { 2703 national = get_national_digit(b, i); 2704 if (unlikely(national < 0x30 || national > 0x39)) { 2705 invalid = 1; 2706 break; 2707 } 2708 2709 bcd_put_digit(&ret, national & 0xf, i); 2710 } 2711 2712 if (sgnb == NATIONAL_PLUS) { 2713 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2714 } else { 2715 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2716 } 2717 2718 cr = bcd_cmp_zero(&ret); 2719 2720 if (unlikely(invalid)) { 2721 cr = CRF_SO; 2722 } 2723 2724 *r = ret; 2725 2726 return cr; 2727 } 2728 2729 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2730 { 2731 int i; 2732 int cr = 0; 2733 int sgnb = bcd_get_sgn(b); 2734 int invalid = (sgnb == 0); 2735 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2736 2737 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2738 2739 for (i = 1; i < 8; i++) { 2740 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2741 2742 if (unlikely(invalid)) { 2743 break; 2744 } 2745 } 2746 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2747 2748 cr = bcd_cmp_zero(b); 2749 2750 if (ox_flag) { 2751 cr |= CRF_SO; 2752 } 2753 2754 if (unlikely(invalid)) { 2755 cr = CRF_SO; 2756 } 2757 2758 *r = ret; 2759 2760 return cr; 2761 } 2762 2763 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2764 { 2765 int i; 2766 int cr = 0; 2767 int invalid = 0; 2768 int zone_digit = 0; 2769 int zone_lead = ps ? 0xF : 0x3; 2770 int digit = 0; 2771 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2772 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2773 2774 if (unlikely((sgnb < 0xA) && ps)) { 2775 invalid = 1; 2776 } 2777 2778 for (i = 0; i < 16; i++) { 2779 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2780 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2781 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2782 invalid = 1; 2783 break; 2784 } 2785 2786 bcd_put_digit(&ret, digit, i + 1); 2787 } 2788 2789 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2790 (!ps && (sgnb & 0x4))) { 2791 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2792 } else { 2793 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2794 } 2795 2796 cr = bcd_cmp_zero(&ret); 2797 2798 if (unlikely(invalid)) { 2799 cr = CRF_SO; 2800 } 2801 2802 *r = ret; 2803 2804 return cr; 2805 } 2806 2807 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2808 { 2809 int i; 2810 int cr = 0; 2811 uint8_t digit = 0; 2812 int sgnb = bcd_get_sgn(b); 2813 int zone_lead = (ps) ? 0xF0 : 0x30; 2814 int invalid = (sgnb == 0); 2815 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2816 2817 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2818 2819 for (i = 0; i < 16; i++) { 2820 digit = bcd_get_digit(b, i + 1, &invalid); 2821 2822 if (unlikely(invalid)) { 2823 break; 2824 } 2825 2826 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2827 } 2828 2829 if (ps) { 2830 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2831 } else { 2832 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2833 } 2834 2835 cr = bcd_cmp_zero(b); 2836 2837 if (ox_flag) { 2838 cr |= CRF_SO; 2839 } 2840 2841 if (unlikely(invalid)) { 2842 cr = CRF_SO; 2843 } 2844 2845 *r = ret; 2846 2847 return cr; 2848 } 2849 2850 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2851 { 2852 int i; 2853 int cr = 0; 2854 uint64_t lo_value; 2855 uint64_t hi_value; 2856 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2857 2858 if (b->VsrSD(0) < 0) { 2859 lo_value = -b->VsrSD(1); 2860 hi_value = ~b->VsrD(0) + !lo_value; 2861 bcd_put_digit(&ret, 0xD, 0); 2862 } else { 2863 lo_value = b->VsrD(1); 2864 hi_value = b->VsrD(0); 2865 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2866 } 2867 2868 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2869 lo_value > 9999999999999999ULL) { 2870 cr = CRF_SO; 2871 } 2872 2873 for (i = 1; i < 16; hi_value /= 10, i++) { 2874 bcd_put_digit(&ret, hi_value % 10, i); 2875 } 2876 2877 for (; i < 32; lo_value /= 10, i++) { 2878 bcd_put_digit(&ret, lo_value % 10, i); 2879 } 2880 2881 cr |= bcd_cmp_zero(&ret); 2882 2883 *r = ret; 2884 2885 return cr; 2886 } 2887 2888 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2889 { 2890 uint8_t i; 2891 int cr; 2892 uint64_t carry; 2893 uint64_t unused; 2894 uint64_t lo_value; 2895 uint64_t hi_value = 0; 2896 int sgnb = bcd_get_sgn(b); 2897 int invalid = (sgnb == 0); 2898 2899 lo_value = bcd_get_digit(b, 31, &invalid); 2900 for (i = 30; i > 0; i--) { 2901 mulu64(&lo_value, &carry, lo_value, 10ULL); 2902 mulu64(&hi_value, &unused, hi_value, 10ULL); 2903 lo_value += bcd_get_digit(b, i, &invalid); 2904 hi_value += carry; 2905 2906 if (unlikely(invalid)) { 2907 break; 2908 } 2909 } 2910 2911 if (sgnb == -1) { 2912 r->VsrSD(1) = -lo_value; 2913 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2914 } else { 2915 r->VsrSD(1) = lo_value; 2916 r->VsrSD(0) = hi_value; 2917 } 2918 2919 cr = bcd_cmp_zero(b); 2920 2921 if (unlikely(invalid)) { 2922 cr = CRF_SO; 2923 } 2924 2925 return cr; 2926 } 2927 2928 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2929 { 2930 int i; 2931 int invalid = 0; 2932 2933 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2934 return CRF_SO; 2935 } 2936 2937 *r = *a; 2938 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2939 2940 for (i = 1; i < 32; i++) { 2941 bcd_get_digit(a, i, &invalid); 2942 bcd_get_digit(b, i, &invalid); 2943 if (unlikely(invalid)) { 2944 return CRF_SO; 2945 } 2946 } 2947 2948 return bcd_cmp_zero(r); 2949 } 2950 2951 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2952 { 2953 int sgnb = bcd_get_sgn(b); 2954 2955 *r = *b; 2956 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2957 2958 if (bcd_is_valid(b) == false) { 2959 return CRF_SO; 2960 } 2961 2962 return bcd_cmp_zero(r); 2963 } 2964 2965 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2966 { 2967 int cr; 2968 #if defined(HOST_WORDS_BIGENDIAN) 2969 int i = a->s8[7]; 2970 #else 2971 int i = a->s8[8]; 2972 #endif 2973 bool ox_flag = false; 2974 int sgnb = bcd_get_sgn(b); 2975 ppc_avr_t ret = *b; 2976 ret.VsrD(1) &= ~0xf; 2977 2978 if (bcd_is_valid(b) == false) { 2979 return CRF_SO; 2980 } 2981 2982 if (unlikely(i > 31)) { 2983 i = 31; 2984 } else if (unlikely(i < -31)) { 2985 i = -31; 2986 } 2987 2988 if (i > 0) { 2989 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2990 } else { 2991 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2992 } 2993 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2994 2995 *r = ret; 2996 2997 cr = bcd_cmp_zero(r); 2998 if (ox_flag) { 2999 cr |= CRF_SO; 3000 } 3001 3002 return cr; 3003 } 3004 3005 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3006 { 3007 int cr; 3008 int i; 3009 int invalid = 0; 3010 bool ox_flag = false; 3011 ppc_avr_t ret = *b; 3012 3013 for (i = 0; i < 32; i++) { 3014 bcd_get_digit(b, i, &invalid); 3015 3016 if (unlikely(invalid)) { 3017 return CRF_SO; 3018 } 3019 } 3020 3021 #if defined(HOST_WORDS_BIGENDIAN) 3022 i = a->s8[7]; 3023 #else 3024 i = a->s8[8]; 3025 #endif 3026 if (i >= 32) { 3027 ox_flag = true; 3028 ret.VsrD(1) = ret.VsrD(0) = 0; 3029 } else if (i <= -32) { 3030 ret.VsrD(1) = ret.VsrD(0) = 0; 3031 } else if (i > 0) { 3032 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3033 } else { 3034 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3035 } 3036 *r = ret; 3037 3038 cr = bcd_cmp_zero(r); 3039 if (ox_flag) { 3040 cr |= CRF_SO; 3041 } 3042 3043 return cr; 3044 } 3045 3046 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3047 { 3048 int cr; 3049 int unused = 0; 3050 int invalid = 0; 3051 bool ox_flag = false; 3052 int sgnb = bcd_get_sgn(b); 3053 ppc_avr_t ret = *b; 3054 ret.VsrD(1) &= ~0xf; 3055 3056 #if defined(HOST_WORDS_BIGENDIAN) 3057 int i = a->s8[7]; 3058 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3059 #else 3060 int i = a->s8[8]; 3061 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3062 #endif 3063 3064 if (bcd_is_valid(b) == false) { 3065 return CRF_SO; 3066 } 3067 3068 if (unlikely(i > 31)) { 3069 i = 31; 3070 } else if (unlikely(i < -31)) { 3071 i = -31; 3072 } 3073 3074 if (i > 0) { 3075 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3076 } else { 3077 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3078 3079 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3080 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3081 } 3082 } 3083 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3084 3085 cr = bcd_cmp_zero(&ret); 3086 if (ox_flag) { 3087 cr |= CRF_SO; 3088 } 3089 *r = ret; 3090 3091 return cr; 3092 } 3093 3094 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3095 { 3096 uint64_t mask; 3097 uint32_t ox_flag = 0; 3098 #if defined(HOST_WORDS_BIGENDIAN) 3099 int i = a->s16[3] + 1; 3100 #else 3101 int i = a->s16[4] + 1; 3102 #endif 3103 ppc_avr_t ret = *b; 3104 3105 if (bcd_is_valid(b) == false) { 3106 return CRF_SO; 3107 } 3108 3109 if (i > 16 && i < 32) { 3110 mask = (uint64_t)-1 >> (128 - i * 4); 3111 if (ret.VsrD(0) & ~mask) { 3112 ox_flag = CRF_SO; 3113 } 3114 3115 ret.VsrD(0) &= mask; 3116 } else if (i >= 0 && i <= 16) { 3117 mask = (uint64_t)-1 >> (64 - i * 4); 3118 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3119 ox_flag = CRF_SO; 3120 } 3121 3122 ret.VsrD(1) &= mask; 3123 ret.VsrD(0) = 0; 3124 } 3125 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3126 *r = ret; 3127 3128 return bcd_cmp_zero(&ret) | ox_flag; 3129 } 3130 3131 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3132 { 3133 int i; 3134 uint64_t mask; 3135 uint32_t ox_flag = 0; 3136 int invalid = 0; 3137 ppc_avr_t ret = *b; 3138 3139 for (i = 0; i < 32; i++) { 3140 bcd_get_digit(b, i, &invalid); 3141 3142 if (unlikely(invalid)) { 3143 return CRF_SO; 3144 } 3145 } 3146 3147 #if defined(HOST_WORDS_BIGENDIAN) 3148 i = a->s16[3]; 3149 #else 3150 i = a->s16[4]; 3151 #endif 3152 if (i > 16 && i < 33) { 3153 mask = (uint64_t)-1 >> (128 - i * 4); 3154 if (ret.VsrD(0) & ~mask) { 3155 ox_flag = CRF_SO; 3156 } 3157 3158 ret.VsrD(0) &= mask; 3159 } else if (i > 0 && i <= 16) { 3160 mask = (uint64_t)-1 >> (64 - i * 4); 3161 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3162 ox_flag = CRF_SO; 3163 } 3164 3165 ret.VsrD(1) &= mask; 3166 ret.VsrD(0) = 0; 3167 } else if (i == 0) { 3168 if (ret.VsrD(0) || ret.VsrD(1)) { 3169 ox_flag = CRF_SO; 3170 } 3171 ret.VsrD(0) = ret.VsrD(1) = 0; 3172 } 3173 3174 *r = ret; 3175 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3176 return ox_flag | CRF_EQ; 3177 } 3178 3179 return ox_flag | CRF_GT; 3180 } 3181 3182 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3183 { 3184 int i; 3185 VECTOR_FOR_INORDER_I(i, u8) { 3186 r->u8[i] = AES_sbox[a->u8[i]]; 3187 } 3188 } 3189 3190 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3191 { 3192 ppc_avr_t result; 3193 int i; 3194 3195 VECTOR_FOR_INORDER_I(i, u32) { 3196 result.VsrW(i) = b->VsrW(i) ^ 3197 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3198 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3199 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3200 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3201 } 3202 *r = result; 3203 } 3204 3205 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3206 { 3207 ppc_avr_t result; 3208 int i; 3209 3210 VECTOR_FOR_INORDER_I(i, u8) { 3211 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3212 } 3213 *r = result; 3214 } 3215 3216 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3217 { 3218 /* This differs from what is written in ISA V2.07. The RTL is */ 3219 /* incorrect and will be fixed in V2.07B. */ 3220 int i; 3221 ppc_avr_t tmp; 3222 3223 VECTOR_FOR_INORDER_I(i, u8) { 3224 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3225 } 3226 3227 VECTOR_FOR_INORDER_I(i, u32) { 3228 r->VsrW(i) = 3229 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3230 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3231 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3232 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3233 } 3234 } 3235 3236 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3237 { 3238 ppc_avr_t result; 3239 int i; 3240 3241 VECTOR_FOR_INORDER_I(i, u8) { 3242 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3243 } 3244 *r = result; 3245 } 3246 3247 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3248 { 3249 int st = (st_six & 0x10) != 0; 3250 int six = st_six & 0xF; 3251 int i; 3252 3253 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3254 if (st == 0) { 3255 if ((six & (0x8 >> i)) == 0) { 3256 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3257 ror32(a->VsrW(i), 18) ^ 3258 (a->VsrW(i) >> 3); 3259 } else { /* six.bit[i] == 1 */ 3260 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3261 ror32(a->VsrW(i), 19) ^ 3262 (a->VsrW(i) >> 10); 3263 } 3264 } else { /* st == 1 */ 3265 if ((six & (0x8 >> i)) == 0) { 3266 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3267 ror32(a->VsrW(i), 13) ^ 3268 ror32(a->VsrW(i), 22); 3269 } else { /* six.bit[i] == 1 */ 3270 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3271 ror32(a->VsrW(i), 11) ^ 3272 ror32(a->VsrW(i), 25); 3273 } 3274 } 3275 } 3276 } 3277 3278 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3279 { 3280 int st = (st_six & 0x10) != 0; 3281 int six = st_six & 0xF; 3282 int i; 3283 3284 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3285 if (st == 0) { 3286 if ((six & (0x8 >> (2*i))) == 0) { 3287 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3288 ror64(a->VsrD(i), 8) ^ 3289 (a->VsrD(i) >> 7); 3290 } else { /* six.bit[2*i] == 1 */ 3291 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3292 ror64(a->VsrD(i), 61) ^ 3293 (a->VsrD(i) >> 6); 3294 } 3295 } else { /* st == 1 */ 3296 if ((six & (0x8 >> (2*i))) == 0) { 3297 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3298 ror64(a->VsrD(i), 34) ^ 3299 ror64(a->VsrD(i), 39); 3300 } else { /* six.bit[2*i] == 1 */ 3301 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3302 ror64(a->VsrD(i), 18) ^ 3303 ror64(a->VsrD(i), 41); 3304 } 3305 } 3306 } 3307 } 3308 3309 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3310 { 3311 ppc_avr_t result; 3312 int i; 3313 3314 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3315 int indexA = c->VsrB(i) >> 4; 3316 int indexB = c->VsrB(i) & 0xF; 3317 3318 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3319 } 3320 *r = result; 3321 } 3322 3323 #undef VECTOR_FOR_INORDER_I 3324 3325 /*****************************************************************************/ 3326 /* SPE extension helpers */ 3327 /* Use a table to make this quicker */ 3328 static const uint8_t hbrev[16] = { 3329 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3330 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3331 }; 3332 3333 static inline uint8_t byte_reverse(uint8_t val) 3334 { 3335 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3336 } 3337 3338 static inline uint32_t word_reverse(uint32_t val) 3339 { 3340 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3341 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3342 } 3343 3344 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3345 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3346 { 3347 uint32_t a, b, d, mask; 3348 3349 mask = UINT32_MAX >> (32 - MASKBITS); 3350 a = arg1 & mask; 3351 b = arg2 & mask; 3352 d = word_reverse(1 + word_reverse(a | ~b)); 3353 return (arg1 & ~mask) | (d & b); 3354 } 3355 3356 uint32_t helper_cntlsw32(uint32_t val) 3357 { 3358 if (val & 0x80000000) { 3359 return clz32(~val); 3360 } else { 3361 return clz32(val); 3362 } 3363 } 3364 3365 uint32_t helper_cntlzw32(uint32_t val) 3366 { 3367 return clz32(val); 3368 } 3369 3370 /* 440 specific */ 3371 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3372 target_ulong low, uint32_t update_Rc) 3373 { 3374 target_ulong mask; 3375 int i; 3376 3377 i = 1; 3378 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3379 if ((high & mask) == 0) { 3380 if (update_Rc) { 3381 env->crf[0] = 0x4; 3382 } 3383 goto done; 3384 } 3385 i++; 3386 } 3387 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3388 if ((low & mask) == 0) { 3389 if (update_Rc) { 3390 env->crf[0] = 0x8; 3391 } 3392 goto done; 3393 } 3394 i++; 3395 } 3396 i = 8; 3397 if (update_Rc) { 3398 env->crf[0] = 0x2; 3399 } 3400 done: 3401 env->xer = (env->xer & ~0x7F) | i; 3402 if (update_Rc) { 3403 env->crf[0] |= xer_so; 3404 } 3405 return i; 3406 } 3407