1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "exec/exec-all.h" 23 #include "qemu/host-utils.h" 24 #include "exec/helper-proto.h" 25 #include "crypto/aes.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 32 uint32_t oe) 33 { 34 uint64_t rt = 0; 35 int overflow = 0; 36 37 uint64_t dividend = (uint64_t)ra << 32; 38 uint64_t divisor = (uint32_t)rb; 39 40 if (unlikely(divisor == 0)) { 41 overflow = 1; 42 } else { 43 rt = dividend / divisor; 44 overflow = rt > UINT32_MAX; 45 } 46 47 if (unlikely(overflow)) { 48 rt = 0; /* Undefined */ 49 } 50 51 if (oe) { 52 if (unlikely(overflow)) { 53 env->so = env->ov = 1; 54 } else { 55 env->ov = 0; 56 } 57 } 58 59 return (target_ulong)rt; 60 } 61 62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 63 uint32_t oe) 64 { 65 int64_t rt = 0; 66 int overflow = 0; 67 68 int64_t dividend = (int64_t)ra << 32; 69 int64_t divisor = (int64_t)((int32_t)rb); 70 71 if (unlikely((divisor == 0) || 72 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 73 overflow = 1; 74 } else { 75 rt = dividend / divisor; 76 overflow = rt != (int32_t)rt; 77 } 78 79 if (unlikely(overflow)) { 80 rt = 0; /* Undefined */ 81 } 82 83 if (oe) { 84 if (unlikely(overflow)) { 85 env->so = env->ov = 1; 86 } else { 87 env->ov = 0; 88 } 89 } 90 91 return (target_ulong)rt; 92 } 93 94 #if defined(TARGET_PPC64) 95 96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 97 { 98 uint64_t rt = 0; 99 int overflow = 0; 100 101 overflow = divu128(&rt, &ra, rb); 102 103 if (unlikely(overflow)) { 104 rt = 0; /* Undefined */ 105 } 106 107 if (oe) { 108 if (unlikely(overflow)) { 109 env->so = env->ov = 1; 110 } else { 111 env->ov = 0; 112 } 113 } 114 115 return rt; 116 } 117 118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 119 { 120 int64_t rt = 0; 121 int64_t ra = (int64_t)rau; 122 int64_t rb = (int64_t)rbu; 123 int overflow = divs128(&rt, &ra, rb); 124 125 if (unlikely(overflow)) { 126 rt = 0; /* Undefined */ 127 } 128 129 if (oe) { 130 131 if (unlikely(overflow)) { 132 env->so = env->ov = 1; 133 } else { 134 env->ov = 0; 135 } 136 } 137 138 return rt; 139 } 140 141 #endif 142 143 144 target_ulong helper_cntlzw(target_ulong t) 145 { 146 return clz32(t); 147 } 148 149 target_ulong helper_cnttzw(target_ulong t) 150 { 151 return ctz32(t); 152 } 153 154 #if defined(TARGET_PPC64) 155 /* if x = 0xab, returns 0xababababababababa */ 156 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 157 158 /* substract 1 from each byte, and with inverse, check if MSB is set at each 159 * byte. 160 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 161 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 162 */ 163 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 164 165 /* When you XOR the pattern and there is a match, that byte will be zero */ 166 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 167 168 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 169 { 170 return hasvalue(rb, ra) ? 1 << CRF_GT : 0; 171 } 172 173 #undef pattern 174 #undef haszero 175 #undef hasvalue 176 177 target_ulong helper_cntlzd(target_ulong t) 178 { 179 return clz64(t); 180 } 181 182 target_ulong helper_cnttzd(target_ulong t) 183 { 184 return ctz64(t); 185 } 186 187 /* Return invalid random number. 188 * 189 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 190 * random number 191 */ 192 target_ulong helper_darn32(void) 193 { 194 return -1; 195 } 196 197 target_ulong helper_darn64(void) 198 { 199 return -1; 200 } 201 202 #endif 203 204 #if defined(TARGET_PPC64) 205 206 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 207 { 208 int i; 209 uint64_t ra = 0; 210 211 for (i = 0; i < 8; i++) { 212 int index = (rs >> (i*8)) & 0xFF; 213 if (index < 64) { 214 if (rb & (1ull << (63-index))) { 215 ra |= 1 << i; 216 } 217 } 218 } 219 return ra; 220 } 221 222 #endif 223 224 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 225 { 226 target_ulong mask = 0xff; 227 target_ulong ra = 0; 228 int i; 229 230 for (i = 0; i < sizeof(target_ulong); i++) { 231 if ((rs & mask) == (rb & mask)) { 232 ra |= mask; 233 } 234 mask <<= 8; 235 } 236 return ra; 237 } 238 239 /* shift right arithmetic helper */ 240 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 241 target_ulong shift) 242 { 243 int32_t ret; 244 245 if (likely(!(shift & 0x20))) { 246 if (likely((uint32_t)shift != 0)) { 247 shift &= 0x1f; 248 ret = (int32_t)value >> shift; 249 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 250 env->ca = 0; 251 } else { 252 env->ca = 1; 253 } 254 } else { 255 ret = (int32_t)value; 256 env->ca = 0; 257 } 258 } else { 259 ret = (int32_t)value >> 31; 260 env->ca = (ret != 0); 261 } 262 return (target_long)ret; 263 } 264 265 #if defined(TARGET_PPC64) 266 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 267 target_ulong shift) 268 { 269 int64_t ret; 270 271 if (likely(!(shift & 0x40))) { 272 if (likely((uint64_t)shift != 0)) { 273 shift &= 0x3f; 274 ret = (int64_t)value >> shift; 275 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 276 env->ca = 0; 277 } else { 278 env->ca = 1; 279 } 280 } else { 281 ret = (int64_t)value; 282 env->ca = 0; 283 } 284 } else { 285 ret = (int64_t)value >> 63; 286 env->ca = (ret != 0); 287 } 288 return ret; 289 } 290 #endif 291 292 #if defined(TARGET_PPC64) 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 296 0x5555555555555555ULL); 297 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 298 0x3333333333333333ULL); 299 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 300 0x0f0f0f0f0f0f0f0fULL); 301 return val; 302 } 303 304 target_ulong helper_popcntw(target_ulong val) 305 { 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 319 target_ulong helper_popcntd(target_ulong val) 320 { 321 return ctpop64(val); 322 } 323 #else 324 target_ulong helper_popcntb(target_ulong val) 325 { 326 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 327 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 328 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 329 return val; 330 } 331 332 target_ulong helper_popcntw(target_ulong val) 333 { 334 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 335 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 336 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 337 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); 338 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff); 339 return val; 340 } 341 #endif 342 343 /*****************************************************************************/ 344 /* PowerPC 601 specific instructions (POWER bridge) */ 345 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 346 { 347 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 348 349 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 350 (int32_t)arg2 == 0) { 351 env->spr[SPR_MQ] = 0; 352 return INT32_MIN; 353 } else { 354 env->spr[SPR_MQ] = tmp % arg2; 355 return tmp / (int32_t)arg2; 356 } 357 } 358 359 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 360 target_ulong arg2) 361 { 362 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 363 364 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 365 (int32_t)arg2 == 0) { 366 env->so = env->ov = 1; 367 env->spr[SPR_MQ] = 0; 368 return INT32_MIN; 369 } else { 370 env->spr[SPR_MQ] = tmp % arg2; 371 tmp /= (int32_t)arg2; 372 if ((int32_t)tmp != tmp) { 373 env->so = env->ov = 1; 374 } else { 375 env->ov = 0; 376 } 377 return tmp; 378 } 379 } 380 381 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 382 target_ulong arg2) 383 { 384 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 385 (int32_t)arg2 == 0) { 386 env->spr[SPR_MQ] = 0; 387 return INT32_MIN; 388 } else { 389 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 390 return (int32_t)arg1 / (int32_t)arg2; 391 } 392 } 393 394 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 395 target_ulong arg2) 396 { 397 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 398 (int32_t)arg2 == 0) { 399 env->so = env->ov = 1; 400 env->spr[SPR_MQ] = 0; 401 return INT32_MIN; 402 } else { 403 env->ov = 0; 404 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 405 return (int32_t)arg1 / (int32_t)arg2; 406 } 407 } 408 409 /*****************************************************************************/ 410 /* 602 specific instructions */ 411 /* mfrom is the most crazy instruction ever seen, imho ! */ 412 /* Real implementation uses a ROM table. Do the same */ 413 /* Extremely decomposed: 414 * -arg / 256 415 * return 256 * log10(10 + 1.0) + 0.5 416 */ 417 #if !defined(CONFIG_USER_ONLY) 418 target_ulong helper_602_mfrom(target_ulong arg) 419 { 420 if (likely(arg < 602)) { 421 #include "mfrom_table.c" 422 return mfrom_ROM_table[arg]; 423 } else { 424 return 0; 425 } 426 } 427 #endif 428 429 /*****************************************************************************/ 430 /* Altivec extension helpers */ 431 #if defined(HOST_WORDS_BIGENDIAN) 432 #define HI_IDX 0 433 #define LO_IDX 1 434 #define AVRB(i) u8[i] 435 #define AVRW(i) u32[i] 436 #else 437 #define HI_IDX 1 438 #define LO_IDX 0 439 #define AVRB(i) u8[15-(i)] 440 #define AVRW(i) u32[3-(i)] 441 #endif 442 443 #if defined(HOST_WORDS_BIGENDIAN) 444 #define VECTOR_FOR_INORDER_I(index, element) \ 445 for (index = 0; index < ARRAY_SIZE(r->element); index++) 446 #else 447 #define VECTOR_FOR_INORDER_I(index, element) \ 448 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 449 #endif 450 451 /* Saturating arithmetic helpers. */ 452 #define SATCVT(from, to, from_type, to_type, min, max) \ 453 static inline to_type cvt##from##to(from_type x, int *sat) \ 454 { \ 455 to_type r; \ 456 \ 457 if (x < (from_type)min) { \ 458 r = min; \ 459 *sat = 1; \ 460 } else if (x > (from_type)max) { \ 461 r = max; \ 462 *sat = 1; \ 463 } else { \ 464 r = x; \ 465 } \ 466 return r; \ 467 } 468 #define SATCVTU(from, to, from_type, to_type, min, max) \ 469 static inline to_type cvt##from##to(from_type x, int *sat) \ 470 { \ 471 to_type r; \ 472 \ 473 if (x > (from_type)max) { \ 474 r = max; \ 475 *sat = 1; \ 476 } else { \ 477 r = x; \ 478 } \ 479 return r; \ 480 } 481 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 482 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 483 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 484 485 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 486 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 487 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 488 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 489 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 490 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 491 #undef SATCVT 492 #undef SATCVTU 493 494 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 495 { 496 int i, j = (sh & 0xf); 497 498 VECTOR_FOR_INORDER_I(i, u8) { 499 r->u8[i] = j++; 500 } 501 } 502 503 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 504 { 505 int i, j = 0x10 - (sh & 0xf); 506 507 VECTOR_FOR_INORDER_I(i, u8) { 508 r->u8[i] = j++; 509 } 510 } 511 512 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 513 { 514 #if defined(HOST_WORDS_BIGENDIAN) 515 env->vscr = r->u32[3]; 516 #else 517 env->vscr = r->u32[0]; 518 #endif 519 set_flush_to_zero(vscr_nj, &env->vec_status); 520 } 521 522 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 523 { 524 int i; 525 526 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 527 r->u32[i] = ~a->u32[i] < b->u32[i]; 528 } 529 } 530 531 /* vprtybw */ 532 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 533 { 534 int i; 535 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 536 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 537 res ^= res >> 8; 538 r->u32[i] = res & 1; 539 } 540 } 541 542 /* vprtybd */ 543 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 544 { 545 int i; 546 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 547 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 548 res ^= res >> 16; 549 res ^= res >> 8; 550 r->u64[i] = res & 1; 551 } 552 } 553 554 /* vprtybq */ 555 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 556 { 557 uint64_t res = b->u64[0] ^ b->u64[1]; 558 res ^= res >> 32; 559 res ^= res >> 16; 560 res ^= res >> 8; 561 r->u64[LO_IDX] = res & 1; 562 r->u64[HI_IDX] = 0; 563 } 564 565 #define VARITH_DO(name, op, element) \ 566 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 567 { \ 568 int i; \ 569 \ 570 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 571 r->element[i] = a->element[i] op b->element[i]; \ 572 } \ 573 } 574 #define VARITH(suffix, element) \ 575 VARITH_DO(add##suffix, +, element) \ 576 VARITH_DO(sub##suffix, -, element) 577 VARITH(ubm, u8) 578 VARITH(uhm, u16) 579 VARITH(uwm, u32) 580 VARITH(udm, u64) 581 VARITH_DO(muluwm, *, u32) 582 #undef VARITH_DO 583 #undef VARITH 584 585 #define VARITHFP(suffix, func) \ 586 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 587 ppc_avr_t *b) \ 588 { \ 589 int i; \ 590 \ 591 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 592 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ 593 } \ 594 } 595 VARITHFP(addfp, float32_add) 596 VARITHFP(subfp, float32_sub) 597 VARITHFP(minfp, float32_min) 598 VARITHFP(maxfp, float32_max) 599 #undef VARITHFP 600 601 #define VARITHFPFMA(suffix, type) \ 602 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 603 ppc_avr_t *b, ppc_avr_t *c) \ 604 { \ 605 int i; \ 606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 607 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ 608 type, &env->vec_status); \ 609 } \ 610 } 611 VARITHFPFMA(maddfp, 0); 612 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 613 #undef VARITHFPFMA 614 615 #define VARITHSAT_CASE(type, op, cvt, element) \ 616 { \ 617 type result = (type)a->element[i] op (type)b->element[i]; \ 618 r->element[i] = cvt(result, &sat); \ 619 } 620 621 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 622 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 623 ppc_avr_t *b) \ 624 { \ 625 int sat = 0; \ 626 int i; \ 627 \ 628 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 629 switch (sizeof(r->element[0])) { \ 630 case 1: \ 631 VARITHSAT_CASE(optype, op, cvt, element); \ 632 break; \ 633 case 2: \ 634 VARITHSAT_CASE(optype, op, cvt, element); \ 635 break; \ 636 case 4: \ 637 VARITHSAT_CASE(optype, op, cvt, element); \ 638 break; \ 639 } \ 640 } \ 641 if (sat) { \ 642 env->vscr |= (1 << VSCR_SAT); \ 643 } \ 644 } 645 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 646 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 647 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 648 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 649 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 650 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 651 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 652 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 653 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 654 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 655 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 656 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 657 #undef VARITHSAT_CASE 658 #undef VARITHSAT_DO 659 #undef VARITHSAT_SIGNED 660 #undef VARITHSAT_UNSIGNED 661 662 #define VAVG_DO(name, element, etype) \ 663 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 664 { \ 665 int i; \ 666 \ 667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 668 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 669 r->element[i] = x >> 1; \ 670 } \ 671 } 672 673 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 674 unsigned_type) \ 675 VAVG_DO(avgs##type, signed_element, signed_type) \ 676 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 677 VAVG(b, s8, int16_t, u8, uint16_t) 678 VAVG(h, s16, int32_t, u16, uint32_t) 679 VAVG(w, s32, int64_t, u32, uint64_t) 680 #undef VAVG_DO 681 #undef VAVG 682 683 #define VABSDU_DO(name, element) \ 684 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 685 { \ 686 int i; \ 687 \ 688 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 689 r->element[i] = (a->element[i] > b->element[i]) ? \ 690 (a->element[i] - b->element[i]) : \ 691 (b->element[i] - a->element[i]); \ 692 } \ 693 } 694 695 /* VABSDU - Vector absolute difference unsigned 696 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 697 * element - element type to access from vector 698 */ 699 #define VABSDU(type, element) \ 700 VABSDU_DO(absdu##type, element) 701 VABSDU(b, u8) 702 VABSDU(h, u16) 703 VABSDU(w, u32) 704 #undef VABSDU_DO 705 #undef VABSDU 706 707 #define VCF(suffix, cvt, element) \ 708 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 709 ppc_avr_t *b, uint32_t uim) \ 710 { \ 711 int i; \ 712 \ 713 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 714 float32 t = cvt(b->element[i], &env->vec_status); \ 715 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \ 716 } \ 717 } 718 VCF(ux, uint32_to_float32, u32) 719 VCF(sx, int32_to_float32, s32) 720 #undef VCF 721 722 #define VCMP_DO(suffix, compare, element, record) \ 723 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 724 ppc_avr_t *a, ppc_avr_t *b) \ 725 { \ 726 uint64_t ones = (uint64_t)-1; \ 727 uint64_t all = ones; \ 728 uint64_t none = 0; \ 729 int i; \ 730 \ 731 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 732 uint64_t result = (a->element[i] compare b->element[i] ? \ 733 ones : 0x0); \ 734 switch (sizeof(a->element[0])) { \ 735 case 8: \ 736 r->u64[i] = result; \ 737 break; \ 738 case 4: \ 739 r->u32[i] = result; \ 740 break; \ 741 case 2: \ 742 r->u16[i] = result; \ 743 break; \ 744 case 1: \ 745 r->u8[i] = result; \ 746 break; \ 747 } \ 748 all &= result; \ 749 none |= result; \ 750 } \ 751 if (record) { \ 752 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 753 } \ 754 } 755 #define VCMP(suffix, compare, element) \ 756 VCMP_DO(suffix, compare, element, 0) \ 757 VCMP_DO(suffix##_dot, compare, element, 1) 758 VCMP(equb, ==, u8) 759 VCMP(equh, ==, u16) 760 VCMP(equw, ==, u32) 761 VCMP(equd, ==, u64) 762 VCMP(gtub, >, u8) 763 VCMP(gtuh, >, u16) 764 VCMP(gtuw, >, u32) 765 VCMP(gtud, >, u64) 766 VCMP(gtsb, >, s8) 767 VCMP(gtsh, >, s16) 768 VCMP(gtsw, >, s32) 769 VCMP(gtsd, >, s64) 770 #undef VCMP_DO 771 #undef VCMP 772 773 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 774 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 775 ppc_avr_t *a, ppc_avr_t *b) \ 776 { \ 777 etype ones = (etype)-1; \ 778 etype all = ones; \ 779 etype result, none = 0; \ 780 int i; \ 781 \ 782 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 783 if (cmpzero) { \ 784 result = ((a->element[i] == 0) \ 785 || (b->element[i] == 0) \ 786 || (a->element[i] != b->element[i]) ? \ 787 ones : 0x0); \ 788 } else { \ 789 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 790 } \ 791 r->element[i] = result; \ 792 all &= result; \ 793 none |= result; \ 794 } \ 795 if (record) { \ 796 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 797 } \ 798 } 799 800 /* VCMPNEZ - Vector compare not equal to zero 801 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 802 * element - element type to access from vector 803 */ 804 #define VCMPNE(suffix, element, etype, cmpzero) \ 805 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 806 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 807 VCMPNE(zb, u8, uint8_t, 1) 808 VCMPNE(zh, u16, uint16_t, 1) 809 VCMPNE(zw, u32, uint32_t, 1) 810 VCMPNE(b, u8, uint8_t, 0) 811 VCMPNE(h, u16, uint16_t, 0) 812 VCMPNE(w, u32, uint32_t, 0) 813 #undef VCMPNE_DO 814 #undef VCMPNE 815 816 #define VCMPFP_DO(suffix, compare, order, record) \ 817 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 818 ppc_avr_t *a, ppc_avr_t *b) \ 819 { \ 820 uint32_t ones = (uint32_t)-1; \ 821 uint32_t all = ones; \ 822 uint32_t none = 0; \ 823 int i; \ 824 \ 825 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 826 uint32_t result; \ 827 int rel = float32_compare_quiet(a->f[i], b->f[i], \ 828 &env->vec_status); \ 829 if (rel == float_relation_unordered) { \ 830 result = 0; \ 831 } else if (rel compare order) { \ 832 result = ones; \ 833 } else { \ 834 result = 0; \ 835 } \ 836 r->u32[i] = result; \ 837 all &= result; \ 838 none |= result; \ 839 } \ 840 if (record) { \ 841 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 842 } \ 843 } 844 #define VCMPFP(suffix, compare, order) \ 845 VCMPFP_DO(suffix, compare, order, 0) \ 846 VCMPFP_DO(suffix##_dot, compare, order, 1) 847 VCMPFP(eqfp, ==, float_relation_equal) 848 VCMPFP(gefp, !=, float_relation_less) 849 VCMPFP(gtfp, ==, float_relation_greater) 850 #undef VCMPFP_DO 851 #undef VCMPFP 852 853 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 854 ppc_avr_t *a, ppc_avr_t *b, int record) 855 { 856 int i; 857 int all_in = 0; 858 859 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 860 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); 861 if (le_rel == float_relation_unordered) { 862 r->u32[i] = 0xc0000000; 863 all_in = 1; 864 } else { 865 float32 bneg = float32_chs(b->f[i]); 866 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status); 867 int le = le_rel != float_relation_greater; 868 int ge = ge_rel != float_relation_less; 869 870 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 871 all_in |= (!le | !ge); 872 } 873 } 874 if (record) { 875 env->crf[6] = (all_in == 0) << 1; 876 } 877 } 878 879 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 880 { 881 vcmpbfp_internal(env, r, a, b, 0); 882 } 883 884 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 885 ppc_avr_t *b) 886 { 887 vcmpbfp_internal(env, r, a, b, 1); 888 } 889 890 #define VCT(suffix, satcvt, element) \ 891 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 892 ppc_avr_t *b, uint32_t uim) \ 893 { \ 894 int i; \ 895 int sat = 0; \ 896 float_status s = env->vec_status; \ 897 \ 898 set_float_rounding_mode(float_round_to_zero, &s); \ 899 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 900 if (float32_is_any_nan(b->f[i])) { \ 901 r->element[i] = 0; \ 902 } else { \ 903 float64 t = float32_to_float64(b->f[i], &s); \ 904 int64_t j; \ 905 \ 906 t = float64_scalbn(t, uim, &s); \ 907 j = float64_to_int64(t, &s); \ 908 r->element[i] = satcvt(j, &sat); \ 909 } \ 910 } \ 911 if (sat) { \ 912 env->vscr |= (1 << VSCR_SAT); \ 913 } \ 914 } 915 VCT(uxs, cvtsduw, u32) 916 VCT(sxs, cvtsdsw, s32) 917 #undef VCT 918 919 target_ulong helper_vclzlsbb(ppc_avr_t *r) 920 { 921 target_ulong count = 0; 922 int i; 923 VECTOR_FOR_INORDER_I(i, u8) { 924 if (r->u8[i] & 0x01) { 925 break; 926 } 927 count++; 928 } 929 return count; 930 } 931 932 target_ulong helper_vctzlsbb(ppc_avr_t *r) 933 { 934 target_ulong count = 0; 935 int i; 936 #if defined(HOST_WORDS_BIGENDIAN) 937 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 938 #else 939 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 940 #endif 941 if (r->u8[i] & 0x01) { 942 break; 943 } 944 count++; 945 } 946 return count; 947 } 948 949 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 950 ppc_avr_t *b, ppc_avr_t *c) 951 { 952 int sat = 0; 953 int i; 954 955 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 956 int32_t prod = a->s16[i] * b->s16[i]; 957 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 958 959 r->s16[i] = cvtswsh(t, &sat); 960 } 961 962 if (sat) { 963 env->vscr |= (1 << VSCR_SAT); 964 } 965 } 966 967 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 968 ppc_avr_t *b, ppc_avr_t *c) 969 { 970 int sat = 0; 971 int i; 972 973 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 974 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 975 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 976 r->s16[i] = cvtswsh(t, &sat); 977 } 978 979 if (sat) { 980 env->vscr |= (1 << VSCR_SAT); 981 } 982 } 983 984 #define VMINMAX_DO(name, compare, element) \ 985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 986 { \ 987 int i; \ 988 \ 989 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 990 if (a->element[i] compare b->element[i]) { \ 991 r->element[i] = b->element[i]; \ 992 } else { \ 993 r->element[i] = a->element[i]; \ 994 } \ 995 } \ 996 } 997 #define VMINMAX(suffix, element) \ 998 VMINMAX_DO(min##suffix, >, element) \ 999 VMINMAX_DO(max##suffix, <, element) 1000 VMINMAX(sb, s8) 1001 VMINMAX(sh, s16) 1002 VMINMAX(sw, s32) 1003 VMINMAX(sd, s64) 1004 VMINMAX(ub, u8) 1005 VMINMAX(uh, u16) 1006 VMINMAX(uw, u32) 1007 VMINMAX(ud, u64) 1008 #undef VMINMAX_DO 1009 #undef VMINMAX 1010 1011 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1012 { 1013 int i; 1014 1015 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1016 int32_t prod = a->s16[i] * b->s16[i]; 1017 r->s16[i] = (int16_t) (prod + c->s16[i]); 1018 } 1019 } 1020 1021 #define VMRG_DO(name, element, highp) \ 1022 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1023 { \ 1024 ppc_avr_t result; \ 1025 int i; \ 1026 size_t n_elems = ARRAY_SIZE(r->element); \ 1027 \ 1028 for (i = 0; i < n_elems / 2; i++) { \ 1029 if (highp) { \ 1030 result.element[i*2+HI_IDX] = a->element[i]; \ 1031 result.element[i*2+LO_IDX] = b->element[i]; \ 1032 } else { \ 1033 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \ 1034 b->element[n_elems - i - 1]; \ 1035 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \ 1036 a->element[n_elems - i - 1]; \ 1037 } \ 1038 } \ 1039 *r = result; \ 1040 } 1041 #if defined(HOST_WORDS_BIGENDIAN) 1042 #define MRGHI 0 1043 #define MRGLO 1 1044 #else 1045 #define MRGHI 1 1046 #define MRGLO 0 1047 #endif 1048 #define VMRG(suffix, element) \ 1049 VMRG_DO(mrgl##suffix, element, MRGHI) \ 1050 VMRG_DO(mrgh##suffix, element, MRGLO) 1051 VMRG(b, u8) 1052 VMRG(h, u16) 1053 VMRG(w, u32) 1054 #undef VMRG_DO 1055 #undef VMRG 1056 #undef MRGHI 1057 #undef MRGLO 1058 1059 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1060 ppc_avr_t *b, ppc_avr_t *c) 1061 { 1062 int32_t prod[16]; 1063 int i; 1064 1065 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1066 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1067 } 1068 1069 VECTOR_FOR_INORDER_I(i, s32) { 1070 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1071 prod[4 * i + 2] + prod[4 * i + 3]; 1072 } 1073 } 1074 1075 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1076 ppc_avr_t *b, ppc_avr_t *c) 1077 { 1078 int32_t prod[8]; 1079 int i; 1080 1081 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1082 prod[i] = a->s16[i] * b->s16[i]; 1083 } 1084 1085 VECTOR_FOR_INORDER_I(i, s32) { 1086 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1087 } 1088 } 1089 1090 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1091 ppc_avr_t *b, ppc_avr_t *c) 1092 { 1093 int32_t prod[8]; 1094 int i; 1095 int sat = 0; 1096 1097 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1098 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1099 } 1100 1101 VECTOR_FOR_INORDER_I(i, s32) { 1102 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1103 1104 r->u32[i] = cvtsdsw(t, &sat); 1105 } 1106 1107 if (sat) { 1108 env->vscr |= (1 << VSCR_SAT); 1109 } 1110 } 1111 1112 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1113 ppc_avr_t *b, ppc_avr_t *c) 1114 { 1115 uint16_t prod[16]; 1116 int i; 1117 1118 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1119 prod[i] = a->u8[i] * b->u8[i]; 1120 } 1121 1122 VECTOR_FOR_INORDER_I(i, u32) { 1123 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1124 prod[4 * i + 2] + prod[4 * i + 3]; 1125 } 1126 } 1127 1128 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1129 ppc_avr_t *b, ppc_avr_t *c) 1130 { 1131 uint32_t prod[8]; 1132 int i; 1133 1134 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1135 prod[i] = a->u16[i] * b->u16[i]; 1136 } 1137 1138 VECTOR_FOR_INORDER_I(i, u32) { 1139 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1140 } 1141 } 1142 1143 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1144 ppc_avr_t *b, ppc_avr_t *c) 1145 { 1146 uint32_t prod[8]; 1147 int i; 1148 int sat = 0; 1149 1150 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1151 prod[i] = a->u16[i] * b->u16[i]; 1152 } 1153 1154 VECTOR_FOR_INORDER_I(i, s32) { 1155 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1156 1157 r->u32[i] = cvtuduw(t, &sat); 1158 } 1159 1160 if (sat) { 1161 env->vscr |= (1 << VSCR_SAT); 1162 } 1163 } 1164 1165 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ 1166 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1167 { \ 1168 int i; \ 1169 \ 1170 VECTOR_FOR_INORDER_I(i, prod_element) { \ 1171 if (evenp) { \ 1172 r->prod_element[i] = \ 1173 (cast)a->mul_element[i * 2 + HI_IDX] * \ 1174 (cast)b->mul_element[i * 2 + HI_IDX]; \ 1175 } else { \ 1176 r->prod_element[i] = \ 1177 (cast)a->mul_element[i * 2 + LO_IDX] * \ 1178 (cast)b->mul_element[i * 2 + LO_IDX]; \ 1179 } \ 1180 } \ 1181 } 1182 #define VMUL(suffix, mul_element, prod_element, cast) \ 1183 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ 1184 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) 1185 VMUL(sb, s8, s16, int16_t) 1186 VMUL(sh, s16, s32, int32_t) 1187 VMUL(sw, s32, s64, int64_t) 1188 VMUL(ub, u8, u16, uint16_t) 1189 VMUL(uh, u16, u32, uint32_t) 1190 VMUL(uw, u32, u64, uint64_t) 1191 #undef VMUL_DO 1192 #undef VMUL 1193 1194 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1195 ppc_avr_t *c) 1196 { 1197 ppc_avr_t result; 1198 int i; 1199 1200 VECTOR_FOR_INORDER_I(i, u8) { 1201 int s = c->u8[i] & 0x1f; 1202 #if defined(HOST_WORDS_BIGENDIAN) 1203 int index = s & 0xf; 1204 #else 1205 int index = 15 - (s & 0xf); 1206 #endif 1207 1208 if (s & 0x10) { 1209 result.u8[i] = b->u8[index]; 1210 } else { 1211 result.u8[i] = a->u8[index]; 1212 } 1213 } 1214 *r = result; 1215 } 1216 1217 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1218 ppc_avr_t *c) 1219 { 1220 ppc_avr_t result; 1221 int i; 1222 1223 VECTOR_FOR_INORDER_I(i, u8) { 1224 int s = c->u8[i] & 0x1f; 1225 #if defined(HOST_WORDS_BIGENDIAN) 1226 int index = 15 - (s & 0xf); 1227 #else 1228 int index = s & 0xf; 1229 #endif 1230 1231 if (s & 0x10) { 1232 result.u8[i] = a->u8[index]; 1233 } else { 1234 result.u8[i] = b->u8[index]; 1235 } 1236 } 1237 *r = result; 1238 } 1239 1240 #if defined(HOST_WORDS_BIGENDIAN) 1241 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1242 #define VBPERMD_INDEX(i) (i) 1243 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1244 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1245 #else 1246 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1247 #define VBPERMD_INDEX(i) (1 - i) 1248 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1249 #define EXTRACT_BIT(avr, i, index) \ 1250 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1251 #endif 1252 1253 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1254 { 1255 int i, j; 1256 ppc_avr_t result = { .u64 = { 0, 0 } }; 1257 VECTOR_FOR_INORDER_I(i, u64) { 1258 for (j = 0; j < 8; j++) { 1259 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1260 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1261 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1262 } 1263 } 1264 } 1265 *r = result; 1266 } 1267 1268 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1269 { 1270 int i; 1271 uint64_t perm = 0; 1272 1273 VECTOR_FOR_INORDER_I(i, u8) { 1274 int index = VBPERMQ_INDEX(b, i); 1275 1276 if (index < 128) { 1277 uint64_t mask = (1ull << (63-(index & 0x3F))); 1278 if (a->u64[VBPERMQ_DW(index)] & mask) { 1279 perm |= (0x8000 >> i); 1280 } 1281 } 1282 } 1283 1284 r->u64[HI_IDX] = perm; 1285 r->u64[LO_IDX] = 0; 1286 } 1287 1288 #undef VBPERMQ_INDEX 1289 #undef VBPERMQ_DW 1290 1291 static const uint64_t VGBBD_MASKS[256] = { 1292 0x0000000000000000ull, /* 00 */ 1293 0x0000000000000080ull, /* 01 */ 1294 0x0000000000008000ull, /* 02 */ 1295 0x0000000000008080ull, /* 03 */ 1296 0x0000000000800000ull, /* 04 */ 1297 0x0000000000800080ull, /* 05 */ 1298 0x0000000000808000ull, /* 06 */ 1299 0x0000000000808080ull, /* 07 */ 1300 0x0000000080000000ull, /* 08 */ 1301 0x0000000080000080ull, /* 09 */ 1302 0x0000000080008000ull, /* 0A */ 1303 0x0000000080008080ull, /* 0B */ 1304 0x0000000080800000ull, /* 0C */ 1305 0x0000000080800080ull, /* 0D */ 1306 0x0000000080808000ull, /* 0E */ 1307 0x0000000080808080ull, /* 0F */ 1308 0x0000008000000000ull, /* 10 */ 1309 0x0000008000000080ull, /* 11 */ 1310 0x0000008000008000ull, /* 12 */ 1311 0x0000008000008080ull, /* 13 */ 1312 0x0000008000800000ull, /* 14 */ 1313 0x0000008000800080ull, /* 15 */ 1314 0x0000008000808000ull, /* 16 */ 1315 0x0000008000808080ull, /* 17 */ 1316 0x0000008080000000ull, /* 18 */ 1317 0x0000008080000080ull, /* 19 */ 1318 0x0000008080008000ull, /* 1A */ 1319 0x0000008080008080ull, /* 1B */ 1320 0x0000008080800000ull, /* 1C */ 1321 0x0000008080800080ull, /* 1D */ 1322 0x0000008080808000ull, /* 1E */ 1323 0x0000008080808080ull, /* 1F */ 1324 0x0000800000000000ull, /* 20 */ 1325 0x0000800000000080ull, /* 21 */ 1326 0x0000800000008000ull, /* 22 */ 1327 0x0000800000008080ull, /* 23 */ 1328 0x0000800000800000ull, /* 24 */ 1329 0x0000800000800080ull, /* 25 */ 1330 0x0000800000808000ull, /* 26 */ 1331 0x0000800000808080ull, /* 27 */ 1332 0x0000800080000000ull, /* 28 */ 1333 0x0000800080000080ull, /* 29 */ 1334 0x0000800080008000ull, /* 2A */ 1335 0x0000800080008080ull, /* 2B */ 1336 0x0000800080800000ull, /* 2C */ 1337 0x0000800080800080ull, /* 2D */ 1338 0x0000800080808000ull, /* 2E */ 1339 0x0000800080808080ull, /* 2F */ 1340 0x0000808000000000ull, /* 30 */ 1341 0x0000808000000080ull, /* 31 */ 1342 0x0000808000008000ull, /* 32 */ 1343 0x0000808000008080ull, /* 33 */ 1344 0x0000808000800000ull, /* 34 */ 1345 0x0000808000800080ull, /* 35 */ 1346 0x0000808000808000ull, /* 36 */ 1347 0x0000808000808080ull, /* 37 */ 1348 0x0000808080000000ull, /* 38 */ 1349 0x0000808080000080ull, /* 39 */ 1350 0x0000808080008000ull, /* 3A */ 1351 0x0000808080008080ull, /* 3B */ 1352 0x0000808080800000ull, /* 3C */ 1353 0x0000808080800080ull, /* 3D */ 1354 0x0000808080808000ull, /* 3E */ 1355 0x0000808080808080ull, /* 3F */ 1356 0x0080000000000000ull, /* 40 */ 1357 0x0080000000000080ull, /* 41 */ 1358 0x0080000000008000ull, /* 42 */ 1359 0x0080000000008080ull, /* 43 */ 1360 0x0080000000800000ull, /* 44 */ 1361 0x0080000000800080ull, /* 45 */ 1362 0x0080000000808000ull, /* 46 */ 1363 0x0080000000808080ull, /* 47 */ 1364 0x0080000080000000ull, /* 48 */ 1365 0x0080000080000080ull, /* 49 */ 1366 0x0080000080008000ull, /* 4A */ 1367 0x0080000080008080ull, /* 4B */ 1368 0x0080000080800000ull, /* 4C */ 1369 0x0080000080800080ull, /* 4D */ 1370 0x0080000080808000ull, /* 4E */ 1371 0x0080000080808080ull, /* 4F */ 1372 0x0080008000000000ull, /* 50 */ 1373 0x0080008000000080ull, /* 51 */ 1374 0x0080008000008000ull, /* 52 */ 1375 0x0080008000008080ull, /* 53 */ 1376 0x0080008000800000ull, /* 54 */ 1377 0x0080008000800080ull, /* 55 */ 1378 0x0080008000808000ull, /* 56 */ 1379 0x0080008000808080ull, /* 57 */ 1380 0x0080008080000000ull, /* 58 */ 1381 0x0080008080000080ull, /* 59 */ 1382 0x0080008080008000ull, /* 5A */ 1383 0x0080008080008080ull, /* 5B */ 1384 0x0080008080800000ull, /* 5C */ 1385 0x0080008080800080ull, /* 5D */ 1386 0x0080008080808000ull, /* 5E */ 1387 0x0080008080808080ull, /* 5F */ 1388 0x0080800000000000ull, /* 60 */ 1389 0x0080800000000080ull, /* 61 */ 1390 0x0080800000008000ull, /* 62 */ 1391 0x0080800000008080ull, /* 63 */ 1392 0x0080800000800000ull, /* 64 */ 1393 0x0080800000800080ull, /* 65 */ 1394 0x0080800000808000ull, /* 66 */ 1395 0x0080800000808080ull, /* 67 */ 1396 0x0080800080000000ull, /* 68 */ 1397 0x0080800080000080ull, /* 69 */ 1398 0x0080800080008000ull, /* 6A */ 1399 0x0080800080008080ull, /* 6B */ 1400 0x0080800080800000ull, /* 6C */ 1401 0x0080800080800080ull, /* 6D */ 1402 0x0080800080808000ull, /* 6E */ 1403 0x0080800080808080ull, /* 6F */ 1404 0x0080808000000000ull, /* 70 */ 1405 0x0080808000000080ull, /* 71 */ 1406 0x0080808000008000ull, /* 72 */ 1407 0x0080808000008080ull, /* 73 */ 1408 0x0080808000800000ull, /* 74 */ 1409 0x0080808000800080ull, /* 75 */ 1410 0x0080808000808000ull, /* 76 */ 1411 0x0080808000808080ull, /* 77 */ 1412 0x0080808080000000ull, /* 78 */ 1413 0x0080808080000080ull, /* 79 */ 1414 0x0080808080008000ull, /* 7A */ 1415 0x0080808080008080ull, /* 7B */ 1416 0x0080808080800000ull, /* 7C */ 1417 0x0080808080800080ull, /* 7D */ 1418 0x0080808080808000ull, /* 7E */ 1419 0x0080808080808080ull, /* 7F */ 1420 0x8000000000000000ull, /* 80 */ 1421 0x8000000000000080ull, /* 81 */ 1422 0x8000000000008000ull, /* 82 */ 1423 0x8000000000008080ull, /* 83 */ 1424 0x8000000000800000ull, /* 84 */ 1425 0x8000000000800080ull, /* 85 */ 1426 0x8000000000808000ull, /* 86 */ 1427 0x8000000000808080ull, /* 87 */ 1428 0x8000000080000000ull, /* 88 */ 1429 0x8000000080000080ull, /* 89 */ 1430 0x8000000080008000ull, /* 8A */ 1431 0x8000000080008080ull, /* 8B */ 1432 0x8000000080800000ull, /* 8C */ 1433 0x8000000080800080ull, /* 8D */ 1434 0x8000000080808000ull, /* 8E */ 1435 0x8000000080808080ull, /* 8F */ 1436 0x8000008000000000ull, /* 90 */ 1437 0x8000008000000080ull, /* 91 */ 1438 0x8000008000008000ull, /* 92 */ 1439 0x8000008000008080ull, /* 93 */ 1440 0x8000008000800000ull, /* 94 */ 1441 0x8000008000800080ull, /* 95 */ 1442 0x8000008000808000ull, /* 96 */ 1443 0x8000008000808080ull, /* 97 */ 1444 0x8000008080000000ull, /* 98 */ 1445 0x8000008080000080ull, /* 99 */ 1446 0x8000008080008000ull, /* 9A */ 1447 0x8000008080008080ull, /* 9B */ 1448 0x8000008080800000ull, /* 9C */ 1449 0x8000008080800080ull, /* 9D */ 1450 0x8000008080808000ull, /* 9E */ 1451 0x8000008080808080ull, /* 9F */ 1452 0x8000800000000000ull, /* A0 */ 1453 0x8000800000000080ull, /* A1 */ 1454 0x8000800000008000ull, /* A2 */ 1455 0x8000800000008080ull, /* A3 */ 1456 0x8000800000800000ull, /* A4 */ 1457 0x8000800000800080ull, /* A5 */ 1458 0x8000800000808000ull, /* A6 */ 1459 0x8000800000808080ull, /* A7 */ 1460 0x8000800080000000ull, /* A8 */ 1461 0x8000800080000080ull, /* A9 */ 1462 0x8000800080008000ull, /* AA */ 1463 0x8000800080008080ull, /* AB */ 1464 0x8000800080800000ull, /* AC */ 1465 0x8000800080800080ull, /* AD */ 1466 0x8000800080808000ull, /* AE */ 1467 0x8000800080808080ull, /* AF */ 1468 0x8000808000000000ull, /* B0 */ 1469 0x8000808000000080ull, /* B1 */ 1470 0x8000808000008000ull, /* B2 */ 1471 0x8000808000008080ull, /* B3 */ 1472 0x8000808000800000ull, /* B4 */ 1473 0x8000808000800080ull, /* B5 */ 1474 0x8000808000808000ull, /* B6 */ 1475 0x8000808000808080ull, /* B7 */ 1476 0x8000808080000000ull, /* B8 */ 1477 0x8000808080000080ull, /* B9 */ 1478 0x8000808080008000ull, /* BA */ 1479 0x8000808080008080ull, /* BB */ 1480 0x8000808080800000ull, /* BC */ 1481 0x8000808080800080ull, /* BD */ 1482 0x8000808080808000ull, /* BE */ 1483 0x8000808080808080ull, /* BF */ 1484 0x8080000000000000ull, /* C0 */ 1485 0x8080000000000080ull, /* C1 */ 1486 0x8080000000008000ull, /* C2 */ 1487 0x8080000000008080ull, /* C3 */ 1488 0x8080000000800000ull, /* C4 */ 1489 0x8080000000800080ull, /* C5 */ 1490 0x8080000000808000ull, /* C6 */ 1491 0x8080000000808080ull, /* C7 */ 1492 0x8080000080000000ull, /* C8 */ 1493 0x8080000080000080ull, /* C9 */ 1494 0x8080000080008000ull, /* CA */ 1495 0x8080000080008080ull, /* CB */ 1496 0x8080000080800000ull, /* CC */ 1497 0x8080000080800080ull, /* CD */ 1498 0x8080000080808000ull, /* CE */ 1499 0x8080000080808080ull, /* CF */ 1500 0x8080008000000000ull, /* D0 */ 1501 0x8080008000000080ull, /* D1 */ 1502 0x8080008000008000ull, /* D2 */ 1503 0x8080008000008080ull, /* D3 */ 1504 0x8080008000800000ull, /* D4 */ 1505 0x8080008000800080ull, /* D5 */ 1506 0x8080008000808000ull, /* D6 */ 1507 0x8080008000808080ull, /* D7 */ 1508 0x8080008080000000ull, /* D8 */ 1509 0x8080008080000080ull, /* D9 */ 1510 0x8080008080008000ull, /* DA */ 1511 0x8080008080008080ull, /* DB */ 1512 0x8080008080800000ull, /* DC */ 1513 0x8080008080800080ull, /* DD */ 1514 0x8080008080808000ull, /* DE */ 1515 0x8080008080808080ull, /* DF */ 1516 0x8080800000000000ull, /* E0 */ 1517 0x8080800000000080ull, /* E1 */ 1518 0x8080800000008000ull, /* E2 */ 1519 0x8080800000008080ull, /* E3 */ 1520 0x8080800000800000ull, /* E4 */ 1521 0x8080800000800080ull, /* E5 */ 1522 0x8080800000808000ull, /* E6 */ 1523 0x8080800000808080ull, /* E7 */ 1524 0x8080800080000000ull, /* E8 */ 1525 0x8080800080000080ull, /* E9 */ 1526 0x8080800080008000ull, /* EA */ 1527 0x8080800080008080ull, /* EB */ 1528 0x8080800080800000ull, /* EC */ 1529 0x8080800080800080ull, /* ED */ 1530 0x8080800080808000ull, /* EE */ 1531 0x8080800080808080ull, /* EF */ 1532 0x8080808000000000ull, /* F0 */ 1533 0x8080808000000080ull, /* F1 */ 1534 0x8080808000008000ull, /* F2 */ 1535 0x8080808000008080ull, /* F3 */ 1536 0x8080808000800000ull, /* F4 */ 1537 0x8080808000800080ull, /* F5 */ 1538 0x8080808000808000ull, /* F6 */ 1539 0x8080808000808080ull, /* F7 */ 1540 0x8080808080000000ull, /* F8 */ 1541 0x8080808080000080ull, /* F9 */ 1542 0x8080808080008000ull, /* FA */ 1543 0x8080808080008080ull, /* FB */ 1544 0x8080808080800000ull, /* FC */ 1545 0x8080808080800080ull, /* FD */ 1546 0x8080808080808000ull, /* FE */ 1547 0x8080808080808080ull, /* FF */ 1548 }; 1549 1550 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1551 { 1552 int i; 1553 uint64_t t[2] = { 0, 0 }; 1554 1555 VECTOR_FOR_INORDER_I(i, u8) { 1556 #if defined(HOST_WORDS_BIGENDIAN) 1557 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1558 #else 1559 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1560 #endif 1561 } 1562 1563 r->u64[0] = t[0]; 1564 r->u64[1] = t[1]; 1565 } 1566 1567 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1568 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1569 { \ 1570 int i, j; \ 1571 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1572 \ 1573 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1574 prod[i] = 0; \ 1575 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1576 if (a->srcfld[i] & (1ull<<j)) { \ 1577 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1578 } \ 1579 } \ 1580 } \ 1581 \ 1582 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1583 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1584 } \ 1585 } 1586 1587 PMSUM(vpmsumb, u8, u16, uint16_t) 1588 PMSUM(vpmsumh, u16, u32, uint32_t) 1589 PMSUM(vpmsumw, u32, u64, uint64_t) 1590 1591 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1592 { 1593 1594 #ifdef CONFIG_INT128 1595 int i, j; 1596 __uint128_t prod[2]; 1597 1598 VECTOR_FOR_INORDER_I(i, u64) { 1599 prod[i] = 0; 1600 for (j = 0; j < 64; j++) { 1601 if (a->u64[i] & (1ull<<j)) { 1602 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1603 } 1604 } 1605 } 1606 1607 r->u128 = prod[0] ^ prod[1]; 1608 1609 #else 1610 int i, j; 1611 ppc_avr_t prod[2]; 1612 1613 VECTOR_FOR_INORDER_I(i, u64) { 1614 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; 1615 for (j = 0; j < 64; j++) { 1616 if (a->u64[i] & (1ull<<j)) { 1617 ppc_avr_t bshift; 1618 if (j == 0) { 1619 bshift.u64[HI_IDX] = 0; 1620 bshift.u64[LO_IDX] = b->u64[i]; 1621 } else { 1622 bshift.u64[HI_IDX] = b->u64[i] >> (64-j); 1623 bshift.u64[LO_IDX] = b->u64[i] << j; 1624 } 1625 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; 1626 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; 1627 } 1628 } 1629 } 1630 1631 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; 1632 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; 1633 #endif 1634 } 1635 1636 1637 #if defined(HOST_WORDS_BIGENDIAN) 1638 #define PKBIG 1 1639 #else 1640 #define PKBIG 0 1641 #endif 1642 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1643 { 1644 int i, j; 1645 ppc_avr_t result; 1646 #if defined(HOST_WORDS_BIGENDIAN) 1647 const ppc_avr_t *x[2] = { a, b }; 1648 #else 1649 const ppc_avr_t *x[2] = { b, a }; 1650 #endif 1651 1652 VECTOR_FOR_INORDER_I(i, u64) { 1653 VECTOR_FOR_INORDER_I(j, u32) { 1654 uint32_t e = x[i]->u32[j]; 1655 1656 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1657 ((e >> 6) & 0x3e0) | 1658 ((e >> 3) & 0x1f)); 1659 } 1660 } 1661 *r = result; 1662 } 1663 1664 #define VPK(suffix, from, to, cvt, dosat) \ 1665 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1666 ppc_avr_t *a, ppc_avr_t *b) \ 1667 { \ 1668 int i; \ 1669 int sat = 0; \ 1670 ppc_avr_t result; \ 1671 ppc_avr_t *a0 = PKBIG ? a : b; \ 1672 ppc_avr_t *a1 = PKBIG ? b : a; \ 1673 \ 1674 VECTOR_FOR_INORDER_I(i, from) { \ 1675 result.to[i] = cvt(a0->from[i], &sat); \ 1676 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1677 } \ 1678 *r = result; \ 1679 if (dosat && sat) { \ 1680 env->vscr |= (1 << VSCR_SAT); \ 1681 } \ 1682 } 1683 #define I(x, y) (x) 1684 VPK(shss, s16, s8, cvtshsb, 1) 1685 VPK(shus, s16, u8, cvtshub, 1) 1686 VPK(swss, s32, s16, cvtswsh, 1) 1687 VPK(swus, s32, u16, cvtswuh, 1) 1688 VPK(sdss, s64, s32, cvtsdsw, 1) 1689 VPK(sdus, s64, u32, cvtsduw, 1) 1690 VPK(uhus, u16, u8, cvtuhub, 1) 1691 VPK(uwus, u32, u16, cvtuwuh, 1) 1692 VPK(udus, u64, u32, cvtuduw, 1) 1693 VPK(uhum, u16, u8, I, 0) 1694 VPK(uwum, u32, u16, I, 0) 1695 VPK(udum, u64, u32, I, 0) 1696 #undef I 1697 #undef VPK 1698 #undef PKBIG 1699 1700 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1701 { 1702 int i; 1703 1704 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1705 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); 1706 } 1707 } 1708 1709 #define VRFI(suffix, rounding) \ 1710 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1711 ppc_avr_t *b) \ 1712 { \ 1713 int i; \ 1714 float_status s = env->vec_status; \ 1715 \ 1716 set_float_rounding_mode(rounding, &s); \ 1717 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ 1718 r->f[i] = float32_round_to_int (b->f[i], &s); \ 1719 } \ 1720 } 1721 VRFI(n, float_round_nearest_even) 1722 VRFI(m, float_round_down) 1723 VRFI(p, float_round_up) 1724 VRFI(z, float_round_to_zero) 1725 #undef VRFI 1726 1727 #define VROTATE(suffix, element, mask) \ 1728 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1729 { \ 1730 int i; \ 1731 \ 1732 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1733 unsigned int shift = b->element[i] & mask; \ 1734 r->element[i] = (a->element[i] << shift) | \ 1735 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1736 } \ 1737 } 1738 VROTATE(b, u8, 0x7) 1739 VROTATE(h, u16, 0xF) 1740 VROTATE(w, u32, 0x1F) 1741 VROTATE(d, u64, 0x3F) 1742 #undef VROTATE 1743 1744 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1745 { 1746 int i; 1747 1748 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1749 float32 t = float32_sqrt(b->f[i], &env->vec_status); 1750 1751 r->f[i] = float32_div(float32_one, t, &env->vec_status); 1752 } 1753 } 1754 1755 #define VRLMI(name, size, element, insert) \ 1756 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1757 { \ 1758 int i; \ 1759 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1760 uint##size##_t src1 = a->element[i]; \ 1761 uint##size##_t src2 = b->element[i]; \ 1762 uint##size##_t src3 = r->element[i]; \ 1763 uint##size##_t begin, end, shift, mask, rot_val; \ 1764 \ 1765 shift = extract##size(src2, 0, 6); \ 1766 end = extract##size(src2, 8, 6); \ 1767 begin = extract##size(src2, 16, 6); \ 1768 rot_val = rol##size(src1, shift); \ 1769 mask = mask_u##size(begin, end); \ 1770 if (insert) { \ 1771 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1772 } else { \ 1773 r->element[i] = (rot_val & mask); \ 1774 } \ 1775 } \ 1776 } 1777 1778 VRLMI(vrldmi, 64, u64, 1); 1779 VRLMI(vrlwmi, 32, u32, 1); 1780 VRLMI(vrldnm, 64, u64, 0); 1781 VRLMI(vrlwnm, 32, u32, 0); 1782 1783 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1784 ppc_avr_t *c) 1785 { 1786 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1787 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1788 } 1789 1790 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1791 { 1792 int i; 1793 1794 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1795 r->f[i] = float32_exp2(b->f[i], &env->vec_status); 1796 } 1797 } 1798 1799 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1800 { 1801 int i; 1802 1803 for (i = 0; i < ARRAY_SIZE(r->f); i++) { 1804 r->f[i] = float32_log2(b->f[i], &env->vec_status); 1805 } 1806 } 1807 1808 /* The specification says that the results are undefined if all of the 1809 * shift counts are not identical. We check to make sure that they are 1810 * to conform to what real hardware appears to do. */ 1811 #define VSHIFT(suffix, leftp) \ 1812 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1813 { \ 1814 int shift = b->u8[LO_IDX*15] & 0x7; \ 1815 int doit = 1; \ 1816 int i; \ 1817 \ 1818 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1819 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1820 } \ 1821 if (doit) { \ 1822 if (shift == 0) { \ 1823 *r = *a; \ 1824 } else if (leftp) { \ 1825 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ 1826 \ 1827 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ 1828 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ 1829 } else { \ 1830 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ 1831 \ 1832 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ 1833 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ 1834 } \ 1835 } \ 1836 } 1837 VSHIFT(l, 1) 1838 VSHIFT(r, 0) 1839 #undef VSHIFT 1840 1841 #define VSL(suffix, element, mask) \ 1842 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1843 { \ 1844 int i; \ 1845 \ 1846 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1847 unsigned int shift = b->element[i] & mask; \ 1848 \ 1849 r->element[i] = a->element[i] << shift; \ 1850 } \ 1851 } 1852 VSL(b, u8, 0x7) 1853 VSL(h, u16, 0x0F) 1854 VSL(w, u32, 0x1F) 1855 VSL(d, u64, 0x3F) 1856 #undef VSL 1857 1858 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1859 { 1860 int i; 1861 unsigned int shift, bytes, size; 1862 1863 size = ARRAY_SIZE(r->u8); 1864 for (i = 0; i < size; i++) { 1865 shift = b->u8[i] & 0x7; /* extract shift value */ 1866 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1867 (((i + 1) < size) ? a->u8[i + 1] : 0); 1868 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1869 } 1870 } 1871 1872 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1873 { 1874 int i; 1875 unsigned int shift, bytes; 1876 1877 /* Use reverse order, as destination and source register can be same. Its 1878 * being modified in place saving temporary, reverse order will guarantee 1879 * that computed result is not fed back. 1880 */ 1881 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1882 shift = b->u8[i] & 0x7; /* extract shift value */ 1883 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1884 /* extract adjacent bytes */ 1885 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1886 } 1887 } 1888 1889 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1890 { 1891 int sh = shift & 0xf; 1892 int i; 1893 ppc_avr_t result; 1894 1895 #if defined(HOST_WORDS_BIGENDIAN) 1896 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1897 int index = sh + i; 1898 if (index > 0xf) { 1899 result.u8[i] = b->u8[index - 0x10]; 1900 } else { 1901 result.u8[i] = a->u8[index]; 1902 } 1903 } 1904 #else 1905 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1906 int index = (16 - sh) + i; 1907 if (index > 0xf) { 1908 result.u8[i] = a->u8[index - 0x10]; 1909 } else { 1910 result.u8[i] = b->u8[index]; 1911 } 1912 } 1913 #endif 1914 *r = result; 1915 } 1916 1917 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1918 { 1919 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; 1920 1921 #if defined(HOST_WORDS_BIGENDIAN) 1922 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1923 memset(&r->u8[16-sh], 0, sh); 1924 #else 1925 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1926 memset(&r->u8[0], 0, sh); 1927 #endif 1928 } 1929 1930 /* Experimental testing shows that hardware masks the immediate. */ 1931 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1932 #if defined(HOST_WORDS_BIGENDIAN) 1933 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1934 #else 1935 #define SPLAT_ELEMENT(element) \ 1936 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element)) 1937 #endif 1938 #define VSPLT(suffix, element) \ 1939 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1940 { \ 1941 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ 1942 int i; \ 1943 \ 1944 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1945 r->element[i] = s; \ 1946 } \ 1947 } 1948 VSPLT(b, u8) 1949 VSPLT(h, u16) 1950 VSPLT(w, u32) 1951 #undef VSPLT 1952 #undef SPLAT_ELEMENT 1953 #undef _SPLAT_MASKED 1954 #if defined(HOST_WORDS_BIGENDIAN) 1955 #define VINSERT(suffix, element) \ 1956 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1957 { \ 1958 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \ 1959 sizeof(r->element[0])); \ 1960 } 1961 #else 1962 #define VINSERT(suffix, element) \ 1963 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1964 { \ 1965 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1966 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1967 } 1968 #endif 1969 VINSERT(b, u8) 1970 VINSERT(h, u16) 1971 VINSERT(w, u32) 1972 VINSERT(d, u64) 1973 #undef VINSERT 1974 #if defined(HOST_WORDS_BIGENDIAN) 1975 #define VEXTRACT(suffix, element) \ 1976 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1977 { \ 1978 uint32_t es = sizeof(r->element[0]); \ 1979 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1980 memset(&r->u8[8], 0, 8); \ 1981 memset(&r->u8[0], 0, 8 - es); \ 1982 } 1983 #else 1984 #define VEXTRACT(suffix, element) \ 1985 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1986 { \ 1987 uint32_t es = sizeof(r->element[0]); \ 1988 uint32_t s = (16 - index) - es; \ 1989 memmove(&r->u8[8], &b->u8[s], es); \ 1990 memset(&r->u8[0], 0, 8); \ 1991 memset(&r->u8[8 + es], 0, 8 - es); \ 1992 } 1993 #endif 1994 VEXTRACT(ub, u8) 1995 VEXTRACT(uh, u16) 1996 VEXTRACT(uw, u32) 1997 VEXTRACT(d, u64) 1998 #undef VEXTRACT 1999 2000 #define VEXT_SIGNED(name, element, mask, cast, recast) \ 2001 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2002 { \ 2003 int i; \ 2004 VECTOR_FOR_INORDER_I(i, element) { \ 2005 r->element[i] = (recast)((cast)(b->element[i] & mask)); \ 2006 } \ 2007 } 2008 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) 2009 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) 2010 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) 2011 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) 2012 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) 2013 #undef VEXT_SIGNED 2014 2015 #define VNEG(name, element) \ 2016 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2017 { \ 2018 int i; \ 2019 VECTOR_FOR_INORDER_I(i, element) { \ 2020 r->element[i] = -b->element[i]; \ 2021 } \ 2022 } 2023 VNEG(vnegw, s32) 2024 VNEG(vnegd, s64) 2025 #undef VNEG 2026 2027 #define VSPLTI(suffix, element, splat_type) \ 2028 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2029 { \ 2030 splat_type x = (int8_t)(splat << 3) >> 3; \ 2031 int i; \ 2032 \ 2033 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2034 r->element[i] = x; \ 2035 } \ 2036 } 2037 VSPLTI(b, s8, int8_t) 2038 VSPLTI(h, s16, int16_t) 2039 VSPLTI(w, s32, int32_t) 2040 #undef VSPLTI 2041 2042 #define VSR(suffix, element, mask) \ 2043 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2044 { \ 2045 int i; \ 2046 \ 2047 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2048 unsigned int shift = b->element[i] & mask; \ 2049 r->element[i] = a->element[i] >> shift; \ 2050 } \ 2051 } 2052 VSR(ab, s8, 0x7) 2053 VSR(ah, s16, 0xF) 2054 VSR(aw, s32, 0x1F) 2055 VSR(ad, s64, 0x3F) 2056 VSR(b, u8, 0x7) 2057 VSR(h, u16, 0xF) 2058 VSR(w, u32, 0x1F) 2059 VSR(d, u64, 0x3F) 2060 #undef VSR 2061 2062 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2063 { 2064 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf; 2065 2066 #if defined(HOST_WORDS_BIGENDIAN) 2067 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2068 memset(&r->u8[0], 0, sh); 2069 #else 2070 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2071 memset(&r->u8[16 - sh], 0, sh); 2072 #endif 2073 } 2074 2075 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2076 { 2077 int i; 2078 2079 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2080 r->u32[i] = a->u32[i] >= b->u32[i]; 2081 } 2082 } 2083 2084 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2085 { 2086 int64_t t; 2087 int i, upper; 2088 ppc_avr_t result; 2089 int sat = 0; 2090 2091 #if defined(HOST_WORDS_BIGENDIAN) 2092 upper = ARRAY_SIZE(r->s32)-1; 2093 #else 2094 upper = 0; 2095 #endif 2096 t = (int64_t)b->s32[upper]; 2097 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2098 t += a->s32[i]; 2099 result.s32[i] = 0; 2100 } 2101 result.s32[upper] = cvtsdsw(t, &sat); 2102 *r = result; 2103 2104 if (sat) { 2105 env->vscr |= (1 << VSCR_SAT); 2106 } 2107 } 2108 2109 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2110 { 2111 int i, j, upper; 2112 ppc_avr_t result; 2113 int sat = 0; 2114 2115 #if defined(HOST_WORDS_BIGENDIAN) 2116 upper = 1; 2117 #else 2118 upper = 0; 2119 #endif 2120 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2121 int64_t t = (int64_t)b->s32[upper + i * 2]; 2122 2123 result.u64[i] = 0; 2124 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2125 t += a->s32[2 * i + j]; 2126 } 2127 result.s32[upper + i * 2] = cvtsdsw(t, &sat); 2128 } 2129 2130 *r = result; 2131 if (sat) { 2132 env->vscr |= (1 << VSCR_SAT); 2133 } 2134 } 2135 2136 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2137 { 2138 int i, j; 2139 int sat = 0; 2140 2141 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2142 int64_t t = (int64_t)b->s32[i]; 2143 2144 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2145 t += a->s8[4 * i + j]; 2146 } 2147 r->s32[i] = cvtsdsw(t, &sat); 2148 } 2149 2150 if (sat) { 2151 env->vscr |= (1 << VSCR_SAT); 2152 } 2153 } 2154 2155 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2156 { 2157 int sat = 0; 2158 int i; 2159 2160 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2161 int64_t t = (int64_t)b->s32[i]; 2162 2163 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2164 r->s32[i] = cvtsdsw(t, &sat); 2165 } 2166 2167 if (sat) { 2168 env->vscr |= (1 << VSCR_SAT); 2169 } 2170 } 2171 2172 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2173 { 2174 int i, j; 2175 int sat = 0; 2176 2177 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2178 uint64_t t = (uint64_t)b->u32[i]; 2179 2180 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2181 t += a->u8[4 * i + j]; 2182 } 2183 r->u32[i] = cvtuduw(t, &sat); 2184 } 2185 2186 if (sat) { 2187 env->vscr |= (1 << VSCR_SAT); 2188 } 2189 } 2190 2191 #if defined(HOST_WORDS_BIGENDIAN) 2192 #define UPKHI 1 2193 #define UPKLO 0 2194 #else 2195 #define UPKHI 0 2196 #define UPKLO 1 2197 #endif 2198 #define VUPKPX(suffix, hi) \ 2199 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2200 { \ 2201 int i; \ 2202 ppc_avr_t result; \ 2203 \ 2204 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2205 uint16_t e = b->u16[hi ? i : i+4]; \ 2206 uint8_t a = (e >> 15) ? 0xff : 0; \ 2207 uint8_t r = (e >> 10) & 0x1f; \ 2208 uint8_t g = (e >> 5) & 0x1f; \ 2209 uint8_t b = e & 0x1f; \ 2210 \ 2211 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2212 } \ 2213 *r = result; \ 2214 } 2215 VUPKPX(lpx, UPKLO) 2216 VUPKPX(hpx, UPKHI) 2217 #undef VUPKPX 2218 2219 #define VUPK(suffix, unpacked, packee, hi) \ 2220 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2221 { \ 2222 int i; \ 2223 ppc_avr_t result; \ 2224 \ 2225 if (hi) { \ 2226 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2227 result.unpacked[i] = b->packee[i]; \ 2228 } \ 2229 } else { \ 2230 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2231 i++) { \ 2232 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2233 } \ 2234 } \ 2235 *r = result; \ 2236 } 2237 VUPK(hsb, s16, s8, UPKHI) 2238 VUPK(hsh, s32, s16, UPKHI) 2239 VUPK(hsw, s64, s32, UPKHI) 2240 VUPK(lsb, s16, s8, UPKLO) 2241 VUPK(lsh, s32, s16, UPKLO) 2242 VUPK(lsw, s64, s32, UPKLO) 2243 #undef VUPK 2244 #undef UPKHI 2245 #undef UPKLO 2246 2247 #define VGENERIC_DO(name, element) \ 2248 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2249 { \ 2250 int i; \ 2251 \ 2252 VECTOR_FOR_INORDER_I(i, element) { \ 2253 r->element[i] = name(b->element[i]); \ 2254 } \ 2255 } 2256 2257 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2258 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2259 #define clzw(v) clz32((v)) 2260 #define clzd(v) clz64((v)) 2261 2262 VGENERIC_DO(clzb, u8) 2263 VGENERIC_DO(clzh, u16) 2264 VGENERIC_DO(clzw, u32) 2265 VGENERIC_DO(clzd, u64) 2266 2267 #undef clzb 2268 #undef clzh 2269 #undef clzw 2270 #undef clzd 2271 2272 #define ctzb(v) ((v) ? ctz32(v) : 8) 2273 #define ctzh(v) ((v) ? ctz32(v) : 16) 2274 #define ctzw(v) ctz32((v)) 2275 #define ctzd(v) ctz64((v)) 2276 2277 VGENERIC_DO(ctzb, u8) 2278 VGENERIC_DO(ctzh, u16) 2279 VGENERIC_DO(ctzw, u32) 2280 VGENERIC_DO(ctzd, u64) 2281 2282 #undef ctzb 2283 #undef ctzh 2284 #undef ctzw 2285 #undef ctzd 2286 2287 #define popcntb(v) ctpop8(v) 2288 #define popcnth(v) ctpop16(v) 2289 #define popcntw(v) ctpop32(v) 2290 #define popcntd(v) ctpop64(v) 2291 2292 VGENERIC_DO(popcntb, u8) 2293 VGENERIC_DO(popcnth, u16) 2294 VGENERIC_DO(popcntw, u32) 2295 VGENERIC_DO(popcntd, u64) 2296 2297 #undef popcntb 2298 #undef popcnth 2299 #undef popcntw 2300 #undef popcntd 2301 2302 #undef VGENERIC_DO 2303 2304 #if defined(HOST_WORDS_BIGENDIAN) 2305 #define QW_ONE { .u64 = { 0, 1 } } 2306 #else 2307 #define QW_ONE { .u64 = { 1, 0 } } 2308 #endif 2309 2310 #ifndef CONFIG_INT128 2311 2312 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2313 { 2314 t->u64[0] = ~a.u64[0]; 2315 t->u64[1] = ~a.u64[1]; 2316 } 2317 2318 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2319 { 2320 if (a.u64[HI_IDX] < b.u64[HI_IDX]) { 2321 return -1; 2322 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { 2323 return 1; 2324 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { 2325 return -1; 2326 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { 2327 return 1; 2328 } else { 2329 return 0; 2330 } 2331 } 2332 2333 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2334 { 2335 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2336 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2337 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2338 } 2339 2340 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2341 { 2342 ppc_avr_t not_a; 2343 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2344 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2345 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2346 avr_qw_not(¬_a, a); 2347 return avr_qw_cmpu(not_a, b) < 0; 2348 } 2349 2350 #endif 2351 2352 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2353 { 2354 #ifdef CONFIG_INT128 2355 r->u128 = a->u128 + b->u128; 2356 #else 2357 avr_qw_add(r, *a, *b); 2358 #endif 2359 } 2360 2361 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2362 { 2363 #ifdef CONFIG_INT128 2364 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2365 #else 2366 2367 if (c->u64[LO_IDX] & 1) { 2368 ppc_avr_t tmp; 2369 2370 tmp.u64[HI_IDX] = 0; 2371 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2372 avr_qw_add(&tmp, *a, tmp); 2373 avr_qw_add(r, tmp, *b); 2374 } else { 2375 avr_qw_add(r, *a, *b); 2376 } 2377 #endif 2378 } 2379 2380 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2381 { 2382 #ifdef CONFIG_INT128 2383 r->u128 = (~a->u128 < b->u128); 2384 #else 2385 ppc_avr_t not_a; 2386 2387 avr_qw_not(¬_a, *a); 2388 2389 r->u64[HI_IDX] = 0; 2390 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); 2391 #endif 2392 } 2393 2394 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2395 { 2396 #ifdef CONFIG_INT128 2397 int carry_out = (~a->u128 < b->u128); 2398 if (!carry_out && (c->u128 & 1)) { 2399 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2400 ((a->u128 != 0) || (b->u128 != 0)); 2401 } 2402 r->u128 = carry_out; 2403 #else 2404 2405 int carry_in = c->u64[LO_IDX] & 1; 2406 int carry_out = 0; 2407 ppc_avr_t tmp; 2408 2409 carry_out = avr_qw_addc(&tmp, *a, *b); 2410 2411 if (!carry_out && carry_in) { 2412 ppc_avr_t one = QW_ONE; 2413 carry_out = avr_qw_addc(&tmp, tmp, one); 2414 } 2415 r->u64[HI_IDX] = 0; 2416 r->u64[LO_IDX] = carry_out; 2417 #endif 2418 } 2419 2420 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2421 { 2422 #ifdef CONFIG_INT128 2423 r->u128 = a->u128 - b->u128; 2424 #else 2425 ppc_avr_t tmp; 2426 ppc_avr_t one = QW_ONE; 2427 2428 avr_qw_not(&tmp, *b); 2429 avr_qw_add(&tmp, *a, tmp); 2430 avr_qw_add(r, tmp, one); 2431 #endif 2432 } 2433 2434 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2435 { 2436 #ifdef CONFIG_INT128 2437 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2438 #else 2439 ppc_avr_t tmp, sum; 2440 2441 avr_qw_not(&tmp, *b); 2442 avr_qw_add(&sum, *a, tmp); 2443 2444 tmp.u64[HI_IDX] = 0; 2445 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2446 avr_qw_add(r, sum, tmp); 2447 #endif 2448 } 2449 2450 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2451 { 2452 #ifdef CONFIG_INT128 2453 r->u128 = (~a->u128 < ~b->u128) || 2454 (a->u128 + ~b->u128 == (__uint128_t)-1); 2455 #else 2456 int carry = (avr_qw_cmpu(*a, *b) > 0); 2457 if (!carry) { 2458 ppc_avr_t tmp; 2459 avr_qw_not(&tmp, *b); 2460 avr_qw_add(&tmp, *a, tmp); 2461 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); 2462 } 2463 r->u64[HI_IDX] = 0; 2464 r->u64[LO_IDX] = carry; 2465 #endif 2466 } 2467 2468 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2469 { 2470 #ifdef CONFIG_INT128 2471 r->u128 = 2472 (~a->u128 < ~b->u128) || 2473 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2474 #else 2475 int carry_in = c->u64[LO_IDX] & 1; 2476 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2477 if (!carry_out && carry_in) { 2478 ppc_avr_t tmp; 2479 avr_qw_not(&tmp, *b); 2480 avr_qw_add(&tmp, *a, tmp); 2481 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); 2482 } 2483 2484 r->u64[HI_IDX] = 0; 2485 r->u64[LO_IDX] = carry_out; 2486 #endif 2487 } 2488 2489 #define BCD_PLUS_PREF_1 0xC 2490 #define BCD_PLUS_PREF_2 0xF 2491 #define BCD_PLUS_ALT_1 0xA 2492 #define BCD_NEG_PREF 0xD 2493 #define BCD_NEG_ALT 0xB 2494 #define BCD_PLUS_ALT_2 0xE 2495 #define NATIONAL_PLUS 0x2B 2496 #define NATIONAL_NEG 0x2D 2497 2498 #if defined(HOST_WORDS_BIGENDIAN) 2499 #define BCD_DIG_BYTE(n) (15 - (n/2)) 2500 #else 2501 #define BCD_DIG_BYTE(n) (n/2) 2502 #endif 2503 2504 static int bcd_get_sgn(ppc_avr_t *bcd) 2505 { 2506 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2507 case BCD_PLUS_PREF_1: 2508 case BCD_PLUS_PREF_2: 2509 case BCD_PLUS_ALT_1: 2510 case BCD_PLUS_ALT_2: 2511 { 2512 return 1; 2513 } 2514 2515 case BCD_NEG_PREF: 2516 case BCD_NEG_ALT: 2517 { 2518 return -1; 2519 } 2520 2521 default: 2522 { 2523 return 0; 2524 } 2525 } 2526 } 2527 2528 static int bcd_preferred_sgn(int sgn, int ps) 2529 { 2530 if (sgn >= 0) { 2531 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2532 } else { 2533 return BCD_NEG_PREF; 2534 } 2535 } 2536 2537 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2538 { 2539 uint8_t result; 2540 if (n & 1) { 2541 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2542 } else { 2543 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2544 } 2545 2546 if (unlikely(result > 9)) { 2547 *invalid = true; 2548 } 2549 return result; 2550 } 2551 2552 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2553 { 2554 if (n & 1) { 2555 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2556 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2557 } else { 2558 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2559 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2560 } 2561 } 2562 2563 static int bcd_cmp_zero(ppc_avr_t *bcd) 2564 { 2565 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) { 2566 return 1 << CRF_EQ; 2567 } else { 2568 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT; 2569 } 2570 } 2571 2572 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2573 { 2574 #if defined(HOST_WORDS_BIGENDIAN) 2575 return reg->u16[7 - n]; 2576 #else 2577 return reg->u16[n]; 2578 #endif 2579 } 2580 2581 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2582 { 2583 #if defined(HOST_WORDS_BIGENDIAN) 2584 reg->u16[7 - n] = val; 2585 #else 2586 reg->u16[n] = val; 2587 #endif 2588 } 2589 2590 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2591 { 2592 int i; 2593 int invalid = 0; 2594 for (i = 31; i > 0; i--) { 2595 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2596 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2597 if (unlikely(invalid)) { 2598 return 0; /* doesn't matter */ 2599 } else if (dig_a > dig_b) { 2600 return 1; 2601 } else if (dig_a < dig_b) { 2602 return -1; 2603 } 2604 } 2605 2606 return 0; 2607 } 2608 2609 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2610 int *overflow) 2611 { 2612 int carry = 0; 2613 int i; 2614 int is_zero = 1; 2615 for (i = 1; i <= 31; i++) { 2616 uint8_t digit = bcd_get_digit(a, i, invalid) + 2617 bcd_get_digit(b, i, invalid) + carry; 2618 is_zero &= (digit == 0); 2619 if (digit > 9) { 2620 carry = 1; 2621 digit -= 10; 2622 } else { 2623 carry = 0; 2624 } 2625 2626 bcd_put_digit(t, digit, i); 2627 2628 if (unlikely(*invalid)) { 2629 return -1; 2630 } 2631 } 2632 2633 *overflow = carry; 2634 return is_zero; 2635 } 2636 2637 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2638 int *overflow) 2639 { 2640 int carry = 0; 2641 int i; 2642 int is_zero = 1; 2643 for (i = 1; i <= 31; i++) { 2644 uint8_t digit = bcd_get_digit(a, i, invalid) - 2645 bcd_get_digit(b, i, invalid) + carry; 2646 is_zero &= (digit == 0); 2647 if (digit & 0x80) { 2648 carry = -1; 2649 digit += 10; 2650 } else { 2651 carry = 0; 2652 } 2653 2654 bcd_put_digit(t, digit, i); 2655 2656 if (unlikely(*invalid)) { 2657 return -1; 2658 } 2659 } 2660 2661 *overflow = carry; 2662 return is_zero; 2663 } 2664 2665 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2666 { 2667 2668 int sgna = bcd_get_sgn(a); 2669 int sgnb = bcd_get_sgn(b); 2670 int invalid = (sgna == 0) || (sgnb == 0); 2671 int overflow = 0; 2672 int zero = 0; 2673 uint32_t cr = 0; 2674 ppc_avr_t result = { .u64 = { 0, 0 } }; 2675 2676 if (!invalid) { 2677 if (sgna == sgnb) { 2678 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2679 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2680 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT; 2681 } else if (bcd_cmp_mag(a, b) > 0) { 2682 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2683 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow); 2684 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT; 2685 } else { 2686 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2687 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow); 2688 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT; 2689 } 2690 } 2691 2692 if (unlikely(invalid)) { 2693 result.u64[HI_IDX] = result.u64[LO_IDX] = -1; 2694 cr = 1 << CRF_SO; 2695 } else if (overflow) { 2696 cr |= 1 << CRF_SO; 2697 } else if (zero) { 2698 cr = 1 << CRF_EQ; 2699 } 2700 2701 *r = result; 2702 2703 return cr; 2704 } 2705 2706 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2707 { 2708 ppc_avr_t bcopy = *b; 2709 int sgnb = bcd_get_sgn(b); 2710 if (sgnb < 0) { 2711 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2712 } else if (sgnb > 0) { 2713 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2714 } 2715 /* else invalid ... defer to bcdadd code for proper handling */ 2716 2717 return helper_bcdadd(r, a, &bcopy, ps); 2718 } 2719 2720 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2721 { 2722 int i; 2723 int cr = 0; 2724 uint16_t national = 0; 2725 uint16_t sgnb = get_national_digit(b, 0); 2726 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2727 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2728 2729 for (i = 1; i < 8; i++) { 2730 national = get_national_digit(b, i); 2731 if (unlikely(national < 0x30 || national > 0x39)) { 2732 invalid = 1; 2733 break; 2734 } 2735 2736 bcd_put_digit(&ret, national & 0xf, i); 2737 } 2738 2739 if (sgnb == NATIONAL_PLUS) { 2740 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2741 } else { 2742 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2743 } 2744 2745 cr = bcd_cmp_zero(&ret); 2746 2747 if (unlikely(invalid)) { 2748 cr = 1 << CRF_SO; 2749 } 2750 2751 *r = ret; 2752 2753 return cr; 2754 } 2755 2756 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2757 { 2758 int i; 2759 int cr = 0; 2760 int sgnb = bcd_get_sgn(b); 2761 int invalid = (sgnb == 0); 2762 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2763 2764 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0); 2765 2766 for (i = 1; i < 8; i++) { 2767 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2768 2769 if (unlikely(invalid)) { 2770 break; 2771 } 2772 } 2773 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2774 2775 cr = bcd_cmp_zero(b); 2776 2777 if (ox_flag) { 2778 cr |= 1 << CRF_SO; 2779 } 2780 2781 if (unlikely(invalid)) { 2782 cr = 1 << CRF_SO; 2783 } 2784 2785 *r = ret; 2786 2787 return cr; 2788 } 2789 2790 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2791 { 2792 int i; 2793 int cr = 0; 2794 int invalid = 0; 2795 int zone_digit = 0; 2796 int zone_lead = ps ? 0xF : 0x3; 2797 int digit = 0; 2798 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2799 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2800 2801 if (unlikely((sgnb < 0xA) && ps)) { 2802 invalid = 1; 2803 } 2804 2805 for (i = 0; i < 16; i++) { 2806 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2807 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2808 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2809 invalid = 1; 2810 break; 2811 } 2812 2813 bcd_put_digit(&ret, digit, i + 1); 2814 } 2815 2816 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2817 (!ps && (sgnb & 0x4))) { 2818 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2819 } else { 2820 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2821 } 2822 2823 cr = bcd_cmp_zero(&ret); 2824 2825 if (unlikely(invalid)) { 2826 cr = 1 << CRF_SO; 2827 } 2828 2829 *r = ret; 2830 2831 return cr; 2832 } 2833 2834 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2835 { 2836 int i; 2837 int cr = 0; 2838 uint8_t digit = 0; 2839 int sgnb = bcd_get_sgn(b); 2840 int zone_lead = (ps) ? 0xF0 : 0x30; 2841 int invalid = (sgnb == 0); 2842 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2843 2844 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0); 2845 2846 for (i = 0; i < 16; i++) { 2847 digit = bcd_get_digit(b, i + 1, &invalid); 2848 2849 if (unlikely(invalid)) { 2850 break; 2851 } 2852 2853 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2854 } 2855 2856 if (ps) { 2857 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2858 } else { 2859 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2860 } 2861 2862 cr = bcd_cmp_zero(b); 2863 2864 if (ox_flag) { 2865 cr |= 1 << CRF_SO; 2866 } 2867 2868 if (unlikely(invalid)) { 2869 cr = 1 << CRF_SO; 2870 } 2871 2872 *r = ret; 2873 2874 return cr; 2875 } 2876 2877 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2878 { 2879 int i; 2880 VECTOR_FOR_INORDER_I(i, u8) { 2881 r->u8[i] = AES_sbox[a->u8[i]]; 2882 } 2883 } 2884 2885 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2886 { 2887 ppc_avr_t result; 2888 int i; 2889 2890 VECTOR_FOR_INORDER_I(i, u32) { 2891 result.AVRW(i) = b->AVRW(i) ^ 2892 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^ 2893 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^ 2894 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^ 2895 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]); 2896 } 2897 *r = result; 2898 } 2899 2900 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2901 { 2902 ppc_avr_t result; 2903 int i; 2904 2905 VECTOR_FOR_INORDER_I(i, u8) { 2906 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]); 2907 } 2908 *r = result; 2909 } 2910 2911 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2912 { 2913 /* This differs from what is written in ISA V2.07. The RTL is */ 2914 /* incorrect and will be fixed in V2.07B. */ 2915 int i; 2916 ppc_avr_t tmp; 2917 2918 VECTOR_FOR_INORDER_I(i, u8) { 2919 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])]; 2920 } 2921 2922 VECTOR_FOR_INORDER_I(i, u32) { 2923 r->AVRW(i) = 2924 AES_imc[tmp.AVRB(4*i + 0)][0] ^ 2925 AES_imc[tmp.AVRB(4*i + 1)][1] ^ 2926 AES_imc[tmp.AVRB(4*i + 2)][2] ^ 2927 AES_imc[tmp.AVRB(4*i + 3)][3]; 2928 } 2929 } 2930 2931 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2932 { 2933 ppc_avr_t result; 2934 int i; 2935 2936 VECTOR_FOR_INORDER_I(i, u8) { 2937 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]); 2938 } 2939 *r = result; 2940 } 2941 2942 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) 2943 #if defined(HOST_WORDS_BIGENDIAN) 2944 #define EL_IDX(i) (i) 2945 #else 2946 #define EL_IDX(i) (3 - (i)) 2947 #endif 2948 2949 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2950 { 2951 int st = (st_six & 0x10) != 0; 2952 int six = st_six & 0xF; 2953 int i; 2954 2955 VECTOR_FOR_INORDER_I(i, u32) { 2956 if (st == 0) { 2957 if ((six & (0x8 >> i)) == 0) { 2958 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ 2959 ROTRu32(a->u32[EL_IDX(i)], 18) ^ 2960 (a->u32[EL_IDX(i)] >> 3); 2961 } else { /* six.bit[i] == 1 */ 2962 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ 2963 ROTRu32(a->u32[EL_IDX(i)], 19) ^ 2964 (a->u32[EL_IDX(i)] >> 10); 2965 } 2966 } else { /* st == 1 */ 2967 if ((six & (0x8 >> i)) == 0) { 2968 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ 2969 ROTRu32(a->u32[EL_IDX(i)], 13) ^ 2970 ROTRu32(a->u32[EL_IDX(i)], 22); 2971 } else { /* six.bit[i] == 1 */ 2972 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ 2973 ROTRu32(a->u32[EL_IDX(i)], 11) ^ 2974 ROTRu32(a->u32[EL_IDX(i)], 25); 2975 } 2976 } 2977 } 2978 } 2979 2980 #undef ROTRu32 2981 #undef EL_IDX 2982 2983 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) 2984 #if defined(HOST_WORDS_BIGENDIAN) 2985 #define EL_IDX(i) (i) 2986 #else 2987 #define EL_IDX(i) (1 - (i)) 2988 #endif 2989 2990 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2991 { 2992 int st = (st_six & 0x10) != 0; 2993 int six = st_six & 0xF; 2994 int i; 2995 2996 VECTOR_FOR_INORDER_I(i, u64) { 2997 if (st == 0) { 2998 if ((six & (0x8 >> (2*i))) == 0) { 2999 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ 3000 ROTRu64(a->u64[EL_IDX(i)], 8) ^ 3001 (a->u64[EL_IDX(i)] >> 7); 3002 } else { /* six.bit[2*i] == 1 */ 3003 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ 3004 ROTRu64(a->u64[EL_IDX(i)], 61) ^ 3005 (a->u64[EL_IDX(i)] >> 6); 3006 } 3007 } else { /* st == 1 */ 3008 if ((six & (0x8 >> (2*i))) == 0) { 3009 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ 3010 ROTRu64(a->u64[EL_IDX(i)], 34) ^ 3011 ROTRu64(a->u64[EL_IDX(i)], 39); 3012 } else { /* six.bit[2*i] == 1 */ 3013 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ 3014 ROTRu64(a->u64[EL_IDX(i)], 18) ^ 3015 ROTRu64(a->u64[EL_IDX(i)], 41); 3016 } 3017 } 3018 } 3019 } 3020 3021 #undef ROTRu64 3022 #undef EL_IDX 3023 3024 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3025 { 3026 ppc_avr_t result; 3027 int i; 3028 3029 VECTOR_FOR_INORDER_I(i, u8) { 3030 int indexA = c->u8[i] >> 4; 3031 int indexB = c->u8[i] & 0xF; 3032 #if defined(HOST_WORDS_BIGENDIAN) 3033 result.u8[i] = a->u8[indexA] ^ b->u8[indexB]; 3034 #else 3035 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; 3036 #endif 3037 } 3038 *r = result; 3039 } 3040 3041 #undef VECTOR_FOR_INORDER_I 3042 #undef HI_IDX 3043 #undef LO_IDX 3044 3045 /*****************************************************************************/ 3046 /* SPE extension helpers */ 3047 /* Use a table to make this quicker */ 3048 static const uint8_t hbrev[16] = { 3049 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3050 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3051 }; 3052 3053 static inline uint8_t byte_reverse(uint8_t val) 3054 { 3055 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3056 } 3057 3058 static inline uint32_t word_reverse(uint32_t val) 3059 { 3060 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3061 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3062 } 3063 3064 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3065 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3066 { 3067 uint32_t a, b, d, mask; 3068 3069 mask = UINT32_MAX >> (32 - MASKBITS); 3070 a = arg1 & mask; 3071 b = arg2 & mask; 3072 d = word_reverse(1 + word_reverse(a | ~b)); 3073 return (arg1 & ~mask) | (d & b); 3074 } 3075 3076 uint32_t helper_cntlsw32(uint32_t val) 3077 { 3078 if (val & 0x80000000) { 3079 return clz32(~val); 3080 } else { 3081 return clz32(val); 3082 } 3083 } 3084 3085 uint32_t helper_cntlzw32(uint32_t val) 3086 { 3087 return clz32(val); 3088 } 3089 3090 /* 440 specific */ 3091 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3092 target_ulong low, uint32_t update_Rc) 3093 { 3094 target_ulong mask; 3095 int i; 3096 3097 i = 1; 3098 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3099 if ((high & mask) == 0) { 3100 if (update_Rc) { 3101 env->crf[0] = 0x4; 3102 } 3103 goto done; 3104 } 3105 i++; 3106 } 3107 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3108 if ((low & mask) == 0) { 3109 if (update_Rc) { 3110 env->crf[0] = 0x8; 3111 } 3112 goto done; 3113 } 3114 i++; 3115 } 3116 i = 8; 3117 if (update_Rc) { 3118 env->crf[0] = 0x2; 3119 } 3120 done: 3121 env->xer = (env->xer & ~0x7F) | i; 3122 if (update_Rc) { 3123 env->crf[0] |= xer_so; 3124 } 3125 return i; 3126 } 3127