1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define HI_IDX 0 393 #define LO_IDX 1 394 #else 395 #define HI_IDX 1 396 #define LO_IDX 0 397 #endif 398 399 #if defined(HOST_WORDS_BIGENDIAN) 400 #define VECTOR_FOR_INORDER_I(index, element) \ 401 for (index = 0; index < ARRAY_SIZE(r->element); index++) 402 #else 403 #define VECTOR_FOR_INORDER_I(index, element) \ 404 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 405 #endif 406 407 /* Saturating arithmetic helpers. */ 408 #define SATCVT(from, to, from_type, to_type, min, max) \ 409 static inline to_type cvt##from##to(from_type x, int *sat) \ 410 { \ 411 to_type r; \ 412 \ 413 if (x < (from_type)min) { \ 414 r = min; \ 415 *sat = 1; \ 416 } else if (x > (from_type)max) { \ 417 r = max; \ 418 *sat = 1; \ 419 } else { \ 420 r = x; \ 421 } \ 422 return r; \ 423 } 424 #define SATCVTU(from, to, from_type, to_type, min, max) \ 425 static inline to_type cvt##from##to(from_type x, int *sat) \ 426 { \ 427 to_type r; \ 428 \ 429 if (x > (from_type)max) { \ 430 r = max; \ 431 *sat = 1; \ 432 } else { \ 433 r = x; \ 434 } \ 435 return r; \ 436 } 437 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 438 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 439 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 440 441 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 442 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 443 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 444 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 445 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 446 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 447 #undef SATCVT 448 #undef SATCVTU 449 450 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 451 { 452 int i, j = (sh & 0xf); 453 454 VECTOR_FOR_INORDER_I(i, u8) { 455 r->u8[i] = j++; 456 } 457 } 458 459 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 460 { 461 int i, j = 0x10 - (sh & 0xf); 462 463 VECTOR_FOR_INORDER_I(i, u8) { 464 r->u8[i] = j++; 465 } 466 } 467 468 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 469 { 470 #if defined(HOST_WORDS_BIGENDIAN) 471 env->vscr = r->u32[3]; 472 #else 473 env->vscr = r->u32[0]; 474 #endif 475 set_flush_to_zero(vscr_nj, &env->vec_status); 476 } 477 478 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 479 { 480 int i; 481 482 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 483 r->u32[i] = ~a->u32[i] < b->u32[i]; 484 } 485 } 486 487 /* vprtybw */ 488 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 489 { 490 int i; 491 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 492 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 493 res ^= res >> 8; 494 r->u32[i] = res & 1; 495 } 496 } 497 498 /* vprtybd */ 499 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 500 { 501 int i; 502 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 503 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 504 res ^= res >> 16; 505 res ^= res >> 8; 506 r->u64[i] = res & 1; 507 } 508 } 509 510 /* vprtybq */ 511 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 512 { 513 uint64_t res = b->u64[0] ^ b->u64[1]; 514 res ^= res >> 32; 515 res ^= res >> 16; 516 res ^= res >> 8; 517 r->u64[LO_IDX] = res & 1; 518 r->u64[HI_IDX] = 0; 519 } 520 521 #define VARITH_DO(name, op, element) \ 522 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 523 { \ 524 int i; \ 525 \ 526 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 527 r->element[i] = a->element[i] op b->element[i]; \ 528 } \ 529 } 530 #define VARITH(suffix, element) \ 531 VARITH_DO(add##suffix, +, element) \ 532 VARITH_DO(sub##suffix, -, element) 533 VARITH(ubm, u8) 534 VARITH(uhm, u16) 535 VARITH(uwm, u32) 536 VARITH(udm, u64) 537 VARITH_DO(muluwm, *, u32) 538 #undef VARITH_DO 539 #undef VARITH 540 541 #define VARITHFP(suffix, func) \ 542 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 543 ppc_avr_t *b) \ 544 { \ 545 int i; \ 546 \ 547 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 548 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 549 } \ 550 } 551 VARITHFP(addfp, float32_add) 552 VARITHFP(subfp, float32_sub) 553 VARITHFP(minfp, float32_min) 554 VARITHFP(maxfp, float32_max) 555 #undef VARITHFP 556 557 #define VARITHFPFMA(suffix, type) \ 558 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 559 ppc_avr_t *b, ppc_avr_t *c) \ 560 { \ 561 int i; \ 562 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 563 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 564 type, &env->vec_status); \ 565 } \ 566 } 567 VARITHFPFMA(maddfp, 0); 568 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 569 #undef VARITHFPFMA 570 571 #define VARITHSAT_CASE(type, op, cvt, element) \ 572 { \ 573 type result = (type)a->element[i] op (type)b->element[i]; \ 574 r->element[i] = cvt(result, &sat); \ 575 } 576 577 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 578 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 579 ppc_avr_t *b) \ 580 { \ 581 int sat = 0; \ 582 int i; \ 583 \ 584 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 585 switch (sizeof(r->element[0])) { \ 586 case 1: \ 587 VARITHSAT_CASE(optype, op, cvt, element); \ 588 break; \ 589 case 2: \ 590 VARITHSAT_CASE(optype, op, cvt, element); \ 591 break; \ 592 case 4: \ 593 VARITHSAT_CASE(optype, op, cvt, element); \ 594 break; \ 595 } \ 596 } \ 597 if (sat) { \ 598 env->vscr |= (1 << VSCR_SAT); \ 599 } \ 600 } 601 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 602 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 603 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 604 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 605 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 606 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 607 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 608 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 609 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 610 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 611 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 612 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 613 #undef VARITHSAT_CASE 614 #undef VARITHSAT_DO 615 #undef VARITHSAT_SIGNED 616 #undef VARITHSAT_UNSIGNED 617 618 #define VAVG_DO(name, element, etype) \ 619 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 620 { \ 621 int i; \ 622 \ 623 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 624 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 625 r->element[i] = x >> 1; \ 626 } \ 627 } 628 629 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 630 unsigned_type) \ 631 VAVG_DO(avgs##type, signed_element, signed_type) \ 632 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 633 VAVG(b, s8, int16_t, u8, uint16_t) 634 VAVG(h, s16, int32_t, u16, uint32_t) 635 VAVG(w, s32, int64_t, u32, uint64_t) 636 #undef VAVG_DO 637 #undef VAVG 638 639 #define VABSDU_DO(name, element) \ 640 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 641 { \ 642 int i; \ 643 \ 644 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 645 r->element[i] = (a->element[i] > b->element[i]) ? \ 646 (a->element[i] - b->element[i]) : \ 647 (b->element[i] - a->element[i]); \ 648 } \ 649 } 650 651 /* VABSDU - Vector absolute difference unsigned 652 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 653 * element - element type to access from vector 654 */ 655 #define VABSDU(type, element) \ 656 VABSDU_DO(absdu##type, element) 657 VABSDU(b, u8) 658 VABSDU(h, u16) 659 VABSDU(w, u32) 660 #undef VABSDU_DO 661 #undef VABSDU 662 663 #define VCF(suffix, cvt, element) \ 664 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 665 ppc_avr_t *b, uint32_t uim) \ 666 { \ 667 int i; \ 668 \ 669 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 670 float32 t = cvt(b->element[i], &env->vec_status); \ 671 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 672 } \ 673 } 674 VCF(ux, uint32_to_float32, u32) 675 VCF(sx, int32_to_float32, s32) 676 #undef VCF 677 678 #define VCMP_DO(suffix, compare, element, record) \ 679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 680 ppc_avr_t *a, ppc_avr_t *b) \ 681 { \ 682 uint64_t ones = (uint64_t)-1; \ 683 uint64_t all = ones; \ 684 uint64_t none = 0; \ 685 int i; \ 686 \ 687 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 688 uint64_t result = (a->element[i] compare b->element[i] ? \ 689 ones : 0x0); \ 690 switch (sizeof(a->element[0])) { \ 691 case 8: \ 692 r->u64[i] = result; \ 693 break; \ 694 case 4: \ 695 r->u32[i] = result; \ 696 break; \ 697 case 2: \ 698 r->u16[i] = result; \ 699 break; \ 700 case 1: \ 701 r->u8[i] = result; \ 702 break; \ 703 } \ 704 all &= result; \ 705 none |= result; \ 706 } \ 707 if (record) { \ 708 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 709 } \ 710 } 711 #define VCMP(suffix, compare, element) \ 712 VCMP_DO(suffix, compare, element, 0) \ 713 VCMP_DO(suffix##_dot, compare, element, 1) 714 VCMP(equb, ==, u8) 715 VCMP(equh, ==, u16) 716 VCMP(equw, ==, u32) 717 VCMP(equd, ==, u64) 718 VCMP(gtub, >, u8) 719 VCMP(gtuh, >, u16) 720 VCMP(gtuw, >, u32) 721 VCMP(gtud, >, u64) 722 VCMP(gtsb, >, s8) 723 VCMP(gtsh, >, s16) 724 VCMP(gtsw, >, s32) 725 VCMP(gtsd, >, s64) 726 #undef VCMP_DO 727 #undef VCMP 728 729 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 730 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 731 ppc_avr_t *a, ppc_avr_t *b) \ 732 { \ 733 etype ones = (etype)-1; \ 734 etype all = ones; \ 735 etype result, none = 0; \ 736 int i; \ 737 \ 738 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 739 if (cmpzero) { \ 740 result = ((a->element[i] == 0) \ 741 || (b->element[i] == 0) \ 742 || (a->element[i] != b->element[i]) ? \ 743 ones : 0x0); \ 744 } else { \ 745 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 746 } \ 747 r->element[i] = result; \ 748 all &= result; \ 749 none |= result; \ 750 } \ 751 if (record) { \ 752 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 753 } \ 754 } 755 756 /* VCMPNEZ - Vector compare not equal to zero 757 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 758 * element - element type to access from vector 759 */ 760 #define VCMPNE(suffix, element, etype, cmpzero) \ 761 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 762 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 763 VCMPNE(zb, u8, uint8_t, 1) 764 VCMPNE(zh, u16, uint16_t, 1) 765 VCMPNE(zw, u32, uint32_t, 1) 766 VCMPNE(b, u8, uint8_t, 0) 767 VCMPNE(h, u16, uint16_t, 0) 768 VCMPNE(w, u32, uint32_t, 0) 769 #undef VCMPNE_DO 770 #undef VCMPNE 771 772 #define VCMPFP_DO(suffix, compare, order, record) \ 773 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 774 ppc_avr_t *a, ppc_avr_t *b) \ 775 { \ 776 uint32_t ones = (uint32_t)-1; \ 777 uint32_t all = ones; \ 778 uint32_t none = 0; \ 779 int i; \ 780 \ 781 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 782 uint32_t result; \ 783 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 784 &env->vec_status); \ 785 if (rel == float_relation_unordered) { \ 786 result = 0; \ 787 } else if (rel compare order) { \ 788 result = ones; \ 789 } else { \ 790 result = 0; \ 791 } \ 792 r->u32[i] = result; \ 793 all &= result; \ 794 none |= result; \ 795 } \ 796 if (record) { \ 797 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 798 } \ 799 } 800 #define VCMPFP(suffix, compare, order) \ 801 VCMPFP_DO(suffix, compare, order, 0) \ 802 VCMPFP_DO(suffix##_dot, compare, order, 1) 803 VCMPFP(eqfp, ==, float_relation_equal) 804 VCMPFP(gefp, !=, float_relation_less) 805 VCMPFP(gtfp, ==, float_relation_greater) 806 #undef VCMPFP_DO 807 #undef VCMPFP 808 809 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 810 ppc_avr_t *a, ppc_avr_t *b, int record) 811 { 812 int i; 813 int all_in = 0; 814 815 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 816 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 817 &env->vec_status); 818 if (le_rel == float_relation_unordered) { 819 r->u32[i] = 0xc0000000; 820 all_in = 1; 821 } else { 822 float32 bneg = float32_chs(b->f32[i]); 823 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 824 &env->vec_status); 825 int le = le_rel != float_relation_greater; 826 int ge = ge_rel != float_relation_less; 827 828 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 829 all_in |= (!le | !ge); 830 } 831 } 832 if (record) { 833 env->crf[6] = (all_in == 0) << 1; 834 } 835 } 836 837 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 838 { 839 vcmpbfp_internal(env, r, a, b, 0); 840 } 841 842 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 843 ppc_avr_t *b) 844 { 845 vcmpbfp_internal(env, r, a, b, 1); 846 } 847 848 #define VCT(suffix, satcvt, element) \ 849 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 850 ppc_avr_t *b, uint32_t uim) \ 851 { \ 852 int i; \ 853 int sat = 0; \ 854 float_status s = env->vec_status; \ 855 \ 856 set_float_rounding_mode(float_round_to_zero, &s); \ 857 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 858 if (float32_is_any_nan(b->f32[i])) { \ 859 r->element[i] = 0; \ 860 } else { \ 861 float64 t = float32_to_float64(b->f32[i], &s); \ 862 int64_t j; \ 863 \ 864 t = float64_scalbn(t, uim, &s); \ 865 j = float64_to_int64(t, &s); \ 866 r->element[i] = satcvt(j, &sat); \ 867 } \ 868 } \ 869 if (sat) { \ 870 env->vscr |= (1 << VSCR_SAT); \ 871 } \ 872 } 873 VCT(uxs, cvtsduw, u32) 874 VCT(sxs, cvtsdsw, s32) 875 #undef VCT 876 877 target_ulong helper_vclzlsbb(ppc_avr_t *r) 878 { 879 target_ulong count = 0; 880 int i; 881 VECTOR_FOR_INORDER_I(i, u8) { 882 if (r->u8[i] & 0x01) { 883 break; 884 } 885 count++; 886 } 887 return count; 888 } 889 890 target_ulong helper_vctzlsbb(ppc_avr_t *r) 891 { 892 target_ulong count = 0; 893 int i; 894 #if defined(HOST_WORDS_BIGENDIAN) 895 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 896 #else 897 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 898 #endif 899 if (r->u8[i] & 0x01) { 900 break; 901 } 902 count++; 903 } 904 return count; 905 } 906 907 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 908 ppc_avr_t *b, ppc_avr_t *c) 909 { 910 int sat = 0; 911 int i; 912 913 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 914 int32_t prod = a->s16[i] * b->s16[i]; 915 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 916 917 r->s16[i] = cvtswsh(t, &sat); 918 } 919 920 if (sat) { 921 env->vscr |= (1 << VSCR_SAT); 922 } 923 } 924 925 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 926 ppc_avr_t *b, ppc_avr_t *c) 927 { 928 int sat = 0; 929 int i; 930 931 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 932 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 933 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 934 r->s16[i] = cvtswsh(t, &sat); 935 } 936 937 if (sat) { 938 env->vscr |= (1 << VSCR_SAT); 939 } 940 } 941 942 #define VMINMAX_DO(name, compare, element) \ 943 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 944 { \ 945 int i; \ 946 \ 947 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 948 if (a->element[i] compare b->element[i]) { \ 949 r->element[i] = b->element[i]; \ 950 } else { \ 951 r->element[i] = a->element[i]; \ 952 } \ 953 } \ 954 } 955 #define VMINMAX(suffix, element) \ 956 VMINMAX_DO(min##suffix, >, element) \ 957 VMINMAX_DO(max##suffix, <, element) 958 VMINMAX(sb, s8) 959 VMINMAX(sh, s16) 960 VMINMAX(sw, s32) 961 VMINMAX(sd, s64) 962 VMINMAX(ub, u8) 963 VMINMAX(uh, u16) 964 VMINMAX(uw, u32) 965 VMINMAX(ud, u64) 966 #undef VMINMAX_DO 967 #undef VMINMAX 968 969 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 970 { 971 int i; 972 973 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 974 int32_t prod = a->s16[i] * b->s16[i]; 975 r->s16[i] = (int16_t) (prod + c->s16[i]); 976 } 977 } 978 979 #define VMRG_DO(name, element, highp) \ 980 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 981 { \ 982 ppc_avr_t result; \ 983 int i; \ 984 size_t n_elems = ARRAY_SIZE(r->element); \ 985 \ 986 for (i = 0; i < n_elems / 2; i++) { \ 987 if (highp) { \ 988 result.element[i*2+HI_IDX] = a->element[i]; \ 989 result.element[i*2+LO_IDX] = b->element[i]; \ 990 } else { \ 991 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \ 992 b->element[n_elems - i - 1]; \ 993 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \ 994 a->element[n_elems - i - 1]; \ 995 } \ 996 } \ 997 *r = result; \ 998 } 999 #if defined(HOST_WORDS_BIGENDIAN) 1000 #define MRGHI 0 1001 #define MRGLO 1 1002 #else 1003 #define MRGHI 1 1004 #define MRGLO 0 1005 #endif 1006 #define VMRG(suffix, element) \ 1007 VMRG_DO(mrgl##suffix, element, MRGHI) \ 1008 VMRG_DO(mrgh##suffix, element, MRGLO) 1009 VMRG(b, u8) 1010 VMRG(h, u16) 1011 VMRG(w, u32) 1012 #undef VMRG_DO 1013 #undef VMRG 1014 #undef MRGHI 1015 #undef MRGLO 1016 1017 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1018 ppc_avr_t *b, ppc_avr_t *c) 1019 { 1020 int32_t prod[16]; 1021 int i; 1022 1023 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1024 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1025 } 1026 1027 VECTOR_FOR_INORDER_I(i, s32) { 1028 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1029 prod[4 * i + 2] + prod[4 * i + 3]; 1030 } 1031 } 1032 1033 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1034 ppc_avr_t *b, ppc_avr_t *c) 1035 { 1036 int32_t prod[8]; 1037 int i; 1038 1039 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1040 prod[i] = a->s16[i] * b->s16[i]; 1041 } 1042 1043 VECTOR_FOR_INORDER_I(i, s32) { 1044 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1045 } 1046 } 1047 1048 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1049 ppc_avr_t *b, ppc_avr_t *c) 1050 { 1051 int32_t prod[8]; 1052 int i; 1053 int sat = 0; 1054 1055 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1056 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1057 } 1058 1059 VECTOR_FOR_INORDER_I(i, s32) { 1060 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1061 1062 r->u32[i] = cvtsdsw(t, &sat); 1063 } 1064 1065 if (sat) { 1066 env->vscr |= (1 << VSCR_SAT); 1067 } 1068 } 1069 1070 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1071 ppc_avr_t *b, ppc_avr_t *c) 1072 { 1073 uint16_t prod[16]; 1074 int i; 1075 1076 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1077 prod[i] = a->u8[i] * b->u8[i]; 1078 } 1079 1080 VECTOR_FOR_INORDER_I(i, u32) { 1081 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1082 prod[4 * i + 2] + prod[4 * i + 3]; 1083 } 1084 } 1085 1086 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1087 ppc_avr_t *b, ppc_avr_t *c) 1088 { 1089 uint32_t prod[8]; 1090 int i; 1091 1092 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1093 prod[i] = a->u16[i] * b->u16[i]; 1094 } 1095 1096 VECTOR_FOR_INORDER_I(i, u32) { 1097 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1098 } 1099 } 1100 1101 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1102 ppc_avr_t *b, ppc_avr_t *c) 1103 { 1104 uint32_t prod[8]; 1105 int i; 1106 int sat = 0; 1107 1108 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1109 prod[i] = a->u16[i] * b->u16[i]; 1110 } 1111 1112 VECTOR_FOR_INORDER_I(i, s32) { 1113 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1114 1115 r->u32[i] = cvtuduw(t, &sat); 1116 } 1117 1118 if (sat) { 1119 env->vscr |= (1 << VSCR_SAT); 1120 } 1121 } 1122 1123 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ 1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1125 { \ 1126 int i; \ 1127 \ 1128 VECTOR_FOR_INORDER_I(i, prod_element) { \ 1129 if (evenp) { \ 1130 r->prod_element[i] = \ 1131 (cast)a->mul_element[i * 2 + HI_IDX] * \ 1132 (cast)b->mul_element[i * 2 + HI_IDX]; \ 1133 } else { \ 1134 r->prod_element[i] = \ 1135 (cast)a->mul_element[i * 2 + LO_IDX] * \ 1136 (cast)b->mul_element[i * 2 + LO_IDX]; \ 1137 } \ 1138 } \ 1139 } 1140 #define VMUL(suffix, mul_element, prod_element, cast) \ 1141 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ 1142 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) 1143 VMUL(sb, s8, s16, int16_t) 1144 VMUL(sh, s16, s32, int32_t) 1145 VMUL(sw, s32, s64, int64_t) 1146 VMUL(ub, u8, u16, uint16_t) 1147 VMUL(uh, u16, u32, uint32_t) 1148 VMUL(uw, u32, u64, uint64_t) 1149 #undef VMUL_DO 1150 #undef VMUL 1151 1152 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1153 ppc_avr_t *c) 1154 { 1155 ppc_avr_t result; 1156 int i; 1157 1158 VECTOR_FOR_INORDER_I(i, u8) { 1159 int s = c->u8[i] & 0x1f; 1160 #if defined(HOST_WORDS_BIGENDIAN) 1161 int index = s & 0xf; 1162 #else 1163 int index = 15 - (s & 0xf); 1164 #endif 1165 1166 if (s & 0x10) { 1167 result.u8[i] = b->u8[index]; 1168 } else { 1169 result.u8[i] = a->u8[index]; 1170 } 1171 } 1172 *r = result; 1173 } 1174 1175 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1176 ppc_avr_t *c) 1177 { 1178 ppc_avr_t result; 1179 int i; 1180 1181 VECTOR_FOR_INORDER_I(i, u8) { 1182 int s = c->u8[i] & 0x1f; 1183 #if defined(HOST_WORDS_BIGENDIAN) 1184 int index = 15 - (s & 0xf); 1185 #else 1186 int index = s & 0xf; 1187 #endif 1188 1189 if (s & 0x10) { 1190 result.u8[i] = a->u8[index]; 1191 } else { 1192 result.u8[i] = b->u8[index]; 1193 } 1194 } 1195 *r = result; 1196 } 1197 1198 #if defined(HOST_WORDS_BIGENDIAN) 1199 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1200 #define VBPERMD_INDEX(i) (i) 1201 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1202 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1203 #else 1204 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1205 #define VBPERMD_INDEX(i) (1 - i) 1206 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1207 #define EXTRACT_BIT(avr, i, index) \ 1208 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1209 #endif 1210 1211 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1212 { 1213 int i, j; 1214 ppc_avr_t result = { .u64 = { 0, 0 } }; 1215 VECTOR_FOR_INORDER_I(i, u64) { 1216 for (j = 0; j < 8; j++) { 1217 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1218 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1219 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1220 } 1221 } 1222 } 1223 *r = result; 1224 } 1225 1226 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1227 { 1228 int i; 1229 uint64_t perm = 0; 1230 1231 VECTOR_FOR_INORDER_I(i, u8) { 1232 int index = VBPERMQ_INDEX(b, i); 1233 1234 if (index < 128) { 1235 uint64_t mask = (1ull << (63-(index & 0x3F))); 1236 if (a->u64[VBPERMQ_DW(index)] & mask) { 1237 perm |= (0x8000 >> i); 1238 } 1239 } 1240 } 1241 1242 r->u64[HI_IDX] = perm; 1243 r->u64[LO_IDX] = 0; 1244 } 1245 1246 #undef VBPERMQ_INDEX 1247 #undef VBPERMQ_DW 1248 1249 static const uint64_t VGBBD_MASKS[256] = { 1250 0x0000000000000000ull, /* 00 */ 1251 0x0000000000000080ull, /* 01 */ 1252 0x0000000000008000ull, /* 02 */ 1253 0x0000000000008080ull, /* 03 */ 1254 0x0000000000800000ull, /* 04 */ 1255 0x0000000000800080ull, /* 05 */ 1256 0x0000000000808000ull, /* 06 */ 1257 0x0000000000808080ull, /* 07 */ 1258 0x0000000080000000ull, /* 08 */ 1259 0x0000000080000080ull, /* 09 */ 1260 0x0000000080008000ull, /* 0A */ 1261 0x0000000080008080ull, /* 0B */ 1262 0x0000000080800000ull, /* 0C */ 1263 0x0000000080800080ull, /* 0D */ 1264 0x0000000080808000ull, /* 0E */ 1265 0x0000000080808080ull, /* 0F */ 1266 0x0000008000000000ull, /* 10 */ 1267 0x0000008000000080ull, /* 11 */ 1268 0x0000008000008000ull, /* 12 */ 1269 0x0000008000008080ull, /* 13 */ 1270 0x0000008000800000ull, /* 14 */ 1271 0x0000008000800080ull, /* 15 */ 1272 0x0000008000808000ull, /* 16 */ 1273 0x0000008000808080ull, /* 17 */ 1274 0x0000008080000000ull, /* 18 */ 1275 0x0000008080000080ull, /* 19 */ 1276 0x0000008080008000ull, /* 1A */ 1277 0x0000008080008080ull, /* 1B */ 1278 0x0000008080800000ull, /* 1C */ 1279 0x0000008080800080ull, /* 1D */ 1280 0x0000008080808000ull, /* 1E */ 1281 0x0000008080808080ull, /* 1F */ 1282 0x0000800000000000ull, /* 20 */ 1283 0x0000800000000080ull, /* 21 */ 1284 0x0000800000008000ull, /* 22 */ 1285 0x0000800000008080ull, /* 23 */ 1286 0x0000800000800000ull, /* 24 */ 1287 0x0000800000800080ull, /* 25 */ 1288 0x0000800000808000ull, /* 26 */ 1289 0x0000800000808080ull, /* 27 */ 1290 0x0000800080000000ull, /* 28 */ 1291 0x0000800080000080ull, /* 29 */ 1292 0x0000800080008000ull, /* 2A */ 1293 0x0000800080008080ull, /* 2B */ 1294 0x0000800080800000ull, /* 2C */ 1295 0x0000800080800080ull, /* 2D */ 1296 0x0000800080808000ull, /* 2E */ 1297 0x0000800080808080ull, /* 2F */ 1298 0x0000808000000000ull, /* 30 */ 1299 0x0000808000000080ull, /* 31 */ 1300 0x0000808000008000ull, /* 32 */ 1301 0x0000808000008080ull, /* 33 */ 1302 0x0000808000800000ull, /* 34 */ 1303 0x0000808000800080ull, /* 35 */ 1304 0x0000808000808000ull, /* 36 */ 1305 0x0000808000808080ull, /* 37 */ 1306 0x0000808080000000ull, /* 38 */ 1307 0x0000808080000080ull, /* 39 */ 1308 0x0000808080008000ull, /* 3A */ 1309 0x0000808080008080ull, /* 3B */ 1310 0x0000808080800000ull, /* 3C */ 1311 0x0000808080800080ull, /* 3D */ 1312 0x0000808080808000ull, /* 3E */ 1313 0x0000808080808080ull, /* 3F */ 1314 0x0080000000000000ull, /* 40 */ 1315 0x0080000000000080ull, /* 41 */ 1316 0x0080000000008000ull, /* 42 */ 1317 0x0080000000008080ull, /* 43 */ 1318 0x0080000000800000ull, /* 44 */ 1319 0x0080000000800080ull, /* 45 */ 1320 0x0080000000808000ull, /* 46 */ 1321 0x0080000000808080ull, /* 47 */ 1322 0x0080000080000000ull, /* 48 */ 1323 0x0080000080000080ull, /* 49 */ 1324 0x0080000080008000ull, /* 4A */ 1325 0x0080000080008080ull, /* 4B */ 1326 0x0080000080800000ull, /* 4C */ 1327 0x0080000080800080ull, /* 4D */ 1328 0x0080000080808000ull, /* 4E */ 1329 0x0080000080808080ull, /* 4F */ 1330 0x0080008000000000ull, /* 50 */ 1331 0x0080008000000080ull, /* 51 */ 1332 0x0080008000008000ull, /* 52 */ 1333 0x0080008000008080ull, /* 53 */ 1334 0x0080008000800000ull, /* 54 */ 1335 0x0080008000800080ull, /* 55 */ 1336 0x0080008000808000ull, /* 56 */ 1337 0x0080008000808080ull, /* 57 */ 1338 0x0080008080000000ull, /* 58 */ 1339 0x0080008080000080ull, /* 59 */ 1340 0x0080008080008000ull, /* 5A */ 1341 0x0080008080008080ull, /* 5B */ 1342 0x0080008080800000ull, /* 5C */ 1343 0x0080008080800080ull, /* 5D */ 1344 0x0080008080808000ull, /* 5E */ 1345 0x0080008080808080ull, /* 5F */ 1346 0x0080800000000000ull, /* 60 */ 1347 0x0080800000000080ull, /* 61 */ 1348 0x0080800000008000ull, /* 62 */ 1349 0x0080800000008080ull, /* 63 */ 1350 0x0080800000800000ull, /* 64 */ 1351 0x0080800000800080ull, /* 65 */ 1352 0x0080800000808000ull, /* 66 */ 1353 0x0080800000808080ull, /* 67 */ 1354 0x0080800080000000ull, /* 68 */ 1355 0x0080800080000080ull, /* 69 */ 1356 0x0080800080008000ull, /* 6A */ 1357 0x0080800080008080ull, /* 6B */ 1358 0x0080800080800000ull, /* 6C */ 1359 0x0080800080800080ull, /* 6D */ 1360 0x0080800080808000ull, /* 6E */ 1361 0x0080800080808080ull, /* 6F */ 1362 0x0080808000000000ull, /* 70 */ 1363 0x0080808000000080ull, /* 71 */ 1364 0x0080808000008000ull, /* 72 */ 1365 0x0080808000008080ull, /* 73 */ 1366 0x0080808000800000ull, /* 74 */ 1367 0x0080808000800080ull, /* 75 */ 1368 0x0080808000808000ull, /* 76 */ 1369 0x0080808000808080ull, /* 77 */ 1370 0x0080808080000000ull, /* 78 */ 1371 0x0080808080000080ull, /* 79 */ 1372 0x0080808080008000ull, /* 7A */ 1373 0x0080808080008080ull, /* 7B */ 1374 0x0080808080800000ull, /* 7C */ 1375 0x0080808080800080ull, /* 7D */ 1376 0x0080808080808000ull, /* 7E */ 1377 0x0080808080808080ull, /* 7F */ 1378 0x8000000000000000ull, /* 80 */ 1379 0x8000000000000080ull, /* 81 */ 1380 0x8000000000008000ull, /* 82 */ 1381 0x8000000000008080ull, /* 83 */ 1382 0x8000000000800000ull, /* 84 */ 1383 0x8000000000800080ull, /* 85 */ 1384 0x8000000000808000ull, /* 86 */ 1385 0x8000000000808080ull, /* 87 */ 1386 0x8000000080000000ull, /* 88 */ 1387 0x8000000080000080ull, /* 89 */ 1388 0x8000000080008000ull, /* 8A */ 1389 0x8000000080008080ull, /* 8B */ 1390 0x8000000080800000ull, /* 8C */ 1391 0x8000000080800080ull, /* 8D */ 1392 0x8000000080808000ull, /* 8E */ 1393 0x8000000080808080ull, /* 8F */ 1394 0x8000008000000000ull, /* 90 */ 1395 0x8000008000000080ull, /* 91 */ 1396 0x8000008000008000ull, /* 92 */ 1397 0x8000008000008080ull, /* 93 */ 1398 0x8000008000800000ull, /* 94 */ 1399 0x8000008000800080ull, /* 95 */ 1400 0x8000008000808000ull, /* 96 */ 1401 0x8000008000808080ull, /* 97 */ 1402 0x8000008080000000ull, /* 98 */ 1403 0x8000008080000080ull, /* 99 */ 1404 0x8000008080008000ull, /* 9A */ 1405 0x8000008080008080ull, /* 9B */ 1406 0x8000008080800000ull, /* 9C */ 1407 0x8000008080800080ull, /* 9D */ 1408 0x8000008080808000ull, /* 9E */ 1409 0x8000008080808080ull, /* 9F */ 1410 0x8000800000000000ull, /* A0 */ 1411 0x8000800000000080ull, /* A1 */ 1412 0x8000800000008000ull, /* A2 */ 1413 0x8000800000008080ull, /* A3 */ 1414 0x8000800000800000ull, /* A4 */ 1415 0x8000800000800080ull, /* A5 */ 1416 0x8000800000808000ull, /* A6 */ 1417 0x8000800000808080ull, /* A7 */ 1418 0x8000800080000000ull, /* A8 */ 1419 0x8000800080000080ull, /* A9 */ 1420 0x8000800080008000ull, /* AA */ 1421 0x8000800080008080ull, /* AB */ 1422 0x8000800080800000ull, /* AC */ 1423 0x8000800080800080ull, /* AD */ 1424 0x8000800080808000ull, /* AE */ 1425 0x8000800080808080ull, /* AF */ 1426 0x8000808000000000ull, /* B0 */ 1427 0x8000808000000080ull, /* B1 */ 1428 0x8000808000008000ull, /* B2 */ 1429 0x8000808000008080ull, /* B3 */ 1430 0x8000808000800000ull, /* B4 */ 1431 0x8000808000800080ull, /* B5 */ 1432 0x8000808000808000ull, /* B6 */ 1433 0x8000808000808080ull, /* B7 */ 1434 0x8000808080000000ull, /* B8 */ 1435 0x8000808080000080ull, /* B9 */ 1436 0x8000808080008000ull, /* BA */ 1437 0x8000808080008080ull, /* BB */ 1438 0x8000808080800000ull, /* BC */ 1439 0x8000808080800080ull, /* BD */ 1440 0x8000808080808000ull, /* BE */ 1441 0x8000808080808080ull, /* BF */ 1442 0x8080000000000000ull, /* C0 */ 1443 0x8080000000000080ull, /* C1 */ 1444 0x8080000000008000ull, /* C2 */ 1445 0x8080000000008080ull, /* C3 */ 1446 0x8080000000800000ull, /* C4 */ 1447 0x8080000000800080ull, /* C5 */ 1448 0x8080000000808000ull, /* C6 */ 1449 0x8080000000808080ull, /* C7 */ 1450 0x8080000080000000ull, /* C8 */ 1451 0x8080000080000080ull, /* C9 */ 1452 0x8080000080008000ull, /* CA */ 1453 0x8080000080008080ull, /* CB */ 1454 0x8080000080800000ull, /* CC */ 1455 0x8080000080800080ull, /* CD */ 1456 0x8080000080808000ull, /* CE */ 1457 0x8080000080808080ull, /* CF */ 1458 0x8080008000000000ull, /* D0 */ 1459 0x8080008000000080ull, /* D1 */ 1460 0x8080008000008000ull, /* D2 */ 1461 0x8080008000008080ull, /* D3 */ 1462 0x8080008000800000ull, /* D4 */ 1463 0x8080008000800080ull, /* D5 */ 1464 0x8080008000808000ull, /* D6 */ 1465 0x8080008000808080ull, /* D7 */ 1466 0x8080008080000000ull, /* D8 */ 1467 0x8080008080000080ull, /* D9 */ 1468 0x8080008080008000ull, /* DA */ 1469 0x8080008080008080ull, /* DB */ 1470 0x8080008080800000ull, /* DC */ 1471 0x8080008080800080ull, /* DD */ 1472 0x8080008080808000ull, /* DE */ 1473 0x8080008080808080ull, /* DF */ 1474 0x8080800000000000ull, /* E0 */ 1475 0x8080800000000080ull, /* E1 */ 1476 0x8080800000008000ull, /* E2 */ 1477 0x8080800000008080ull, /* E3 */ 1478 0x8080800000800000ull, /* E4 */ 1479 0x8080800000800080ull, /* E5 */ 1480 0x8080800000808000ull, /* E6 */ 1481 0x8080800000808080ull, /* E7 */ 1482 0x8080800080000000ull, /* E8 */ 1483 0x8080800080000080ull, /* E9 */ 1484 0x8080800080008000ull, /* EA */ 1485 0x8080800080008080ull, /* EB */ 1486 0x8080800080800000ull, /* EC */ 1487 0x8080800080800080ull, /* ED */ 1488 0x8080800080808000ull, /* EE */ 1489 0x8080800080808080ull, /* EF */ 1490 0x8080808000000000ull, /* F0 */ 1491 0x8080808000000080ull, /* F1 */ 1492 0x8080808000008000ull, /* F2 */ 1493 0x8080808000008080ull, /* F3 */ 1494 0x8080808000800000ull, /* F4 */ 1495 0x8080808000800080ull, /* F5 */ 1496 0x8080808000808000ull, /* F6 */ 1497 0x8080808000808080ull, /* F7 */ 1498 0x8080808080000000ull, /* F8 */ 1499 0x8080808080000080ull, /* F9 */ 1500 0x8080808080008000ull, /* FA */ 1501 0x8080808080008080ull, /* FB */ 1502 0x8080808080800000ull, /* FC */ 1503 0x8080808080800080ull, /* FD */ 1504 0x8080808080808000ull, /* FE */ 1505 0x8080808080808080ull, /* FF */ 1506 }; 1507 1508 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1509 { 1510 int i; 1511 uint64_t t[2] = { 0, 0 }; 1512 1513 VECTOR_FOR_INORDER_I(i, u8) { 1514 #if defined(HOST_WORDS_BIGENDIAN) 1515 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1516 #else 1517 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1518 #endif 1519 } 1520 1521 r->u64[0] = t[0]; 1522 r->u64[1] = t[1]; 1523 } 1524 1525 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1526 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1527 { \ 1528 int i, j; \ 1529 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1530 \ 1531 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1532 prod[i] = 0; \ 1533 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1534 if (a->srcfld[i] & (1ull<<j)) { \ 1535 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1536 } \ 1537 } \ 1538 } \ 1539 \ 1540 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1541 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1542 } \ 1543 } 1544 1545 PMSUM(vpmsumb, u8, u16, uint16_t) 1546 PMSUM(vpmsumh, u16, u32, uint32_t) 1547 PMSUM(vpmsumw, u32, u64, uint64_t) 1548 1549 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1550 { 1551 1552 #ifdef CONFIG_INT128 1553 int i, j; 1554 __uint128_t prod[2]; 1555 1556 VECTOR_FOR_INORDER_I(i, u64) { 1557 prod[i] = 0; 1558 for (j = 0; j < 64; j++) { 1559 if (a->u64[i] & (1ull<<j)) { 1560 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1561 } 1562 } 1563 } 1564 1565 r->u128 = prod[0] ^ prod[1]; 1566 1567 #else 1568 int i, j; 1569 ppc_avr_t prod[2]; 1570 1571 VECTOR_FOR_INORDER_I(i, u64) { 1572 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; 1573 for (j = 0; j < 64; j++) { 1574 if (a->u64[i] & (1ull<<j)) { 1575 ppc_avr_t bshift; 1576 if (j == 0) { 1577 bshift.u64[HI_IDX] = 0; 1578 bshift.u64[LO_IDX] = b->u64[i]; 1579 } else { 1580 bshift.u64[HI_IDX] = b->u64[i] >> (64-j); 1581 bshift.u64[LO_IDX] = b->u64[i] << j; 1582 } 1583 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; 1584 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; 1585 } 1586 } 1587 } 1588 1589 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; 1590 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; 1591 #endif 1592 } 1593 1594 1595 #if defined(HOST_WORDS_BIGENDIAN) 1596 #define PKBIG 1 1597 #else 1598 #define PKBIG 0 1599 #endif 1600 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1601 { 1602 int i, j; 1603 ppc_avr_t result; 1604 #if defined(HOST_WORDS_BIGENDIAN) 1605 const ppc_avr_t *x[2] = { a, b }; 1606 #else 1607 const ppc_avr_t *x[2] = { b, a }; 1608 #endif 1609 1610 VECTOR_FOR_INORDER_I(i, u64) { 1611 VECTOR_FOR_INORDER_I(j, u32) { 1612 uint32_t e = x[i]->u32[j]; 1613 1614 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1615 ((e >> 6) & 0x3e0) | 1616 ((e >> 3) & 0x1f)); 1617 } 1618 } 1619 *r = result; 1620 } 1621 1622 #define VPK(suffix, from, to, cvt, dosat) \ 1623 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1624 ppc_avr_t *a, ppc_avr_t *b) \ 1625 { \ 1626 int i; \ 1627 int sat = 0; \ 1628 ppc_avr_t result; \ 1629 ppc_avr_t *a0 = PKBIG ? a : b; \ 1630 ppc_avr_t *a1 = PKBIG ? b : a; \ 1631 \ 1632 VECTOR_FOR_INORDER_I(i, from) { \ 1633 result.to[i] = cvt(a0->from[i], &sat); \ 1634 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1635 } \ 1636 *r = result; \ 1637 if (dosat && sat) { \ 1638 env->vscr |= (1 << VSCR_SAT); \ 1639 } \ 1640 } 1641 #define I(x, y) (x) 1642 VPK(shss, s16, s8, cvtshsb, 1) 1643 VPK(shus, s16, u8, cvtshub, 1) 1644 VPK(swss, s32, s16, cvtswsh, 1) 1645 VPK(swus, s32, u16, cvtswuh, 1) 1646 VPK(sdss, s64, s32, cvtsdsw, 1) 1647 VPK(sdus, s64, u32, cvtsduw, 1) 1648 VPK(uhus, u16, u8, cvtuhub, 1) 1649 VPK(uwus, u32, u16, cvtuwuh, 1) 1650 VPK(udus, u64, u32, cvtuduw, 1) 1651 VPK(uhum, u16, u8, I, 0) 1652 VPK(uwum, u32, u16, I, 0) 1653 VPK(udum, u64, u32, I, 0) 1654 #undef I 1655 #undef VPK 1656 #undef PKBIG 1657 1658 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1659 { 1660 int i; 1661 1662 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1663 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1664 } 1665 } 1666 1667 #define VRFI(suffix, rounding) \ 1668 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1669 ppc_avr_t *b) \ 1670 { \ 1671 int i; \ 1672 float_status s = env->vec_status; \ 1673 \ 1674 set_float_rounding_mode(rounding, &s); \ 1675 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1676 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1677 } \ 1678 } 1679 VRFI(n, float_round_nearest_even) 1680 VRFI(m, float_round_down) 1681 VRFI(p, float_round_up) 1682 VRFI(z, float_round_to_zero) 1683 #undef VRFI 1684 1685 #define VROTATE(suffix, element, mask) \ 1686 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1687 { \ 1688 int i; \ 1689 \ 1690 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1691 unsigned int shift = b->element[i] & mask; \ 1692 r->element[i] = (a->element[i] << shift) | \ 1693 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1694 } \ 1695 } 1696 VROTATE(b, u8, 0x7) 1697 VROTATE(h, u16, 0xF) 1698 VROTATE(w, u32, 0x1F) 1699 VROTATE(d, u64, 0x3F) 1700 #undef VROTATE 1701 1702 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1703 { 1704 int i; 1705 1706 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1707 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1708 1709 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1710 } 1711 } 1712 1713 #define VRLMI(name, size, element, insert) \ 1714 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1715 { \ 1716 int i; \ 1717 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1718 uint##size##_t src1 = a->element[i]; \ 1719 uint##size##_t src2 = b->element[i]; \ 1720 uint##size##_t src3 = r->element[i]; \ 1721 uint##size##_t begin, end, shift, mask, rot_val; \ 1722 \ 1723 shift = extract##size(src2, 0, 6); \ 1724 end = extract##size(src2, 8, 6); \ 1725 begin = extract##size(src2, 16, 6); \ 1726 rot_val = rol##size(src1, shift); \ 1727 mask = mask_u##size(begin, end); \ 1728 if (insert) { \ 1729 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1730 } else { \ 1731 r->element[i] = (rot_val & mask); \ 1732 } \ 1733 } \ 1734 } 1735 1736 VRLMI(vrldmi, 64, u64, 1); 1737 VRLMI(vrlwmi, 32, u32, 1); 1738 VRLMI(vrldnm, 64, u64, 0); 1739 VRLMI(vrlwnm, 32, u32, 0); 1740 1741 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1742 ppc_avr_t *c) 1743 { 1744 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1745 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1746 } 1747 1748 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1749 { 1750 int i; 1751 1752 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1753 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1754 } 1755 } 1756 1757 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1758 { 1759 int i; 1760 1761 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1762 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1763 } 1764 } 1765 1766 #if defined(HOST_WORDS_BIGENDIAN) 1767 #define VEXTU_X_DO(name, size, left) \ 1768 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1769 { \ 1770 int index; \ 1771 if (left) { \ 1772 index = (a & 0xf) * 8; \ 1773 } else { \ 1774 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1775 } \ 1776 return int128_getlo(int128_rshift(b->s128, index)) & \ 1777 MAKE_64BIT_MASK(0, size); \ 1778 } 1779 #else 1780 #define VEXTU_X_DO(name, size, left) \ 1781 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1782 { \ 1783 int index; \ 1784 if (left) { \ 1785 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1786 } else { \ 1787 index = (a & 0xf) * 8; \ 1788 } \ 1789 return int128_getlo(int128_rshift(b->s128, index)) & \ 1790 MAKE_64BIT_MASK(0, size); \ 1791 } 1792 #endif 1793 1794 VEXTU_X_DO(vextublx, 8, 1) 1795 VEXTU_X_DO(vextuhlx, 16, 1) 1796 VEXTU_X_DO(vextuwlx, 32, 1) 1797 VEXTU_X_DO(vextubrx, 8, 0) 1798 VEXTU_X_DO(vextuhrx, 16, 0) 1799 VEXTU_X_DO(vextuwrx, 32, 0) 1800 #undef VEXTU_X_DO 1801 1802 /* The specification says that the results are undefined if all of the 1803 * shift counts are not identical. We check to make sure that they are 1804 * to conform to what real hardware appears to do. */ 1805 #define VSHIFT(suffix, leftp) \ 1806 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1807 { \ 1808 int shift = b->u8[LO_IDX*15] & 0x7; \ 1809 int doit = 1; \ 1810 int i; \ 1811 \ 1812 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1813 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1814 } \ 1815 if (doit) { \ 1816 if (shift == 0) { \ 1817 *r = *a; \ 1818 } else if (leftp) { \ 1819 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ 1820 \ 1821 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ 1822 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ 1823 } else { \ 1824 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ 1825 \ 1826 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ 1827 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ 1828 } \ 1829 } \ 1830 } 1831 VSHIFT(l, 1) 1832 VSHIFT(r, 0) 1833 #undef VSHIFT 1834 1835 #define VSL(suffix, element, mask) \ 1836 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1837 { \ 1838 int i; \ 1839 \ 1840 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1841 unsigned int shift = b->element[i] & mask; \ 1842 \ 1843 r->element[i] = a->element[i] << shift; \ 1844 } \ 1845 } 1846 VSL(b, u8, 0x7) 1847 VSL(h, u16, 0x0F) 1848 VSL(w, u32, 0x1F) 1849 VSL(d, u64, 0x3F) 1850 #undef VSL 1851 1852 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1853 { 1854 int i; 1855 unsigned int shift, bytes, size; 1856 1857 size = ARRAY_SIZE(r->u8); 1858 for (i = 0; i < size; i++) { 1859 shift = b->u8[i] & 0x7; /* extract shift value */ 1860 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1861 (((i + 1) < size) ? a->u8[i + 1] : 0); 1862 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1863 } 1864 } 1865 1866 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1867 { 1868 int i; 1869 unsigned int shift, bytes; 1870 1871 /* Use reverse order, as destination and source register can be same. Its 1872 * being modified in place saving temporary, reverse order will guarantee 1873 * that computed result is not fed back. 1874 */ 1875 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1876 shift = b->u8[i] & 0x7; /* extract shift value */ 1877 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1878 /* extract adjacent bytes */ 1879 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1880 } 1881 } 1882 1883 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1884 { 1885 int sh = shift & 0xf; 1886 int i; 1887 ppc_avr_t result; 1888 1889 #if defined(HOST_WORDS_BIGENDIAN) 1890 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1891 int index = sh + i; 1892 if (index > 0xf) { 1893 result.u8[i] = b->u8[index - 0x10]; 1894 } else { 1895 result.u8[i] = a->u8[index]; 1896 } 1897 } 1898 #else 1899 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1900 int index = (16 - sh) + i; 1901 if (index > 0xf) { 1902 result.u8[i] = a->u8[index - 0x10]; 1903 } else { 1904 result.u8[i] = b->u8[index]; 1905 } 1906 } 1907 #endif 1908 *r = result; 1909 } 1910 1911 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1912 { 1913 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; 1914 1915 #if defined(HOST_WORDS_BIGENDIAN) 1916 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1917 memset(&r->u8[16-sh], 0, sh); 1918 #else 1919 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1920 memset(&r->u8[0], 0, sh); 1921 #endif 1922 } 1923 1924 /* Experimental testing shows that hardware masks the immediate. */ 1925 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1926 #if defined(HOST_WORDS_BIGENDIAN) 1927 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1928 #else 1929 #define SPLAT_ELEMENT(element) \ 1930 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element)) 1931 #endif 1932 #define VSPLT(suffix, element) \ 1933 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1934 { \ 1935 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ 1936 int i; \ 1937 \ 1938 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1939 r->element[i] = s; \ 1940 } \ 1941 } 1942 VSPLT(b, u8) 1943 VSPLT(h, u16) 1944 VSPLT(w, u32) 1945 #undef VSPLT 1946 #undef SPLAT_ELEMENT 1947 #undef _SPLAT_MASKED 1948 #if defined(HOST_WORDS_BIGENDIAN) 1949 #define VINSERT(suffix, element) \ 1950 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1951 { \ 1952 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1953 sizeof(r->element[0])); \ 1954 } 1955 #else 1956 #define VINSERT(suffix, element) \ 1957 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1958 { \ 1959 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1960 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1961 } 1962 #endif 1963 VINSERT(b, u8) 1964 VINSERT(h, u16) 1965 VINSERT(w, u32) 1966 VINSERT(d, u64) 1967 #undef VINSERT 1968 #if defined(HOST_WORDS_BIGENDIAN) 1969 #define VEXTRACT(suffix, element) \ 1970 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1971 { \ 1972 uint32_t es = sizeof(r->element[0]); \ 1973 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1974 memset(&r->u8[8], 0, 8); \ 1975 memset(&r->u8[0], 0, 8 - es); \ 1976 } 1977 #else 1978 #define VEXTRACT(suffix, element) \ 1979 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1980 { \ 1981 uint32_t es = sizeof(r->element[0]); \ 1982 uint32_t s = (16 - index) - es; \ 1983 memmove(&r->u8[8], &b->u8[s], es); \ 1984 memset(&r->u8[0], 0, 8); \ 1985 memset(&r->u8[8 + es], 0, 8 - es); \ 1986 } 1987 #endif 1988 VEXTRACT(ub, u8) 1989 VEXTRACT(uh, u16) 1990 VEXTRACT(uw, u32) 1991 VEXTRACT(d, u64) 1992 #undef VEXTRACT 1993 1994 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1995 target_ulong xbn, uint32_t index) 1996 { 1997 ppc_vsr_t xt, xb; 1998 size_t es = sizeof(uint32_t); 1999 uint32_t ext_index; 2000 int i; 2001 2002 getVSR(xbn, &xb, env); 2003 memset(&xt, 0, sizeof(xt)); 2004 2005 #if defined(HOST_WORDS_BIGENDIAN) 2006 ext_index = index; 2007 for (i = 0; i < es; i++, ext_index++) { 2008 xt.u8[8 - es + i] = xb.u8[ext_index % 16]; 2009 } 2010 #else 2011 ext_index = 15 - index; 2012 for (i = es - 1; i >= 0; i--, ext_index--) { 2013 xt.u8[8 + i] = xb.u8[ext_index % 16]; 2014 } 2015 #endif 2016 2017 putVSR(xtn, &xt, env); 2018 } 2019 2020 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 2021 target_ulong xbn, uint32_t index) 2022 { 2023 ppc_vsr_t xt, xb; 2024 size_t es = sizeof(uint32_t); 2025 int ins_index, i = 0; 2026 2027 getVSR(xbn, &xb, env); 2028 getVSR(xtn, &xt, env); 2029 2030 #if defined(HOST_WORDS_BIGENDIAN) 2031 ins_index = index; 2032 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 2033 xt.u8[ins_index] = xb.u8[8 - es + i]; 2034 } 2035 #else 2036 ins_index = 15 - index; 2037 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) { 2038 xt.u8[ins_index] = xb.u8[8 + i]; 2039 } 2040 #endif 2041 2042 putVSR(xtn, &xt, env); 2043 } 2044 2045 #define VEXT_SIGNED(name, element, mask, cast, recast) \ 2046 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2047 { \ 2048 int i; \ 2049 VECTOR_FOR_INORDER_I(i, element) { \ 2050 r->element[i] = (recast)((cast)(b->element[i] & mask)); \ 2051 } \ 2052 } 2053 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) 2054 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) 2055 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) 2056 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) 2057 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) 2058 #undef VEXT_SIGNED 2059 2060 #define VNEG(name, element) \ 2061 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2062 { \ 2063 int i; \ 2064 VECTOR_FOR_INORDER_I(i, element) { \ 2065 r->element[i] = -b->element[i]; \ 2066 } \ 2067 } 2068 VNEG(vnegw, s32) 2069 VNEG(vnegd, s64) 2070 #undef VNEG 2071 2072 #define VSPLTI(suffix, element, splat_type) \ 2073 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2074 { \ 2075 splat_type x = (int8_t)(splat << 3) >> 3; \ 2076 int i; \ 2077 \ 2078 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2079 r->element[i] = x; \ 2080 } \ 2081 } 2082 VSPLTI(b, s8, int8_t) 2083 VSPLTI(h, s16, int16_t) 2084 VSPLTI(w, s32, int32_t) 2085 #undef VSPLTI 2086 2087 #define VSR(suffix, element, mask) \ 2088 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2089 { \ 2090 int i; \ 2091 \ 2092 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2093 unsigned int shift = b->element[i] & mask; \ 2094 r->element[i] = a->element[i] >> shift; \ 2095 } \ 2096 } 2097 VSR(ab, s8, 0x7) 2098 VSR(ah, s16, 0xF) 2099 VSR(aw, s32, 0x1F) 2100 VSR(ad, s64, 0x3F) 2101 VSR(b, u8, 0x7) 2102 VSR(h, u16, 0xF) 2103 VSR(w, u32, 0x1F) 2104 VSR(d, u64, 0x3F) 2105 #undef VSR 2106 2107 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2108 { 2109 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf; 2110 2111 #if defined(HOST_WORDS_BIGENDIAN) 2112 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2113 memset(&r->u8[0], 0, sh); 2114 #else 2115 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2116 memset(&r->u8[16 - sh], 0, sh); 2117 #endif 2118 } 2119 2120 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2121 { 2122 int i; 2123 2124 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2125 r->u32[i] = a->u32[i] >= b->u32[i]; 2126 } 2127 } 2128 2129 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2130 { 2131 int64_t t; 2132 int i, upper; 2133 ppc_avr_t result; 2134 int sat = 0; 2135 2136 #if defined(HOST_WORDS_BIGENDIAN) 2137 upper = ARRAY_SIZE(r->s32)-1; 2138 #else 2139 upper = 0; 2140 #endif 2141 t = (int64_t)b->s32[upper]; 2142 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2143 t += a->s32[i]; 2144 result.s32[i] = 0; 2145 } 2146 result.s32[upper] = cvtsdsw(t, &sat); 2147 *r = result; 2148 2149 if (sat) { 2150 env->vscr |= (1 << VSCR_SAT); 2151 } 2152 } 2153 2154 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2155 { 2156 int i, j, upper; 2157 ppc_avr_t result; 2158 int sat = 0; 2159 2160 #if defined(HOST_WORDS_BIGENDIAN) 2161 upper = 1; 2162 #else 2163 upper = 0; 2164 #endif 2165 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2166 int64_t t = (int64_t)b->s32[upper + i * 2]; 2167 2168 result.u64[i] = 0; 2169 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2170 t += a->s32[2 * i + j]; 2171 } 2172 result.s32[upper + i * 2] = cvtsdsw(t, &sat); 2173 } 2174 2175 *r = result; 2176 if (sat) { 2177 env->vscr |= (1 << VSCR_SAT); 2178 } 2179 } 2180 2181 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2182 { 2183 int i, j; 2184 int sat = 0; 2185 2186 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2187 int64_t t = (int64_t)b->s32[i]; 2188 2189 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2190 t += a->s8[4 * i + j]; 2191 } 2192 r->s32[i] = cvtsdsw(t, &sat); 2193 } 2194 2195 if (sat) { 2196 env->vscr |= (1 << VSCR_SAT); 2197 } 2198 } 2199 2200 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2201 { 2202 int sat = 0; 2203 int i; 2204 2205 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2206 int64_t t = (int64_t)b->s32[i]; 2207 2208 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2209 r->s32[i] = cvtsdsw(t, &sat); 2210 } 2211 2212 if (sat) { 2213 env->vscr |= (1 << VSCR_SAT); 2214 } 2215 } 2216 2217 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2218 { 2219 int i, j; 2220 int sat = 0; 2221 2222 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2223 uint64_t t = (uint64_t)b->u32[i]; 2224 2225 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2226 t += a->u8[4 * i + j]; 2227 } 2228 r->u32[i] = cvtuduw(t, &sat); 2229 } 2230 2231 if (sat) { 2232 env->vscr |= (1 << VSCR_SAT); 2233 } 2234 } 2235 2236 #if defined(HOST_WORDS_BIGENDIAN) 2237 #define UPKHI 1 2238 #define UPKLO 0 2239 #else 2240 #define UPKHI 0 2241 #define UPKLO 1 2242 #endif 2243 #define VUPKPX(suffix, hi) \ 2244 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2245 { \ 2246 int i; \ 2247 ppc_avr_t result; \ 2248 \ 2249 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2250 uint16_t e = b->u16[hi ? i : i+4]; \ 2251 uint8_t a = (e >> 15) ? 0xff : 0; \ 2252 uint8_t r = (e >> 10) & 0x1f; \ 2253 uint8_t g = (e >> 5) & 0x1f; \ 2254 uint8_t b = e & 0x1f; \ 2255 \ 2256 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2257 } \ 2258 *r = result; \ 2259 } 2260 VUPKPX(lpx, UPKLO) 2261 VUPKPX(hpx, UPKHI) 2262 #undef VUPKPX 2263 2264 #define VUPK(suffix, unpacked, packee, hi) \ 2265 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2266 { \ 2267 int i; \ 2268 ppc_avr_t result; \ 2269 \ 2270 if (hi) { \ 2271 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2272 result.unpacked[i] = b->packee[i]; \ 2273 } \ 2274 } else { \ 2275 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2276 i++) { \ 2277 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2278 } \ 2279 } \ 2280 *r = result; \ 2281 } 2282 VUPK(hsb, s16, s8, UPKHI) 2283 VUPK(hsh, s32, s16, UPKHI) 2284 VUPK(hsw, s64, s32, UPKHI) 2285 VUPK(lsb, s16, s8, UPKLO) 2286 VUPK(lsh, s32, s16, UPKLO) 2287 VUPK(lsw, s64, s32, UPKLO) 2288 #undef VUPK 2289 #undef UPKHI 2290 #undef UPKLO 2291 2292 #define VGENERIC_DO(name, element) \ 2293 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2294 { \ 2295 int i; \ 2296 \ 2297 VECTOR_FOR_INORDER_I(i, element) { \ 2298 r->element[i] = name(b->element[i]); \ 2299 } \ 2300 } 2301 2302 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2303 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2304 #define clzw(v) clz32((v)) 2305 #define clzd(v) clz64((v)) 2306 2307 VGENERIC_DO(clzb, u8) 2308 VGENERIC_DO(clzh, u16) 2309 VGENERIC_DO(clzw, u32) 2310 VGENERIC_DO(clzd, u64) 2311 2312 #undef clzb 2313 #undef clzh 2314 #undef clzw 2315 #undef clzd 2316 2317 #define ctzb(v) ((v) ? ctz32(v) : 8) 2318 #define ctzh(v) ((v) ? ctz32(v) : 16) 2319 #define ctzw(v) ctz32((v)) 2320 #define ctzd(v) ctz64((v)) 2321 2322 VGENERIC_DO(ctzb, u8) 2323 VGENERIC_DO(ctzh, u16) 2324 VGENERIC_DO(ctzw, u32) 2325 VGENERIC_DO(ctzd, u64) 2326 2327 #undef ctzb 2328 #undef ctzh 2329 #undef ctzw 2330 #undef ctzd 2331 2332 #define popcntb(v) ctpop8(v) 2333 #define popcnth(v) ctpop16(v) 2334 #define popcntw(v) ctpop32(v) 2335 #define popcntd(v) ctpop64(v) 2336 2337 VGENERIC_DO(popcntb, u8) 2338 VGENERIC_DO(popcnth, u16) 2339 VGENERIC_DO(popcntw, u32) 2340 VGENERIC_DO(popcntd, u64) 2341 2342 #undef popcntb 2343 #undef popcnth 2344 #undef popcntw 2345 #undef popcntd 2346 2347 #undef VGENERIC_DO 2348 2349 #if defined(HOST_WORDS_BIGENDIAN) 2350 #define QW_ONE { .u64 = { 0, 1 } } 2351 #else 2352 #define QW_ONE { .u64 = { 1, 0 } } 2353 #endif 2354 2355 #ifndef CONFIG_INT128 2356 2357 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2358 { 2359 t->u64[0] = ~a.u64[0]; 2360 t->u64[1] = ~a.u64[1]; 2361 } 2362 2363 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2364 { 2365 if (a.u64[HI_IDX] < b.u64[HI_IDX]) { 2366 return -1; 2367 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { 2368 return 1; 2369 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { 2370 return -1; 2371 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { 2372 return 1; 2373 } else { 2374 return 0; 2375 } 2376 } 2377 2378 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2379 { 2380 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2381 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2382 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2383 } 2384 2385 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2386 { 2387 ppc_avr_t not_a; 2388 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; 2389 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + 2390 (~a.u64[LO_IDX] < b.u64[LO_IDX]); 2391 avr_qw_not(¬_a, a); 2392 return avr_qw_cmpu(not_a, b) < 0; 2393 } 2394 2395 #endif 2396 2397 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2398 { 2399 #ifdef CONFIG_INT128 2400 r->u128 = a->u128 + b->u128; 2401 #else 2402 avr_qw_add(r, *a, *b); 2403 #endif 2404 } 2405 2406 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2407 { 2408 #ifdef CONFIG_INT128 2409 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2410 #else 2411 2412 if (c->u64[LO_IDX] & 1) { 2413 ppc_avr_t tmp; 2414 2415 tmp.u64[HI_IDX] = 0; 2416 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2417 avr_qw_add(&tmp, *a, tmp); 2418 avr_qw_add(r, tmp, *b); 2419 } else { 2420 avr_qw_add(r, *a, *b); 2421 } 2422 #endif 2423 } 2424 2425 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2426 { 2427 #ifdef CONFIG_INT128 2428 r->u128 = (~a->u128 < b->u128); 2429 #else 2430 ppc_avr_t not_a; 2431 2432 avr_qw_not(¬_a, *a); 2433 2434 r->u64[HI_IDX] = 0; 2435 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); 2436 #endif 2437 } 2438 2439 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2440 { 2441 #ifdef CONFIG_INT128 2442 int carry_out = (~a->u128 < b->u128); 2443 if (!carry_out && (c->u128 & 1)) { 2444 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2445 ((a->u128 != 0) || (b->u128 != 0)); 2446 } 2447 r->u128 = carry_out; 2448 #else 2449 2450 int carry_in = c->u64[LO_IDX] & 1; 2451 int carry_out = 0; 2452 ppc_avr_t tmp; 2453 2454 carry_out = avr_qw_addc(&tmp, *a, *b); 2455 2456 if (!carry_out && carry_in) { 2457 ppc_avr_t one = QW_ONE; 2458 carry_out = avr_qw_addc(&tmp, tmp, one); 2459 } 2460 r->u64[HI_IDX] = 0; 2461 r->u64[LO_IDX] = carry_out; 2462 #endif 2463 } 2464 2465 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2466 { 2467 #ifdef CONFIG_INT128 2468 r->u128 = a->u128 - b->u128; 2469 #else 2470 ppc_avr_t tmp; 2471 ppc_avr_t one = QW_ONE; 2472 2473 avr_qw_not(&tmp, *b); 2474 avr_qw_add(&tmp, *a, tmp); 2475 avr_qw_add(r, tmp, one); 2476 #endif 2477 } 2478 2479 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2480 { 2481 #ifdef CONFIG_INT128 2482 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2483 #else 2484 ppc_avr_t tmp, sum; 2485 2486 avr_qw_not(&tmp, *b); 2487 avr_qw_add(&sum, *a, tmp); 2488 2489 tmp.u64[HI_IDX] = 0; 2490 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; 2491 avr_qw_add(r, sum, tmp); 2492 #endif 2493 } 2494 2495 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2496 { 2497 #ifdef CONFIG_INT128 2498 r->u128 = (~a->u128 < ~b->u128) || 2499 (a->u128 + ~b->u128 == (__uint128_t)-1); 2500 #else 2501 int carry = (avr_qw_cmpu(*a, *b) > 0); 2502 if (!carry) { 2503 ppc_avr_t tmp; 2504 avr_qw_not(&tmp, *b); 2505 avr_qw_add(&tmp, *a, tmp); 2506 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); 2507 } 2508 r->u64[HI_IDX] = 0; 2509 r->u64[LO_IDX] = carry; 2510 #endif 2511 } 2512 2513 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2514 { 2515 #ifdef CONFIG_INT128 2516 r->u128 = 2517 (~a->u128 < ~b->u128) || 2518 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2519 #else 2520 int carry_in = c->u64[LO_IDX] & 1; 2521 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2522 if (!carry_out && carry_in) { 2523 ppc_avr_t tmp; 2524 avr_qw_not(&tmp, *b); 2525 avr_qw_add(&tmp, *a, tmp); 2526 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); 2527 } 2528 2529 r->u64[HI_IDX] = 0; 2530 r->u64[LO_IDX] = carry_out; 2531 #endif 2532 } 2533 2534 #define BCD_PLUS_PREF_1 0xC 2535 #define BCD_PLUS_PREF_2 0xF 2536 #define BCD_PLUS_ALT_1 0xA 2537 #define BCD_NEG_PREF 0xD 2538 #define BCD_NEG_ALT 0xB 2539 #define BCD_PLUS_ALT_2 0xE 2540 #define NATIONAL_PLUS 0x2B 2541 #define NATIONAL_NEG 0x2D 2542 2543 #if defined(HOST_WORDS_BIGENDIAN) 2544 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2545 #else 2546 #define BCD_DIG_BYTE(n) ((n) / 2) 2547 #endif 2548 2549 static int bcd_get_sgn(ppc_avr_t *bcd) 2550 { 2551 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2552 case BCD_PLUS_PREF_1: 2553 case BCD_PLUS_PREF_2: 2554 case BCD_PLUS_ALT_1: 2555 case BCD_PLUS_ALT_2: 2556 { 2557 return 1; 2558 } 2559 2560 case BCD_NEG_PREF: 2561 case BCD_NEG_ALT: 2562 { 2563 return -1; 2564 } 2565 2566 default: 2567 { 2568 return 0; 2569 } 2570 } 2571 } 2572 2573 static int bcd_preferred_sgn(int sgn, int ps) 2574 { 2575 if (sgn >= 0) { 2576 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2577 } else { 2578 return BCD_NEG_PREF; 2579 } 2580 } 2581 2582 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2583 { 2584 uint8_t result; 2585 if (n & 1) { 2586 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2587 } else { 2588 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2589 } 2590 2591 if (unlikely(result > 9)) { 2592 *invalid = true; 2593 } 2594 return result; 2595 } 2596 2597 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2598 { 2599 if (n & 1) { 2600 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2601 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2602 } else { 2603 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2604 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2605 } 2606 } 2607 2608 static bool bcd_is_valid(ppc_avr_t *bcd) 2609 { 2610 int i; 2611 int invalid = 0; 2612 2613 if (bcd_get_sgn(bcd) == 0) { 2614 return false; 2615 } 2616 2617 for (i = 1; i < 32; i++) { 2618 bcd_get_digit(bcd, i, &invalid); 2619 if (unlikely(invalid)) { 2620 return false; 2621 } 2622 } 2623 return true; 2624 } 2625 2626 static int bcd_cmp_zero(ppc_avr_t *bcd) 2627 { 2628 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) { 2629 return CRF_EQ; 2630 } else { 2631 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2632 } 2633 } 2634 2635 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2636 { 2637 #if defined(HOST_WORDS_BIGENDIAN) 2638 return reg->u16[7 - n]; 2639 #else 2640 return reg->u16[n]; 2641 #endif 2642 } 2643 2644 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2645 { 2646 #if defined(HOST_WORDS_BIGENDIAN) 2647 reg->u16[7 - n] = val; 2648 #else 2649 reg->u16[n] = val; 2650 #endif 2651 } 2652 2653 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2654 { 2655 int i; 2656 int invalid = 0; 2657 for (i = 31; i > 0; i--) { 2658 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2659 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2660 if (unlikely(invalid)) { 2661 return 0; /* doesn't matter */ 2662 } else if (dig_a > dig_b) { 2663 return 1; 2664 } else if (dig_a < dig_b) { 2665 return -1; 2666 } 2667 } 2668 2669 return 0; 2670 } 2671 2672 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2673 int *overflow) 2674 { 2675 int carry = 0; 2676 int i; 2677 for (i = 1; i <= 31; i++) { 2678 uint8_t digit = bcd_get_digit(a, i, invalid) + 2679 bcd_get_digit(b, i, invalid) + carry; 2680 if (digit > 9) { 2681 carry = 1; 2682 digit -= 10; 2683 } else { 2684 carry = 0; 2685 } 2686 2687 bcd_put_digit(t, digit, i); 2688 } 2689 2690 *overflow = carry; 2691 } 2692 2693 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2694 int *overflow) 2695 { 2696 int carry = 0; 2697 int i; 2698 2699 for (i = 1; i <= 31; i++) { 2700 uint8_t digit = bcd_get_digit(a, i, invalid) - 2701 bcd_get_digit(b, i, invalid) + carry; 2702 if (digit & 0x80) { 2703 carry = -1; 2704 digit += 10; 2705 } else { 2706 carry = 0; 2707 } 2708 2709 bcd_put_digit(t, digit, i); 2710 } 2711 2712 *overflow = carry; 2713 } 2714 2715 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2716 { 2717 2718 int sgna = bcd_get_sgn(a); 2719 int sgnb = bcd_get_sgn(b); 2720 int invalid = (sgna == 0) || (sgnb == 0); 2721 int overflow = 0; 2722 uint32_t cr = 0; 2723 ppc_avr_t result = { .u64 = { 0, 0 } }; 2724 2725 if (!invalid) { 2726 if (sgna == sgnb) { 2727 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2728 bcd_add_mag(&result, a, b, &invalid, &overflow); 2729 cr = bcd_cmp_zero(&result); 2730 } else { 2731 int magnitude = bcd_cmp_mag(a, b); 2732 if (magnitude > 0) { 2733 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2734 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2735 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2736 } else if (magnitude < 0) { 2737 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2738 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2739 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2740 } else { 2741 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2742 cr = CRF_EQ; 2743 } 2744 } 2745 } 2746 2747 if (unlikely(invalid)) { 2748 result.u64[HI_IDX] = result.u64[LO_IDX] = -1; 2749 cr = CRF_SO; 2750 } else if (overflow) { 2751 cr |= CRF_SO; 2752 } 2753 2754 *r = result; 2755 2756 return cr; 2757 } 2758 2759 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2760 { 2761 ppc_avr_t bcopy = *b; 2762 int sgnb = bcd_get_sgn(b); 2763 if (sgnb < 0) { 2764 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2765 } else if (sgnb > 0) { 2766 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2767 } 2768 /* else invalid ... defer to bcdadd code for proper handling */ 2769 2770 return helper_bcdadd(r, a, &bcopy, ps); 2771 } 2772 2773 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2774 { 2775 int i; 2776 int cr = 0; 2777 uint16_t national = 0; 2778 uint16_t sgnb = get_national_digit(b, 0); 2779 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2780 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2781 2782 for (i = 1; i < 8; i++) { 2783 national = get_national_digit(b, i); 2784 if (unlikely(national < 0x30 || national > 0x39)) { 2785 invalid = 1; 2786 break; 2787 } 2788 2789 bcd_put_digit(&ret, national & 0xf, i); 2790 } 2791 2792 if (sgnb == NATIONAL_PLUS) { 2793 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2794 } else { 2795 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2796 } 2797 2798 cr = bcd_cmp_zero(&ret); 2799 2800 if (unlikely(invalid)) { 2801 cr = CRF_SO; 2802 } 2803 2804 *r = ret; 2805 2806 return cr; 2807 } 2808 2809 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2810 { 2811 int i; 2812 int cr = 0; 2813 int sgnb = bcd_get_sgn(b); 2814 int invalid = (sgnb == 0); 2815 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2816 2817 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0); 2818 2819 for (i = 1; i < 8; i++) { 2820 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2821 2822 if (unlikely(invalid)) { 2823 break; 2824 } 2825 } 2826 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2827 2828 cr = bcd_cmp_zero(b); 2829 2830 if (ox_flag) { 2831 cr |= CRF_SO; 2832 } 2833 2834 if (unlikely(invalid)) { 2835 cr = CRF_SO; 2836 } 2837 2838 *r = ret; 2839 2840 return cr; 2841 } 2842 2843 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2844 { 2845 int i; 2846 int cr = 0; 2847 int invalid = 0; 2848 int zone_digit = 0; 2849 int zone_lead = ps ? 0xF : 0x3; 2850 int digit = 0; 2851 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2852 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2853 2854 if (unlikely((sgnb < 0xA) && ps)) { 2855 invalid = 1; 2856 } 2857 2858 for (i = 0; i < 16; i++) { 2859 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2860 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2861 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2862 invalid = 1; 2863 break; 2864 } 2865 2866 bcd_put_digit(&ret, digit, i + 1); 2867 } 2868 2869 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2870 (!ps && (sgnb & 0x4))) { 2871 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2872 } else { 2873 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2874 } 2875 2876 cr = bcd_cmp_zero(&ret); 2877 2878 if (unlikely(invalid)) { 2879 cr = CRF_SO; 2880 } 2881 2882 *r = ret; 2883 2884 return cr; 2885 } 2886 2887 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2888 { 2889 int i; 2890 int cr = 0; 2891 uint8_t digit = 0; 2892 int sgnb = bcd_get_sgn(b); 2893 int zone_lead = (ps) ? 0xF0 : 0x30; 2894 int invalid = (sgnb == 0); 2895 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2896 2897 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0); 2898 2899 for (i = 0; i < 16; i++) { 2900 digit = bcd_get_digit(b, i + 1, &invalid); 2901 2902 if (unlikely(invalid)) { 2903 break; 2904 } 2905 2906 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2907 } 2908 2909 if (ps) { 2910 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2911 } else { 2912 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2913 } 2914 2915 cr = bcd_cmp_zero(b); 2916 2917 if (ox_flag) { 2918 cr |= CRF_SO; 2919 } 2920 2921 if (unlikely(invalid)) { 2922 cr = CRF_SO; 2923 } 2924 2925 *r = ret; 2926 2927 return cr; 2928 } 2929 2930 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2931 { 2932 int i; 2933 int cr = 0; 2934 uint64_t lo_value; 2935 uint64_t hi_value; 2936 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2937 2938 if (b->s64[HI_IDX] < 0) { 2939 lo_value = -b->s64[LO_IDX]; 2940 hi_value = ~b->u64[HI_IDX] + !lo_value; 2941 bcd_put_digit(&ret, 0xD, 0); 2942 } else { 2943 lo_value = b->u64[LO_IDX]; 2944 hi_value = b->u64[HI_IDX]; 2945 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2946 } 2947 2948 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2949 lo_value > 9999999999999999ULL) { 2950 cr = CRF_SO; 2951 } 2952 2953 for (i = 1; i < 16; hi_value /= 10, i++) { 2954 bcd_put_digit(&ret, hi_value % 10, i); 2955 } 2956 2957 for (; i < 32; lo_value /= 10, i++) { 2958 bcd_put_digit(&ret, lo_value % 10, i); 2959 } 2960 2961 cr |= bcd_cmp_zero(&ret); 2962 2963 *r = ret; 2964 2965 return cr; 2966 } 2967 2968 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2969 { 2970 uint8_t i; 2971 int cr; 2972 uint64_t carry; 2973 uint64_t unused; 2974 uint64_t lo_value; 2975 uint64_t hi_value = 0; 2976 int sgnb = bcd_get_sgn(b); 2977 int invalid = (sgnb == 0); 2978 2979 lo_value = bcd_get_digit(b, 31, &invalid); 2980 for (i = 30; i > 0; i--) { 2981 mulu64(&lo_value, &carry, lo_value, 10ULL); 2982 mulu64(&hi_value, &unused, hi_value, 10ULL); 2983 lo_value += bcd_get_digit(b, i, &invalid); 2984 hi_value += carry; 2985 2986 if (unlikely(invalid)) { 2987 break; 2988 } 2989 } 2990 2991 if (sgnb == -1) { 2992 r->s64[LO_IDX] = -lo_value; 2993 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX]; 2994 } else { 2995 r->s64[LO_IDX] = lo_value; 2996 r->s64[HI_IDX] = hi_value; 2997 } 2998 2999 cr = bcd_cmp_zero(b); 3000 3001 if (unlikely(invalid)) { 3002 cr = CRF_SO; 3003 } 3004 3005 return cr; 3006 } 3007 3008 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3009 { 3010 int i; 3011 int invalid = 0; 3012 3013 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 3014 return CRF_SO; 3015 } 3016 3017 *r = *a; 3018 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 3019 3020 for (i = 1; i < 32; i++) { 3021 bcd_get_digit(a, i, &invalid); 3022 bcd_get_digit(b, i, &invalid); 3023 if (unlikely(invalid)) { 3024 return CRF_SO; 3025 } 3026 } 3027 3028 return bcd_cmp_zero(r); 3029 } 3030 3031 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 3032 { 3033 int sgnb = bcd_get_sgn(b); 3034 3035 *r = *b; 3036 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 3037 3038 if (bcd_is_valid(b) == false) { 3039 return CRF_SO; 3040 } 3041 3042 return bcd_cmp_zero(r); 3043 } 3044 3045 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3046 { 3047 int cr; 3048 #if defined(HOST_WORDS_BIGENDIAN) 3049 int i = a->s8[7]; 3050 #else 3051 int i = a->s8[8]; 3052 #endif 3053 bool ox_flag = false; 3054 int sgnb = bcd_get_sgn(b); 3055 ppc_avr_t ret = *b; 3056 ret.u64[LO_IDX] &= ~0xf; 3057 3058 if (bcd_is_valid(b) == false) { 3059 return CRF_SO; 3060 } 3061 3062 if (unlikely(i > 31)) { 3063 i = 31; 3064 } else if (unlikely(i < -31)) { 3065 i = -31; 3066 } 3067 3068 if (i > 0) { 3069 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3070 } else { 3071 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3072 } 3073 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3074 3075 *r = ret; 3076 3077 cr = bcd_cmp_zero(r); 3078 if (ox_flag) { 3079 cr |= CRF_SO; 3080 } 3081 3082 return cr; 3083 } 3084 3085 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3086 { 3087 int cr; 3088 int i; 3089 int invalid = 0; 3090 bool ox_flag = false; 3091 ppc_avr_t ret = *b; 3092 3093 for (i = 0; i < 32; i++) { 3094 bcd_get_digit(b, i, &invalid); 3095 3096 if (unlikely(invalid)) { 3097 return CRF_SO; 3098 } 3099 } 3100 3101 #if defined(HOST_WORDS_BIGENDIAN) 3102 i = a->s8[7]; 3103 #else 3104 i = a->s8[8]; 3105 #endif 3106 if (i >= 32) { 3107 ox_flag = true; 3108 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3109 } else if (i <= -32) { 3110 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0; 3111 } else if (i > 0) { 3112 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3113 } else { 3114 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3115 } 3116 *r = ret; 3117 3118 cr = bcd_cmp_zero(r); 3119 if (ox_flag) { 3120 cr |= CRF_SO; 3121 } 3122 3123 return cr; 3124 } 3125 3126 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3127 { 3128 int cr; 3129 int unused = 0; 3130 int invalid = 0; 3131 bool ox_flag = false; 3132 int sgnb = bcd_get_sgn(b); 3133 ppc_avr_t ret = *b; 3134 ret.u64[LO_IDX] &= ~0xf; 3135 3136 #if defined(HOST_WORDS_BIGENDIAN) 3137 int i = a->s8[7]; 3138 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3139 #else 3140 int i = a->s8[8]; 3141 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3142 #endif 3143 3144 if (bcd_is_valid(b) == false) { 3145 return CRF_SO; 3146 } 3147 3148 if (unlikely(i > 31)) { 3149 i = 31; 3150 } else if (unlikely(i < -31)) { 3151 i = -31; 3152 } 3153 3154 if (i > 0) { 3155 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag); 3156 } else { 3157 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4); 3158 3159 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3160 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3161 } 3162 } 3163 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3164 3165 cr = bcd_cmp_zero(&ret); 3166 if (ox_flag) { 3167 cr |= CRF_SO; 3168 } 3169 *r = ret; 3170 3171 return cr; 3172 } 3173 3174 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3175 { 3176 uint64_t mask; 3177 uint32_t ox_flag = 0; 3178 #if defined(HOST_WORDS_BIGENDIAN) 3179 int i = a->s16[3] + 1; 3180 #else 3181 int i = a->s16[4] + 1; 3182 #endif 3183 ppc_avr_t ret = *b; 3184 3185 if (bcd_is_valid(b) == false) { 3186 return CRF_SO; 3187 } 3188 3189 if (i > 16 && i < 32) { 3190 mask = (uint64_t)-1 >> (128 - i * 4); 3191 if (ret.u64[HI_IDX] & ~mask) { 3192 ox_flag = CRF_SO; 3193 } 3194 3195 ret.u64[HI_IDX] &= mask; 3196 } else if (i >= 0 && i <= 16) { 3197 mask = (uint64_t)-1 >> (64 - i * 4); 3198 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3199 ox_flag = CRF_SO; 3200 } 3201 3202 ret.u64[LO_IDX] &= mask; 3203 ret.u64[HI_IDX] = 0; 3204 } 3205 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3206 *r = ret; 3207 3208 return bcd_cmp_zero(&ret) | ox_flag; 3209 } 3210 3211 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3212 { 3213 int i; 3214 uint64_t mask; 3215 uint32_t ox_flag = 0; 3216 int invalid = 0; 3217 ppc_avr_t ret = *b; 3218 3219 for (i = 0; i < 32; i++) { 3220 bcd_get_digit(b, i, &invalid); 3221 3222 if (unlikely(invalid)) { 3223 return CRF_SO; 3224 } 3225 } 3226 3227 #if defined(HOST_WORDS_BIGENDIAN) 3228 i = a->s16[3]; 3229 #else 3230 i = a->s16[4]; 3231 #endif 3232 if (i > 16 && i < 33) { 3233 mask = (uint64_t)-1 >> (128 - i * 4); 3234 if (ret.u64[HI_IDX] & ~mask) { 3235 ox_flag = CRF_SO; 3236 } 3237 3238 ret.u64[HI_IDX] &= mask; 3239 } else if (i > 0 && i <= 16) { 3240 mask = (uint64_t)-1 >> (64 - i * 4); 3241 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) { 3242 ox_flag = CRF_SO; 3243 } 3244 3245 ret.u64[LO_IDX] &= mask; 3246 ret.u64[HI_IDX] = 0; 3247 } else if (i == 0) { 3248 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) { 3249 ox_flag = CRF_SO; 3250 } 3251 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0; 3252 } 3253 3254 *r = ret; 3255 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) { 3256 return ox_flag | CRF_EQ; 3257 } 3258 3259 return ox_flag | CRF_GT; 3260 } 3261 3262 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3263 { 3264 int i; 3265 VECTOR_FOR_INORDER_I(i, u8) { 3266 r->u8[i] = AES_sbox[a->u8[i]]; 3267 } 3268 } 3269 3270 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3271 { 3272 ppc_avr_t result; 3273 int i; 3274 3275 VECTOR_FOR_INORDER_I(i, u32) { 3276 result.VsrW(i) = b->VsrW(i) ^ 3277 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3278 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3279 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3280 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3281 } 3282 *r = result; 3283 } 3284 3285 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3286 { 3287 ppc_avr_t result; 3288 int i; 3289 3290 VECTOR_FOR_INORDER_I(i, u8) { 3291 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3292 } 3293 *r = result; 3294 } 3295 3296 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3297 { 3298 /* This differs from what is written in ISA V2.07. The RTL is */ 3299 /* incorrect and will be fixed in V2.07B. */ 3300 int i; 3301 ppc_avr_t tmp; 3302 3303 VECTOR_FOR_INORDER_I(i, u8) { 3304 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3305 } 3306 3307 VECTOR_FOR_INORDER_I(i, u32) { 3308 r->VsrW(i) = 3309 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3310 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3311 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3312 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3313 } 3314 } 3315 3316 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3317 { 3318 ppc_avr_t result; 3319 int i; 3320 3321 VECTOR_FOR_INORDER_I(i, u8) { 3322 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3323 } 3324 *r = result; 3325 } 3326 3327 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) 3328 #if defined(HOST_WORDS_BIGENDIAN) 3329 #define EL_IDX(i) (i) 3330 #else 3331 #define EL_IDX(i) (3 - (i)) 3332 #endif 3333 3334 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3335 { 3336 int st = (st_six & 0x10) != 0; 3337 int six = st_six & 0xF; 3338 int i; 3339 3340 VECTOR_FOR_INORDER_I(i, u32) { 3341 if (st == 0) { 3342 if ((six & (0x8 >> i)) == 0) { 3343 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ 3344 ROTRu32(a->u32[EL_IDX(i)], 18) ^ 3345 (a->u32[EL_IDX(i)] >> 3); 3346 } else { /* six.bit[i] == 1 */ 3347 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ 3348 ROTRu32(a->u32[EL_IDX(i)], 19) ^ 3349 (a->u32[EL_IDX(i)] >> 10); 3350 } 3351 } else { /* st == 1 */ 3352 if ((six & (0x8 >> i)) == 0) { 3353 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ 3354 ROTRu32(a->u32[EL_IDX(i)], 13) ^ 3355 ROTRu32(a->u32[EL_IDX(i)], 22); 3356 } else { /* six.bit[i] == 1 */ 3357 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ 3358 ROTRu32(a->u32[EL_IDX(i)], 11) ^ 3359 ROTRu32(a->u32[EL_IDX(i)], 25); 3360 } 3361 } 3362 } 3363 } 3364 3365 #undef ROTRu32 3366 #undef EL_IDX 3367 3368 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) 3369 #if defined(HOST_WORDS_BIGENDIAN) 3370 #define EL_IDX(i) (i) 3371 #else 3372 #define EL_IDX(i) (1 - (i)) 3373 #endif 3374 3375 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3376 { 3377 int st = (st_six & 0x10) != 0; 3378 int six = st_six & 0xF; 3379 int i; 3380 3381 VECTOR_FOR_INORDER_I(i, u64) { 3382 if (st == 0) { 3383 if ((six & (0x8 >> (2*i))) == 0) { 3384 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ 3385 ROTRu64(a->u64[EL_IDX(i)], 8) ^ 3386 (a->u64[EL_IDX(i)] >> 7); 3387 } else { /* six.bit[2*i] == 1 */ 3388 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ 3389 ROTRu64(a->u64[EL_IDX(i)], 61) ^ 3390 (a->u64[EL_IDX(i)] >> 6); 3391 } 3392 } else { /* st == 1 */ 3393 if ((six & (0x8 >> (2*i))) == 0) { 3394 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ 3395 ROTRu64(a->u64[EL_IDX(i)], 34) ^ 3396 ROTRu64(a->u64[EL_IDX(i)], 39); 3397 } else { /* six.bit[2*i] == 1 */ 3398 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ 3399 ROTRu64(a->u64[EL_IDX(i)], 18) ^ 3400 ROTRu64(a->u64[EL_IDX(i)], 41); 3401 } 3402 } 3403 } 3404 } 3405 3406 #undef ROTRu64 3407 #undef EL_IDX 3408 3409 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3410 { 3411 ppc_avr_t result; 3412 int i; 3413 3414 VECTOR_FOR_INORDER_I(i, u8) { 3415 int indexA = c->u8[i] >> 4; 3416 int indexB = c->u8[i] & 0xF; 3417 #if defined(HOST_WORDS_BIGENDIAN) 3418 result.u8[i] = a->u8[indexA] ^ b->u8[indexB]; 3419 #else 3420 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; 3421 #endif 3422 } 3423 *r = result; 3424 } 3425 3426 #undef VECTOR_FOR_INORDER_I 3427 #undef HI_IDX 3428 #undef LO_IDX 3429 3430 /*****************************************************************************/ 3431 /* SPE extension helpers */ 3432 /* Use a table to make this quicker */ 3433 static const uint8_t hbrev[16] = { 3434 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3435 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3436 }; 3437 3438 static inline uint8_t byte_reverse(uint8_t val) 3439 { 3440 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3441 } 3442 3443 static inline uint32_t word_reverse(uint32_t val) 3444 { 3445 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3446 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3447 } 3448 3449 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3450 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3451 { 3452 uint32_t a, b, d, mask; 3453 3454 mask = UINT32_MAX >> (32 - MASKBITS); 3455 a = arg1 & mask; 3456 b = arg2 & mask; 3457 d = word_reverse(1 + word_reverse(a | ~b)); 3458 return (arg1 & ~mask) | (d & b); 3459 } 3460 3461 uint32_t helper_cntlsw32(uint32_t val) 3462 { 3463 if (val & 0x80000000) { 3464 return clz32(~val); 3465 } else { 3466 return clz32(val); 3467 } 3468 } 3469 3470 uint32_t helper_cntlzw32(uint32_t val) 3471 { 3472 return clz32(val); 3473 } 3474 3475 /* 440 specific */ 3476 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3477 target_ulong low, uint32_t update_Rc) 3478 { 3479 target_ulong mask; 3480 int i; 3481 3482 i = 1; 3483 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3484 if ((high & mask) == 0) { 3485 if (update_Rc) { 3486 env->crf[0] = 0x4; 3487 } 3488 goto done; 3489 } 3490 i++; 3491 } 3492 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3493 if ((low & mask) == 0) { 3494 if (update_Rc) { 3495 env->crf[0] = 0x8; 3496 } 3497 goto done; 3498 } 3499 i++; 3500 } 3501 i = 8; 3502 if (update_Rc) { 3503 env->crf[0] = 0x2; 3504 } 3505 done: 3506 env->xer = (env->xer & ~0x7F) | i; 3507 if (update_Rc) { 3508 env->crf[0] |= xer_so; 3509 } 3510 return i; 3511 } 3512