1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define VECTOR_FOR_INORDER_I(index, element) \ 393 for (index = 0; index < ARRAY_SIZE(r->element); index++) 394 #else 395 #define VECTOR_FOR_INORDER_I(index, element) \ 396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 397 #endif 398 399 /* Saturating arithmetic helpers. */ 400 #define SATCVT(from, to, from_type, to_type, min, max) \ 401 static inline to_type cvt##from##to(from_type x, int *sat) \ 402 { \ 403 to_type r; \ 404 \ 405 if (x < (from_type)min) { \ 406 r = min; \ 407 *sat = 1; \ 408 } else if (x > (from_type)max) { \ 409 r = max; \ 410 *sat = 1; \ 411 } else { \ 412 r = x; \ 413 } \ 414 return r; \ 415 } 416 #define SATCVTU(from, to, from_type, to_type, min, max) \ 417 static inline to_type cvt##from##to(from_type x, int *sat) \ 418 { \ 419 to_type r; \ 420 \ 421 if (x > (from_type)max) { \ 422 r = max; \ 423 *sat = 1; \ 424 } else { \ 425 r = x; \ 426 } \ 427 return r; \ 428 } 429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 432 433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 439 #undef SATCVT 440 #undef SATCVTU 441 442 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 443 { 444 int i, j = (sh & 0xf); 445 446 VECTOR_FOR_INORDER_I(i, u8) { 447 r->u8[i] = j++; 448 } 449 } 450 451 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 452 { 453 int i, j = 0x10 - (sh & 0xf); 454 455 VECTOR_FOR_INORDER_I(i, u8) { 456 r->u8[i] = j++; 457 } 458 } 459 460 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 461 { 462 #if defined(HOST_WORDS_BIGENDIAN) 463 env->vscr = r->u32[3]; 464 #else 465 env->vscr = r->u32[0]; 466 #endif 467 set_flush_to_zero(vscr_nj, &env->vec_status); 468 } 469 470 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 471 { 472 int i; 473 474 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 475 r->u32[i] = ~a->u32[i] < b->u32[i]; 476 } 477 } 478 479 /* vprtybw */ 480 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 481 { 482 int i; 483 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 484 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 485 res ^= res >> 8; 486 r->u32[i] = res & 1; 487 } 488 } 489 490 /* vprtybd */ 491 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 492 { 493 int i; 494 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 495 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 496 res ^= res >> 16; 497 res ^= res >> 8; 498 r->u64[i] = res & 1; 499 } 500 } 501 502 /* vprtybq */ 503 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 504 { 505 uint64_t res = b->u64[0] ^ b->u64[1]; 506 res ^= res >> 32; 507 res ^= res >> 16; 508 res ^= res >> 8; 509 r->VsrD(1) = res & 1; 510 r->VsrD(0) = 0; 511 } 512 513 #define VARITH_DO(name, op, element) \ 514 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 515 { \ 516 int i; \ 517 \ 518 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 519 r->element[i] = a->element[i] op b->element[i]; \ 520 } \ 521 } 522 #define VARITH(suffix, element) \ 523 VARITH_DO(add##suffix, +, element) \ 524 VARITH_DO(sub##suffix, -, element) 525 VARITH(ubm, u8) 526 VARITH(uhm, u16) 527 VARITH(uwm, u32) 528 VARITH(udm, u64) 529 VARITH_DO(muluwm, *, u32) 530 #undef VARITH_DO 531 #undef VARITH 532 533 #define VARITHFP(suffix, func) \ 534 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 535 ppc_avr_t *b) \ 536 { \ 537 int i; \ 538 \ 539 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 540 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 541 } \ 542 } 543 VARITHFP(addfp, float32_add) 544 VARITHFP(subfp, float32_sub) 545 VARITHFP(minfp, float32_min) 546 VARITHFP(maxfp, float32_max) 547 #undef VARITHFP 548 549 #define VARITHFPFMA(suffix, type) \ 550 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 551 ppc_avr_t *b, ppc_avr_t *c) \ 552 { \ 553 int i; \ 554 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 555 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 556 type, &env->vec_status); \ 557 } \ 558 } 559 VARITHFPFMA(maddfp, 0); 560 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 561 #undef VARITHFPFMA 562 563 #define VARITHSAT_CASE(type, op, cvt, element) \ 564 { \ 565 type result = (type)a->element[i] op (type)b->element[i]; \ 566 r->element[i] = cvt(result, &sat); \ 567 } 568 569 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 570 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 571 ppc_avr_t *b) \ 572 { \ 573 int sat = 0; \ 574 int i; \ 575 \ 576 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 577 switch (sizeof(r->element[0])) { \ 578 case 1: \ 579 VARITHSAT_CASE(optype, op, cvt, element); \ 580 break; \ 581 case 2: \ 582 VARITHSAT_CASE(optype, op, cvt, element); \ 583 break; \ 584 case 4: \ 585 VARITHSAT_CASE(optype, op, cvt, element); \ 586 break; \ 587 } \ 588 } \ 589 if (sat) { \ 590 env->vscr |= (1 << VSCR_SAT); \ 591 } \ 592 } 593 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 594 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 595 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 596 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 597 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 598 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 599 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 600 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 601 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 602 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 603 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 604 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 605 #undef VARITHSAT_CASE 606 #undef VARITHSAT_DO 607 #undef VARITHSAT_SIGNED 608 #undef VARITHSAT_UNSIGNED 609 610 #define VAVG_DO(name, element, etype) \ 611 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 612 { \ 613 int i; \ 614 \ 615 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 616 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 617 r->element[i] = x >> 1; \ 618 } \ 619 } 620 621 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 622 unsigned_type) \ 623 VAVG_DO(avgs##type, signed_element, signed_type) \ 624 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 625 VAVG(b, s8, int16_t, u8, uint16_t) 626 VAVG(h, s16, int32_t, u16, uint32_t) 627 VAVG(w, s32, int64_t, u32, uint64_t) 628 #undef VAVG_DO 629 #undef VAVG 630 631 #define VABSDU_DO(name, element) \ 632 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 633 { \ 634 int i; \ 635 \ 636 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 637 r->element[i] = (a->element[i] > b->element[i]) ? \ 638 (a->element[i] - b->element[i]) : \ 639 (b->element[i] - a->element[i]); \ 640 } \ 641 } 642 643 /* VABSDU - Vector absolute difference unsigned 644 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 645 * element - element type to access from vector 646 */ 647 #define VABSDU(type, element) \ 648 VABSDU_DO(absdu##type, element) 649 VABSDU(b, u8) 650 VABSDU(h, u16) 651 VABSDU(w, u32) 652 #undef VABSDU_DO 653 #undef VABSDU 654 655 #define VCF(suffix, cvt, element) \ 656 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 657 ppc_avr_t *b, uint32_t uim) \ 658 { \ 659 int i; \ 660 \ 661 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 662 float32 t = cvt(b->element[i], &env->vec_status); \ 663 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 664 } \ 665 } 666 VCF(ux, uint32_to_float32, u32) 667 VCF(sx, int32_to_float32, s32) 668 #undef VCF 669 670 #define VCMP_DO(suffix, compare, element, record) \ 671 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 672 ppc_avr_t *a, ppc_avr_t *b) \ 673 { \ 674 uint64_t ones = (uint64_t)-1; \ 675 uint64_t all = ones; \ 676 uint64_t none = 0; \ 677 int i; \ 678 \ 679 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 680 uint64_t result = (a->element[i] compare b->element[i] ? \ 681 ones : 0x0); \ 682 switch (sizeof(a->element[0])) { \ 683 case 8: \ 684 r->u64[i] = result; \ 685 break; \ 686 case 4: \ 687 r->u32[i] = result; \ 688 break; \ 689 case 2: \ 690 r->u16[i] = result; \ 691 break; \ 692 case 1: \ 693 r->u8[i] = result; \ 694 break; \ 695 } \ 696 all &= result; \ 697 none |= result; \ 698 } \ 699 if (record) { \ 700 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 701 } \ 702 } 703 #define VCMP(suffix, compare, element) \ 704 VCMP_DO(suffix, compare, element, 0) \ 705 VCMP_DO(suffix##_dot, compare, element, 1) 706 VCMP(equb, ==, u8) 707 VCMP(equh, ==, u16) 708 VCMP(equw, ==, u32) 709 VCMP(equd, ==, u64) 710 VCMP(gtub, >, u8) 711 VCMP(gtuh, >, u16) 712 VCMP(gtuw, >, u32) 713 VCMP(gtud, >, u64) 714 VCMP(gtsb, >, s8) 715 VCMP(gtsh, >, s16) 716 VCMP(gtsw, >, s32) 717 VCMP(gtsd, >, s64) 718 #undef VCMP_DO 719 #undef VCMP 720 721 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 722 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 723 ppc_avr_t *a, ppc_avr_t *b) \ 724 { \ 725 etype ones = (etype)-1; \ 726 etype all = ones; \ 727 etype result, none = 0; \ 728 int i; \ 729 \ 730 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 731 if (cmpzero) { \ 732 result = ((a->element[i] == 0) \ 733 || (b->element[i] == 0) \ 734 || (a->element[i] != b->element[i]) ? \ 735 ones : 0x0); \ 736 } else { \ 737 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 738 } \ 739 r->element[i] = result; \ 740 all &= result; \ 741 none |= result; \ 742 } \ 743 if (record) { \ 744 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 745 } \ 746 } 747 748 /* VCMPNEZ - Vector compare not equal to zero 749 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 750 * element - element type to access from vector 751 */ 752 #define VCMPNE(suffix, element, etype, cmpzero) \ 753 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 754 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 755 VCMPNE(zb, u8, uint8_t, 1) 756 VCMPNE(zh, u16, uint16_t, 1) 757 VCMPNE(zw, u32, uint32_t, 1) 758 VCMPNE(b, u8, uint8_t, 0) 759 VCMPNE(h, u16, uint16_t, 0) 760 VCMPNE(w, u32, uint32_t, 0) 761 #undef VCMPNE_DO 762 #undef VCMPNE 763 764 #define VCMPFP_DO(suffix, compare, order, record) \ 765 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 766 ppc_avr_t *a, ppc_avr_t *b) \ 767 { \ 768 uint32_t ones = (uint32_t)-1; \ 769 uint32_t all = ones; \ 770 uint32_t none = 0; \ 771 int i; \ 772 \ 773 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 774 uint32_t result; \ 775 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 776 &env->vec_status); \ 777 if (rel == float_relation_unordered) { \ 778 result = 0; \ 779 } else if (rel compare order) { \ 780 result = ones; \ 781 } else { \ 782 result = 0; \ 783 } \ 784 r->u32[i] = result; \ 785 all &= result; \ 786 none |= result; \ 787 } \ 788 if (record) { \ 789 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 790 } \ 791 } 792 #define VCMPFP(suffix, compare, order) \ 793 VCMPFP_DO(suffix, compare, order, 0) \ 794 VCMPFP_DO(suffix##_dot, compare, order, 1) 795 VCMPFP(eqfp, ==, float_relation_equal) 796 VCMPFP(gefp, !=, float_relation_less) 797 VCMPFP(gtfp, ==, float_relation_greater) 798 #undef VCMPFP_DO 799 #undef VCMPFP 800 801 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 802 ppc_avr_t *a, ppc_avr_t *b, int record) 803 { 804 int i; 805 int all_in = 0; 806 807 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 808 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 809 &env->vec_status); 810 if (le_rel == float_relation_unordered) { 811 r->u32[i] = 0xc0000000; 812 all_in = 1; 813 } else { 814 float32 bneg = float32_chs(b->f32[i]); 815 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 816 &env->vec_status); 817 int le = le_rel != float_relation_greater; 818 int ge = ge_rel != float_relation_less; 819 820 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 821 all_in |= (!le | !ge); 822 } 823 } 824 if (record) { 825 env->crf[6] = (all_in == 0) << 1; 826 } 827 } 828 829 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 830 { 831 vcmpbfp_internal(env, r, a, b, 0); 832 } 833 834 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 835 ppc_avr_t *b) 836 { 837 vcmpbfp_internal(env, r, a, b, 1); 838 } 839 840 #define VCT(suffix, satcvt, element) \ 841 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 842 ppc_avr_t *b, uint32_t uim) \ 843 { \ 844 int i; \ 845 int sat = 0; \ 846 float_status s = env->vec_status; \ 847 \ 848 set_float_rounding_mode(float_round_to_zero, &s); \ 849 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 850 if (float32_is_any_nan(b->f32[i])) { \ 851 r->element[i] = 0; \ 852 } else { \ 853 float64 t = float32_to_float64(b->f32[i], &s); \ 854 int64_t j; \ 855 \ 856 t = float64_scalbn(t, uim, &s); \ 857 j = float64_to_int64(t, &s); \ 858 r->element[i] = satcvt(j, &sat); \ 859 } \ 860 } \ 861 if (sat) { \ 862 env->vscr |= (1 << VSCR_SAT); \ 863 } \ 864 } 865 VCT(uxs, cvtsduw, u32) 866 VCT(sxs, cvtsdsw, s32) 867 #undef VCT 868 869 target_ulong helper_vclzlsbb(ppc_avr_t *r) 870 { 871 target_ulong count = 0; 872 int i; 873 VECTOR_FOR_INORDER_I(i, u8) { 874 if (r->u8[i] & 0x01) { 875 break; 876 } 877 count++; 878 } 879 return count; 880 } 881 882 target_ulong helper_vctzlsbb(ppc_avr_t *r) 883 { 884 target_ulong count = 0; 885 int i; 886 #if defined(HOST_WORDS_BIGENDIAN) 887 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 888 #else 889 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 890 #endif 891 if (r->u8[i] & 0x01) { 892 break; 893 } 894 count++; 895 } 896 return count; 897 } 898 899 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 900 ppc_avr_t *b, ppc_avr_t *c) 901 { 902 int sat = 0; 903 int i; 904 905 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 906 int32_t prod = a->s16[i] * b->s16[i]; 907 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 908 909 r->s16[i] = cvtswsh(t, &sat); 910 } 911 912 if (sat) { 913 env->vscr |= (1 << VSCR_SAT); 914 } 915 } 916 917 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 918 ppc_avr_t *b, ppc_avr_t *c) 919 { 920 int sat = 0; 921 int i; 922 923 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 924 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 925 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 926 r->s16[i] = cvtswsh(t, &sat); 927 } 928 929 if (sat) { 930 env->vscr |= (1 << VSCR_SAT); 931 } 932 } 933 934 #define VMINMAX_DO(name, compare, element) \ 935 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 936 { \ 937 int i; \ 938 \ 939 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 940 if (a->element[i] compare b->element[i]) { \ 941 r->element[i] = b->element[i]; \ 942 } else { \ 943 r->element[i] = a->element[i]; \ 944 } \ 945 } \ 946 } 947 #define VMINMAX(suffix, element) \ 948 VMINMAX_DO(min##suffix, >, element) \ 949 VMINMAX_DO(max##suffix, <, element) 950 VMINMAX(sb, s8) 951 VMINMAX(sh, s16) 952 VMINMAX(sw, s32) 953 VMINMAX(sd, s64) 954 VMINMAX(ub, u8) 955 VMINMAX(uh, u16) 956 VMINMAX(uw, u32) 957 VMINMAX(ud, u64) 958 #undef VMINMAX_DO 959 #undef VMINMAX 960 961 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 962 { 963 int i; 964 965 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 966 int32_t prod = a->s16[i] * b->s16[i]; 967 r->s16[i] = (int16_t) (prod + c->s16[i]); 968 } 969 } 970 971 #define VMRG_DO(name, element, access, ofs) \ 972 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 973 { \ 974 ppc_avr_t result; \ 975 int i, half = ARRAY_SIZE(r->element) / 2; \ 976 \ 977 for (i = 0; i < half; i++) { \ 978 result.access(i * 2 + 0) = a->access(i + ofs); \ 979 result.access(i * 2 + 1) = b->access(i + ofs); \ 980 } \ 981 *r = result; \ 982 } 983 984 #define VMRG(suffix, element, access) \ 985 VMRG_DO(mrgl##suffix, element, access, half) \ 986 VMRG_DO(mrgh##suffix, element, access, 0) 987 VMRG(b, u8, VsrB) 988 VMRG(h, u16, VsrH) 989 VMRG(w, u32, VsrW) 990 #undef VMRG_DO 991 #undef VMRG 992 993 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 994 ppc_avr_t *b, ppc_avr_t *c) 995 { 996 int32_t prod[16]; 997 int i; 998 999 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 1000 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 1001 } 1002 1003 VECTOR_FOR_INORDER_I(i, s32) { 1004 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 1005 prod[4 * i + 2] + prod[4 * i + 3]; 1006 } 1007 } 1008 1009 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1010 ppc_avr_t *b, ppc_avr_t *c) 1011 { 1012 int32_t prod[8]; 1013 int i; 1014 1015 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1016 prod[i] = a->s16[i] * b->s16[i]; 1017 } 1018 1019 VECTOR_FOR_INORDER_I(i, s32) { 1020 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1021 } 1022 } 1023 1024 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1025 ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 int32_t prod[8]; 1028 int i; 1029 int sat = 0; 1030 1031 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1032 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1033 } 1034 1035 VECTOR_FOR_INORDER_I(i, s32) { 1036 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1037 1038 r->u32[i] = cvtsdsw(t, &sat); 1039 } 1040 1041 if (sat) { 1042 env->vscr |= (1 << VSCR_SAT); 1043 } 1044 } 1045 1046 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1047 ppc_avr_t *b, ppc_avr_t *c) 1048 { 1049 uint16_t prod[16]; 1050 int i; 1051 1052 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1053 prod[i] = a->u8[i] * b->u8[i]; 1054 } 1055 1056 VECTOR_FOR_INORDER_I(i, u32) { 1057 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1058 prod[4 * i + 2] + prod[4 * i + 3]; 1059 } 1060 } 1061 1062 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1063 ppc_avr_t *b, ppc_avr_t *c) 1064 { 1065 uint32_t prod[8]; 1066 int i; 1067 1068 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1069 prod[i] = a->u16[i] * b->u16[i]; 1070 } 1071 1072 VECTOR_FOR_INORDER_I(i, u32) { 1073 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1074 } 1075 } 1076 1077 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1078 ppc_avr_t *b, ppc_avr_t *c) 1079 { 1080 uint32_t prod[8]; 1081 int i; 1082 int sat = 0; 1083 1084 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1085 prod[i] = a->u16[i] * b->u16[i]; 1086 } 1087 1088 VECTOR_FOR_INORDER_I(i, s32) { 1089 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1090 1091 r->u32[i] = cvtuduw(t, &sat); 1092 } 1093 1094 if (sat) { 1095 env->vscr |= (1 << VSCR_SAT); 1096 } 1097 } 1098 1099 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1100 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1101 { \ 1102 int i; \ 1103 \ 1104 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1105 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1106 (cast)b->mul_access(i); \ 1107 } \ 1108 } 1109 1110 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1111 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1112 { \ 1113 int i; \ 1114 \ 1115 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1116 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1117 (cast)b->mul_access(i + 1); \ 1118 } \ 1119 } 1120 1121 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1122 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1123 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1124 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1125 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1126 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1127 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1128 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1129 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1130 #undef VMUL_DO_EVN 1131 #undef VMUL_DO_ODD 1132 #undef VMUL 1133 1134 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1135 ppc_avr_t *c) 1136 { 1137 ppc_avr_t result; 1138 int i; 1139 1140 VECTOR_FOR_INORDER_I(i, u8) { 1141 int s = c->u8[i] & 0x1f; 1142 #if defined(HOST_WORDS_BIGENDIAN) 1143 int index = s & 0xf; 1144 #else 1145 int index = 15 - (s & 0xf); 1146 #endif 1147 1148 if (s & 0x10) { 1149 result.u8[i] = b->u8[index]; 1150 } else { 1151 result.u8[i] = a->u8[index]; 1152 } 1153 } 1154 *r = result; 1155 } 1156 1157 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1158 ppc_avr_t *c) 1159 { 1160 ppc_avr_t result; 1161 int i; 1162 1163 VECTOR_FOR_INORDER_I(i, u8) { 1164 int s = c->u8[i] & 0x1f; 1165 #if defined(HOST_WORDS_BIGENDIAN) 1166 int index = 15 - (s & 0xf); 1167 #else 1168 int index = s & 0xf; 1169 #endif 1170 1171 if (s & 0x10) { 1172 result.u8[i] = a->u8[index]; 1173 } else { 1174 result.u8[i] = b->u8[index]; 1175 } 1176 } 1177 *r = result; 1178 } 1179 1180 #if defined(HOST_WORDS_BIGENDIAN) 1181 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1182 #define VBPERMD_INDEX(i) (i) 1183 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1184 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1185 #else 1186 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1187 #define VBPERMD_INDEX(i) (1 - i) 1188 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1189 #define EXTRACT_BIT(avr, i, index) \ 1190 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1191 #endif 1192 1193 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1194 { 1195 int i, j; 1196 ppc_avr_t result = { .u64 = { 0, 0 } }; 1197 VECTOR_FOR_INORDER_I(i, u64) { 1198 for (j = 0; j < 8; j++) { 1199 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1200 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1201 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1202 } 1203 } 1204 } 1205 *r = result; 1206 } 1207 1208 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1209 { 1210 int i; 1211 uint64_t perm = 0; 1212 1213 VECTOR_FOR_INORDER_I(i, u8) { 1214 int index = VBPERMQ_INDEX(b, i); 1215 1216 if (index < 128) { 1217 uint64_t mask = (1ull << (63-(index & 0x3F))); 1218 if (a->u64[VBPERMQ_DW(index)] & mask) { 1219 perm |= (0x8000 >> i); 1220 } 1221 } 1222 } 1223 1224 r->VsrD(0) = perm; 1225 r->VsrD(1) = 0; 1226 } 1227 1228 #undef VBPERMQ_INDEX 1229 #undef VBPERMQ_DW 1230 1231 static const uint64_t VGBBD_MASKS[256] = { 1232 0x0000000000000000ull, /* 00 */ 1233 0x0000000000000080ull, /* 01 */ 1234 0x0000000000008000ull, /* 02 */ 1235 0x0000000000008080ull, /* 03 */ 1236 0x0000000000800000ull, /* 04 */ 1237 0x0000000000800080ull, /* 05 */ 1238 0x0000000000808000ull, /* 06 */ 1239 0x0000000000808080ull, /* 07 */ 1240 0x0000000080000000ull, /* 08 */ 1241 0x0000000080000080ull, /* 09 */ 1242 0x0000000080008000ull, /* 0A */ 1243 0x0000000080008080ull, /* 0B */ 1244 0x0000000080800000ull, /* 0C */ 1245 0x0000000080800080ull, /* 0D */ 1246 0x0000000080808000ull, /* 0E */ 1247 0x0000000080808080ull, /* 0F */ 1248 0x0000008000000000ull, /* 10 */ 1249 0x0000008000000080ull, /* 11 */ 1250 0x0000008000008000ull, /* 12 */ 1251 0x0000008000008080ull, /* 13 */ 1252 0x0000008000800000ull, /* 14 */ 1253 0x0000008000800080ull, /* 15 */ 1254 0x0000008000808000ull, /* 16 */ 1255 0x0000008000808080ull, /* 17 */ 1256 0x0000008080000000ull, /* 18 */ 1257 0x0000008080000080ull, /* 19 */ 1258 0x0000008080008000ull, /* 1A */ 1259 0x0000008080008080ull, /* 1B */ 1260 0x0000008080800000ull, /* 1C */ 1261 0x0000008080800080ull, /* 1D */ 1262 0x0000008080808000ull, /* 1E */ 1263 0x0000008080808080ull, /* 1F */ 1264 0x0000800000000000ull, /* 20 */ 1265 0x0000800000000080ull, /* 21 */ 1266 0x0000800000008000ull, /* 22 */ 1267 0x0000800000008080ull, /* 23 */ 1268 0x0000800000800000ull, /* 24 */ 1269 0x0000800000800080ull, /* 25 */ 1270 0x0000800000808000ull, /* 26 */ 1271 0x0000800000808080ull, /* 27 */ 1272 0x0000800080000000ull, /* 28 */ 1273 0x0000800080000080ull, /* 29 */ 1274 0x0000800080008000ull, /* 2A */ 1275 0x0000800080008080ull, /* 2B */ 1276 0x0000800080800000ull, /* 2C */ 1277 0x0000800080800080ull, /* 2D */ 1278 0x0000800080808000ull, /* 2E */ 1279 0x0000800080808080ull, /* 2F */ 1280 0x0000808000000000ull, /* 30 */ 1281 0x0000808000000080ull, /* 31 */ 1282 0x0000808000008000ull, /* 32 */ 1283 0x0000808000008080ull, /* 33 */ 1284 0x0000808000800000ull, /* 34 */ 1285 0x0000808000800080ull, /* 35 */ 1286 0x0000808000808000ull, /* 36 */ 1287 0x0000808000808080ull, /* 37 */ 1288 0x0000808080000000ull, /* 38 */ 1289 0x0000808080000080ull, /* 39 */ 1290 0x0000808080008000ull, /* 3A */ 1291 0x0000808080008080ull, /* 3B */ 1292 0x0000808080800000ull, /* 3C */ 1293 0x0000808080800080ull, /* 3D */ 1294 0x0000808080808000ull, /* 3E */ 1295 0x0000808080808080ull, /* 3F */ 1296 0x0080000000000000ull, /* 40 */ 1297 0x0080000000000080ull, /* 41 */ 1298 0x0080000000008000ull, /* 42 */ 1299 0x0080000000008080ull, /* 43 */ 1300 0x0080000000800000ull, /* 44 */ 1301 0x0080000000800080ull, /* 45 */ 1302 0x0080000000808000ull, /* 46 */ 1303 0x0080000000808080ull, /* 47 */ 1304 0x0080000080000000ull, /* 48 */ 1305 0x0080000080000080ull, /* 49 */ 1306 0x0080000080008000ull, /* 4A */ 1307 0x0080000080008080ull, /* 4B */ 1308 0x0080000080800000ull, /* 4C */ 1309 0x0080000080800080ull, /* 4D */ 1310 0x0080000080808000ull, /* 4E */ 1311 0x0080000080808080ull, /* 4F */ 1312 0x0080008000000000ull, /* 50 */ 1313 0x0080008000000080ull, /* 51 */ 1314 0x0080008000008000ull, /* 52 */ 1315 0x0080008000008080ull, /* 53 */ 1316 0x0080008000800000ull, /* 54 */ 1317 0x0080008000800080ull, /* 55 */ 1318 0x0080008000808000ull, /* 56 */ 1319 0x0080008000808080ull, /* 57 */ 1320 0x0080008080000000ull, /* 58 */ 1321 0x0080008080000080ull, /* 59 */ 1322 0x0080008080008000ull, /* 5A */ 1323 0x0080008080008080ull, /* 5B */ 1324 0x0080008080800000ull, /* 5C */ 1325 0x0080008080800080ull, /* 5D */ 1326 0x0080008080808000ull, /* 5E */ 1327 0x0080008080808080ull, /* 5F */ 1328 0x0080800000000000ull, /* 60 */ 1329 0x0080800000000080ull, /* 61 */ 1330 0x0080800000008000ull, /* 62 */ 1331 0x0080800000008080ull, /* 63 */ 1332 0x0080800000800000ull, /* 64 */ 1333 0x0080800000800080ull, /* 65 */ 1334 0x0080800000808000ull, /* 66 */ 1335 0x0080800000808080ull, /* 67 */ 1336 0x0080800080000000ull, /* 68 */ 1337 0x0080800080000080ull, /* 69 */ 1338 0x0080800080008000ull, /* 6A */ 1339 0x0080800080008080ull, /* 6B */ 1340 0x0080800080800000ull, /* 6C */ 1341 0x0080800080800080ull, /* 6D */ 1342 0x0080800080808000ull, /* 6E */ 1343 0x0080800080808080ull, /* 6F */ 1344 0x0080808000000000ull, /* 70 */ 1345 0x0080808000000080ull, /* 71 */ 1346 0x0080808000008000ull, /* 72 */ 1347 0x0080808000008080ull, /* 73 */ 1348 0x0080808000800000ull, /* 74 */ 1349 0x0080808000800080ull, /* 75 */ 1350 0x0080808000808000ull, /* 76 */ 1351 0x0080808000808080ull, /* 77 */ 1352 0x0080808080000000ull, /* 78 */ 1353 0x0080808080000080ull, /* 79 */ 1354 0x0080808080008000ull, /* 7A */ 1355 0x0080808080008080ull, /* 7B */ 1356 0x0080808080800000ull, /* 7C */ 1357 0x0080808080800080ull, /* 7D */ 1358 0x0080808080808000ull, /* 7E */ 1359 0x0080808080808080ull, /* 7F */ 1360 0x8000000000000000ull, /* 80 */ 1361 0x8000000000000080ull, /* 81 */ 1362 0x8000000000008000ull, /* 82 */ 1363 0x8000000000008080ull, /* 83 */ 1364 0x8000000000800000ull, /* 84 */ 1365 0x8000000000800080ull, /* 85 */ 1366 0x8000000000808000ull, /* 86 */ 1367 0x8000000000808080ull, /* 87 */ 1368 0x8000000080000000ull, /* 88 */ 1369 0x8000000080000080ull, /* 89 */ 1370 0x8000000080008000ull, /* 8A */ 1371 0x8000000080008080ull, /* 8B */ 1372 0x8000000080800000ull, /* 8C */ 1373 0x8000000080800080ull, /* 8D */ 1374 0x8000000080808000ull, /* 8E */ 1375 0x8000000080808080ull, /* 8F */ 1376 0x8000008000000000ull, /* 90 */ 1377 0x8000008000000080ull, /* 91 */ 1378 0x8000008000008000ull, /* 92 */ 1379 0x8000008000008080ull, /* 93 */ 1380 0x8000008000800000ull, /* 94 */ 1381 0x8000008000800080ull, /* 95 */ 1382 0x8000008000808000ull, /* 96 */ 1383 0x8000008000808080ull, /* 97 */ 1384 0x8000008080000000ull, /* 98 */ 1385 0x8000008080000080ull, /* 99 */ 1386 0x8000008080008000ull, /* 9A */ 1387 0x8000008080008080ull, /* 9B */ 1388 0x8000008080800000ull, /* 9C */ 1389 0x8000008080800080ull, /* 9D */ 1390 0x8000008080808000ull, /* 9E */ 1391 0x8000008080808080ull, /* 9F */ 1392 0x8000800000000000ull, /* A0 */ 1393 0x8000800000000080ull, /* A1 */ 1394 0x8000800000008000ull, /* A2 */ 1395 0x8000800000008080ull, /* A3 */ 1396 0x8000800000800000ull, /* A4 */ 1397 0x8000800000800080ull, /* A5 */ 1398 0x8000800000808000ull, /* A6 */ 1399 0x8000800000808080ull, /* A7 */ 1400 0x8000800080000000ull, /* A8 */ 1401 0x8000800080000080ull, /* A9 */ 1402 0x8000800080008000ull, /* AA */ 1403 0x8000800080008080ull, /* AB */ 1404 0x8000800080800000ull, /* AC */ 1405 0x8000800080800080ull, /* AD */ 1406 0x8000800080808000ull, /* AE */ 1407 0x8000800080808080ull, /* AF */ 1408 0x8000808000000000ull, /* B0 */ 1409 0x8000808000000080ull, /* B1 */ 1410 0x8000808000008000ull, /* B2 */ 1411 0x8000808000008080ull, /* B3 */ 1412 0x8000808000800000ull, /* B4 */ 1413 0x8000808000800080ull, /* B5 */ 1414 0x8000808000808000ull, /* B6 */ 1415 0x8000808000808080ull, /* B7 */ 1416 0x8000808080000000ull, /* B8 */ 1417 0x8000808080000080ull, /* B9 */ 1418 0x8000808080008000ull, /* BA */ 1419 0x8000808080008080ull, /* BB */ 1420 0x8000808080800000ull, /* BC */ 1421 0x8000808080800080ull, /* BD */ 1422 0x8000808080808000ull, /* BE */ 1423 0x8000808080808080ull, /* BF */ 1424 0x8080000000000000ull, /* C0 */ 1425 0x8080000000000080ull, /* C1 */ 1426 0x8080000000008000ull, /* C2 */ 1427 0x8080000000008080ull, /* C3 */ 1428 0x8080000000800000ull, /* C4 */ 1429 0x8080000000800080ull, /* C5 */ 1430 0x8080000000808000ull, /* C6 */ 1431 0x8080000000808080ull, /* C7 */ 1432 0x8080000080000000ull, /* C8 */ 1433 0x8080000080000080ull, /* C9 */ 1434 0x8080000080008000ull, /* CA */ 1435 0x8080000080008080ull, /* CB */ 1436 0x8080000080800000ull, /* CC */ 1437 0x8080000080800080ull, /* CD */ 1438 0x8080000080808000ull, /* CE */ 1439 0x8080000080808080ull, /* CF */ 1440 0x8080008000000000ull, /* D0 */ 1441 0x8080008000000080ull, /* D1 */ 1442 0x8080008000008000ull, /* D2 */ 1443 0x8080008000008080ull, /* D3 */ 1444 0x8080008000800000ull, /* D4 */ 1445 0x8080008000800080ull, /* D5 */ 1446 0x8080008000808000ull, /* D6 */ 1447 0x8080008000808080ull, /* D7 */ 1448 0x8080008080000000ull, /* D8 */ 1449 0x8080008080000080ull, /* D9 */ 1450 0x8080008080008000ull, /* DA */ 1451 0x8080008080008080ull, /* DB */ 1452 0x8080008080800000ull, /* DC */ 1453 0x8080008080800080ull, /* DD */ 1454 0x8080008080808000ull, /* DE */ 1455 0x8080008080808080ull, /* DF */ 1456 0x8080800000000000ull, /* E0 */ 1457 0x8080800000000080ull, /* E1 */ 1458 0x8080800000008000ull, /* E2 */ 1459 0x8080800000008080ull, /* E3 */ 1460 0x8080800000800000ull, /* E4 */ 1461 0x8080800000800080ull, /* E5 */ 1462 0x8080800000808000ull, /* E6 */ 1463 0x8080800000808080ull, /* E7 */ 1464 0x8080800080000000ull, /* E8 */ 1465 0x8080800080000080ull, /* E9 */ 1466 0x8080800080008000ull, /* EA */ 1467 0x8080800080008080ull, /* EB */ 1468 0x8080800080800000ull, /* EC */ 1469 0x8080800080800080ull, /* ED */ 1470 0x8080800080808000ull, /* EE */ 1471 0x8080800080808080ull, /* EF */ 1472 0x8080808000000000ull, /* F0 */ 1473 0x8080808000000080ull, /* F1 */ 1474 0x8080808000008000ull, /* F2 */ 1475 0x8080808000008080ull, /* F3 */ 1476 0x8080808000800000ull, /* F4 */ 1477 0x8080808000800080ull, /* F5 */ 1478 0x8080808000808000ull, /* F6 */ 1479 0x8080808000808080ull, /* F7 */ 1480 0x8080808080000000ull, /* F8 */ 1481 0x8080808080000080ull, /* F9 */ 1482 0x8080808080008000ull, /* FA */ 1483 0x8080808080008080ull, /* FB */ 1484 0x8080808080800000ull, /* FC */ 1485 0x8080808080800080ull, /* FD */ 1486 0x8080808080808000ull, /* FE */ 1487 0x8080808080808080ull, /* FF */ 1488 }; 1489 1490 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1491 { 1492 int i; 1493 uint64_t t[2] = { 0, 0 }; 1494 1495 VECTOR_FOR_INORDER_I(i, u8) { 1496 #if defined(HOST_WORDS_BIGENDIAN) 1497 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1498 #else 1499 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1500 #endif 1501 } 1502 1503 r->u64[0] = t[0]; 1504 r->u64[1] = t[1]; 1505 } 1506 1507 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1508 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1509 { \ 1510 int i, j; \ 1511 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1512 \ 1513 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1514 prod[i] = 0; \ 1515 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1516 if (a->srcfld[i] & (1ull<<j)) { \ 1517 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1518 } \ 1519 } \ 1520 } \ 1521 \ 1522 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1523 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1524 } \ 1525 } 1526 1527 PMSUM(vpmsumb, u8, u16, uint16_t) 1528 PMSUM(vpmsumh, u16, u32, uint32_t) 1529 PMSUM(vpmsumw, u32, u64, uint64_t) 1530 1531 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1532 { 1533 1534 #ifdef CONFIG_INT128 1535 int i, j; 1536 __uint128_t prod[2]; 1537 1538 VECTOR_FOR_INORDER_I(i, u64) { 1539 prod[i] = 0; 1540 for (j = 0; j < 64; j++) { 1541 if (a->u64[i] & (1ull<<j)) { 1542 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1543 } 1544 } 1545 } 1546 1547 r->u128 = prod[0] ^ prod[1]; 1548 1549 #else 1550 int i, j; 1551 ppc_avr_t prod[2]; 1552 1553 VECTOR_FOR_INORDER_I(i, u64) { 1554 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1555 for (j = 0; j < 64; j++) { 1556 if (a->u64[i] & (1ull<<j)) { 1557 ppc_avr_t bshift; 1558 if (j == 0) { 1559 bshift.VsrD(0) = 0; 1560 bshift.VsrD(1) = b->u64[i]; 1561 } else { 1562 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1563 bshift.VsrD(1) = b->u64[i] << j; 1564 } 1565 prod[i].VsrD(1) ^= bshift.VsrD(1); 1566 prod[i].VsrD(0) ^= bshift.VsrD(0); 1567 } 1568 } 1569 } 1570 1571 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1572 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1573 #endif 1574 } 1575 1576 1577 #if defined(HOST_WORDS_BIGENDIAN) 1578 #define PKBIG 1 1579 #else 1580 #define PKBIG 0 1581 #endif 1582 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1583 { 1584 int i, j; 1585 ppc_avr_t result; 1586 #if defined(HOST_WORDS_BIGENDIAN) 1587 const ppc_avr_t *x[2] = { a, b }; 1588 #else 1589 const ppc_avr_t *x[2] = { b, a }; 1590 #endif 1591 1592 VECTOR_FOR_INORDER_I(i, u64) { 1593 VECTOR_FOR_INORDER_I(j, u32) { 1594 uint32_t e = x[i]->u32[j]; 1595 1596 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1597 ((e >> 6) & 0x3e0) | 1598 ((e >> 3) & 0x1f)); 1599 } 1600 } 1601 *r = result; 1602 } 1603 1604 #define VPK(suffix, from, to, cvt, dosat) \ 1605 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1606 ppc_avr_t *a, ppc_avr_t *b) \ 1607 { \ 1608 int i; \ 1609 int sat = 0; \ 1610 ppc_avr_t result; \ 1611 ppc_avr_t *a0 = PKBIG ? a : b; \ 1612 ppc_avr_t *a1 = PKBIG ? b : a; \ 1613 \ 1614 VECTOR_FOR_INORDER_I(i, from) { \ 1615 result.to[i] = cvt(a0->from[i], &sat); \ 1616 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1617 } \ 1618 *r = result; \ 1619 if (dosat && sat) { \ 1620 env->vscr |= (1 << VSCR_SAT); \ 1621 } \ 1622 } 1623 #define I(x, y) (x) 1624 VPK(shss, s16, s8, cvtshsb, 1) 1625 VPK(shus, s16, u8, cvtshub, 1) 1626 VPK(swss, s32, s16, cvtswsh, 1) 1627 VPK(swus, s32, u16, cvtswuh, 1) 1628 VPK(sdss, s64, s32, cvtsdsw, 1) 1629 VPK(sdus, s64, u32, cvtsduw, 1) 1630 VPK(uhus, u16, u8, cvtuhub, 1) 1631 VPK(uwus, u32, u16, cvtuwuh, 1) 1632 VPK(udus, u64, u32, cvtuduw, 1) 1633 VPK(uhum, u16, u8, I, 0) 1634 VPK(uwum, u32, u16, I, 0) 1635 VPK(udum, u64, u32, I, 0) 1636 #undef I 1637 #undef VPK 1638 #undef PKBIG 1639 1640 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1641 { 1642 int i; 1643 1644 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1645 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1646 } 1647 } 1648 1649 #define VRFI(suffix, rounding) \ 1650 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1651 ppc_avr_t *b) \ 1652 { \ 1653 int i; \ 1654 float_status s = env->vec_status; \ 1655 \ 1656 set_float_rounding_mode(rounding, &s); \ 1657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1658 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1659 } \ 1660 } 1661 VRFI(n, float_round_nearest_even) 1662 VRFI(m, float_round_down) 1663 VRFI(p, float_round_up) 1664 VRFI(z, float_round_to_zero) 1665 #undef VRFI 1666 1667 #define VROTATE(suffix, element, mask) \ 1668 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1669 { \ 1670 int i; \ 1671 \ 1672 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1673 unsigned int shift = b->element[i] & mask; \ 1674 r->element[i] = (a->element[i] << shift) | \ 1675 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1676 } \ 1677 } 1678 VROTATE(b, u8, 0x7) 1679 VROTATE(h, u16, 0xF) 1680 VROTATE(w, u32, 0x1F) 1681 VROTATE(d, u64, 0x3F) 1682 #undef VROTATE 1683 1684 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1685 { 1686 int i; 1687 1688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1689 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1690 1691 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1692 } 1693 } 1694 1695 #define VRLMI(name, size, element, insert) \ 1696 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1697 { \ 1698 int i; \ 1699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1700 uint##size##_t src1 = a->element[i]; \ 1701 uint##size##_t src2 = b->element[i]; \ 1702 uint##size##_t src3 = r->element[i]; \ 1703 uint##size##_t begin, end, shift, mask, rot_val; \ 1704 \ 1705 shift = extract##size(src2, 0, 6); \ 1706 end = extract##size(src2, 8, 6); \ 1707 begin = extract##size(src2, 16, 6); \ 1708 rot_val = rol##size(src1, shift); \ 1709 mask = mask_u##size(begin, end); \ 1710 if (insert) { \ 1711 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1712 } else { \ 1713 r->element[i] = (rot_val & mask); \ 1714 } \ 1715 } \ 1716 } 1717 1718 VRLMI(vrldmi, 64, u64, 1); 1719 VRLMI(vrlwmi, 32, u32, 1); 1720 VRLMI(vrldnm, 64, u64, 0); 1721 VRLMI(vrlwnm, 32, u32, 0); 1722 1723 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1724 ppc_avr_t *c) 1725 { 1726 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1727 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1728 } 1729 1730 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1731 { 1732 int i; 1733 1734 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1735 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1736 } 1737 } 1738 1739 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1740 { 1741 int i; 1742 1743 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1744 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1745 } 1746 } 1747 1748 #if defined(HOST_WORDS_BIGENDIAN) 1749 #define VEXTU_X_DO(name, size, left) \ 1750 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1751 { \ 1752 int index; \ 1753 if (left) { \ 1754 index = (a & 0xf) * 8; \ 1755 } else { \ 1756 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1757 } \ 1758 return int128_getlo(int128_rshift(b->s128, index)) & \ 1759 MAKE_64BIT_MASK(0, size); \ 1760 } 1761 #else 1762 #define VEXTU_X_DO(name, size, left) \ 1763 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1764 { \ 1765 int index; \ 1766 if (left) { \ 1767 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1768 } else { \ 1769 index = (a & 0xf) * 8; \ 1770 } \ 1771 return int128_getlo(int128_rshift(b->s128, index)) & \ 1772 MAKE_64BIT_MASK(0, size); \ 1773 } 1774 #endif 1775 1776 VEXTU_X_DO(vextublx, 8, 1) 1777 VEXTU_X_DO(vextuhlx, 16, 1) 1778 VEXTU_X_DO(vextuwlx, 32, 1) 1779 VEXTU_X_DO(vextubrx, 8, 0) 1780 VEXTU_X_DO(vextuhrx, 16, 0) 1781 VEXTU_X_DO(vextuwrx, 32, 0) 1782 #undef VEXTU_X_DO 1783 1784 /* The specification says that the results are undefined if all of the 1785 * shift counts are not identical. We check to make sure that they are 1786 * to conform to what real hardware appears to do. */ 1787 #define VSHIFT(suffix, leftp) \ 1788 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1789 { \ 1790 int shift = b->VsrB(15) & 0x7; \ 1791 int doit = 1; \ 1792 int i; \ 1793 \ 1794 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1795 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1796 } \ 1797 if (doit) { \ 1798 if (shift == 0) { \ 1799 *r = *a; \ 1800 } else if (leftp) { \ 1801 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1802 \ 1803 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1804 r->VsrD(1) = a->VsrD(1) << shift; \ 1805 } else { \ 1806 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1807 \ 1808 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1809 r->VsrD(0) = a->VsrD(0) >> shift; \ 1810 } \ 1811 } \ 1812 } 1813 VSHIFT(l, 1) 1814 VSHIFT(r, 0) 1815 #undef VSHIFT 1816 1817 #define VSL(suffix, element, mask) \ 1818 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1819 { \ 1820 int i; \ 1821 \ 1822 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1823 unsigned int shift = b->element[i] & mask; \ 1824 \ 1825 r->element[i] = a->element[i] << shift; \ 1826 } \ 1827 } 1828 VSL(b, u8, 0x7) 1829 VSL(h, u16, 0x0F) 1830 VSL(w, u32, 0x1F) 1831 VSL(d, u64, 0x3F) 1832 #undef VSL 1833 1834 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1835 { 1836 int i; 1837 unsigned int shift, bytes, size; 1838 1839 size = ARRAY_SIZE(r->u8); 1840 for (i = 0; i < size; i++) { 1841 shift = b->u8[i] & 0x7; /* extract shift value */ 1842 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1843 (((i + 1) < size) ? a->u8[i + 1] : 0); 1844 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1845 } 1846 } 1847 1848 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1849 { 1850 int i; 1851 unsigned int shift, bytes; 1852 1853 /* Use reverse order, as destination and source register can be same. Its 1854 * being modified in place saving temporary, reverse order will guarantee 1855 * that computed result is not fed back. 1856 */ 1857 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1858 shift = b->u8[i] & 0x7; /* extract shift value */ 1859 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1860 /* extract adjacent bytes */ 1861 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1862 } 1863 } 1864 1865 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1866 { 1867 int sh = shift & 0xf; 1868 int i; 1869 ppc_avr_t result; 1870 1871 #if defined(HOST_WORDS_BIGENDIAN) 1872 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1873 int index = sh + i; 1874 if (index > 0xf) { 1875 result.u8[i] = b->u8[index - 0x10]; 1876 } else { 1877 result.u8[i] = a->u8[index]; 1878 } 1879 } 1880 #else 1881 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1882 int index = (16 - sh) + i; 1883 if (index > 0xf) { 1884 result.u8[i] = a->u8[index - 0x10]; 1885 } else { 1886 result.u8[i] = b->u8[index]; 1887 } 1888 } 1889 #endif 1890 *r = result; 1891 } 1892 1893 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1894 { 1895 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1896 1897 #if defined(HOST_WORDS_BIGENDIAN) 1898 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1899 memset(&r->u8[16-sh], 0, sh); 1900 #else 1901 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1902 memset(&r->u8[0], 0, sh); 1903 #endif 1904 } 1905 1906 /* Experimental testing shows that hardware masks the immediate. */ 1907 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1908 #if defined(HOST_WORDS_BIGENDIAN) 1909 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1910 #else 1911 #define SPLAT_ELEMENT(element) \ 1912 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element)) 1913 #endif 1914 #define VSPLT(suffix, element) \ 1915 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1916 { \ 1917 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ 1918 int i; \ 1919 \ 1920 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1921 r->element[i] = s; \ 1922 } \ 1923 } 1924 VSPLT(b, u8) 1925 VSPLT(h, u16) 1926 VSPLT(w, u32) 1927 #undef VSPLT 1928 #undef SPLAT_ELEMENT 1929 #undef _SPLAT_MASKED 1930 #if defined(HOST_WORDS_BIGENDIAN) 1931 #define VINSERT(suffix, element) \ 1932 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1933 { \ 1934 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1935 sizeof(r->element[0])); \ 1936 } 1937 #else 1938 #define VINSERT(suffix, element) \ 1939 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1940 { \ 1941 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1942 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1943 } 1944 #endif 1945 VINSERT(b, u8) 1946 VINSERT(h, u16) 1947 VINSERT(w, u32) 1948 VINSERT(d, u64) 1949 #undef VINSERT 1950 #if defined(HOST_WORDS_BIGENDIAN) 1951 #define VEXTRACT(suffix, element) \ 1952 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1953 { \ 1954 uint32_t es = sizeof(r->element[0]); \ 1955 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1956 memset(&r->u8[8], 0, 8); \ 1957 memset(&r->u8[0], 0, 8 - es); \ 1958 } 1959 #else 1960 #define VEXTRACT(suffix, element) \ 1961 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1962 { \ 1963 uint32_t es = sizeof(r->element[0]); \ 1964 uint32_t s = (16 - index) - es; \ 1965 memmove(&r->u8[8], &b->u8[s], es); \ 1966 memset(&r->u8[0], 0, 8); \ 1967 memset(&r->u8[8 + es], 0, 8 - es); \ 1968 } 1969 #endif 1970 VEXTRACT(ub, u8) 1971 VEXTRACT(uh, u16) 1972 VEXTRACT(uw, u32) 1973 VEXTRACT(d, u64) 1974 #undef VEXTRACT 1975 1976 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1977 target_ulong xbn, uint32_t index) 1978 { 1979 ppc_vsr_t xt, xb; 1980 size_t es = sizeof(uint32_t); 1981 uint32_t ext_index; 1982 int i; 1983 1984 getVSR(xbn, &xb, env); 1985 memset(&xt, 0, sizeof(xt)); 1986 1987 #if defined(HOST_WORDS_BIGENDIAN) 1988 ext_index = index; 1989 for (i = 0; i < es; i++, ext_index++) { 1990 xt.u8[8 - es + i] = xb.u8[ext_index % 16]; 1991 } 1992 #else 1993 ext_index = 15 - index; 1994 for (i = es - 1; i >= 0; i--, ext_index--) { 1995 xt.u8[8 + i] = xb.u8[ext_index % 16]; 1996 } 1997 #endif 1998 1999 putVSR(xtn, &xt, env); 2000 } 2001 2002 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 2003 target_ulong xbn, uint32_t index) 2004 { 2005 ppc_vsr_t xt, xb; 2006 size_t es = sizeof(uint32_t); 2007 int ins_index, i = 0; 2008 2009 getVSR(xbn, &xb, env); 2010 getVSR(xtn, &xt, env); 2011 2012 #if defined(HOST_WORDS_BIGENDIAN) 2013 ins_index = index; 2014 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 2015 xt.u8[ins_index] = xb.u8[8 - es + i]; 2016 } 2017 #else 2018 ins_index = 15 - index; 2019 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) { 2020 xt.u8[ins_index] = xb.u8[8 + i]; 2021 } 2022 #endif 2023 2024 putVSR(xtn, &xt, env); 2025 } 2026 2027 #define VEXT_SIGNED(name, element, mask, cast, recast) \ 2028 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2029 { \ 2030 int i; \ 2031 VECTOR_FOR_INORDER_I(i, element) { \ 2032 r->element[i] = (recast)((cast)(b->element[i] & mask)); \ 2033 } \ 2034 } 2035 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) 2036 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) 2037 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) 2038 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) 2039 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) 2040 #undef VEXT_SIGNED 2041 2042 #define VNEG(name, element) \ 2043 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 2044 { \ 2045 int i; \ 2046 VECTOR_FOR_INORDER_I(i, element) { \ 2047 r->element[i] = -b->element[i]; \ 2048 } \ 2049 } 2050 VNEG(vnegw, s32) 2051 VNEG(vnegd, s64) 2052 #undef VNEG 2053 2054 #define VSPLTI(suffix, element, splat_type) \ 2055 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2056 { \ 2057 splat_type x = (int8_t)(splat << 3) >> 3; \ 2058 int i; \ 2059 \ 2060 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2061 r->element[i] = x; \ 2062 } \ 2063 } 2064 VSPLTI(b, s8, int8_t) 2065 VSPLTI(h, s16, int16_t) 2066 VSPLTI(w, s32, int32_t) 2067 #undef VSPLTI 2068 2069 #define VSR(suffix, element, mask) \ 2070 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2071 { \ 2072 int i; \ 2073 \ 2074 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2075 unsigned int shift = b->element[i] & mask; \ 2076 r->element[i] = a->element[i] >> shift; \ 2077 } \ 2078 } 2079 VSR(ab, s8, 0x7) 2080 VSR(ah, s16, 0xF) 2081 VSR(aw, s32, 0x1F) 2082 VSR(ad, s64, 0x3F) 2083 VSR(b, u8, 0x7) 2084 VSR(h, u16, 0xF) 2085 VSR(w, u32, 0x1F) 2086 VSR(d, u64, 0x3F) 2087 #undef VSR 2088 2089 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2090 { 2091 int sh = (b->VsrB(0xf) >> 3) & 0xf; 2092 2093 #if defined(HOST_WORDS_BIGENDIAN) 2094 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2095 memset(&r->u8[0], 0, sh); 2096 #else 2097 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2098 memset(&r->u8[16 - sh], 0, sh); 2099 #endif 2100 } 2101 2102 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2103 { 2104 int i; 2105 2106 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2107 r->u32[i] = a->u32[i] >= b->u32[i]; 2108 } 2109 } 2110 2111 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2112 { 2113 int64_t t; 2114 int i, upper; 2115 ppc_avr_t result; 2116 int sat = 0; 2117 2118 #if defined(HOST_WORDS_BIGENDIAN) 2119 upper = ARRAY_SIZE(r->s32)-1; 2120 #else 2121 upper = 0; 2122 #endif 2123 t = (int64_t)b->s32[upper]; 2124 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2125 t += a->s32[i]; 2126 result.s32[i] = 0; 2127 } 2128 result.s32[upper] = cvtsdsw(t, &sat); 2129 *r = result; 2130 2131 if (sat) { 2132 env->vscr |= (1 << VSCR_SAT); 2133 } 2134 } 2135 2136 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2137 { 2138 int i, j, upper; 2139 ppc_avr_t result; 2140 int sat = 0; 2141 2142 #if defined(HOST_WORDS_BIGENDIAN) 2143 upper = 1; 2144 #else 2145 upper = 0; 2146 #endif 2147 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2148 int64_t t = (int64_t)b->s32[upper + i * 2]; 2149 2150 result.u64[i] = 0; 2151 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2152 t += a->s32[2 * i + j]; 2153 } 2154 result.s32[upper + i * 2] = cvtsdsw(t, &sat); 2155 } 2156 2157 *r = result; 2158 if (sat) { 2159 env->vscr |= (1 << VSCR_SAT); 2160 } 2161 } 2162 2163 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2164 { 2165 int i, j; 2166 int sat = 0; 2167 2168 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2169 int64_t t = (int64_t)b->s32[i]; 2170 2171 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2172 t += a->s8[4 * i + j]; 2173 } 2174 r->s32[i] = cvtsdsw(t, &sat); 2175 } 2176 2177 if (sat) { 2178 env->vscr |= (1 << VSCR_SAT); 2179 } 2180 } 2181 2182 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2183 { 2184 int sat = 0; 2185 int i; 2186 2187 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2188 int64_t t = (int64_t)b->s32[i]; 2189 2190 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2191 r->s32[i] = cvtsdsw(t, &sat); 2192 } 2193 2194 if (sat) { 2195 env->vscr |= (1 << VSCR_SAT); 2196 } 2197 } 2198 2199 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2200 { 2201 int i, j; 2202 int sat = 0; 2203 2204 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2205 uint64_t t = (uint64_t)b->u32[i]; 2206 2207 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2208 t += a->u8[4 * i + j]; 2209 } 2210 r->u32[i] = cvtuduw(t, &sat); 2211 } 2212 2213 if (sat) { 2214 env->vscr |= (1 << VSCR_SAT); 2215 } 2216 } 2217 2218 #if defined(HOST_WORDS_BIGENDIAN) 2219 #define UPKHI 1 2220 #define UPKLO 0 2221 #else 2222 #define UPKHI 0 2223 #define UPKLO 1 2224 #endif 2225 #define VUPKPX(suffix, hi) \ 2226 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2227 { \ 2228 int i; \ 2229 ppc_avr_t result; \ 2230 \ 2231 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2232 uint16_t e = b->u16[hi ? i : i+4]; \ 2233 uint8_t a = (e >> 15) ? 0xff : 0; \ 2234 uint8_t r = (e >> 10) & 0x1f; \ 2235 uint8_t g = (e >> 5) & 0x1f; \ 2236 uint8_t b = e & 0x1f; \ 2237 \ 2238 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2239 } \ 2240 *r = result; \ 2241 } 2242 VUPKPX(lpx, UPKLO) 2243 VUPKPX(hpx, UPKHI) 2244 #undef VUPKPX 2245 2246 #define VUPK(suffix, unpacked, packee, hi) \ 2247 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2248 { \ 2249 int i; \ 2250 ppc_avr_t result; \ 2251 \ 2252 if (hi) { \ 2253 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2254 result.unpacked[i] = b->packee[i]; \ 2255 } \ 2256 } else { \ 2257 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2258 i++) { \ 2259 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2260 } \ 2261 } \ 2262 *r = result; \ 2263 } 2264 VUPK(hsb, s16, s8, UPKHI) 2265 VUPK(hsh, s32, s16, UPKHI) 2266 VUPK(hsw, s64, s32, UPKHI) 2267 VUPK(lsb, s16, s8, UPKLO) 2268 VUPK(lsh, s32, s16, UPKLO) 2269 VUPK(lsw, s64, s32, UPKLO) 2270 #undef VUPK 2271 #undef UPKHI 2272 #undef UPKLO 2273 2274 #define VGENERIC_DO(name, element) \ 2275 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2276 { \ 2277 int i; \ 2278 \ 2279 VECTOR_FOR_INORDER_I(i, element) { \ 2280 r->element[i] = name(b->element[i]); \ 2281 } \ 2282 } 2283 2284 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2285 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2286 #define clzw(v) clz32((v)) 2287 #define clzd(v) clz64((v)) 2288 2289 VGENERIC_DO(clzb, u8) 2290 VGENERIC_DO(clzh, u16) 2291 VGENERIC_DO(clzw, u32) 2292 VGENERIC_DO(clzd, u64) 2293 2294 #undef clzb 2295 #undef clzh 2296 #undef clzw 2297 #undef clzd 2298 2299 #define ctzb(v) ((v) ? ctz32(v) : 8) 2300 #define ctzh(v) ((v) ? ctz32(v) : 16) 2301 #define ctzw(v) ctz32((v)) 2302 #define ctzd(v) ctz64((v)) 2303 2304 VGENERIC_DO(ctzb, u8) 2305 VGENERIC_DO(ctzh, u16) 2306 VGENERIC_DO(ctzw, u32) 2307 VGENERIC_DO(ctzd, u64) 2308 2309 #undef ctzb 2310 #undef ctzh 2311 #undef ctzw 2312 #undef ctzd 2313 2314 #define popcntb(v) ctpop8(v) 2315 #define popcnth(v) ctpop16(v) 2316 #define popcntw(v) ctpop32(v) 2317 #define popcntd(v) ctpop64(v) 2318 2319 VGENERIC_DO(popcntb, u8) 2320 VGENERIC_DO(popcnth, u16) 2321 VGENERIC_DO(popcntw, u32) 2322 VGENERIC_DO(popcntd, u64) 2323 2324 #undef popcntb 2325 #undef popcnth 2326 #undef popcntw 2327 #undef popcntd 2328 2329 #undef VGENERIC_DO 2330 2331 #if defined(HOST_WORDS_BIGENDIAN) 2332 #define QW_ONE { .u64 = { 0, 1 } } 2333 #else 2334 #define QW_ONE { .u64 = { 1, 0 } } 2335 #endif 2336 2337 #ifndef CONFIG_INT128 2338 2339 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2340 { 2341 t->u64[0] = ~a.u64[0]; 2342 t->u64[1] = ~a.u64[1]; 2343 } 2344 2345 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2346 { 2347 if (a.VsrD(0) < b.VsrD(0)) { 2348 return -1; 2349 } else if (a.VsrD(0) > b.VsrD(0)) { 2350 return 1; 2351 } else if (a.VsrD(1) < b.VsrD(1)) { 2352 return -1; 2353 } else if (a.VsrD(1) > b.VsrD(1)) { 2354 return 1; 2355 } else { 2356 return 0; 2357 } 2358 } 2359 2360 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2361 { 2362 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2363 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2364 (~a.VsrD(1) < b.VsrD(1)); 2365 } 2366 2367 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2368 { 2369 ppc_avr_t not_a; 2370 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2371 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2372 (~a.VsrD(1) < b.VsrD(1)); 2373 avr_qw_not(¬_a, a); 2374 return avr_qw_cmpu(not_a, b) < 0; 2375 } 2376 2377 #endif 2378 2379 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2380 { 2381 #ifdef CONFIG_INT128 2382 r->u128 = a->u128 + b->u128; 2383 #else 2384 avr_qw_add(r, *a, *b); 2385 #endif 2386 } 2387 2388 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2389 { 2390 #ifdef CONFIG_INT128 2391 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2392 #else 2393 2394 if (c->VsrD(1) & 1) { 2395 ppc_avr_t tmp; 2396 2397 tmp.VsrD(0) = 0; 2398 tmp.VsrD(1) = c->VsrD(1) & 1; 2399 avr_qw_add(&tmp, *a, tmp); 2400 avr_qw_add(r, tmp, *b); 2401 } else { 2402 avr_qw_add(r, *a, *b); 2403 } 2404 #endif 2405 } 2406 2407 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2408 { 2409 #ifdef CONFIG_INT128 2410 r->u128 = (~a->u128 < b->u128); 2411 #else 2412 ppc_avr_t not_a; 2413 2414 avr_qw_not(¬_a, *a); 2415 2416 r->VsrD(0) = 0; 2417 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2418 #endif 2419 } 2420 2421 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2422 { 2423 #ifdef CONFIG_INT128 2424 int carry_out = (~a->u128 < b->u128); 2425 if (!carry_out && (c->u128 & 1)) { 2426 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2427 ((a->u128 != 0) || (b->u128 != 0)); 2428 } 2429 r->u128 = carry_out; 2430 #else 2431 2432 int carry_in = c->VsrD(1) & 1; 2433 int carry_out = 0; 2434 ppc_avr_t tmp; 2435 2436 carry_out = avr_qw_addc(&tmp, *a, *b); 2437 2438 if (!carry_out && carry_in) { 2439 ppc_avr_t one = QW_ONE; 2440 carry_out = avr_qw_addc(&tmp, tmp, one); 2441 } 2442 r->VsrD(0) = 0; 2443 r->VsrD(1) = carry_out; 2444 #endif 2445 } 2446 2447 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2448 { 2449 #ifdef CONFIG_INT128 2450 r->u128 = a->u128 - b->u128; 2451 #else 2452 ppc_avr_t tmp; 2453 ppc_avr_t one = QW_ONE; 2454 2455 avr_qw_not(&tmp, *b); 2456 avr_qw_add(&tmp, *a, tmp); 2457 avr_qw_add(r, tmp, one); 2458 #endif 2459 } 2460 2461 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2462 { 2463 #ifdef CONFIG_INT128 2464 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2465 #else 2466 ppc_avr_t tmp, sum; 2467 2468 avr_qw_not(&tmp, *b); 2469 avr_qw_add(&sum, *a, tmp); 2470 2471 tmp.VsrD(0) = 0; 2472 tmp.VsrD(1) = c->VsrD(1) & 1; 2473 avr_qw_add(r, sum, tmp); 2474 #endif 2475 } 2476 2477 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2478 { 2479 #ifdef CONFIG_INT128 2480 r->u128 = (~a->u128 < ~b->u128) || 2481 (a->u128 + ~b->u128 == (__uint128_t)-1); 2482 #else 2483 int carry = (avr_qw_cmpu(*a, *b) > 0); 2484 if (!carry) { 2485 ppc_avr_t tmp; 2486 avr_qw_not(&tmp, *b); 2487 avr_qw_add(&tmp, *a, tmp); 2488 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2489 } 2490 r->VsrD(0) = 0; 2491 r->VsrD(1) = carry; 2492 #endif 2493 } 2494 2495 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2496 { 2497 #ifdef CONFIG_INT128 2498 r->u128 = 2499 (~a->u128 < ~b->u128) || 2500 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2501 #else 2502 int carry_in = c->VsrD(1) & 1; 2503 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2504 if (!carry_out && carry_in) { 2505 ppc_avr_t tmp; 2506 avr_qw_not(&tmp, *b); 2507 avr_qw_add(&tmp, *a, tmp); 2508 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2509 } 2510 2511 r->VsrD(0) = 0; 2512 r->VsrD(1) = carry_out; 2513 #endif 2514 } 2515 2516 #define BCD_PLUS_PREF_1 0xC 2517 #define BCD_PLUS_PREF_2 0xF 2518 #define BCD_PLUS_ALT_1 0xA 2519 #define BCD_NEG_PREF 0xD 2520 #define BCD_NEG_ALT 0xB 2521 #define BCD_PLUS_ALT_2 0xE 2522 #define NATIONAL_PLUS 0x2B 2523 #define NATIONAL_NEG 0x2D 2524 2525 #if defined(HOST_WORDS_BIGENDIAN) 2526 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2527 #else 2528 #define BCD_DIG_BYTE(n) ((n) / 2) 2529 #endif 2530 2531 static int bcd_get_sgn(ppc_avr_t *bcd) 2532 { 2533 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2534 case BCD_PLUS_PREF_1: 2535 case BCD_PLUS_PREF_2: 2536 case BCD_PLUS_ALT_1: 2537 case BCD_PLUS_ALT_2: 2538 { 2539 return 1; 2540 } 2541 2542 case BCD_NEG_PREF: 2543 case BCD_NEG_ALT: 2544 { 2545 return -1; 2546 } 2547 2548 default: 2549 { 2550 return 0; 2551 } 2552 } 2553 } 2554 2555 static int bcd_preferred_sgn(int sgn, int ps) 2556 { 2557 if (sgn >= 0) { 2558 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2559 } else { 2560 return BCD_NEG_PREF; 2561 } 2562 } 2563 2564 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2565 { 2566 uint8_t result; 2567 if (n & 1) { 2568 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2569 } else { 2570 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2571 } 2572 2573 if (unlikely(result > 9)) { 2574 *invalid = true; 2575 } 2576 return result; 2577 } 2578 2579 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2580 { 2581 if (n & 1) { 2582 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2583 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2584 } else { 2585 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2586 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2587 } 2588 } 2589 2590 static bool bcd_is_valid(ppc_avr_t *bcd) 2591 { 2592 int i; 2593 int invalid = 0; 2594 2595 if (bcd_get_sgn(bcd) == 0) { 2596 return false; 2597 } 2598 2599 for (i = 1; i < 32; i++) { 2600 bcd_get_digit(bcd, i, &invalid); 2601 if (unlikely(invalid)) { 2602 return false; 2603 } 2604 } 2605 return true; 2606 } 2607 2608 static int bcd_cmp_zero(ppc_avr_t *bcd) 2609 { 2610 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2611 return CRF_EQ; 2612 } else { 2613 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2614 } 2615 } 2616 2617 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2618 { 2619 #if defined(HOST_WORDS_BIGENDIAN) 2620 return reg->u16[7 - n]; 2621 #else 2622 return reg->u16[n]; 2623 #endif 2624 } 2625 2626 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2627 { 2628 #if defined(HOST_WORDS_BIGENDIAN) 2629 reg->u16[7 - n] = val; 2630 #else 2631 reg->u16[n] = val; 2632 #endif 2633 } 2634 2635 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2636 { 2637 int i; 2638 int invalid = 0; 2639 for (i = 31; i > 0; i--) { 2640 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2641 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2642 if (unlikely(invalid)) { 2643 return 0; /* doesn't matter */ 2644 } else if (dig_a > dig_b) { 2645 return 1; 2646 } else if (dig_a < dig_b) { 2647 return -1; 2648 } 2649 } 2650 2651 return 0; 2652 } 2653 2654 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2655 int *overflow) 2656 { 2657 int carry = 0; 2658 int i; 2659 for (i = 1; i <= 31; i++) { 2660 uint8_t digit = bcd_get_digit(a, i, invalid) + 2661 bcd_get_digit(b, i, invalid) + carry; 2662 if (digit > 9) { 2663 carry = 1; 2664 digit -= 10; 2665 } else { 2666 carry = 0; 2667 } 2668 2669 bcd_put_digit(t, digit, i); 2670 } 2671 2672 *overflow = carry; 2673 } 2674 2675 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2676 int *overflow) 2677 { 2678 int carry = 0; 2679 int i; 2680 2681 for (i = 1; i <= 31; i++) { 2682 uint8_t digit = bcd_get_digit(a, i, invalid) - 2683 bcd_get_digit(b, i, invalid) + carry; 2684 if (digit & 0x80) { 2685 carry = -1; 2686 digit += 10; 2687 } else { 2688 carry = 0; 2689 } 2690 2691 bcd_put_digit(t, digit, i); 2692 } 2693 2694 *overflow = carry; 2695 } 2696 2697 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2698 { 2699 2700 int sgna = bcd_get_sgn(a); 2701 int sgnb = bcd_get_sgn(b); 2702 int invalid = (sgna == 0) || (sgnb == 0); 2703 int overflow = 0; 2704 uint32_t cr = 0; 2705 ppc_avr_t result = { .u64 = { 0, 0 } }; 2706 2707 if (!invalid) { 2708 if (sgna == sgnb) { 2709 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2710 bcd_add_mag(&result, a, b, &invalid, &overflow); 2711 cr = bcd_cmp_zero(&result); 2712 } else { 2713 int magnitude = bcd_cmp_mag(a, b); 2714 if (magnitude > 0) { 2715 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2716 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2717 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2718 } else if (magnitude < 0) { 2719 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2720 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2721 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2722 } else { 2723 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2724 cr = CRF_EQ; 2725 } 2726 } 2727 } 2728 2729 if (unlikely(invalid)) { 2730 result.VsrD(0) = result.VsrD(1) = -1; 2731 cr = CRF_SO; 2732 } else if (overflow) { 2733 cr |= CRF_SO; 2734 } 2735 2736 *r = result; 2737 2738 return cr; 2739 } 2740 2741 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2742 { 2743 ppc_avr_t bcopy = *b; 2744 int sgnb = bcd_get_sgn(b); 2745 if (sgnb < 0) { 2746 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2747 } else if (sgnb > 0) { 2748 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2749 } 2750 /* else invalid ... defer to bcdadd code for proper handling */ 2751 2752 return helper_bcdadd(r, a, &bcopy, ps); 2753 } 2754 2755 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2756 { 2757 int i; 2758 int cr = 0; 2759 uint16_t national = 0; 2760 uint16_t sgnb = get_national_digit(b, 0); 2761 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2762 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2763 2764 for (i = 1; i < 8; i++) { 2765 national = get_national_digit(b, i); 2766 if (unlikely(national < 0x30 || national > 0x39)) { 2767 invalid = 1; 2768 break; 2769 } 2770 2771 bcd_put_digit(&ret, national & 0xf, i); 2772 } 2773 2774 if (sgnb == NATIONAL_PLUS) { 2775 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2776 } else { 2777 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2778 } 2779 2780 cr = bcd_cmp_zero(&ret); 2781 2782 if (unlikely(invalid)) { 2783 cr = CRF_SO; 2784 } 2785 2786 *r = ret; 2787 2788 return cr; 2789 } 2790 2791 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2792 { 2793 int i; 2794 int cr = 0; 2795 int sgnb = bcd_get_sgn(b); 2796 int invalid = (sgnb == 0); 2797 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2798 2799 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2800 2801 for (i = 1; i < 8; i++) { 2802 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2803 2804 if (unlikely(invalid)) { 2805 break; 2806 } 2807 } 2808 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2809 2810 cr = bcd_cmp_zero(b); 2811 2812 if (ox_flag) { 2813 cr |= CRF_SO; 2814 } 2815 2816 if (unlikely(invalid)) { 2817 cr = CRF_SO; 2818 } 2819 2820 *r = ret; 2821 2822 return cr; 2823 } 2824 2825 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2826 { 2827 int i; 2828 int cr = 0; 2829 int invalid = 0; 2830 int zone_digit = 0; 2831 int zone_lead = ps ? 0xF : 0x3; 2832 int digit = 0; 2833 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2834 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2835 2836 if (unlikely((sgnb < 0xA) && ps)) { 2837 invalid = 1; 2838 } 2839 2840 for (i = 0; i < 16; i++) { 2841 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2842 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2843 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2844 invalid = 1; 2845 break; 2846 } 2847 2848 bcd_put_digit(&ret, digit, i + 1); 2849 } 2850 2851 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2852 (!ps && (sgnb & 0x4))) { 2853 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2854 } else { 2855 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2856 } 2857 2858 cr = bcd_cmp_zero(&ret); 2859 2860 if (unlikely(invalid)) { 2861 cr = CRF_SO; 2862 } 2863 2864 *r = ret; 2865 2866 return cr; 2867 } 2868 2869 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2870 { 2871 int i; 2872 int cr = 0; 2873 uint8_t digit = 0; 2874 int sgnb = bcd_get_sgn(b); 2875 int zone_lead = (ps) ? 0xF0 : 0x30; 2876 int invalid = (sgnb == 0); 2877 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2878 2879 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2880 2881 for (i = 0; i < 16; i++) { 2882 digit = bcd_get_digit(b, i + 1, &invalid); 2883 2884 if (unlikely(invalid)) { 2885 break; 2886 } 2887 2888 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2889 } 2890 2891 if (ps) { 2892 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2893 } else { 2894 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2895 } 2896 2897 cr = bcd_cmp_zero(b); 2898 2899 if (ox_flag) { 2900 cr |= CRF_SO; 2901 } 2902 2903 if (unlikely(invalid)) { 2904 cr = CRF_SO; 2905 } 2906 2907 *r = ret; 2908 2909 return cr; 2910 } 2911 2912 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2913 { 2914 int i; 2915 int cr = 0; 2916 uint64_t lo_value; 2917 uint64_t hi_value; 2918 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2919 2920 if (b->VsrSD(0) < 0) { 2921 lo_value = -b->VsrSD(1); 2922 hi_value = ~b->VsrD(0) + !lo_value; 2923 bcd_put_digit(&ret, 0xD, 0); 2924 } else { 2925 lo_value = b->VsrD(1); 2926 hi_value = b->VsrD(0); 2927 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2928 } 2929 2930 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2931 lo_value > 9999999999999999ULL) { 2932 cr = CRF_SO; 2933 } 2934 2935 for (i = 1; i < 16; hi_value /= 10, i++) { 2936 bcd_put_digit(&ret, hi_value % 10, i); 2937 } 2938 2939 for (; i < 32; lo_value /= 10, i++) { 2940 bcd_put_digit(&ret, lo_value % 10, i); 2941 } 2942 2943 cr |= bcd_cmp_zero(&ret); 2944 2945 *r = ret; 2946 2947 return cr; 2948 } 2949 2950 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2951 { 2952 uint8_t i; 2953 int cr; 2954 uint64_t carry; 2955 uint64_t unused; 2956 uint64_t lo_value; 2957 uint64_t hi_value = 0; 2958 int sgnb = bcd_get_sgn(b); 2959 int invalid = (sgnb == 0); 2960 2961 lo_value = bcd_get_digit(b, 31, &invalid); 2962 for (i = 30; i > 0; i--) { 2963 mulu64(&lo_value, &carry, lo_value, 10ULL); 2964 mulu64(&hi_value, &unused, hi_value, 10ULL); 2965 lo_value += bcd_get_digit(b, i, &invalid); 2966 hi_value += carry; 2967 2968 if (unlikely(invalid)) { 2969 break; 2970 } 2971 } 2972 2973 if (sgnb == -1) { 2974 r->VsrSD(1) = -lo_value; 2975 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2976 } else { 2977 r->VsrSD(1) = lo_value; 2978 r->VsrSD(0) = hi_value; 2979 } 2980 2981 cr = bcd_cmp_zero(b); 2982 2983 if (unlikely(invalid)) { 2984 cr = CRF_SO; 2985 } 2986 2987 return cr; 2988 } 2989 2990 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2991 { 2992 int i; 2993 int invalid = 0; 2994 2995 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2996 return CRF_SO; 2997 } 2998 2999 *r = *a; 3000 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 3001 3002 for (i = 1; i < 32; i++) { 3003 bcd_get_digit(a, i, &invalid); 3004 bcd_get_digit(b, i, &invalid); 3005 if (unlikely(invalid)) { 3006 return CRF_SO; 3007 } 3008 } 3009 3010 return bcd_cmp_zero(r); 3011 } 3012 3013 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 3014 { 3015 int sgnb = bcd_get_sgn(b); 3016 3017 *r = *b; 3018 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 3019 3020 if (bcd_is_valid(b) == false) { 3021 return CRF_SO; 3022 } 3023 3024 return bcd_cmp_zero(r); 3025 } 3026 3027 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3028 { 3029 int cr; 3030 #if defined(HOST_WORDS_BIGENDIAN) 3031 int i = a->s8[7]; 3032 #else 3033 int i = a->s8[8]; 3034 #endif 3035 bool ox_flag = false; 3036 int sgnb = bcd_get_sgn(b); 3037 ppc_avr_t ret = *b; 3038 ret.VsrD(1) &= ~0xf; 3039 3040 if (bcd_is_valid(b) == false) { 3041 return CRF_SO; 3042 } 3043 3044 if (unlikely(i > 31)) { 3045 i = 31; 3046 } else if (unlikely(i < -31)) { 3047 i = -31; 3048 } 3049 3050 if (i > 0) { 3051 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3052 } else { 3053 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3054 } 3055 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3056 3057 *r = ret; 3058 3059 cr = bcd_cmp_zero(r); 3060 if (ox_flag) { 3061 cr |= CRF_SO; 3062 } 3063 3064 return cr; 3065 } 3066 3067 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3068 { 3069 int cr; 3070 int i; 3071 int invalid = 0; 3072 bool ox_flag = false; 3073 ppc_avr_t ret = *b; 3074 3075 for (i = 0; i < 32; i++) { 3076 bcd_get_digit(b, i, &invalid); 3077 3078 if (unlikely(invalid)) { 3079 return CRF_SO; 3080 } 3081 } 3082 3083 #if defined(HOST_WORDS_BIGENDIAN) 3084 i = a->s8[7]; 3085 #else 3086 i = a->s8[8]; 3087 #endif 3088 if (i >= 32) { 3089 ox_flag = true; 3090 ret.VsrD(1) = ret.VsrD(0) = 0; 3091 } else if (i <= -32) { 3092 ret.VsrD(1) = ret.VsrD(0) = 0; 3093 } else if (i > 0) { 3094 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3095 } else { 3096 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3097 } 3098 *r = ret; 3099 3100 cr = bcd_cmp_zero(r); 3101 if (ox_flag) { 3102 cr |= CRF_SO; 3103 } 3104 3105 return cr; 3106 } 3107 3108 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3109 { 3110 int cr; 3111 int unused = 0; 3112 int invalid = 0; 3113 bool ox_flag = false; 3114 int sgnb = bcd_get_sgn(b); 3115 ppc_avr_t ret = *b; 3116 ret.VsrD(1) &= ~0xf; 3117 3118 #if defined(HOST_WORDS_BIGENDIAN) 3119 int i = a->s8[7]; 3120 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3121 #else 3122 int i = a->s8[8]; 3123 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3124 #endif 3125 3126 if (bcd_is_valid(b) == false) { 3127 return CRF_SO; 3128 } 3129 3130 if (unlikely(i > 31)) { 3131 i = 31; 3132 } else if (unlikely(i < -31)) { 3133 i = -31; 3134 } 3135 3136 if (i > 0) { 3137 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3138 } else { 3139 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3140 3141 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3142 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3143 } 3144 } 3145 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3146 3147 cr = bcd_cmp_zero(&ret); 3148 if (ox_flag) { 3149 cr |= CRF_SO; 3150 } 3151 *r = ret; 3152 3153 return cr; 3154 } 3155 3156 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3157 { 3158 uint64_t mask; 3159 uint32_t ox_flag = 0; 3160 #if defined(HOST_WORDS_BIGENDIAN) 3161 int i = a->s16[3] + 1; 3162 #else 3163 int i = a->s16[4] + 1; 3164 #endif 3165 ppc_avr_t ret = *b; 3166 3167 if (bcd_is_valid(b) == false) { 3168 return CRF_SO; 3169 } 3170 3171 if (i > 16 && i < 32) { 3172 mask = (uint64_t)-1 >> (128 - i * 4); 3173 if (ret.VsrD(0) & ~mask) { 3174 ox_flag = CRF_SO; 3175 } 3176 3177 ret.VsrD(0) &= mask; 3178 } else if (i >= 0 && i <= 16) { 3179 mask = (uint64_t)-1 >> (64 - i * 4); 3180 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3181 ox_flag = CRF_SO; 3182 } 3183 3184 ret.VsrD(1) &= mask; 3185 ret.VsrD(0) = 0; 3186 } 3187 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3188 *r = ret; 3189 3190 return bcd_cmp_zero(&ret) | ox_flag; 3191 } 3192 3193 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3194 { 3195 int i; 3196 uint64_t mask; 3197 uint32_t ox_flag = 0; 3198 int invalid = 0; 3199 ppc_avr_t ret = *b; 3200 3201 for (i = 0; i < 32; i++) { 3202 bcd_get_digit(b, i, &invalid); 3203 3204 if (unlikely(invalid)) { 3205 return CRF_SO; 3206 } 3207 } 3208 3209 #if defined(HOST_WORDS_BIGENDIAN) 3210 i = a->s16[3]; 3211 #else 3212 i = a->s16[4]; 3213 #endif 3214 if (i > 16 && i < 33) { 3215 mask = (uint64_t)-1 >> (128 - i * 4); 3216 if (ret.VsrD(0) & ~mask) { 3217 ox_flag = CRF_SO; 3218 } 3219 3220 ret.VsrD(0) &= mask; 3221 } else if (i > 0 && i <= 16) { 3222 mask = (uint64_t)-1 >> (64 - i * 4); 3223 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3224 ox_flag = CRF_SO; 3225 } 3226 3227 ret.VsrD(1) &= mask; 3228 ret.VsrD(0) = 0; 3229 } else if (i == 0) { 3230 if (ret.VsrD(0) || ret.VsrD(1)) { 3231 ox_flag = CRF_SO; 3232 } 3233 ret.VsrD(0) = ret.VsrD(1) = 0; 3234 } 3235 3236 *r = ret; 3237 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3238 return ox_flag | CRF_EQ; 3239 } 3240 3241 return ox_flag | CRF_GT; 3242 } 3243 3244 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3245 { 3246 int i; 3247 VECTOR_FOR_INORDER_I(i, u8) { 3248 r->u8[i] = AES_sbox[a->u8[i]]; 3249 } 3250 } 3251 3252 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3253 { 3254 ppc_avr_t result; 3255 int i; 3256 3257 VECTOR_FOR_INORDER_I(i, u32) { 3258 result.VsrW(i) = b->VsrW(i) ^ 3259 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3260 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3261 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3262 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3263 } 3264 *r = result; 3265 } 3266 3267 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3268 { 3269 ppc_avr_t result; 3270 int i; 3271 3272 VECTOR_FOR_INORDER_I(i, u8) { 3273 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3274 } 3275 *r = result; 3276 } 3277 3278 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3279 { 3280 /* This differs from what is written in ISA V2.07. The RTL is */ 3281 /* incorrect and will be fixed in V2.07B. */ 3282 int i; 3283 ppc_avr_t tmp; 3284 3285 VECTOR_FOR_INORDER_I(i, u8) { 3286 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3287 } 3288 3289 VECTOR_FOR_INORDER_I(i, u32) { 3290 r->VsrW(i) = 3291 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3292 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3293 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3294 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3295 } 3296 } 3297 3298 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3299 { 3300 ppc_avr_t result; 3301 int i; 3302 3303 VECTOR_FOR_INORDER_I(i, u8) { 3304 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3305 } 3306 *r = result; 3307 } 3308 3309 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) 3310 #if defined(HOST_WORDS_BIGENDIAN) 3311 #define EL_IDX(i) (i) 3312 #else 3313 #define EL_IDX(i) (3 - (i)) 3314 #endif 3315 3316 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3317 { 3318 int st = (st_six & 0x10) != 0; 3319 int six = st_six & 0xF; 3320 int i; 3321 3322 VECTOR_FOR_INORDER_I(i, u32) { 3323 if (st == 0) { 3324 if ((six & (0x8 >> i)) == 0) { 3325 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ 3326 ROTRu32(a->u32[EL_IDX(i)], 18) ^ 3327 (a->u32[EL_IDX(i)] >> 3); 3328 } else { /* six.bit[i] == 1 */ 3329 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ 3330 ROTRu32(a->u32[EL_IDX(i)], 19) ^ 3331 (a->u32[EL_IDX(i)] >> 10); 3332 } 3333 } else { /* st == 1 */ 3334 if ((six & (0x8 >> i)) == 0) { 3335 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ 3336 ROTRu32(a->u32[EL_IDX(i)], 13) ^ 3337 ROTRu32(a->u32[EL_IDX(i)], 22); 3338 } else { /* six.bit[i] == 1 */ 3339 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ 3340 ROTRu32(a->u32[EL_IDX(i)], 11) ^ 3341 ROTRu32(a->u32[EL_IDX(i)], 25); 3342 } 3343 } 3344 } 3345 } 3346 3347 #undef ROTRu32 3348 #undef EL_IDX 3349 3350 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) 3351 #if defined(HOST_WORDS_BIGENDIAN) 3352 #define EL_IDX(i) (i) 3353 #else 3354 #define EL_IDX(i) (1 - (i)) 3355 #endif 3356 3357 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3358 { 3359 int st = (st_six & 0x10) != 0; 3360 int six = st_six & 0xF; 3361 int i; 3362 3363 VECTOR_FOR_INORDER_I(i, u64) { 3364 if (st == 0) { 3365 if ((six & (0x8 >> (2*i))) == 0) { 3366 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ 3367 ROTRu64(a->u64[EL_IDX(i)], 8) ^ 3368 (a->u64[EL_IDX(i)] >> 7); 3369 } else { /* six.bit[2*i] == 1 */ 3370 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ 3371 ROTRu64(a->u64[EL_IDX(i)], 61) ^ 3372 (a->u64[EL_IDX(i)] >> 6); 3373 } 3374 } else { /* st == 1 */ 3375 if ((six & (0x8 >> (2*i))) == 0) { 3376 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ 3377 ROTRu64(a->u64[EL_IDX(i)], 34) ^ 3378 ROTRu64(a->u64[EL_IDX(i)], 39); 3379 } else { /* six.bit[2*i] == 1 */ 3380 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ 3381 ROTRu64(a->u64[EL_IDX(i)], 18) ^ 3382 ROTRu64(a->u64[EL_IDX(i)], 41); 3383 } 3384 } 3385 } 3386 } 3387 3388 #undef ROTRu64 3389 #undef EL_IDX 3390 3391 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3392 { 3393 ppc_avr_t result; 3394 int i; 3395 3396 VECTOR_FOR_INORDER_I(i, u8) { 3397 int indexA = c->u8[i] >> 4; 3398 int indexB = c->u8[i] & 0xF; 3399 #if defined(HOST_WORDS_BIGENDIAN) 3400 result.u8[i] = a->u8[indexA] ^ b->u8[indexB]; 3401 #else 3402 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; 3403 #endif 3404 } 3405 *r = result; 3406 } 3407 3408 #undef VECTOR_FOR_INORDER_I 3409 3410 /*****************************************************************************/ 3411 /* SPE extension helpers */ 3412 /* Use a table to make this quicker */ 3413 static const uint8_t hbrev[16] = { 3414 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3415 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3416 }; 3417 3418 static inline uint8_t byte_reverse(uint8_t val) 3419 { 3420 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3421 } 3422 3423 static inline uint32_t word_reverse(uint32_t val) 3424 { 3425 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3426 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3427 } 3428 3429 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3430 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3431 { 3432 uint32_t a, b, d, mask; 3433 3434 mask = UINT32_MAX >> (32 - MASKBITS); 3435 a = arg1 & mask; 3436 b = arg2 & mask; 3437 d = word_reverse(1 + word_reverse(a | ~b)); 3438 return (arg1 & ~mask) | (d & b); 3439 } 3440 3441 uint32_t helper_cntlsw32(uint32_t val) 3442 { 3443 if (val & 0x80000000) { 3444 return clz32(~val); 3445 } else { 3446 return clz32(val); 3447 } 3448 } 3449 3450 uint32_t helper_cntlzw32(uint32_t val) 3451 { 3452 return clz32(val); 3453 } 3454 3455 /* 440 specific */ 3456 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3457 target_ulong low, uint32_t update_Rc) 3458 { 3459 target_ulong mask; 3460 int i; 3461 3462 i = 1; 3463 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3464 if ((high & mask) == 0) { 3465 if (update_Rc) { 3466 env->crf[0] = 0x4; 3467 } 3468 goto done; 3469 } 3470 i++; 3471 } 3472 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3473 if ((low & mask) == 0) { 3474 if (update_Rc) { 3475 env->crf[0] = 0x8; 3476 } 3477 goto done; 3478 } 3479 i++; 3480 } 3481 i = 8; 3482 if (update_Rc) { 3483 env->crf[0] = 0x2; 3484 } 3485 done: 3486 env->xer = (env->xer & ~0x7F) | i; 3487 if (update_Rc) { 3488 env->crf[0] |= xer_so; 3489 } 3490 return i; 3491 } 3492