1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "internal.h" 22 #include "qemu/host-utils.h" 23 #include "exec/helper-proto.h" 24 #include "crypto/aes.h" 25 #include "fpu/softfloat.h" 26 27 #include "helper_regs.h" 28 /*****************************************************************************/ 29 /* Fixed point operations helpers */ 30 31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 32 { 33 if (unlikely(ov)) { 34 env->so = env->ov = 1; 35 } else { 36 env->ov = 0; 37 } 38 } 39 40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 41 uint32_t oe) 42 { 43 uint64_t rt = 0; 44 int overflow = 0; 45 46 uint64_t dividend = (uint64_t)ra << 32; 47 uint64_t divisor = (uint32_t)rb; 48 49 if (unlikely(divisor == 0)) { 50 overflow = 1; 51 } else { 52 rt = dividend / divisor; 53 overflow = rt > UINT32_MAX; 54 } 55 56 if (unlikely(overflow)) { 57 rt = 0; /* Undefined */ 58 } 59 60 if (oe) { 61 helper_update_ov_legacy(env, overflow); 62 } 63 64 return (target_ulong)rt; 65 } 66 67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 68 uint32_t oe) 69 { 70 int64_t rt = 0; 71 int overflow = 0; 72 73 int64_t dividend = (int64_t)ra << 32; 74 int64_t divisor = (int64_t)((int32_t)rb); 75 76 if (unlikely((divisor == 0) || 77 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 78 overflow = 1; 79 } else { 80 rt = dividend / divisor; 81 overflow = rt != (int32_t)rt; 82 } 83 84 if (unlikely(overflow)) { 85 rt = 0; /* Undefined */ 86 } 87 88 if (oe) { 89 helper_update_ov_legacy(env, overflow); 90 } 91 92 return (target_ulong)rt; 93 } 94 95 #if defined(TARGET_PPC64) 96 97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 98 { 99 uint64_t rt = 0; 100 int overflow = 0; 101 102 overflow = divu128(&rt, &ra, rb); 103 104 if (unlikely(overflow)) { 105 rt = 0; /* Undefined */ 106 } 107 108 if (oe) { 109 helper_update_ov_legacy(env, overflow); 110 } 111 112 return rt; 113 } 114 115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 116 { 117 int64_t rt = 0; 118 int64_t ra = (int64_t)rau; 119 int64_t rb = (int64_t)rbu; 120 int overflow = divs128(&rt, &ra, rb); 121 122 if (unlikely(overflow)) { 123 rt = 0; /* Undefined */ 124 } 125 126 if (oe) { 127 helper_update_ov_legacy(env, overflow); 128 } 129 130 return rt; 131 } 132 133 #endif 134 135 136 #if defined(TARGET_PPC64) 137 /* if x = 0xab, returns 0xababababababababa */ 138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 139 140 /* substract 1 from each byte, and with inverse, check if MSB is set at each 141 * byte. 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 144 */ 145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 146 147 /* When you XOR the pattern and there is a match, that byte will be zero */ 148 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 149 150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 151 { 152 return hasvalue(rb, ra) ? CRF_GT : 0; 153 } 154 155 #undef pattern 156 #undef haszero 157 #undef hasvalue 158 159 /* Return invalid random number. 160 * 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable 162 * random number 163 */ 164 target_ulong helper_darn32(void) 165 { 166 return -1; 167 } 168 169 target_ulong helper_darn64(void) 170 { 171 return -1; 172 } 173 174 #endif 175 176 #if defined(TARGET_PPC64) 177 178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 179 { 180 int i; 181 uint64_t ra = 0; 182 183 for (i = 0; i < 8; i++) { 184 int index = (rs >> (i*8)) & 0xFF; 185 if (index < 64) { 186 if (rb & PPC_BIT(index)) { 187 ra |= 1 << i; 188 } 189 } 190 } 191 return ra; 192 } 193 194 #endif 195 196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 197 { 198 target_ulong mask = 0xff; 199 target_ulong ra = 0; 200 int i; 201 202 for (i = 0; i < sizeof(target_ulong); i++) { 203 if ((rs & mask) == (rb & mask)) { 204 ra |= mask; 205 } 206 mask <<= 8; 207 } 208 return ra; 209 } 210 211 /* shift right arithmetic helper */ 212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 213 target_ulong shift) 214 { 215 int32_t ret; 216 217 if (likely(!(shift & 0x20))) { 218 if (likely((uint32_t)shift != 0)) { 219 shift &= 0x1f; 220 ret = (int32_t)value >> shift; 221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 222 env->ca32 = env->ca = 0; 223 } else { 224 env->ca32 = env->ca = 1; 225 } 226 } else { 227 ret = (int32_t)value; 228 env->ca32 = env->ca = 0; 229 } 230 } else { 231 ret = (int32_t)value >> 31; 232 env->ca32 = env->ca = (ret != 0); 233 } 234 return (target_long)ret; 235 } 236 237 #if defined(TARGET_PPC64) 238 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int64_t ret; 242 243 if (likely(!(shift & 0x40))) { 244 if (likely((uint64_t)shift != 0)) { 245 shift &= 0x3f; 246 ret = (int64_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int64_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int64_t)value >> 63; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return ret; 261 } 262 #endif 263 264 #if defined(TARGET_PPC64) 265 target_ulong helper_popcntb(target_ulong val) 266 { 267 /* Note that we don't fold past bytes */ 268 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 269 0x5555555555555555ULL); 270 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 271 0x3333333333333333ULL); 272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 273 0x0f0f0f0f0f0f0f0fULL); 274 return val; 275 } 276 277 target_ulong helper_popcntw(target_ulong val) 278 { 279 /* Note that we don't fold past words. */ 280 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 281 0x5555555555555555ULL); 282 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 283 0x3333333333333333ULL); 284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 285 0x0f0f0f0f0f0f0f0fULL); 286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 287 0x00ff00ff00ff00ffULL); 288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 289 0x0000ffff0000ffffULL); 290 return val; 291 } 292 #else 293 target_ulong helper_popcntb(target_ulong val) 294 { 295 /* Note that we don't fold past bytes */ 296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 299 return val; 300 } 301 #endif 302 303 /*****************************************************************************/ 304 /* PowerPC 601 specific instructions (POWER bridge) */ 305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 306 { 307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 308 309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 310 (int32_t)arg2 == 0) { 311 env->spr[SPR_MQ] = 0; 312 return INT32_MIN; 313 } else { 314 env->spr[SPR_MQ] = tmp % arg2; 315 return tmp / (int32_t)arg2; 316 } 317 } 318 319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 320 target_ulong arg2) 321 { 322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 323 324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 325 (int32_t)arg2 == 0) { 326 env->so = env->ov = 1; 327 env->spr[SPR_MQ] = 0; 328 return INT32_MIN; 329 } else { 330 env->spr[SPR_MQ] = tmp % arg2; 331 tmp /= (int32_t)arg2; 332 if ((int32_t)tmp != tmp) { 333 env->so = env->ov = 1; 334 } else { 335 env->ov = 0; 336 } 337 return tmp; 338 } 339 } 340 341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 342 target_ulong arg2) 343 { 344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 350 return (int32_t)arg1 / (int32_t)arg2; 351 } 352 } 353 354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 355 target_ulong arg2) 356 { 357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 358 (int32_t)arg2 == 0) { 359 env->so = env->ov = 1; 360 env->spr[SPR_MQ] = 0; 361 return INT32_MIN; 362 } else { 363 env->ov = 0; 364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 365 return (int32_t)arg1 / (int32_t)arg2; 366 } 367 } 368 369 /*****************************************************************************/ 370 /* 602 specific instructions */ 371 /* mfrom is the most crazy instruction ever seen, imho ! */ 372 /* Real implementation uses a ROM table. Do the same */ 373 /* Extremely decomposed: 374 * -arg / 256 375 * return 256 * log10(10 + 1.0) + 0.5 376 */ 377 #if !defined(CONFIG_USER_ONLY) 378 target_ulong helper_602_mfrom(target_ulong arg) 379 { 380 if (likely(arg < 602)) { 381 #include "mfrom_table.inc.c" 382 return mfrom_ROM_table[arg]; 383 } else { 384 return 0; 385 } 386 } 387 #endif 388 389 /*****************************************************************************/ 390 /* Altivec extension helpers */ 391 #if defined(HOST_WORDS_BIGENDIAN) 392 #define VECTOR_FOR_INORDER_I(index, element) \ 393 for (index = 0; index < ARRAY_SIZE(r->element); index++) 394 #else 395 #define VECTOR_FOR_INORDER_I(index, element) \ 396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) 397 #endif 398 399 /* Saturating arithmetic helpers. */ 400 #define SATCVT(from, to, from_type, to_type, min, max) \ 401 static inline to_type cvt##from##to(from_type x, int *sat) \ 402 { \ 403 to_type r; \ 404 \ 405 if (x < (from_type)min) { \ 406 r = min; \ 407 *sat = 1; \ 408 } else if (x > (from_type)max) { \ 409 r = max; \ 410 *sat = 1; \ 411 } else { \ 412 r = x; \ 413 } \ 414 return r; \ 415 } 416 #define SATCVTU(from, to, from_type, to_type, min, max) \ 417 static inline to_type cvt##from##to(from_type x, int *sat) \ 418 { \ 419 to_type r; \ 420 \ 421 if (x > (from_type)max) { \ 422 r = max; \ 423 *sat = 1; \ 424 } else { \ 425 r = x; \ 426 } \ 427 return r; \ 428 } 429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 432 433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 439 #undef SATCVT 440 #undef SATCVTU 441 442 void helper_lvsl(ppc_avr_t *r, target_ulong sh) 443 { 444 int i, j = (sh & 0xf); 445 446 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 447 r->VsrB(i) = j++; 448 } 449 } 450 451 void helper_lvsr(ppc_avr_t *r, target_ulong sh) 452 { 453 int i, j = 0x10 - (sh & 0xf); 454 455 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 456 r->VsrB(i) = j++; 457 } 458 } 459 460 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r) 461 { 462 env->vscr = r->VsrW(3); 463 set_flush_to_zero(vscr_nj, &env->vec_status); 464 } 465 466 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 467 { 468 int i; 469 470 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 471 r->u32[i] = ~a->u32[i] < b->u32[i]; 472 } 473 } 474 475 /* vprtybw */ 476 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 477 { 478 int i; 479 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 480 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 481 res ^= res >> 8; 482 r->u32[i] = res & 1; 483 } 484 } 485 486 /* vprtybd */ 487 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 488 { 489 int i; 490 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 491 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 492 res ^= res >> 16; 493 res ^= res >> 8; 494 r->u64[i] = res & 1; 495 } 496 } 497 498 /* vprtybq */ 499 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 500 { 501 uint64_t res = b->u64[0] ^ b->u64[1]; 502 res ^= res >> 32; 503 res ^= res >> 16; 504 res ^= res >> 8; 505 r->VsrD(1) = res & 1; 506 r->VsrD(0) = 0; 507 } 508 509 #define VARITH_DO(name, op, element) \ 510 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 511 { \ 512 int i; \ 513 \ 514 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 515 r->element[i] = a->element[i] op b->element[i]; \ 516 } \ 517 } 518 VARITH_DO(muluwm, *, u32) 519 #undef VARITH_DO 520 #undef VARITH 521 522 #define VARITHFP(suffix, func) \ 523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 524 ppc_avr_t *b) \ 525 { \ 526 int i; \ 527 \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 530 } \ 531 } 532 VARITHFP(addfp, float32_add) 533 VARITHFP(subfp, float32_sub) 534 VARITHFP(minfp, float32_min) 535 VARITHFP(maxfp, float32_max) 536 #undef VARITHFP 537 538 #define VARITHFPFMA(suffix, type) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b, ppc_avr_t *c) \ 541 { \ 542 int i; \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 545 type, &env->vec_status); \ 546 } \ 547 } 548 VARITHFPFMA(maddfp, 0); 549 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 550 #undef VARITHFPFMA 551 552 #define VARITHSAT_CASE(type, op, cvt, element) \ 553 { \ 554 type result = (type)a->element[i] op (type)b->element[i]; \ 555 r->element[i] = cvt(result, &sat); \ 556 } 557 558 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 559 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 560 ppc_avr_t *b) \ 561 { \ 562 int sat = 0; \ 563 int i; \ 564 \ 565 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 566 switch (sizeof(r->element[0])) { \ 567 case 1: \ 568 VARITHSAT_CASE(optype, op, cvt, element); \ 569 break; \ 570 case 2: \ 571 VARITHSAT_CASE(optype, op, cvt, element); \ 572 break; \ 573 case 4: \ 574 VARITHSAT_CASE(optype, op, cvt, element); \ 575 break; \ 576 } \ 577 } \ 578 if (sat) { \ 579 env->vscr |= (1 << VSCR_SAT); \ 580 } \ 581 } 582 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 583 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 584 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 585 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 586 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 587 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 588 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 589 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 590 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 591 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 592 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 593 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 594 #undef VARITHSAT_CASE 595 #undef VARITHSAT_DO 596 #undef VARITHSAT_SIGNED 597 #undef VARITHSAT_UNSIGNED 598 599 #define VAVG_DO(name, element, etype) \ 600 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 601 { \ 602 int i; \ 603 \ 604 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 605 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 606 r->element[i] = x >> 1; \ 607 } \ 608 } 609 610 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 611 unsigned_type) \ 612 VAVG_DO(avgs##type, signed_element, signed_type) \ 613 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 614 VAVG(b, s8, int16_t, u8, uint16_t) 615 VAVG(h, s16, int32_t, u16, uint32_t) 616 VAVG(w, s32, int64_t, u32, uint64_t) 617 #undef VAVG_DO 618 #undef VAVG 619 620 #define VABSDU_DO(name, element) \ 621 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 622 { \ 623 int i; \ 624 \ 625 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 626 r->element[i] = (a->element[i] > b->element[i]) ? \ 627 (a->element[i] - b->element[i]) : \ 628 (b->element[i] - a->element[i]); \ 629 } \ 630 } 631 632 /* VABSDU - Vector absolute difference unsigned 633 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 634 * element - element type to access from vector 635 */ 636 #define VABSDU(type, element) \ 637 VABSDU_DO(absdu##type, element) 638 VABSDU(b, u8) 639 VABSDU(h, u16) 640 VABSDU(w, u32) 641 #undef VABSDU_DO 642 #undef VABSDU 643 644 #define VCF(suffix, cvt, element) \ 645 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 646 ppc_avr_t *b, uint32_t uim) \ 647 { \ 648 int i; \ 649 \ 650 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 651 float32 t = cvt(b->element[i], &env->vec_status); \ 652 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 653 } \ 654 } 655 VCF(ux, uint32_to_float32, u32) 656 VCF(sx, int32_to_float32, s32) 657 #undef VCF 658 659 #define VCMP_DO(suffix, compare, element, record) \ 660 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 661 ppc_avr_t *a, ppc_avr_t *b) \ 662 { \ 663 uint64_t ones = (uint64_t)-1; \ 664 uint64_t all = ones; \ 665 uint64_t none = 0; \ 666 int i; \ 667 \ 668 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 669 uint64_t result = (a->element[i] compare b->element[i] ? \ 670 ones : 0x0); \ 671 switch (sizeof(a->element[0])) { \ 672 case 8: \ 673 r->u64[i] = result; \ 674 break; \ 675 case 4: \ 676 r->u32[i] = result; \ 677 break; \ 678 case 2: \ 679 r->u16[i] = result; \ 680 break; \ 681 case 1: \ 682 r->u8[i] = result; \ 683 break; \ 684 } \ 685 all &= result; \ 686 none |= result; \ 687 } \ 688 if (record) { \ 689 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 690 } \ 691 } 692 #define VCMP(suffix, compare, element) \ 693 VCMP_DO(suffix, compare, element, 0) \ 694 VCMP_DO(suffix##_dot, compare, element, 1) 695 VCMP(equb, ==, u8) 696 VCMP(equh, ==, u16) 697 VCMP(equw, ==, u32) 698 VCMP(equd, ==, u64) 699 VCMP(gtub, >, u8) 700 VCMP(gtuh, >, u16) 701 VCMP(gtuw, >, u32) 702 VCMP(gtud, >, u64) 703 VCMP(gtsb, >, s8) 704 VCMP(gtsh, >, s16) 705 VCMP(gtsw, >, s32) 706 VCMP(gtsd, >, s64) 707 #undef VCMP_DO 708 #undef VCMP 709 710 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 711 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 712 ppc_avr_t *a, ppc_avr_t *b) \ 713 { \ 714 etype ones = (etype)-1; \ 715 etype all = ones; \ 716 etype result, none = 0; \ 717 int i; \ 718 \ 719 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 720 if (cmpzero) { \ 721 result = ((a->element[i] == 0) \ 722 || (b->element[i] == 0) \ 723 || (a->element[i] != b->element[i]) ? \ 724 ones : 0x0); \ 725 } else { \ 726 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 727 } \ 728 r->element[i] = result; \ 729 all &= result; \ 730 none |= result; \ 731 } \ 732 if (record) { \ 733 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 734 } \ 735 } 736 737 /* VCMPNEZ - Vector compare not equal to zero 738 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 739 * element - element type to access from vector 740 */ 741 #define VCMPNE(suffix, element, etype, cmpzero) \ 742 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 743 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 744 VCMPNE(zb, u8, uint8_t, 1) 745 VCMPNE(zh, u16, uint16_t, 1) 746 VCMPNE(zw, u32, uint32_t, 1) 747 VCMPNE(b, u8, uint8_t, 0) 748 VCMPNE(h, u16, uint16_t, 0) 749 VCMPNE(w, u32, uint32_t, 0) 750 #undef VCMPNE_DO 751 #undef VCMPNE 752 753 #define VCMPFP_DO(suffix, compare, order, record) \ 754 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 755 ppc_avr_t *a, ppc_avr_t *b) \ 756 { \ 757 uint32_t ones = (uint32_t)-1; \ 758 uint32_t all = ones; \ 759 uint32_t none = 0; \ 760 int i; \ 761 \ 762 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 763 uint32_t result; \ 764 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ 765 &env->vec_status); \ 766 if (rel == float_relation_unordered) { \ 767 result = 0; \ 768 } else if (rel compare order) { \ 769 result = ones; \ 770 } else { \ 771 result = 0; \ 772 } \ 773 r->u32[i] = result; \ 774 all &= result; \ 775 none |= result; \ 776 } \ 777 if (record) { \ 778 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 779 } \ 780 } 781 #define VCMPFP(suffix, compare, order) \ 782 VCMPFP_DO(suffix, compare, order, 0) \ 783 VCMPFP_DO(suffix##_dot, compare, order, 1) 784 VCMPFP(eqfp, ==, float_relation_equal) 785 VCMPFP(gefp, !=, float_relation_less) 786 VCMPFP(gtfp, ==, float_relation_greater) 787 #undef VCMPFP_DO 788 #undef VCMPFP 789 790 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 791 ppc_avr_t *a, ppc_avr_t *b, int record) 792 { 793 int i; 794 int all_in = 0; 795 796 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 797 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 798 &env->vec_status); 799 if (le_rel == float_relation_unordered) { 800 r->u32[i] = 0xc0000000; 801 all_in = 1; 802 } else { 803 float32 bneg = float32_chs(b->f32[i]); 804 int ge_rel = float32_compare_quiet(a->f32[i], bneg, 805 &env->vec_status); 806 int le = le_rel != float_relation_greater; 807 int ge = ge_rel != float_relation_less; 808 809 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 810 all_in |= (!le | !ge); 811 } 812 } 813 if (record) { 814 env->crf[6] = (all_in == 0) << 1; 815 } 816 } 817 818 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 819 { 820 vcmpbfp_internal(env, r, a, b, 0); 821 } 822 823 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 824 ppc_avr_t *b) 825 { 826 vcmpbfp_internal(env, r, a, b, 1); 827 } 828 829 #define VCT(suffix, satcvt, element) \ 830 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 831 ppc_avr_t *b, uint32_t uim) \ 832 { \ 833 int i; \ 834 int sat = 0; \ 835 float_status s = env->vec_status; \ 836 \ 837 set_float_rounding_mode(float_round_to_zero, &s); \ 838 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 839 if (float32_is_any_nan(b->f32[i])) { \ 840 r->element[i] = 0; \ 841 } else { \ 842 float64 t = float32_to_float64(b->f32[i], &s); \ 843 int64_t j; \ 844 \ 845 t = float64_scalbn(t, uim, &s); \ 846 j = float64_to_int64(t, &s); \ 847 r->element[i] = satcvt(j, &sat); \ 848 } \ 849 } \ 850 if (sat) { \ 851 env->vscr |= (1 << VSCR_SAT); \ 852 } \ 853 } 854 VCT(uxs, cvtsduw, u32) 855 VCT(sxs, cvtsdsw, s32) 856 #undef VCT 857 858 target_ulong helper_vclzlsbb(ppc_avr_t *r) 859 { 860 target_ulong count = 0; 861 int i; 862 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 863 if (r->VsrB(i) & 0x01) { 864 break; 865 } 866 count++; 867 } 868 return count; 869 } 870 871 target_ulong helper_vctzlsbb(ppc_avr_t *r) 872 { 873 target_ulong count = 0; 874 int i; 875 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 876 if (r->VsrB(i) & 0x01) { 877 break; 878 } 879 count++; 880 } 881 return count; 882 } 883 884 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 885 ppc_avr_t *b, ppc_avr_t *c) 886 { 887 int sat = 0; 888 int i; 889 890 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 891 int32_t prod = a->s16[i] * b->s16[i]; 892 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 893 894 r->s16[i] = cvtswsh(t, &sat); 895 } 896 897 if (sat) { 898 env->vscr |= (1 << VSCR_SAT); 899 } 900 } 901 902 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 903 ppc_avr_t *b, ppc_avr_t *c) 904 { 905 int sat = 0; 906 int i; 907 908 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 909 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 910 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 911 r->s16[i] = cvtswsh(t, &sat); 912 } 913 914 if (sat) { 915 env->vscr |= (1 << VSCR_SAT); 916 } 917 } 918 919 #define VMINMAX_DO(name, compare, element) \ 920 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 921 { \ 922 int i; \ 923 \ 924 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 925 if (a->element[i] compare b->element[i]) { \ 926 r->element[i] = b->element[i]; \ 927 } else { \ 928 r->element[i] = a->element[i]; \ 929 } \ 930 } \ 931 } 932 #define VMINMAX(suffix, element) \ 933 VMINMAX_DO(min##suffix, >, element) \ 934 VMINMAX_DO(max##suffix, <, element) 935 VMINMAX(sb, s8) 936 VMINMAX(sh, s16) 937 VMINMAX(sw, s32) 938 VMINMAX(sd, s64) 939 VMINMAX(ub, u8) 940 VMINMAX(uh, u16) 941 VMINMAX(uw, u32) 942 VMINMAX(ud, u64) 943 #undef VMINMAX_DO 944 #undef VMINMAX 945 946 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 947 { 948 int i; 949 950 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 951 int32_t prod = a->s16[i] * b->s16[i]; 952 r->s16[i] = (int16_t) (prod + c->s16[i]); 953 } 954 } 955 956 #define VMRG_DO(name, element, access, ofs) \ 957 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 958 { \ 959 ppc_avr_t result; \ 960 int i, half = ARRAY_SIZE(r->element) / 2; \ 961 \ 962 for (i = 0; i < half; i++) { \ 963 result.access(i * 2 + 0) = a->access(i + ofs); \ 964 result.access(i * 2 + 1) = b->access(i + ofs); \ 965 } \ 966 *r = result; \ 967 } 968 969 #define VMRG(suffix, element, access) \ 970 VMRG_DO(mrgl##suffix, element, access, half) \ 971 VMRG_DO(mrgh##suffix, element, access, 0) 972 VMRG(b, u8, VsrB) 973 VMRG(h, u16, VsrH) 974 VMRG(w, u32, VsrW) 975 #undef VMRG_DO 976 #undef VMRG 977 978 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 979 ppc_avr_t *b, ppc_avr_t *c) 980 { 981 int32_t prod[16]; 982 int i; 983 984 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 985 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 986 } 987 988 VECTOR_FOR_INORDER_I(i, s32) { 989 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 990 prod[4 * i + 2] + prod[4 * i + 3]; 991 } 992 } 993 994 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 995 ppc_avr_t *b, ppc_avr_t *c) 996 { 997 int32_t prod[8]; 998 int i; 999 1000 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1001 prod[i] = a->s16[i] * b->s16[i]; 1002 } 1003 1004 VECTOR_FOR_INORDER_I(i, s32) { 1005 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1006 } 1007 } 1008 1009 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1010 ppc_avr_t *b, ppc_avr_t *c) 1011 { 1012 int32_t prod[8]; 1013 int i; 1014 int sat = 0; 1015 1016 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1017 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1018 } 1019 1020 VECTOR_FOR_INORDER_I(i, s32) { 1021 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1022 1023 r->u32[i] = cvtsdsw(t, &sat); 1024 } 1025 1026 if (sat) { 1027 env->vscr |= (1 << VSCR_SAT); 1028 } 1029 } 1030 1031 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1032 ppc_avr_t *b, ppc_avr_t *c) 1033 { 1034 uint16_t prod[16]; 1035 int i; 1036 1037 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1038 prod[i] = a->u8[i] * b->u8[i]; 1039 } 1040 1041 VECTOR_FOR_INORDER_I(i, u32) { 1042 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1043 prod[4 * i + 2] + prod[4 * i + 3]; 1044 } 1045 } 1046 1047 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1048 ppc_avr_t *b, ppc_avr_t *c) 1049 { 1050 uint32_t prod[8]; 1051 int i; 1052 1053 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1054 prod[i] = a->u16[i] * b->u16[i]; 1055 } 1056 1057 VECTOR_FOR_INORDER_I(i, u32) { 1058 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1059 } 1060 } 1061 1062 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1063 ppc_avr_t *b, ppc_avr_t *c) 1064 { 1065 uint32_t prod[8]; 1066 int i; 1067 int sat = 0; 1068 1069 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1070 prod[i] = a->u16[i] * b->u16[i]; 1071 } 1072 1073 VECTOR_FOR_INORDER_I(i, s32) { 1074 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1075 1076 r->u32[i] = cvtuduw(t, &sat); 1077 } 1078 1079 if (sat) { 1080 env->vscr |= (1 << VSCR_SAT); 1081 } 1082 } 1083 1084 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1085 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1086 { \ 1087 int i; \ 1088 \ 1089 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1090 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1091 (cast)b->mul_access(i); \ 1092 } \ 1093 } 1094 1095 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1096 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1097 { \ 1098 int i; \ 1099 \ 1100 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1101 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1102 (cast)b->mul_access(i + 1); \ 1103 } \ 1104 } 1105 1106 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1107 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1108 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1109 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1110 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1111 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1112 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1113 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1114 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1115 #undef VMUL_DO_EVN 1116 #undef VMUL_DO_ODD 1117 #undef VMUL 1118 1119 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1120 ppc_avr_t *c) 1121 { 1122 ppc_avr_t result; 1123 int i; 1124 1125 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1126 int s = c->VsrB(i) & 0x1f; 1127 int index = s & 0xf; 1128 1129 if (s & 0x10) { 1130 result.VsrB(i) = b->VsrB(index); 1131 } else { 1132 result.VsrB(i) = a->VsrB(index); 1133 } 1134 } 1135 *r = result; 1136 } 1137 1138 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1139 ppc_avr_t *c) 1140 { 1141 ppc_avr_t result; 1142 int i; 1143 1144 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1145 int s = c->VsrB(i) & 0x1f; 1146 int index = 15 - (s & 0xf); 1147 1148 if (s & 0x10) { 1149 result.VsrB(i) = a->VsrB(index); 1150 } else { 1151 result.VsrB(i) = b->VsrB(index); 1152 } 1153 } 1154 *r = result; 1155 } 1156 1157 #if defined(HOST_WORDS_BIGENDIAN) 1158 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1159 #define VBPERMD_INDEX(i) (i) 1160 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1161 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1162 #else 1163 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) 1164 #define VBPERMD_INDEX(i) (1 - i) 1165 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1166 #define EXTRACT_BIT(avr, i, index) \ 1167 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1168 #endif 1169 1170 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1171 { 1172 int i, j; 1173 ppc_avr_t result = { .u64 = { 0, 0 } }; 1174 VECTOR_FOR_INORDER_I(i, u64) { 1175 for (j = 0; j < 8; j++) { 1176 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1177 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1178 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1179 } 1180 } 1181 } 1182 *r = result; 1183 } 1184 1185 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 int i; 1188 uint64_t perm = 0; 1189 1190 VECTOR_FOR_INORDER_I(i, u8) { 1191 int index = VBPERMQ_INDEX(b, i); 1192 1193 if (index < 128) { 1194 uint64_t mask = (1ull << (63-(index & 0x3F))); 1195 if (a->u64[VBPERMQ_DW(index)] & mask) { 1196 perm |= (0x8000 >> i); 1197 } 1198 } 1199 } 1200 1201 r->VsrD(0) = perm; 1202 r->VsrD(1) = 0; 1203 } 1204 1205 #undef VBPERMQ_INDEX 1206 #undef VBPERMQ_DW 1207 1208 static const uint64_t VGBBD_MASKS[256] = { 1209 0x0000000000000000ull, /* 00 */ 1210 0x0000000000000080ull, /* 01 */ 1211 0x0000000000008000ull, /* 02 */ 1212 0x0000000000008080ull, /* 03 */ 1213 0x0000000000800000ull, /* 04 */ 1214 0x0000000000800080ull, /* 05 */ 1215 0x0000000000808000ull, /* 06 */ 1216 0x0000000000808080ull, /* 07 */ 1217 0x0000000080000000ull, /* 08 */ 1218 0x0000000080000080ull, /* 09 */ 1219 0x0000000080008000ull, /* 0A */ 1220 0x0000000080008080ull, /* 0B */ 1221 0x0000000080800000ull, /* 0C */ 1222 0x0000000080800080ull, /* 0D */ 1223 0x0000000080808000ull, /* 0E */ 1224 0x0000000080808080ull, /* 0F */ 1225 0x0000008000000000ull, /* 10 */ 1226 0x0000008000000080ull, /* 11 */ 1227 0x0000008000008000ull, /* 12 */ 1228 0x0000008000008080ull, /* 13 */ 1229 0x0000008000800000ull, /* 14 */ 1230 0x0000008000800080ull, /* 15 */ 1231 0x0000008000808000ull, /* 16 */ 1232 0x0000008000808080ull, /* 17 */ 1233 0x0000008080000000ull, /* 18 */ 1234 0x0000008080000080ull, /* 19 */ 1235 0x0000008080008000ull, /* 1A */ 1236 0x0000008080008080ull, /* 1B */ 1237 0x0000008080800000ull, /* 1C */ 1238 0x0000008080800080ull, /* 1D */ 1239 0x0000008080808000ull, /* 1E */ 1240 0x0000008080808080ull, /* 1F */ 1241 0x0000800000000000ull, /* 20 */ 1242 0x0000800000000080ull, /* 21 */ 1243 0x0000800000008000ull, /* 22 */ 1244 0x0000800000008080ull, /* 23 */ 1245 0x0000800000800000ull, /* 24 */ 1246 0x0000800000800080ull, /* 25 */ 1247 0x0000800000808000ull, /* 26 */ 1248 0x0000800000808080ull, /* 27 */ 1249 0x0000800080000000ull, /* 28 */ 1250 0x0000800080000080ull, /* 29 */ 1251 0x0000800080008000ull, /* 2A */ 1252 0x0000800080008080ull, /* 2B */ 1253 0x0000800080800000ull, /* 2C */ 1254 0x0000800080800080ull, /* 2D */ 1255 0x0000800080808000ull, /* 2E */ 1256 0x0000800080808080ull, /* 2F */ 1257 0x0000808000000000ull, /* 30 */ 1258 0x0000808000000080ull, /* 31 */ 1259 0x0000808000008000ull, /* 32 */ 1260 0x0000808000008080ull, /* 33 */ 1261 0x0000808000800000ull, /* 34 */ 1262 0x0000808000800080ull, /* 35 */ 1263 0x0000808000808000ull, /* 36 */ 1264 0x0000808000808080ull, /* 37 */ 1265 0x0000808080000000ull, /* 38 */ 1266 0x0000808080000080ull, /* 39 */ 1267 0x0000808080008000ull, /* 3A */ 1268 0x0000808080008080ull, /* 3B */ 1269 0x0000808080800000ull, /* 3C */ 1270 0x0000808080800080ull, /* 3D */ 1271 0x0000808080808000ull, /* 3E */ 1272 0x0000808080808080ull, /* 3F */ 1273 0x0080000000000000ull, /* 40 */ 1274 0x0080000000000080ull, /* 41 */ 1275 0x0080000000008000ull, /* 42 */ 1276 0x0080000000008080ull, /* 43 */ 1277 0x0080000000800000ull, /* 44 */ 1278 0x0080000000800080ull, /* 45 */ 1279 0x0080000000808000ull, /* 46 */ 1280 0x0080000000808080ull, /* 47 */ 1281 0x0080000080000000ull, /* 48 */ 1282 0x0080000080000080ull, /* 49 */ 1283 0x0080000080008000ull, /* 4A */ 1284 0x0080000080008080ull, /* 4B */ 1285 0x0080000080800000ull, /* 4C */ 1286 0x0080000080800080ull, /* 4D */ 1287 0x0080000080808000ull, /* 4E */ 1288 0x0080000080808080ull, /* 4F */ 1289 0x0080008000000000ull, /* 50 */ 1290 0x0080008000000080ull, /* 51 */ 1291 0x0080008000008000ull, /* 52 */ 1292 0x0080008000008080ull, /* 53 */ 1293 0x0080008000800000ull, /* 54 */ 1294 0x0080008000800080ull, /* 55 */ 1295 0x0080008000808000ull, /* 56 */ 1296 0x0080008000808080ull, /* 57 */ 1297 0x0080008080000000ull, /* 58 */ 1298 0x0080008080000080ull, /* 59 */ 1299 0x0080008080008000ull, /* 5A */ 1300 0x0080008080008080ull, /* 5B */ 1301 0x0080008080800000ull, /* 5C */ 1302 0x0080008080800080ull, /* 5D */ 1303 0x0080008080808000ull, /* 5E */ 1304 0x0080008080808080ull, /* 5F */ 1305 0x0080800000000000ull, /* 60 */ 1306 0x0080800000000080ull, /* 61 */ 1307 0x0080800000008000ull, /* 62 */ 1308 0x0080800000008080ull, /* 63 */ 1309 0x0080800000800000ull, /* 64 */ 1310 0x0080800000800080ull, /* 65 */ 1311 0x0080800000808000ull, /* 66 */ 1312 0x0080800000808080ull, /* 67 */ 1313 0x0080800080000000ull, /* 68 */ 1314 0x0080800080000080ull, /* 69 */ 1315 0x0080800080008000ull, /* 6A */ 1316 0x0080800080008080ull, /* 6B */ 1317 0x0080800080800000ull, /* 6C */ 1318 0x0080800080800080ull, /* 6D */ 1319 0x0080800080808000ull, /* 6E */ 1320 0x0080800080808080ull, /* 6F */ 1321 0x0080808000000000ull, /* 70 */ 1322 0x0080808000000080ull, /* 71 */ 1323 0x0080808000008000ull, /* 72 */ 1324 0x0080808000008080ull, /* 73 */ 1325 0x0080808000800000ull, /* 74 */ 1326 0x0080808000800080ull, /* 75 */ 1327 0x0080808000808000ull, /* 76 */ 1328 0x0080808000808080ull, /* 77 */ 1329 0x0080808080000000ull, /* 78 */ 1330 0x0080808080000080ull, /* 79 */ 1331 0x0080808080008000ull, /* 7A */ 1332 0x0080808080008080ull, /* 7B */ 1333 0x0080808080800000ull, /* 7C */ 1334 0x0080808080800080ull, /* 7D */ 1335 0x0080808080808000ull, /* 7E */ 1336 0x0080808080808080ull, /* 7F */ 1337 0x8000000000000000ull, /* 80 */ 1338 0x8000000000000080ull, /* 81 */ 1339 0x8000000000008000ull, /* 82 */ 1340 0x8000000000008080ull, /* 83 */ 1341 0x8000000000800000ull, /* 84 */ 1342 0x8000000000800080ull, /* 85 */ 1343 0x8000000000808000ull, /* 86 */ 1344 0x8000000000808080ull, /* 87 */ 1345 0x8000000080000000ull, /* 88 */ 1346 0x8000000080000080ull, /* 89 */ 1347 0x8000000080008000ull, /* 8A */ 1348 0x8000000080008080ull, /* 8B */ 1349 0x8000000080800000ull, /* 8C */ 1350 0x8000000080800080ull, /* 8D */ 1351 0x8000000080808000ull, /* 8E */ 1352 0x8000000080808080ull, /* 8F */ 1353 0x8000008000000000ull, /* 90 */ 1354 0x8000008000000080ull, /* 91 */ 1355 0x8000008000008000ull, /* 92 */ 1356 0x8000008000008080ull, /* 93 */ 1357 0x8000008000800000ull, /* 94 */ 1358 0x8000008000800080ull, /* 95 */ 1359 0x8000008000808000ull, /* 96 */ 1360 0x8000008000808080ull, /* 97 */ 1361 0x8000008080000000ull, /* 98 */ 1362 0x8000008080000080ull, /* 99 */ 1363 0x8000008080008000ull, /* 9A */ 1364 0x8000008080008080ull, /* 9B */ 1365 0x8000008080800000ull, /* 9C */ 1366 0x8000008080800080ull, /* 9D */ 1367 0x8000008080808000ull, /* 9E */ 1368 0x8000008080808080ull, /* 9F */ 1369 0x8000800000000000ull, /* A0 */ 1370 0x8000800000000080ull, /* A1 */ 1371 0x8000800000008000ull, /* A2 */ 1372 0x8000800000008080ull, /* A3 */ 1373 0x8000800000800000ull, /* A4 */ 1374 0x8000800000800080ull, /* A5 */ 1375 0x8000800000808000ull, /* A6 */ 1376 0x8000800000808080ull, /* A7 */ 1377 0x8000800080000000ull, /* A8 */ 1378 0x8000800080000080ull, /* A9 */ 1379 0x8000800080008000ull, /* AA */ 1380 0x8000800080008080ull, /* AB */ 1381 0x8000800080800000ull, /* AC */ 1382 0x8000800080800080ull, /* AD */ 1383 0x8000800080808000ull, /* AE */ 1384 0x8000800080808080ull, /* AF */ 1385 0x8000808000000000ull, /* B0 */ 1386 0x8000808000000080ull, /* B1 */ 1387 0x8000808000008000ull, /* B2 */ 1388 0x8000808000008080ull, /* B3 */ 1389 0x8000808000800000ull, /* B4 */ 1390 0x8000808000800080ull, /* B5 */ 1391 0x8000808000808000ull, /* B6 */ 1392 0x8000808000808080ull, /* B7 */ 1393 0x8000808080000000ull, /* B8 */ 1394 0x8000808080000080ull, /* B9 */ 1395 0x8000808080008000ull, /* BA */ 1396 0x8000808080008080ull, /* BB */ 1397 0x8000808080800000ull, /* BC */ 1398 0x8000808080800080ull, /* BD */ 1399 0x8000808080808000ull, /* BE */ 1400 0x8000808080808080ull, /* BF */ 1401 0x8080000000000000ull, /* C0 */ 1402 0x8080000000000080ull, /* C1 */ 1403 0x8080000000008000ull, /* C2 */ 1404 0x8080000000008080ull, /* C3 */ 1405 0x8080000000800000ull, /* C4 */ 1406 0x8080000000800080ull, /* C5 */ 1407 0x8080000000808000ull, /* C6 */ 1408 0x8080000000808080ull, /* C7 */ 1409 0x8080000080000000ull, /* C8 */ 1410 0x8080000080000080ull, /* C9 */ 1411 0x8080000080008000ull, /* CA */ 1412 0x8080000080008080ull, /* CB */ 1413 0x8080000080800000ull, /* CC */ 1414 0x8080000080800080ull, /* CD */ 1415 0x8080000080808000ull, /* CE */ 1416 0x8080000080808080ull, /* CF */ 1417 0x8080008000000000ull, /* D0 */ 1418 0x8080008000000080ull, /* D1 */ 1419 0x8080008000008000ull, /* D2 */ 1420 0x8080008000008080ull, /* D3 */ 1421 0x8080008000800000ull, /* D4 */ 1422 0x8080008000800080ull, /* D5 */ 1423 0x8080008000808000ull, /* D6 */ 1424 0x8080008000808080ull, /* D7 */ 1425 0x8080008080000000ull, /* D8 */ 1426 0x8080008080000080ull, /* D9 */ 1427 0x8080008080008000ull, /* DA */ 1428 0x8080008080008080ull, /* DB */ 1429 0x8080008080800000ull, /* DC */ 1430 0x8080008080800080ull, /* DD */ 1431 0x8080008080808000ull, /* DE */ 1432 0x8080008080808080ull, /* DF */ 1433 0x8080800000000000ull, /* E0 */ 1434 0x8080800000000080ull, /* E1 */ 1435 0x8080800000008000ull, /* E2 */ 1436 0x8080800000008080ull, /* E3 */ 1437 0x8080800000800000ull, /* E4 */ 1438 0x8080800000800080ull, /* E5 */ 1439 0x8080800000808000ull, /* E6 */ 1440 0x8080800000808080ull, /* E7 */ 1441 0x8080800080000000ull, /* E8 */ 1442 0x8080800080000080ull, /* E9 */ 1443 0x8080800080008000ull, /* EA */ 1444 0x8080800080008080ull, /* EB */ 1445 0x8080800080800000ull, /* EC */ 1446 0x8080800080800080ull, /* ED */ 1447 0x8080800080808000ull, /* EE */ 1448 0x8080800080808080ull, /* EF */ 1449 0x8080808000000000ull, /* F0 */ 1450 0x8080808000000080ull, /* F1 */ 1451 0x8080808000008000ull, /* F2 */ 1452 0x8080808000008080ull, /* F3 */ 1453 0x8080808000800000ull, /* F4 */ 1454 0x8080808000800080ull, /* F5 */ 1455 0x8080808000808000ull, /* F6 */ 1456 0x8080808000808080ull, /* F7 */ 1457 0x8080808080000000ull, /* F8 */ 1458 0x8080808080000080ull, /* F9 */ 1459 0x8080808080008000ull, /* FA */ 1460 0x8080808080008080ull, /* FB */ 1461 0x8080808080800000ull, /* FC */ 1462 0x8080808080800080ull, /* FD */ 1463 0x8080808080808000ull, /* FE */ 1464 0x8080808080808080ull, /* FF */ 1465 }; 1466 1467 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) 1468 { 1469 int i; 1470 uint64_t t[2] = { 0, 0 }; 1471 1472 VECTOR_FOR_INORDER_I(i, u8) { 1473 #if defined(HOST_WORDS_BIGENDIAN) 1474 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); 1475 #else 1476 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); 1477 #endif 1478 } 1479 1480 r->u64[0] = t[0]; 1481 r->u64[1] = t[1]; 1482 } 1483 1484 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1485 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1486 { \ 1487 int i, j; \ 1488 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ 1489 \ 1490 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1491 prod[i] = 0; \ 1492 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1493 if (a->srcfld[i] & (1ull<<j)) { \ 1494 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1495 } \ 1496 } \ 1497 } \ 1498 \ 1499 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1500 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ 1501 } \ 1502 } 1503 1504 PMSUM(vpmsumb, u8, u16, uint16_t) 1505 PMSUM(vpmsumh, u16, u32, uint32_t) 1506 PMSUM(vpmsumw, u32, u64, uint64_t) 1507 1508 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1509 { 1510 1511 #ifdef CONFIG_INT128 1512 int i, j; 1513 __uint128_t prod[2]; 1514 1515 VECTOR_FOR_INORDER_I(i, u64) { 1516 prod[i] = 0; 1517 for (j = 0; j < 64; j++) { 1518 if (a->u64[i] & (1ull<<j)) { 1519 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1520 } 1521 } 1522 } 1523 1524 r->u128 = prod[0] ^ prod[1]; 1525 1526 #else 1527 int i, j; 1528 ppc_avr_t prod[2]; 1529 1530 VECTOR_FOR_INORDER_I(i, u64) { 1531 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1532 for (j = 0; j < 64; j++) { 1533 if (a->u64[i] & (1ull<<j)) { 1534 ppc_avr_t bshift; 1535 if (j == 0) { 1536 bshift.VsrD(0) = 0; 1537 bshift.VsrD(1) = b->u64[i]; 1538 } else { 1539 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1540 bshift.VsrD(1) = b->u64[i] << j; 1541 } 1542 prod[i].VsrD(1) ^= bshift.VsrD(1); 1543 prod[i].VsrD(0) ^= bshift.VsrD(0); 1544 } 1545 } 1546 } 1547 1548 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1549 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1550 #endif 1551 } 1552 1553 1554 #if defined(HOST_WORDS_BIGENDIAN) 1555 #define PKBIG 1 1556 #else 1557 #define PKBIG 0 1558 #endif 1559 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1560 { 1561 int i, j; 1562 ppc_avr_t result; 1563 #if defined(HOST_WORDS_BIGENDIAN) 1564 const ppc_avr_t *x[2] = { a, b }; 1565 #else 1566 const ppc_avr_t *x[2] = { b, a }; 1567 #endif 1568 1569 VECTOR_FOR_INORDER_I(i, u64) { 1570 VECTOR_FOR_INORDER_I(j, u32) { 1571 uint32_t e = x[i]->u32[j]; 1572 1573 result.u16[4*i+j] = (((e >> 9) & 0xfc00) | 1574 ((e >> 6) & 0x3e0) | 1575 ((e >> 3) & 0x1f)); 1576 } 1577 } 1578 *r = result; 1579 } 1580 1581 #define VPK(suffix, from, to, cvt, dosat) \ 1582 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1583 ppc_avr_t *a, ppc_avr_t *b) \ 1584 { \ 1585 int i; \ 1586 int sat = 0; \ 1587 ppc_avr_t result; \ 1588 ppc_avr_t *a0 = PKBIG ? a : b; \ 1589 ppc_avr_t *a1 = PKBIG ? b : a; \ 1590 \ 1591 VECTOR_FOR_INORDER_I(i, from) { \ 1592 result.to[i] = cvt(a0->from[i], &sat); \ 1593 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ 1594 } \ 1595 *r = result; \ 1596 if (dosat && sat) { \ 1597 env->vscr |= (1 << VSCR_SAT); \ 1598 } \ 1599 } 1600 #define I(x, y) (x) 1601 VPK(shss, s16, s8, cvtshsb, 1) 1602 VPK(shus, s16, u8, cvtshub, 1) 1603 VPK(swss, s32, s16, cvtswsh, 1) 1604 VPK(swus, s32, u16, cvtswuh, 1) 1605 VPK(sdss, s64, s32, cvtsdsw, 1) 1606 VPK(sdus, s64, u32, cvtsduw, 1) 1607 VPK(uhus, u16, u8, cvtuhub, 1) 1608 VPK(uwus, u32, u16, cvtuwuh, 1) 1609 VPK(udus, u64, u32, cvtuduw, 1) 1610 VPK(uhum, u16, u8, I, 0) 1611 VPK(uwum, u32, u16, I, 0) 1612 VPK(udum, u64, u32, I, 0) 1613 #undef I 1614 #undef VPK 1615 #undef PKBIG 1616 1617 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1618 { 1619 int i; 1620 1621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1622 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1623 } 1624 } 1625 1626 #define VRFI(suffix, rounding) \ 1627 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1628 ppc_avr_t *b) \ 1629 { \ 1630 int i; \ 1631 float_status s = env->vec_status; \ 1632 \ 1633 set_float_rounding_mode(rounding, &s); \ 1634 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1635 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1636 } \ 1637 } 1638 VRFI(n, float_round_nearest_even) 1639 VRFI(m, float_round_down) 1640 VRFI(p, float_round_up) 1641 VRFI(z, float_round_to_zero) 1642 #undef VRFI 1643 1644 #define VROTATE(suffix, element, mask) \ 1645 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1646 { \ 1647 int i; \ 1648 \ 1649 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1650 unsigned int shift = b->element[i] & mask; \ 1651 r->element[i] = (a->element[i] << shift) | \ 1652 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1653 } \ 1654 } 1655 VROTATE(b, u8, 0x7) 1656 VROTATE(h, u16, 0xF) 1657 VROTATE(w, u32, 0x1F) 1658 VROTATE(d, u64, 0x3F) 1659 #undef VROTATE 1660 1661 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1662 { 1663 int i; 1664 1665 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1666 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1667 1668 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1669 } 1670 } 1671 1672 #define VRLMI(name, size, element, insert) \ 1673 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1674 { \ 1675 int i; \ 1676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1677 uint##size##_t src1 = a->element[i]; \ 1678 uint##size##_t src2 = b->element[i]; \ 1679 uint##size##_t src3 = r->element[i]; \ 1680 uint##size##_t begin, end, shift, mask, rot_val; \ 1681 \ 1682 shift = extract##size(src2, 0, 6); \ 1683 end = extract##size(src2, 8, 6); \ 1684 begin = extract##size(src2, 16, 6); \ 1685 rot_val = rol##size(src1, shift); \ 1686 mask = mask_u##size(begin, end); \ 1687 if (insert) { \ 1688 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1689 } else { \ 1690 r->element[i] = (rot_val & mask); \ 1691 } \ 1692 } \ 1693 } 1694 1695 VRLMI(vrldmi, 64, u64, 1); 1696 VRLMI(vrlwmi, 32, u32, 1); 1697 VRLMI(vrldnm, 64, u64, 0); 1698 VRLMI(vrlwnm, 32, u32, 0); 1699 1700 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1701 ppc_avr_t *c) 1702 { 1703 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1704 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1705 } 1706 1707 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1708 { 1709 int i; 1710 1711 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1712 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1713 } 1714 } 1715 1716 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1717 { 1718 int i; 1719 1720 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1721 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1722 } 1723 } 1724 1725 #if defined(HOST_WORDS_BIGENDIAN) 1726 #define VEXTU_X_DO(name, size, left) \ 1727 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1728 { \ 1729 int index; \ 1730 if (left) { \ 1731 index = (a & 0xf) * 8; \ 1732 } else { \ 1733 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1734 } \ 1735 return int128_getlo(int128_rshift(b->s128, index)) & \ 1736 MAKE_64BIT_MASK(0, size); \ 1737 } 1738 #else 1739 #define VEXTU_X_DO(name, size, left) \ 1740 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1741 { \ 1742 int index; \ 1743 if (left) { \ 1744 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1745 } else { \ 1746 index = (a & 0xf) * 8; \ 1747 } \ 1748 return int128_getlo(int128_rshift(b->s128, index)) & \ 1749 MAKE_64BIT_MASK(0, size); \ 1750 } 1751 #endif 1752 1753 VEXTU_X_DO(vextublx, 8, 1) 1754 VEXTU_X_DO(vextuhlx, 16, 1) 1755 VEXTU_X_DO(vextuwlx, 32, 1) 1756 VEXTU_X_DO(vextubrx, 8, 0) 1757 VEXTU_X_DO(vextuhrx, 16, 0) 1758 VEXTU_X_DO(vextuwrx, 32, 0) 1759 #undef VEXTU_X_DO 1760 1761 /* The specification says that the results are undefined if all of the 1762 * shift counts are not identical. We check to make sure that they are 1763 * to conform to what real hardware appears to do. */ 1764 #define VSHIFT(suffix, leftp) \ 1765 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1766 { \ 1767 int shift = b->VsrB(15) & 0x7; \ 1768 int doit = 1; \ 1769 int i; \ 1770 \ 1771 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ 1772 doit = doit && ((b->u8[i] & 0x7) == shift); \ 1773 } \ 1774 if (doit) { \ 1775 if (shift == 0) { \ 1776 *r = *a; \ 1777 } else if (leftp) { \ 1778 uint64_t carry = a->VsrD(1) >> (64 - shift); \ 1779 \ 1780 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \ 1781 r->VsrD(1) = a->VsrD(1) << shift; \ 1782 } else { \ 1783 uint64_t carry = a->VsrD(0) << (64 - shift); \ 1784 \ 1785 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \ 1786 r->VsrD(0) = a->VsrD(0) >> shift; \ 1787 } \ 1788 } \ 1789 } 1790 VSHIFT(l, 1) 1791 VSHIFT(r, 0) 1792 #undef VSHIFT 1793 1794 #define VSL(suffix, element, mask) \ 1795 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1796 { \ 1797 int i; \ 1798 \ 1799 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1800 unsigned int shift = b->element[i] & mask; \ 1801 \ 1802 r->element[i] = a->element[i] << shift; \ 1803 } \ 1804 } 1805 VSL(b, u8, 0x7) 1806 VSL(h, u16, 0x0F) 1807 VSL(w, u32, 0x1F) 1808 VSL(d, u64, 0x3F) 1809 #undef VSL 1810 1811 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1812 { 1813 int i; 1814 unsigned int shift, bytes, size; 1815 1816 size = ARRAY_SIZE(r->u8); 1817 for (i = 0; i < size; i++) { 1818 shift = b->u8[i] & 0x7; /* extract shift value */ 1819 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ 1820 (((i + 1) < size) ? a->u8[i + 1] : 0); 1821 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ 1822 } 1823 } 1824 1825 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1826 { 1827 int i; 1828 unsigned int shift, bytes; 1829 1830 /* Use reverse order, as destination and source register can be same. Its 1831 * being modified in place saving temporary, reverse order will guarantee 1832 * that computed result is not fed back. 1833 */ 1834 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1835 shift = b->u8[i] & 0x7; /* extract shift value */ 1836 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; 1837 /* extract adjacent bytes */ 1838 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ 1839 } 1840 } 1841 1842 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1843 { 1844 int sh = shift & 0xf; 1845 int i; 1846 ppc_avr_t result; 1847 1848 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1849 int index = sh + i; 1850 if (index > 0xf) { 1851 result.VsrB(i) = b->VsrB(index - 0x10); 1852 } else { 1853 result.VsrB(i) = a->VsrB(index); 1854 } 1855 } 1856 *r = result; 1857 } 1858 1859 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1860 { 1861 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1862 1863 #if defined(HOST_WORDS_BIGENDIAN) 1864 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1865 memset(&r->u8[16-sh], 0, sh); 1866 #else 1867 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1868 memset(&r->u8[0], 0, sh); 1869 #endif 1870 } 1871 1872 /* Experimental testing shows that hardware masks the immediate. */ 1873 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) 1874 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) 1875 #define VSPLT(suffix, element, access) \ 1876 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ 1877 { \ 1878 uint32_t s = b->access(SPLAT_ELEMENT(element)); \ 1879 int i; \ 1880 \ 1881 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1882 r->access(i) = s; \ 1883 } \ 1884 } 1885 VSPLT(b, u8, VsrB) 1886 VSPLT(h, u16, VsrH) 1887 VSPLT(w, u32, VsrW) 1888 #undef VSPLT 1889 #undef SPLAT_ELEMENT 1890 #undef _SPLAT_MASKED 1891 #if defined(HOST_WORDS_BIGENDIAN) 1892 #define VINSERT(suffix, element) \ 1893 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1894 { \ 1895 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1896 sizeof(r->element[0])); \ 1897 } 1898 #else 1899 #define VINSERT(suffix, element) \ 1900 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1901 { \ 1902 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1903 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1904 } 1905 #endif 1906 VINSERT(b, u8) 1907 VINSERT(h, u16) 1908 VINSERT(w, u32) 1909 VINSERT(d, u64) 1910 #undef VINSERT 1911 #if defined(HOST_WORDS_BIGENDIAN) 1912 #define VEXTRACT(suffix, element) \ 1913 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1914 { \ 1915 uint32_t es = sizeof(r->element[0]); \ 1916 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1917 memset(&r->u8[8], 0, 8); \ 1918 memset(&r->u8[0], 0, 8 - es); \ 1919 } 1920 #else 1921 #define VEXTRACT(suffix, element) \ 1922 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1923 { \ 1924 uint32_t es = sizeof(r->element[0]); \ 1925 uint32_t s = (16 - index) - es; \ 1926 memmove(&r->u8[8], &b->u8[s], es); \ 1927 memset(&r->u8[0], 0, 8); \ 1928 memset(&r->u8[8 + es], 0, 8 - es); \ 1929 } 1930 #endif 1931 VEXTRACT(ub, u8) 1932 VEXTRACT(uh, u16) 1933 VEXTRACT(uw, u32) 1934 VEXTRACT(d, u64) 1935 #undef VEXTRACT 1936 1937 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn, 1938 target_ulong xbn, uint32_t index) 1939 { 1940 ppc_vsr_t xt, xb; 1941 size_t es = sizeof(uint32_t); 1942 uint32_t ext_index; 1943 int i; 1944 1945 getVSR(xbn, &xb, env); 1946 memset(&xt, 0, sizeof(xt)); 1947 1948 ext_index = index; 1949 for (i = 0; i < es; i++, ext_index++) { 1950 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16); 1951 } 1952 1953 putVSR(xtn, &xt, env); 1954 } 1955 1956 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn, 1957 target_ulong xbn, uint32_t index) 1958 { 1959 ppc_vsr_t xt, xb; 1960 size_t es = sizeof(uint32_t); 1961 int ins_index, i = 0; 1962 1963 getVSR(xbn, &xb, env); 1964 getVSR(xtn, &xt, env); 1965 1966 ins_index = index; 1967 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1968 xt.VsrB(ins_index) = xb.VsrB(8 - es + i); 1969 } 1970 1971 putVSR(xtn, &xt, env); 1972 } 1973 1974 #define VEXT_SIGNED(name, element, cast) \ 1975 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1976 { \ 1977 int i; \ 1978 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1979 r->element[i] = (cast)b->element[i]; \ 1980 } \ 1981 } 1982 VEXT_SIGNED(vextsb2w, s32, int8_t) 1983 VEXT_SIGNED(vextsb2d, s64, int8_t) 1984 VEXT_SIGNED(vextsh2w, s32, int16_t) 1985 VEXT_SIGNED(vextsh2d, s64, int16_t) 1986 VEXT_SIGNED(vextsw2d, s64, int32_t) 1987 #undef VEXT_SIGNED 1988 1989 #define VNEG(name, element) \ 1990 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1991 { \ 1992 int i; \ 1993 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1994 r->element[i] = -b->element[i]; \ 1995 } \ 1996 } 1997 VNEG(vnegw, s32) 1998 VNEG(vnegd, s64) 1999 #undef VNEG 2000 2001 #define VSPLTI(suffix, element, splat_type) \ 2002 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ 2003 { \ 2004 splat_type x = (int8_t)(splat << 3) >> 3; \ 2005 int i; \ 2006 \ 2007 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2008 r->element[i] = x; \ 2009 } \ 2010 } 2011 VSPLTI(b, s8, int8_t) 2012 VSPLTI(h, s16, int16_t) 2013 VSPLTI(w, s32, int32_t) 2014 #undef VSPLTI 2015 2016 #define VSR(suffix, element, mask) \ 2017 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 2018 { \ 2019 int i; \ 2020 \ 2021 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2022 unsigned int shift = b->element[i] & mask; \ 2023 r->element[i] = a->element[i] >> shift; \ 2024 } \ 2025 } 2026 VSR(ab, s8, 0x7) 2027 VSR(ah, s16, 0xF) 2028 VSR(aw, s32, 0x1F) 2029 VSR(ad, s64, 0x3F) 2030 VSR(b, u8, 0x7) 2031 VSR(h, u16, 0xF) 2032 VSR(w, u32, 0x1F) 2033 VSR(d, u64, 0x3F) 2034 #undef VSR 2035 2036 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2037 { 2038 int sh = (b->VsrB(0xf) >> 3) & 0xf; 2039 2040 #if defined(HOST_WORDS_BIGENDIAN) 2041 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 2042 memset(&r->u8[0], 0, sh); 2043 #else 2044 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 2045 memset(&r->u8[16 - sh], 0, sh); 2046 #endif 2047 } 2048 2049 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2050 { 2051 int i; 2052 2053 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2054 r->u32[i] = a->u32[i] >= b->u32[i]; 2055 } 2056 } 2057 2058 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2059 { 2060 int64_t t; 2061 int i, upper; 2062 ppc_avr_t result; 2063 int sat = 0; 2064 2065 upper = ARRAY_SIZE(r->s32) - 1; 2066 t = (int64_t)b->VsrSW(upper); 2067 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2068 t += a->VsrSW(i); 2069 result.VsrSW(i) = 0; 2070 } 2071 result.VsrSW(upper) = cvtsdsw(t, &sat); 2072 *r = result; 2073 2074 if (sat) { 2075 env->vscr |= (1 << VSCR_SAT); 2076 } 2077 } 2078 2079 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2080 { 2081 int i, j, upper; 2082 ppc_avr_t result; 2083 int sat = 0; 2084 2085 upper = 1; 2086 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2087 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 2088 2089 result.VsrW(i) = 0; 2090 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 2091 t += a->VsrSW(2 * i + j); 2092 } 2093 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 2094 } 2095 2096 *r = result; 2097 if (sat) { 2098 env->vscr |= (1 << VSCR_SAT); 2099 } 2100 } 2101 2102 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2103 { 2104 int i, j; 2105 int sat = 0; 2106 2107 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2108 int64_t t = (int64_t)b->s32[i]; 2109 2110 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 2111 t += a->s8[4 * i + j]; 2112 } 2113 r->s32[i] = cvtsdsw(t, &sat); 2114 } 2115 2116 if (sat) { 2117 env->vscr |= (1 << VSCR_SAT); 2118 } 2119 } 2120 2121 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2122 { 2123 int sat = 0; 2124 int i; 2125 2126 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2127 int64_t t = (int64_t)b->s32[i]; 2128 2129 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2130 r->s32[i] = cvtsdsw(t, &sat); 2131 } 2132 2133 if (sat) { 2134 env->vscr |= (1 << VSCR_SAT); 2135 } 2136 } 2137 2138 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2139 { 2140 int i, j; 2141 int sat = 0; 2142 2143 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2144 uint64_t t = (uint64_t)b->u32[i]; 2145 2146 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2147 t += a->u8[4 * i + j]; 2148 } 2149 r->u32[i] = cvtuduw(t, &sat); 2150 } 2151 2152 if (sat) { 2153 env->vscr |= (1 << VSCR_SAT); 2154 } 2155 } 2156 2157 #if defined(HOST_WORDS_BIGENDIAN) 2158 #define UPKHI 1 2159 #define UPKLO 0 2160 #else 2161 #define UPKHI 0 2162 #define UPKLO 1 2163 #endif 2164 #define VUPKPX(suffix, hi) \ 2165 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2166 { \ 2167 int i; \ 2168 ppc_avr_t result; \ 2169 \ 2170 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2171 uint16_t e = b->u16[hi ? i : i+4]; \ 2172 uint8_t a = (e >> 15) ? 0xff : 0; \ 2173 uint8_t r = (e >> 10) & 0x1f; \ 2174 uint8_t g = (e >> 5) & 0x1f; \ 2175 uint8_t b = e & 0x1f; \ 2176 \ 2177 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2178 } \ 2179 *r = result; \ 2180 } 2181 VUPKPX(lpx, UPKLO) 2182 VUPKPX(hpx, UPKHI) 2183 #undef VUPKPX 2184 2185 #define VUPK(suffix, unpacked, packee, hi) \ 2186 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2187 { \ 2188 int i; \ 2189 ppc_avr_t result; \ 2190 \ 2191 if (hi) { \ 2192 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2193 result.unpacked[i] = b->packee[i]; \ 2194 } \ 2195 } else { \ 2196 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2197 i++) { \ 2198 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2199 } \ 2200 } \ 2201 *r = result; \ 2202 } 2203 VUPK(hsb, s16, s8, UPKHI) 2204 VUPK(hsh, s32, s16, UPKHI) 2205 VUPK(hsw, s64, s32, UPKHI) 2206 VUPK(lsb, s16, s8, UPKLO) 2207 VUPK(lsh, s32, s16, UPKLO) 2208 VUPK(lsw, s64, s32, UPKLO) 2209 #undef VUPK 2210 #undef UPKHI 2211 #undef UPKLO 2212 2213 #define VGENERIC_DO(name, element) \ 2214 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2215 { \ 2216 int i; \ 2217 \ 2218 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2219 r->element[i] = name(b->element[i]); \ 2220 } \ 2221 } 2222 2223 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2224 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2225 #define clzw(v) clz32((v)) 2226 #define clzd(v) clz64((v)) 2227 2228 VGENERIC_DO(clzb, u8) 2229 VGENERIC_DO(clzh, u16) 2230 VGENERIC_DO(clzw, u32) 2231 VGENERIC_DO(clzd, u64) 2232 2233 #undef clzb 2234 #undef clzh 2235 #undef clzw 2236 #undef clzd 2237 2238 #define ctzb(v) ((v) ? ctz32(v) : 8) 2239 #define ctzh(v) ((v) ? ctz32(v) : 16) 2240 #define ctzw(v) ctz32((v)) 2241 #define ctzd(v) ctz64((v)) 2242 2243 VGENERIC_DO(ctzb, u8) 2244 VGENERIC_DO(ctzh, u16) 2245 VGENERIC_DO(ctzw, u32) 2246 VGENERIC_DO(ctzd, u64) 2247 2248 #undef ctzb 2249 #undef ctzh 2250 #undef ctzw 2251 #undef ctzd 2252 2253 #define popcntb(v) ctpop8(v) 2254 #define popcnth(v) ctpop16(v) 2255 #define popcntw(v) ctpop32(v) 2256 #define popcntd(v) ctpop64(v) 2257 2258 VGENERIC_DO(popcntb, u8) 2259 VGENERIC_DO(popcnth, u16) 2260 VGENERIC_DO(popcntw, u32) 2261 VGENERIC_DO(popcntd, u64) 2262 2263 #undef popcntb 2264 #undef popcnth 2265 #undef popcntw 2266 #undef popcntd 2267 2268 #undef VGENERIC_DO 2269 2270 #if defined(HOST_WORDS_BIGENDIAN) 2271 #define QW_ONE { .u64 = { 0, 1 } } 2272 #else 2273 #define QW_ONE { .u64 = { 1, 0 } } 2274 #endif 2275 2276 #ifndef CONFIG_INT128 2277 2278 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 2279 { 2280 t->u64[0] = ~a.u64[0]; 2281 t->u64[1] = ~a.u64[1]; 2282 } 2283 2284 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 2285 { 2286 if (a.VsrD(0) < b.VsrD(0)) { 2287 return -1; 2288 } else if (a.VsrD(0) > b.VsrD(0)) { 2289 return 1; 2290 } else if (a.VsrD(1) < b.VsrD(1)) { 2291 return -1; 2292 } else if (a.VsrD(1) > b.VsrD(1)) { 2293 return 1; 2294 } else { 2295 return 0; 2296 } 2297 } 2298 2299 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2300 { 2301 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2302 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2303 (~a.VsrD(1) < b.VsrD(1)); 2304 } 2305 2306 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 2307 { 2308 ppc_avr_t not_a; 2309 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 2310 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 2311 (~a.VsrD(1) < b.VsrD(1)); 2312 avr_qw_not(¬_a, a); 2313 return avr_qw_cmpu(not_a, b) < 0; 2314 } 2315 2316 #endif 2317 2318 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2319 { 2320 #ifdef CONFIG_INT128 2321 r->u128 = a->u128 + b->u128; 2322 #else 2323 avr_qw_add(r, *a, *b); 2324 #endif 2325 } 2326 2327 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2328 { 2329 #ifdef CONFIG_INT128 2330 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 2331 #else 2332 2333 if (c->VsrD(1) & 1) { 2334 ppc_avr_t tmp; 2335 2336 tmp.VsrD(0) = 0; 2337 tmp.VsrD(1) = c->VsrD(1) & 1; 2338 avr_qw_add(&tmp, *a, tmp); 2339 avr_qw_add(r, tmp, *b); 2340 } else { 2341 avr_qw_add(r, *a, *b); 2342 } 2343 #endif 2344 } 2345 2346 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2347 { 2348 #ifdef CONFIG_INT128 2349 r->u128 = (~a->u128 < b->u128); 2350 #else 2351 ppc_avr_t not_a; 2352 2353 avr_qw_not(¬_a, *a); 2354 2355 r->VsrD(0) = 0; 2356 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 2357 #endif 2358 } 2359 2360 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2361 { 2362 #ifdef CONFIG_INT128 2363 int carry_out = (~a->u128 < b->u128); 2364 if (!carry_out && (c->u128 & 1)) { 2365 carry_out = ((a->u128 + b->u128 + 1) == 0) && 2366 ((a->u128 != 0) || (b->u128 != 0)); 2367 } 2368 r->u128 = carry_out; 2369 #else 2370 2371 int carry_in = c->VsrD(1) & 1; 2372 int carry_out = 0; 2373 ppc_avr_t tmp; 2374 2375 carry_out = avr_qw_addc(&tmp, *a, *b); 2376 2377 if (!carry_out && carry_in) { 2378 ppc_avr_t one = QW_ONE; 2379 carry_out = avr_qw_addc(&tmp, tmp, one); 2380 } 2381 r->VsrD(0) = 0; 2382 r->VsrD(1) = carry_out; 2383 #endif 2384 } 2385 2386 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2387 { 2388 #ifdef CONFIG_INT128 2389 r->u128 = a->u128 - b->u128; 2390 #else 2391 ppc_avr_t tmp; 2392 ppc_avr_t one = QW_ONE; 2393 2394 avr_qw_not(&tmp, *b); 2395 avr_qw_add(&tmp, *a, tmp); 2396 avr_qw_add(r, tmp, one); 2397 #endif 2398 } 2399 2400 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2401 { 2402 #ifdef CONFIG_INT128 2403 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2404 #else 2405 ppc_avr_t tmp, sum; 2406 2407 avr_qw_not(&tmp, *b); 2408 avr_qw_add(&sum, *a, tmp); 2409 2410 tmp.VsrD(0) = 0; 2411 tmp.VsrD(1) = c->VsrD(1) & 1; 2412 avr_qw_add(r, sum, tmp); 2413 #endif 2414 } 2415 2416 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2417 { 2418 #ifdef CONFIG_INT128 2419 r->u128 = (~a->u128 < ~b->u128) || 2420 (a->u128 + ~b->u128 == (__uint128_t)-1); 2421 #else 2422 int carry = (avr_qw_cmpu(*a, *b) > 0); 2423 if (!carry) { 2424 ppc_avr_t tmp; 2425 avr_qw_not(&tmp, *b); 2426 avr_qw_add(&tmp, *a, tmp); 2427 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2428 } 2429 r->VsrD(0) = 0; 2430 r->VsrD(1) = carry; 2431 #endif 2432 } 2433 2434 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2435 { 2436 #ifdef CONFIG_INT128 2437 r->u128 = 2438 (~a->u128 < ~b->u128) || 2439 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2440 #else 2441 int carry_in = c->VsrD(1) & 1; 2442 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2443 if (!carry_out && carry_in) { 2444 ppc_avr_t tmp; 2445 avr_qw_not(&tmp, *b); 2446 avr_qw_add(&tmp, *a, tmp); 2447 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2448 } 2449 2450 r->VsrD(0) = 0; 2451 r->VsrD(1) = carry_out; 2452 #endif 2453 } 2454 2455 #define BCD_PLUS_PREF_1 0xC 2456 #define BCD_PLUS_PREF_2 0xF 2457 #define BCD_PLUS_ALT_1 0xA 2458 #define BCD_NEG_PREF 0xD 2459 #define BCD_NEG_ALT 0xB 2460 #define BCD_PLUS_ALT_2 0xE 2461 #define NATIONAL_PLUS 0x2B 2462 #define NATIONAL_NEG 0x2D 2463 2464 #if defined(HOST_WORDS_BIGENDIAN) 2465 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2466 #else 2467 #define BCD_DIG_BYTE(n) ((n) / 2) 2468 #endif 2469 2470 static int bcd_get_sgn(ppc_avr_t *bcd) 2471 { 2472 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { 2473 case BCD_PLUS_PREF_1: 2474 case BCD_PLUS_PREF_2: 2475 case BCD_PLUS_ALT_1: 2476 case BCD_PLUS_ALT_2: 2477 { 2478 return 1; 2479 } 2480 2481 case BCD_NEG_PREF: 2482 case BCD_NEG_ALT: 2483 { 2484 return -1; 2485 } 2486 2487 default: 2488 { 2489 return 0; 2490 } 2491 } 2492 } 2493 2494 static int bcd_preferred_sgn(int sgn, int ps) 2495 { 2496 if (sgn >= 0) { 2497 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2498 } else { 2499 return BCD_NEG_PREF; 2500 } 2501 } 2502 2503 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2504 { 2505 uint8_t result; 2506 if (n & 1) { 2507 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; 2508 } else { 2509 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; 2510 } 2511 2512 if (unlikely(result > 9)) { 2513 *invalid = true; 2514 } 2515 return result; 2516 } 2517 2518 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2519 { 2520 if (n & 1) { 2521 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; 2522 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); 2523 } else { 2524 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; 2525 bcd->u8[BCD_DIG_BYTE(n)] |= digit; 2526 } 2527 } 2528 2529 static bool bcd_is_valid(ppc_avr_t *bcd) 2530 { 2531 int i; 2532 int invalid = 0; 2533 2534 if (bcd_get_sgn(bcd) == 0) { 2535 return false; 2536 } 2537 2538 for (i = 1; i < 32; i++) { 2539 bcd_get_digit(bcd, i, &invalid); 2540 if (unlikely(invalid)) { 2541 return false; 2542 } 2543 } 2544 return true; 2545 } 2546 2547 static int bcd_cmp_zero(ppc_avr_t *bcd) 2548 { 2549 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2550 return CRF_EQ; 2551 } else { 2552 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2553 } 2554 } 2555 2556 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2557 { 2558 return reg->VsrH(7 - n); 2559 } 2560 2561 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2562 { 2563 reg->VsrH(7 - n) = val; 2564 } 2565 2566 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2567 { 2568 int i; 2569 int invalid = 0; 2570 for (i = 31; i > 0; i--) { 2571 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2572 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2573 if (unlikely(invalid)) { 2574 return 0; /* doesn't matter */ 2575 } else if (dig_a > dig_b) { 2576 return 1; 2577 } else if (dig_a < dig_b) { 2578 return -1; 2579 } 2580 } 2581 2582 return 0; 2583 } 2584 2585 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2586 int *overflow) 2587 { 2588 int carry = 0; 2589 int i; 2590 for (i = 1; i <= 31; i++) { 2591 uint8_t digit = bcd_get_digit(a, i, invalid) + 2592 bcd_get_digit(b, i, invalid) + carry; 2593 if (digit > 9) { 2594 carry = 1; 2595 digit -= 10; 2596 } else { 2597 carry = 0; 2598 } 2599 2600 bcd_put_digit(t, digit, i); 2601 } 2602 2603 *overflow = carry; 2604 } 2605 2606 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2607 int *overflow) 2608 { 2609 int carry = 0; 2610 int i; 2611 2612 for (i = 1; i <= 31; i++) { 2613 uint8_t digit = bcd_get_digit(a, i, invalid) - 2614 bcd_get_digit(b, i, invalid) + carry; 2615 if (digit & 0x80) { 2616 carry = -1; 2617 digit += 10; 2618 } else { 2619 carry = 0; 2620 } 2621 2622 bcd_put_digit(t, digit, i); 2623 } 2624 2625 *overflow = carry; 2626 } 2627 2628 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2629 { 2630 2631 int sgna = bcd_get_sgn(a); 2632 int sgnb = bcd_get_sgn(b); 2633 int invalid = (sgna == 0) || (sgnb == 0); 2634 int overflow = 0; 2635 uint32_t cr = 0; 2636 ppc_avr_t result = { .u64 = { 0, 0 } }; 2637 2638 if (!invalid) { 2639 if (sgna == sgnb) { 2640 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2641 bcd_add_mag(&result, a, b, &invalid, &overflow); 2642 cr = bcd_cmp_zero(&result); 2643 } else { 2644 int magnitude = bcd_cmp_mag(a, b); 2645 if (magnitude > 0) { 2646 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); 2647 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2648 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2649 } else if (magnitude < 0) { 2650 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); 2651 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2652 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2653 } else { 2654 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps); 2655 cr = CRF_EQ; 2656 } 2657 } 2658 } 2659 2660 if (unlikely(invalid)) { 2661 result.VsrD(0) = result.VsrD(1) = -1; 2662 cr = CRF_SO; 2663 } else if (overflow) { 2664 cr |= CRF_SO; 2665 } 2666 2667 *r = result; 2668 2669 return cr; 2670 } 2671 2672 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2673 { 2674 ppc_avr_t bcopy = *b; 2675 int sgnb = bcd_get_sgn(b); 2676 if (sgnb < 0) { 2677 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2678 } else if (sgnb > 0) { 2679 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2680 } 2681 /* else invalid ... defer to bcdadd code for proper handling */ 2682 2683 return helper_bcdadd(r, a, &bcopy, ps); 2684 } 2685 2686 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2687 { 2688 int i; 2689 int cr = 0; 2690 uint16_t national = 0; 2691 uint16_t sgnb = get_national_digit(b, 0); 2692 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2693 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2694 2695 for (i = 1; i < 8; i++) { 2696 national = get_national_digit(b, i); 2697 if (unlikely(national < 0x30 || national > 0x39)) { 2698 invalid = 1; 2699 break; 2700 } 2701 2702 bcd_put_digit(&ret, national & 0xf, i); 2703 } 2704 2705 if (sgnb == NATIONAL_PLUS) { 2706 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2707 } else { 2708 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2709 } 2710 2711 cr = bcd_cmp_zero(&ret); 2712 2713 if (unlikely(invalid)) { 2714 cr = CRF_SO; 2715 } 2716 2717 *r = ret; 2718 2719 return cr; 2720 } 2721 2722 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2723 { 2724 int i; 2725 int cr = 0; 2726 int sgnb = bcd_get_sgn(b); 2727 int invalid = (sgnb == 0); 2728 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2729 2730 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2731 2732 for (i = 1; i < 8; i++) { 2733 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2734 2735 if (unlikely(invalid)) { 2736 break; 2737 } 2738 } 2739 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2740 2741 cr = bcd_cmp_zero(b); 2742 2743 if (ox_flag) { 2744 cr |= CRF_SO; 2745 } 2746 2747 if (unlikely(invalid)) { 2748 cr = CRF_SO; 2749 } 2750 2751 *r = ret; 2752 2753 return cr; 2754 } 2755 2756 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2757 { 2758 int i; 2759 int cr = 0; 2760 int invalid = 0; 2761 int zone_digit = 0; 2762 int zone_lead = ps ? 0xF : 0x3; 2763 int digit = 0; 2764 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2765 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4; 2766 2767 if (unlikely((sgnb < 0xA) && ps)) { 2768 invalid = 1; 2769 } 2770 2771 for (i = 0; i < 16; i++) { 2772 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead; 2773 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF; 2774 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2775 invalid = 1; 2776 break; 2777 } 2778 2779 bcd_put_digit(&ret, digit, i + 1); 2780 } 2781 2782 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2783 (!ps && (sgnb & 0x4))) { 2784 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2785 } else { 2786 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2787 } 2788 2789 cr = bcd_cmp_zero(&ret); 2790 2791 if (unlikely(invalid)) { 2792 cr = CRF_SO; 2793 } 2794 2795 *r = ret; 2796 2797 return cr; 2798 } 2799 2800 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2801 { 2802 int i; 2803 int cr = 0; 2804 uint8_t digit = 0; 2805 int sgnb = bcd_get_sgn(b); 2806 int zone_lead = (ps) ? 0xF0 : 0x30; 2807 int invalid = (sgnb == 0); 2808 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2809 2810 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2811 2812 for (i = 0; i < 16; i++) { 2813 digit = bcd_get_digit(b, i + 1, &invalid); 2814 2815 if (unlikely(invalid)) { 2816 break; 2817 } 2818 2819 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit; 2820 } 2821 2822 if (ps) { 2823 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2824 } else { 2825 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2826 } 2827 2828 cr = bcd_cmp_zero(b); 2829 2830 if (ox_flag) { 2831 cr |= CRF_SO; 2832 } 2833 2834 if (unlikely(invalid)) { 2835 cr = CRF_SO; 2836 } 2837 2838 *r = ret; 2839 2840 return cr; 2841 } 2842 2843 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2844 { 2845 int i; 2846 int cr = 0; 2847 uint64_t lo_value; 2848 uint64_t hi_value; 2849 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2850 2851 if (b->VsrSD(0) < 0) { 2852 lo_value = -b->VsrSD(1); 2853 hi_value = ~b->VsrD(0) + !lo_value; 2854 bcd_put_digit(&ret, 0xD, 0); 2855 } else { 2856 lo_value = b->VsrD(1); 2857 hi_value = b->VsrD(0); 2858 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2859 } 2860 2861 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2862 lo_value > 9999999999999999ULL) { 2863 cr = CRF_SO; 2864 } 2865 2866 for (i = 1; i < 16; hi_value /= 10, i++) { 2867 bcd_put_digit(&ret, hi_value % 10, i); 2868 } 2869 2870 for (; i < 32; lo_value /= 10, i++) { 2871 bcd_put_digit(&ret, lo_value % 10, i); 2872 } 2873 2874 cr |= bcd_cmp_zero(&ret); 2875 2876 *r = ret; 2877 2878 return cr; 2879 } 2880 2881 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2882 { 2883 uint8_t i; 2884 int cr; 2885 uint64_t carry; 2886 uint64_t unused; 2887 uint64_t lo_value; 2888 uint64_t hi_value = 0; 2889 int sgnb = bcd_get_sgn(b); 2890 int invalid = (sgnb == 0); 2891 2892 lo_value = bcd_get_digit(b, 31, &invalid); 2893 for (i = 30; i > 0; i--) { 2894 mulu64(&lo_value, &carry, lo_value, 10ULL); 2895 mulu64(&hi_value, &unused, hi_value, 10ULL); 2896 lo_value += bcd_get_digit(b, i, &invalid); 2897 hi_value += carry; 2898 2899 if (unlikely(invalid)) { 2900 break; 2901 } 2902 } 2903 2904 if (sgnb == -1) { 2905 r->VsrSD(1) = -lo_value; 2906 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2907 } else { 2908 r->VsrSD(1) = lo_value; 2909 r->VsrSD(0) = hi_value; 2910 } 2911 2912 cr = bcd_cmp_zero(b); 2913 2914 if (unlikely(invalid)) { 2915 cr = CRF_SO; 2916 } 2917 2918 return cr; 2919 } 2920 2921 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2922 { 2923 int i; 2924 int invalid = 0; 2925 2926 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2927 return CRF_SO; 2928 } 2929 2930 *r = *a; 2931 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0); 2932 2933 for (i = 1; i < 32; i++) { 2934 bcd_get_digit(a, i, &invalid); 2935 bcd_get_digit(b, i, &invalid); 2936 if (unlikely(invalid)) { 2937 return CRF_SO; 2938 } 2939 } 2940 2941 return bcd_cmp_zero(r); 2942 } 2943 2944 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2945 { 2946 int sgnb = bcd_get_sgn(b); 2947 2948 *r = *b; 2949 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2950 2951 if (bcd_is_valid(b) == false) { 2952 return CRF_SO; 2953 } 2954 2955 return bcd_cmp_zero(r); 2956 } 2957 2958 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2959 { 2960 int cr; 2961 #if defined(HOST_WORDS_BIGENDIAN) 2962 int i = a->s8[7]; 2963 #else 2964 int i = a->s8[8]; 2965 #endif 2966 bool ox_flag = false; 2967 int sgnb = bcd_get_sgn(b); 2968 ppc_avr_t ret = *b; 2969 ret.VsrD(1) &= ~0xf; 2970 2971 if (bcd_is_valid(b) == false) { 2972 return CRF_SO; 2973 } 2974 2975 if (unlikely(i > 31)) { 2976 i = 31; 2977 } else if (unlikely(i < -31)) { 2978 i = -31; 2979 } 2980 2981 if (i > 0) { 2982 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2983 } else { 2984 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2985 } 2986 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2987 2988 *r = ret; 2989 2990 cr = bcd_cmp_zero(r); 2991 if (ox_flag) { 2992 cr |= CRF_SO; 2993 } 2994 2995 return cr; 2996 } 2997 2998 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2999 { 3000 int cr; 3001 int i; 3002 int invalid = 0; 3003 bool ox_flag = false; 3004 ppc_avr_t ret = *b; 3005 3006 for (i = 0; i < 32; i++) { 3007 bcd_get_digit(b, i, &invalid); 3008 3009 if (unlikely(invalid)) { 3010 return CRF_SO; 3011 } 3012 } 3013 3014 #if defined(HOST_WORDS_BIGENDIAN) 3015 i = a->s8[7]; 3016 #else 3017 i = a->s8[8]; 3018 #endif 3019 if (i >= 32) { 3020 ox_flag = true; 3021 ret.VsrD(1) = ret.VsrD(0) = 0; 3022 } else if (i <= -32) { 3023 ret.VsrD(1) = ret.VsrD(0) = 0; 3024 } else if (i > 0) { 3025 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3026 } else { 3027 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3028 } 3029 *r = ret; 3030 3031 cr = bcd_cmp_zero(r); 3032 if (ox_flag) { 3033 cr |= CRF_SO; 3034 } 3035 3036 return cr; 3037 } 3038 3039 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3040 { 3041 int cr; 3042 int unused = 0; 3043 int invalid = 0; 3044 bool ox_flag = false; 3045 int sgnb = bcd_get_sgn(b); 3046 ppc_avr_t ret = *b; 3047 ret.VsrD(1) &= ~0xf; 3048 3049 #if defined(HOST_WORDS_BIGENDIAN) 3050 int i = a->s8[7]; 3051 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } }; 3052 #else 3053 int i = a->s8[8]; 3054 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } }; 3055 #endif 3056 3057 if (bcd_is_valid(b) == false) { 3058 return CRF_SO; 3059 } 3060 3061 if (unlikely(i > 31)) { 3062 i = 31; 3063 } else if (unlikely(i < -31)) { 3064 i = -31; 3065 } 3066 3067 if (i > 0) { 3068 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 3069 } else { 3070 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 3071 3072 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 3073 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 3074 } 3075 } 3076 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 3077 3078 cr = bcd_cmp_zero(&ret); 3079 if (ox_flag) { 3080 cr |= CRF_SO; 3081 } 3082 *r = ret; 3083 3084 return cr; 3085 } 3086 3087 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3088 { 3089 uint64_t mask; 3090 uint32_t ox_flag = 0; 3091 #if defined(HOST_WORDS_BIGENDIAN) 3092 int i = a->s16[3] + 1; 3093 #else 3094 int i = a->s16[4] + 1; 3095 #endif 3096 ppc_avr_t ret = *b; 3097 3098 if (bcd_is_valid(b) == false) { 3099 return CRF_SO; 3100 } 3101 3102 if (i > 16 && i < 32) { 3103 mask = (uint64_t)-1 >> (128 - i * 4); 3104 if (ret.VsrD(0) & ~mask) { 3105 ox_flag = CRF_SO; 3106 } 3107 3108 ret.VsrD(0) &= mask; 3109 } else if (i >= 0 && i <= 16) { 3110 mask = (uint64_t)-1 >> (64 - i * 4); 3111 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3112 ox_flag = CRF_SO; 3113 } 3114 3115 ret.VsrD(1) &= mask; 3116 ret.VsrD(0) = 0; 3117 } 3118 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 3119 *r = ret; 3120 3121 return bcd_cmp_zero(&ret) | ox_flag; 3122 } 3123 3124 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 3125 { 3126 int i; 3127 uint64_t mask; 3128 uint32_t ox_flag = 0; 3129 int invalid = 0; 3130 ppc_avr_t ret = *b; 3131 3132 for (i = 0; i < 32; i++) { 3133 bcd_get_digit(b, i, &invalid); 3134 3135 if (unlikely(invalid)) { 3136 return CRF_SO; 3137 } 3138 } 3139 3140 #if defined(HOST_WORDS_BIGENDIAN) 3141 i = a->s16[3]; 3142 #else 3143 i = a->s16[4]; 3144 #endif 3145 if (i > 16 && i < 33) { 3146 mask = (uint64_t)-1 >> (128 - i * 4); 3147 if (ret.VsrD(0) & ~mask) { 3148 ox_flag = CRF_SO; 3149 } 3150 3151 ret.VsrD(0) &= mask; 3152 } else if (i > 0 && i <= 16) { 3153 mask = (uint64_t)-1 >> (64 - i * 4); 3154 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 3155 ox_flag = CRF_SO; 3156 } 3157 3158 ret.VsrD(1) &= mask; 3159 ret.VsrD(0) = 0; 3160 } else if (i == 0) { 3161 if (ret.VsrD(0) || ret.VsrD(1)) { 3162 ox_flag = CRF_SO; 3163 } 3164 ret.VsrD(0) = ret.VsrD(1) = 0; 3165 } 3166 3167 *r = ret; 3168 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 3169 return ox_flag | CRF_EQ; 3170 } 3171 3172 return ox_flag | CRF_GT; 3173 } 3174 3175 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 3176 { 3177 int i; 3178 VECTOR_FOR_INORDER_I(i, u8) { 3179 r->u8[i] = AES_sbox[a->u8[i]]; 3180 } 3181 } 3182 3183 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3184 { 3185 ppc_avr_t result; 3186 int i; 3187 3188 VECTOR_FOR_INORDER_I(i, u32) { 3189 result.VsrW(i) = b->VsrW(i) ^ 3190 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 3191 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 3192 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 3193 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 3194 } 3195 *r = result; 3196 } 3197 3198 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3199 { 3200 ppc_avr_t result; 3201 int i; 3202 3203 VECTOR_FOR_INORDER_I(i, u8) { 3204 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 3205 } 3206 *r = result; 3207 } 3208 3209 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3210 { 3211 /* This differs from what is written in ISA V2.07. The RTL is */ 3212 /* incorrect and will be fixed in V2.07B. */ 3213 int i; 3214 ppc_avr_t tmp; 3215 3216 VECTOR_FOR_INORDER_I(i, u8) { 3217 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 3218 } 3219 3220 VECTOR_FOR_INORDER_I(i, u32) { 3221 r->VsrW(i) = 3222 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 3223 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 3224 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 3225 AES_imc[tmp.VsrB(4 * i + 3)][3]; 3226 } 3227 } 3228 3229 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 3230 { 3231 ppc_avr_t result; 3232 int i; 3233 3234 VECTOR_FOR_INORDER_I(i, u8) { 3235 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 3236 } 3237 *r = result; 3238 } 3239 3240 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3241 { 3242 int st = (st_six & 0x10) != 0; 3243 int six = st_six & 0xF; 3244 int i; 3245 3246 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 3247 if (st == 0) { 3248 if ((six & (0x8 >> i)) == 0) { 3249 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 3250 ror32(a->VsrW(i), 18) ^ 3251 (a->VsrW(i) >> 3); 3252 } else { /* six.bit[i] == 1 */ 3253 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 3254 ror32(a->VsrW(i), 19) ^ 3255 (a->VsrW(i) >> 10); 3256 } 3257 } else { /* st == 1 */ 3258 if ((six & (0x8 >> i)) == 0) { 3259 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3260 ror32(a->VsrW(i), 13) ^ 3261 ror32(a->VsrW(i), 22); 3262 } else { /* six.bit[i] == 1 */ 3263 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3264 ror32(a->VsrW(i), 11) ^ 3265 ror32(a->VsrW(i), 25); 3266 } 3267 } 3268 } 3269 } 3270 3271 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3272 { 3273 int st = (st_six & 0x10) != 0; 3274 int six = st_six & 0xF; 3275 int i; 3276 3277 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3278 if (st == 0) { 3279 if ((six & (0x8 >> (2*i))) == 0) { 3280 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3281 ror64(a->VsrD(i), 8) ^ 3282 (a->VsrD(i) >> 7); 3283 } else { /* six.bit[2*i] == 1 */ 3284 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3285 ror64(a->VsrD(i), 61) ^ 3286 (a->VsrD(i) >> 6); 3287 } 3288 } else { /* st == 1 */ 3289 if ((six & (0x8 >> (2*i))) == 0) { 3290 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3291 ror64(a->VsrD(i), 34) ^ 3292 ror64(a->VsrD(i), 39); 3293 } else { /* six.bit[2*i] == 1 */ 3294 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3295 ror64(a->VsrD(i), 18) ^ 3296 ror64(a->VsrD(i), 41); 3297 } 3298 } 3299 } 3300 } 3301 3302 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3303 { 3304 ppc_avr_t result; 3305 int i; 3306 3307 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3308 int indexA = c->VsrB(i) >> 4; 3309 int indexB = c->VsrB(i) & 0xF; 3310 3311 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3312 } 3313 *r = result; 3314 } 3315 3316 #undef VECTOR_FOR_INORDER_I 3317 3318 /*****************************************************************************/ 3319 /* SPE extension helpers */ 3320 /* Use a table to make this quicker */ 3321 static const uint8_t hbrev[16] = { 3322 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3323 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3324 }; 3325 3326 static inline uint8_t byte_reverse(uint8_t val) 3327 { 3328 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3329 } 3330 3331 static inline uint32_t word_reverse(uint32_t val) 3332 { 3333 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3334 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3335 } 3336 3337 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3338 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3339 { 3340 uint32_t a, b, d, mask; 3341 3342 mask = UINT32_MAX >> (32 - MASKBITS); 3343 a = arg1 & mask; 3344 b = arg2 & mask; 3345 d = word_reverse(1 + word_reverse(a | ~b)); 3346 return (arg1 & ~mask) | (d & b); 3347 } 3348 3349 uint32_t helper_cntlsw32(uint32_t val) 3350 { 3351 if (val & 0x80000000) { 3352 return clz32(~val); 3353 } else { 3354 return clz32(val); 3355 } 3356 } 3357 3358 uint32_t helper_cntlzw32(uint32_t val) 3359 { 3360 return clz32(val); 3361 } 3362 3363 /* 440 specific */ 3364 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3365 target_ulong low, uint32_t update_Rc) 3366 { 3367 target_ulong mask; 3368 int i; 3369 3370 i = 1; 3371 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3372 if ((high & mask) == 0) { 3373 if (update_Rc) { 3374 env->crf[0] = 0x4; 3375 } 3376 goto done; 3377 } 3378 i++; 3379 } 3380 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3381 if ((low & mask) == 0) { 3382 if (update_Rc) { 3383 env->crf[0] = 0x8; 3384 } 3385 goto done; 3386 } 3387 i++; 3388 } 3389 i = 8; 3390 if (update_Rc) { 3391 env->crf[0] = 0x2; 3392 } 3393 done: 3394 env->xer = (env->xer & ~0x7F) | i; 3395 if (update_Rc) { 3396 env->crf[0] |= xer_so; 3397 } 3398 return i; 3399 } 3400