1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "fpu/softfloat.h" 28 #include "qapi/error.h" 29 #include "qemu/guest-random.h" 30 31 #include "helper_regs.h" 32 /*****************************************************************************/ 33 /* Fixed point operations helpers */ 34 35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 36 { 37 if (unlikely(ov)) { 38 env->so = env->ov = 1; 39 } else { 40 env->ov = 0; 41 } 42 } 43 44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 45 uint32_t oe) 46 { 47 uint64_t rt = 0; 48 int overflow = 0; 49 50 uint64_t dividend = (uint64_t)ra << 32; 51 uint64_t divisor = (uint32_t)rb; 52 53 if (unlikely(divisor == 0)) { 54 overflow = 1; 55 } else { 56 rt = dividend / divisor; 57 overflow = rt > UINT32_MAX; 58 } 59 60 if (unlikely(overflow)) { 61 rt = 0; /* Undefined */ 62 } 63 64 if (oe) { 65 helper_update_ov_legacy(env, overflow); 66 } 67 68 return (target_ulong)rt; 69 } 70 71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 72 uint32_t oe) 73 { 74 int64_t rt = 0; 75 int overflow = 0; 76 77 int64_t dividend = (int64_t)ra << 32; 78 int64_t divisor = (int64_t)((int32_t)rb); 79 80 if (unlikely((divisor == 0) || 81 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 82 overflow = 1; 83 } else { 84 rt = dividend / divisor; 85 overflow = rt != (int32_t)rt; 86 } 87 88 if (unlikely(overflow)) { 89 rt = 0; /* Undefined */ 90 } 91 92 if (oe) { 93 helper_update_ov_legacy(env, overflow); 94 } 95 96 return (target_ulong)rt; 97 } 98 99 #if defined(TARGET_PPC64) 100 101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 102 { 103 uint64_t rt = 0; 104 int overflow = 0; 105 106 overflow = divu128(&rt, &ra, rb); 107 108 if (unlikely(overflow)) { 109 rt = 0; /* Undefined */ 110 } 111 112 if (oe) { 113 helper_update_ov_legacy(env, overflow); 114 } 115 116 return rt; 117 } 118 119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 120 { 121 int64_t rt = 0; 122 int64_t ra = (int64_t)rau; 123 int64_t rb = (int64_t)rbu; 124 int overflow = divs128(&rt, &ra, rb); 125 126 if (unlikely(overflow)) { 127 rt = 0; /* Undefined */ 128 } 129 130 if (oe) { 131 helper_update_ov_legacy(env, overflow); 132 } 133 134 return rt; 135 } 136 137 #endif 138 139 140 #if defined(TARGET_PPC64) 141 /* if x = 0xab, returns 0xababababababababa */ 142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 143 144 /* 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each 146 * byte. 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 149 */ 150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 151 152 /* When you XOR the pattern and there is a match, that byte will be zero */ 153 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 154 155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 156 { 157 return hasvalue(rb, ra) ? CRF_GT : 0; 158 } 159 160 #undef pattern 161 #undef haszero 162 #undef hasvalue 163 164 /* 165 * Return a random number. 166 */ 167 uint64_t helper_darn32(void) 168 { 169 Error *err = NULL; 170 uint32_t ret; 171 172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 174 error_get_pretty(err)); 175 error_free(err); 176 return -1; 177 } 178 179 return ret; 180 } 181 182 uint64_t helper_darn64(void) 183 { 184 Error *err = NULL; 185 uint64_t ret; 186 187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 189 error_get_pretty(err)); 190 error_free(err); 191 return -1; 192 } 193 194 return ret; 195 } 196 197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 198 { 199 int i; 200 uint64_t ra = 0; 201 202 for (i = 0; i < 8; i++) { 203 int index = (rs >> (i * 8)) & 0xFF; 204 if (index < 64) { 205 if (rb & PPC_BIT(index)) { 206 ra |= 1 << i; 207 } 208 } 209 } 210 return ra; 211 } 212 213 #endif 214 215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 216 { 217 target_ulong mask = 0xff; 218 target_ulong ra = 0; 219 int i; 220 221 for (i = 0; i < sizeof(target_ulong); i++) { 222 if ((rs & mask) == (rb & mask)) { 223 ra |= mask; 224 } 225 mask <<= 8; 226 } 227 return ra; 228 } 229 230 /* shift right arithmetic helper */ 231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 232 target_ulong shift) 233 { 234 int32_t ret; 235 236 if (likely(!(shift & 0x20))) { 237 if (likely((uint32_t)shift != 0)) { 238 shift &= 0x1f; 239 ret = (int32_t)value >> shift; 240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 241 env->ca32 = env->ca = 0; 242 } else { 243 env->ca32 = env->ca = 1; 244 } 245 } else { 246 ret = (int32_t)value; 247 env->ca32 = env->ca = 0; 248 } 249 } else { 250 ret = (int32_t)value >> 31; 251 env->ca32 = env->ca = (ret != 0); 252 } 253 return (target_long)ret; 254 } 255 256 #if defined(TARGET_PPC64) 257 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 258 target_ulong shift) 259 { 260 int64_t ret; 261 262 if (likely(!(shift & 0x40))) { 263 if (likely((uint64_t)shift != 0)) { 264 shift &= 0x3f; 265 ret = (int64_t)value >> shift; 266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 267 env->ca32 = env->ca = 0; 268 } else { 269 env->ca32 = env->ca = 1; 270 } 271 } else { 272 ret = (int64_t)value; 273 env->ca32 = env->ca = 0; 274 } 275 } else { 276 ret = (int64_t)value >> 63; 277 env->ca32 = env->ca = (ret != 0); 278 } 279 return ret; 280 } 281 #endif 282 283 #if defined(TARGET_PPC64) 284 target_ulong helper_popcntb(target_ulong val) 285 { 286 /* Note that we don't fold past bytes */ 287 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 288 0x5555555555555555ULL); 289 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 290 0x3333333333333333ULL); 291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 292 0x0f0f0f0f0f0f0f0fULL); 293 return val; 294 } 295 296 target_ulong helper_popcntw(target_ulong val) 297 { 298 /* Note that we don't fold past words. */ 299 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 300 0x5555555555555555ULL); 301 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 302 0x3333333333333333ULL); 303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 304 0x0f0f0f0f0f0f0f0fULL); 305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 306 0x00ff00ff00ff00ffULL); 307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 308 0x0000ffff0000ffffULL); 309 return val; 310 } 311 #else 312 target_ulong helper_popcntb(target_ulong val) 313 { 314 /* Note that we don't fold past bytes */ 315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 318 return val; 319 } 320 #endif 321 322 /*****************************************************************************/ 323 /* PowerPC 601 specific instructions (POWER bridge) */ 324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 325 { 326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 327 328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 329 (int32_t)arg2 == 0) { 330 env->spr[SPR_MQ] = 0; 331 return INT32_MIN; 332 } else { 333 env->spr[SPR_MQ] = tmp % arg2; 334 return tmp / (int32_t)arg2; 335 } 336 } 337 338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 339 target_ulong arg2) 340 { 341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 342 343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 344 (int32_t)arg2 == 0) { 345 env->so = env->ov = 1; 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = tmp % arg2; 350 tmp /= (int32_t)arg2; 351 if ((int32_t)tmp != tmp) { 352 env->so = env->ov = 1; 353 } else { 354 env->ov = 0; 355 } 356 return tmp; 357 } 358 } 359 360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 361 target_ulong arg2) 362 { 363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 364 (int32_t)arg2 == 0) { 365 env->spr[SPR_MQ] = 0; 366 return INT32_MIN; 367 } else { 368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 369 return (int32_t)arg1 / (int32_t)arg2; 370 } 371 } 372 373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 374 target_ulong arg2) 375 { 376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 377 (int32_t)arg2 == 0) { 378 env->so = env->ov = 1; 379 env->spr[SPR_MQ] = 0; 380 return INT32_MIN; 381 } else { 382 env->ov = 0; 383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 384 return (int32_t)arg1 / (int32_t)arg2; 385 } 386 } 387 388 /*****************************************************************************/ 389 /* 602 specific instructions */ 390 /* mfrom is the most crazy instruction ever seen, imho ! */ 391 /* Real implementation uses a ROM table. Do the same */ 392 /* 393 * Extremely decomposed: 394 * -arg / 256 395 * return 256 * log10(10 + 1.0) + 0.5 396 */ 397 #if !defined(CONFIG_USER_ONLY) 398 target_ulong helper_602_mfrom(target_ulong arg) 399 { 400 if (likely(arg < 602)) { 401 #include "mfrom_table.inc.c" 402 return mfrom_ROM_table[arg]; 403 } else { 404 return 0; 405 } 406 } 407 #endif 408 409 /*****************************************************************************/ 410 /* Altivec extension helpers */ 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 463 { 464 env->vscr = vscr & ~(1u << VSCR_SAT); 465 /* Which bit we set is completely arbitrary, but clear the rest. */ 466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 467 env->vscr_sat.u64[1] = 0; 468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 469 } 470 471 uint32_t helper_mfvscr(CPUPPCState *env) 472 { 473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 474 return env->vscr | (sat << VSCR_SAT); 475 } 476 477 static inline void set_vscr_sat(CPUPPCState *env) 478 { 479 /* The choice of non-zero value is arbitrary. */ 480 env->vscr_sat.u32[0] = 1; 481 } 482 483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 484 { 485 int i; 486 487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 488 r->u32[i] = ~a->u32[i] < b->u32[i]; 489 } 490 } 491 492 /* vprtybw */ 493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 494 { 495 int i; 496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 498 res ^= res >> 8; 499 r->u32[i] = res & 1; 500 } 501 } 502 503 /* vprtybd */ 504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 509 res ^= res >> 16; 510 res ^= res >> 8; 511 r->u64[i] = res & 1; 512 } 513 } 514 515 /* vprtybq */ 516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 uint64_t res = b->u64[0] ^ b->u64[1]; 519 res ^= res >> 32; 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->VsrD(1) = res & 1; 523 r->VsrD(0) = 0; 524 } 525 526 #define VARITH_DO(name, op, element) \ 527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 528 { \ 529 int i; \ 530 \ 531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 532 r->element[i] = a->element[i] op b->element[i]; \ 533 } \ 534 } 535 VARITH_DO(muluwm, *, u32) 536 #undef VARITH_DO 537 #undef VARITH 538 539 #define VARITHFP(suffix, func) \ 540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 541 ppc_avr_t *b) \ 542 { \ 543 int i; \ 544 \ 545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 547 } \ 548 } 549 VARITHFP(addfp, float32_add) 550 VARITHFP(subfp, float32_sub) 551 VARITHFP(minfp, float32_min) 552 VARITHFP(maxfp, float32_max) 553 #undef VARITHFP 554 555 #define VARITHFPFMA(suffix, type) \ 556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 557 ppc_avr_t *b, ppc_avr_t *c) \ 558 { \ 559 int i; \ 560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 562 type, &env->vec_status); \ 563 } \ 564 } 565 VARITHFPFMA(maddfp, 0); 566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 567 #undef VARITHFPFMA 568 569 #define VARITHSAT_CASE(type, op, cvt, element) \ 570 { \ 571 type result = (type)a->element[i] op (type)b->element[i]; \ 572 r->element[i] = cvt(result, &sat); \ 573 } 574 575 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 578 { \ 579 int sat = 0; \ 580 int i; \ 581 \ 582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 583 VARITHSAT_CASE(optype, op, cvt, element); \ 584 } \ 585 if (sat) { \ 586 vscr_sat->u32[0] = 1; \ 587 } \ 588 } 589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 601 #undef VARITHSAT_CASE 602 #undef VARITHSAT_DO 603 #undef VARITHSAT_SIGNED 604 #undef VARITHSAT_UNSIGNED 605 606 #define VAVG_DO(name, element, etype) \ 607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 608 { \ 609 int i; \ 610 \ 611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 613 r->element[i] = x >> 1; \ 614 } \ 615 } 616 617 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 618 unsigned_type) \ 619 VAVG_DO(avgs##type, signed_element, signed_type) \ 620 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 621 VAVG(b, s8, int16_t, u8, uint16_t) 622 VAVG(h, s16, int32_t, u16, uint32_t) 623 VAVG(w, s32, int64_t, u32, uint64_t) 624 #undef VAVG_DO 625 #undef VAVG 626 627 #define VABSDU_DO(name, element) \ 628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 629 { \ 630 int i; \ 631 \ 632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 633 r->element[i] = (a->element[i] > b->element[i]) ? \ 634 (a->element[i] - b->element[i]) : \ 635 (b->element[i] - a->element[i]); \ 636 } \ 637 } 638 639 /* 640 * VABSDU - Vector absolute difference unsigned 641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 642 * element - element type to access from vector 643 */ 644 #define VABSDU(type, element) \ 645 VABSDU_DO(absdu##type, element) 646 VABSDU(b, u8) 647 VABSDU(h, u16) 648 VABSDU(w, u32) 649 #undef VABSDU_DO 650 #undef VABSDU 651 652 #define VCF(suffix, cvt, element) \ 653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 654 ppc_avr_t *b, uint32_t uim) \ 655 { \ 656 int i; \ 657 \ 658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 659 float32 t = cvt(b->element[i], &env->vec_status); \ 660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 661 } \ 662 } 663 VCF(ux, uint32_to_float32, u32) 664 VCF(sx, int32_to_float32, s32) 665 #undef VCF 666 667 #define VCMP_DO(suffix, compare, element, record) \ 668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 669 ppc_avr_t *a, ppc_avr_t *b) \ 670 { \ 671 uint64_t ones = (uint64_t)-1; \ 672 uint64_t all = ones; \ 673 uint64_t none = 0; \ 674 int i; \ 675 \ 676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 677 uint64_t result = (a->element[i] compare b->element[i] ? \ 678 ones : 0x0); \ 679 switch (sizeof(a->element[0])) { \ 680 case 8: \ 681 r->u64[i] = result; \ 682 break; \ 683 case 4: \ 684 r->u32[i] = result; \ 685 break; \ 686 case 2: \ 687 r->u16[i] = result; \ 688 break; \ 689 case 1: \ 690 r->u8[i] = result; \ 691 break; \ 692 } \ 693 all &= result; \ 694 none |= result; \ 695 } \ 696 if (record) { \ 697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 698 } \ 699 } 700 #define VCMP(suffix, compare, element) \ 701 VCMP_DO(suffix, compare, element, 0) \ 702 VCMP_DO(suffix##_dot, compare, element, 1) 703 VCMP(equb, ==, u8) 704 VCMP(equh, ==, u16) 705 VCMP(equw, ==, u32) 706 VCMP(equd, ==, u64) 707 VCMP(gtub, >, u8) 708 VCMP(gtuh, >, u16) 709 VCMP(gtuw, >, u32) 710 VCMP(gtud, >, u64) 711 VCMP(gtsb, >, s8) 712 VCMP(gtsh, >, s16) 713 VCMP(gtsw, >, s32) 714 VCMP(gtsd, >, s64) 715 #undef VCMP_DO 716 #undef VCMP 717 718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 720 ppc_avr_t *a, ppc_avr_t *b) \ 721 { \ 722 etype ones = (etype)-1; \ 723 etype all = ones; \ 724 etype result, none = 0; \ 725 int i; \ 726 \ 727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 728 if (cmpzero) { \ 729 result = ((a->element[i] == 0) \ 730 || (b->element[i] == 0) \ 731 || (a->element[i] != b->element[i]) ? \ 732 ones : 0x0); \ 733 } else { \ 734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 735 } \ 736 r->element[i] = result; \ 737 all &= result; \ 738 none |= result; \ 739 } \ 740 if (record) { \ 741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 742 } \ 743 } 744 745 /* 746 * VCMPNEZ - Vector compare not equal to zero 747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 748 * element - element type to access from vector 749 */ 750 #define VCMPNE(suffix, element, etype, cmpzero) \ 751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 753 VCMPNE(zb, u8, uint8_t, 1) 754 VCMPNE(zh, u16, uint16_t, 1) 755 VCMPNE(zw, u32, uint32_t, 1) 756 VCMPNE(b, u8, uint8_t, 0) 757 VCMPNE(h, u16, uint16_t, 0) 758 VCMPNE(w, u32, uint32_t, 0) 759 #undef VCMPNE_DO 760 #undef VCMPNE 761 762 #define VCMPFP_DO(suffix, compare, order, record) \ 763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 764 ppc_avr_t *a, ppc_avr_t *b) \ 765 { \ 766 uint32_t ones = (uint32_t)-1; \ 767 uint32_t all = ones; \ 768 uint32_t none = 0; \ 769 int i; \ 770 \ 771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 772 uint32_t result; \ 773 FloatRelation rel = \ 774 float32_compare_quiet(a->f32[i], b->f32[i], \ 775 &env->vec_status); \ 776 if (rel == float_relation_unordered) { \ 777 result = 0; \ 778 } else if (rel compare order) { \ 779 result = ones; \ 780 } else { \ 781 result = 0; \ 782 } \ 783 r->u32[i] = result; \ 784 all &= result; \ 785 none |= result; \ 786 } \ 787 if (record) { \ 788 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 789 } \ 790 } 791 #define VCMPFP(suffix, compare, order) \ 792 VCMPFP_DO(suffix, compare, order, 0) \ 793 VCMPFP_DO(suffix##_dot, compare, order, 1) 794 VCMPFP(eqfp, ==, float_relation_equal) 795 VCMPFP(gefp, !=, float_relation_less) 796 VCMPFP(gtfp, ==, float_relation_greater) 797 #undef VCMPFP_DO 798 #undef VCMPFP 799 800 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 801 ppc_avr_t *a, ppc_avr_t *b, int record) 802 { 803 int i; 804 int all_in = 0; 805 806 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 807 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 808 &env->vec_status); 809 if (le_rel == float_relation_unordered) { 810 r->u32[i] = 0xc0000000; 811 all_in = 1; 812 } else { 813 float32 bneg = float32_chs(b->f32[i]); 814 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 815 &env->vec_status); 816 int le = le_rel != float_relation_greater; 817 int ge = ge_rel != float_relation_less; 818 819 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 820 all_in |= (!le | !ge); 821 } 822 } 823 if (record) { 824 env->crf[6] = (all_in == 0) << 1; 825 } 826 } 827 828 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 829 { 830 vcmpbfp_internal(env, r, a, b, 0); 831 } 832 833 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 834 ppc_avr_t *b) 835 { 836 vcmpbfp_internal(env, r, a, b, 1); 837 } 838 839 #define VCT(suffix, satcvt, element) \ 840 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 841 ppc_avr_t *b, uint32_t uim) \ 842 { \ 843 int i; \ 844 int sat = 0; \ 845 float_status s = env->vec_status; \ 846 \ 847 set_float_rounding_mode(float_round_to_zero, &s); \ 848 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 849 if (float32_is_any_nan(b->f32[i])) { \ 850 r->element[i] = 0; \ 851 } else { \ 852 float64 t = float32_to_float64(b->f32[i], &s); \ 853 int64_t j; \ 854 \ 855 t = float64_scalbn(t, uim, &s); \ 856 j = float64_to_int64(t, &s); \ 857 r->element[i] = satcvt(j, &sat); \ 858 } \ 859 } \ 860 if (sat) { \ 861 set_vscr_sat(env); \ 862 } \ 863 } 864 VCT(uxs, cvtsduw, u32) 865 VCT(sxs, cvtsdsw, s32) 866 #undef VCT 867 868 target_ulong helper_vclzlsbb(ppc_avr_t *r) 869 { 870 target_ulong count = 0; 871 int i; 872 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 873 if (r->VsrB(i) & 0x01) { 874 break; 875 } 876 count++; 877 } 878 return count; 879 } 880 881 target_ulong helper_vctzlsbb(ppc_avr_t *r) 882 { 883 target_ulong count = 0; 884 int i; 885 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 886 if (r->VsrB(i) & 0x01) { 887 break; 888 } 889 count++; 890 } 891 return count; 892 } 893 894 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 895 ppc_avr_t *b, ppc_avr_t *c) 896 { 897 int sat = 0; 898 int i; 899 900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 901 int32_t prod = a->s16[i] * b->s16[i]; 902 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 903 904 r->s16[i] = cvtswsh(t, &sat); 905 } 906 907 if (sat) { 908 set_vscr_sat(env); 909 } 910 } 911 912 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 913 ppc_avr_t *b, ppc_avr_t *c) 914 { 915 int sat = 0; 916 int i; 917 918 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 919 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 920 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 921 r->s16[i] = cvtswsh(t, &sat); 922 } 923 924 if (sat) { 925 set_vscr_sat(env); 926 } 927 } 928 929 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 930 { 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 934 int32_t prod = a->s16[i] * b->s16[i]; 935 r->s16[i] = (int16_t) (prod + c->s16[i]); 936 } 937 } 938 939 #define VMRG_DO(name, element, access, ofs) \ 940 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 941 { \ 942 ppc_avr_t result; \ 943 int i, half = ARRAY_SIZE(r->element) / 2; \ 944 \ 945 for (i = 0; i < half; i++) { \ 946 result.access(i * 2 + 0) = a->access(i + ofs); \ 947 result.access(i * 2 + 1) = b->access(i + ofs); \ 948 } \ 949 *r = result; \ 950 } 951 952 #define VMRG(suffix, element, access) \ 953 VMRG_DO(mrgl##suffix, element, access, half) \ 954 VMRG_DO(mrgh##suffix, element, access, 0) 955 VMRG(b, u8, VsrB) 956 VMRG(h, u16, VsrH) 957 VMRG(w, u32, VsrW) 958 #undef VMRG_DO 959 #undef VMRG 960 961 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 962 ppc_avr_t *b, ppc_avr_t *c) 963 { 964 int32_t prod[16]; 965 int i; 966 967 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 968 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 969 } 970 971 VECTOR_FOR_INORDER_I(i, s32) { 972 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 973 prod[4 * i + 2] + prod[4 * i + 3]; 974 } 975 } 976 977 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 978 ppc_avr_t *b, ppc_avr_t *c) 979 { 980 int32_t prod[8]; 981 int i; 982 983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 984 prod[i] = a->s16[i] * b->s16[i]; 985 } 986 987 VECTOR_FOR_INORDER_I(i, s32) { 988 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 989 } 990 } 991 992 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 993 ppc_avr_t *b, ppc_avr_t *c) 994 { 995 int32_t prod[8]; 996 int i; 997 int sat = 0; 998 999 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1000 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1001 } 1002 1003 VECTOR_FOR_INORDER_I(i, s32) { 1004 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1005 1006 r->u32[i] = cvtsdsw(t, &sat); 1007 } 1008 1009 if (sat) { 1010 set_vscr_sat(env); 1011 } 1012 } 1013 1014 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1015 ppc_avr_t *b, ppc_avr_t *c) 1016 { 1017 uint16_t prod[16]; 1018 int i; 1019 1020 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1021 prod[i] = a->u8[i] * b->u8[i]; 1022 } 1023 1024 VECTOR_FOR_INORDER_I(i, u32) { 1025 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1026 prod[4 * i + 2] + prod[4 * i + 3]; 1027 } 1028 } 1029 1030 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1031 ppc_avr_t *b, ppc_avr_t *c) 1032 { 1033 uint32_t prod[8]; 1034 int i; 1035 1036 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1037 prod[i] = a->u16[i] * b->u16[i]; 1038 } 1039 1040 VECTOR_FOR_INORDER_I(i, u32) { 1041 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1042 } 1043 } 1044 1045 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1046 ppc_avr_t *b, ppc_avr_t *c) 1047 { 1048 uint32_t prod[8]; 1049 int i; 1050 int sat = 0; 1051 1052 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1053 prod[i] = a->u16[i] * b->u16[i]; 1054 } 1055 1056 VECTOR_FOR_INORDER_I(i, s32) { 1057 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1058 1059 r->u32[i] = cvtuduw(t, &sat); 1060 } 1061 1062 if (sat) { 1063 set_vscr_sat(env); 1064 } 1065 } 1066 1067 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1068 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1069 { \ 1070 int i; \ 1071 \ 1072 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1073 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1074 (cast)b->mul_access(i); \ 1075 } \ 1076 } 1077 1078 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1079 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1080 { \ 1081 int i; \ 1082 \ 1083 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1084 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1085 (cast)b->mul_access(i + 1); \ 1086 } \ 1087 } 1088 1089 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1090 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1091 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1092 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1093 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1094 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1095 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1096 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1097 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1098 #undef VMUL_DO_EVN 1099 #undef VMUL_DO_ODD 1100 #undef VMUL 1101 1102 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1103 ppc_avr_t *c) 1104 { 1105 ppc_avr_t result; 1106 int i; 1107 1108 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1109 int s = c->VsrB(i) & 0x1f; 1110 int index = s & 0xf; 1111 1112 if (s & 0x10) { 1113 result.VsrB(i) = b->VsrB(index); 1114 } else { 1115 result.VsrB(i) = a->VsrB(index); 1116 } 1117 } 1118 *r = result; 1119 } 1120 1121 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1122 ppc_avr_t *c) 1123 { 1124 ppc_avr_t result; 1125 int i; 1126 1127 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1128 int s = c->VsrB(i) & 0x1f; 1129 int index = 15 - (s & 0xf); 1130 1131 if (s & 0x10) { 1132 result.VsrB(i) = a->VsrB(index); 1133 } else { 1134 result.VsrB(i) = b->VsrB(index); 1135 } 1136 } 1137 *r = result; 1138 } 1139 1140 #if defined(HOST_WORDS_BIGENDIAN) 1141 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1142 #define VBPERMD_INDEX(i) (i) 1143 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1144 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1145 #else 1146 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1147 #define VBPERMD_INDEX(i) (1 - i) 1148 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1149 #define EXTRACT_BIT(avr, i, index) \ 1150 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1151 #endif 1152 1153 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1154 { 1155 int i, j; 1156 ppc_avr_t result = { .u64 = { 0, 0 } }; 1157 VECTOR_FOR_INORDER_I(i, u64) { 1158 for (j = 0; j < 8; j++) { 1159 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1160 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1161 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1162 } 1163 } 1164 } 1165 *r = result; 1166 } 1167 1168 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1169 { 1170 int i; 1171 uint64_t perm = 0; 1172 1173 VECTOR_FOR_INORDER_I(i, u8) { 1174 int index = VBPERMQ_INDEX(b, i); 1175 1176 if (index < 128) { 1177 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1178 if (a->u64[VBPERMQ_DW(index)] & mask) { 1179 perm |= (0x8000 >> i); 1180 } 1181 } 1182 } 1183 1184 r->VsrD(0) = perm; 1185 r->VsrD(1) = 0; 1186 } 1187 1188 #undef VBPERMQ_INDEX 1189 #undef VBPERMQ_DW 1190 1191 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1192 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1193 { \ 1194 int i, j; \ 1195 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1196 \ 1197 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1198 prod[i] = 0; \ 1199 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1200 if (a->srcfld[i] & (1ull << j)) { \ 1201 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1202 } \ 1203 } \ 1204 } \ 1205 \ 1206 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1207 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1208 } \ 1209 } 1210 1211 PMSUM(vpmsumb, u8, u16, uint16_t) 1212 PMSUM(vpmsumh, u16, u32, uint32_t) 1213 PMSUM(vpmsumw, u32, u64, uint64_t) 1214 1215 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1216 { 1217 1218 #ifdef CONFIG_INT128 1219 int i, j; 1220 __uint128_t prod[2]; 1221 1222 VECTOR_FOR_INORDER_I(i, u64) { 1223 prod[i] = 0; 1224 for (j = 0; j < 64; j++) { 1225 if (a->u64[i] & (1ull << j)) { 1226 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1227 } 1228 } 1229 } 1230 1231 r->u128 = prod[0] ^ prod[1]; 1232 1233 #else 1234 int i, j; 1235 ppc_avr_t prod[2]; 1236 1237 VECTOR_FOR_INORDER_I(i, u64) { 1238 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1239 for (j = 0; j < 64; j++) { 1240 if (a->u64[i] & (1ull << j)) { 1241 ppc_avr_t bshift; 1242 if (j == 0) { 1243 bshift.VsrD(0) = 0; 1244 bshift.VsrD(1) = b->u64[i]; 1245 } else { 1246 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1247 bshift.VsrD(1) = b->u64[i] << j; 1248 } 1249 prod[i].VsrD(1) ^= bshift.VsrD(1); 1250 prod[i].VsrD(0) ^= bshift.VsrD(0); 1251 } 1252 } 1253 } 1254 1255 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1256 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1257 #endif 1258 } 1259 1260 1261 #if defined(HOST_WORDS_BIGENDIAN) 1262 #define PKBIG 1 1263 #else 1264 #define PKBIG 0 1265 #endif 1266 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1267 { 1268 int i, j; 1269 ppc_avr_t result; 1270 #if defined(HOST_WORDS_BIGENDIAN) 1271 const ppc_avr_t *x[2] = { a, b }; 1272 #else 1273 const ppc_avr_t *x[2] = { b, a }; 1274 #endif 1275 1276 VECTOR_FOR_INORDER_I(i, u64) { 1277 VECTOR_FOR_INORDER_I(j, u32) { 1278 uint32_t e = x[i]->u32[j]; 1279 1280 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1281 ((e >> 6) & 0x3e0) | 1282 ((e >> 3) & 0x1f)); 1283 } 1284 } 1285 *r = result; 1286 } 1287 1288 #define VPK(suffix, from, to, cvt, dosat) \ 1289 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1290 ppc_avr_t *a, ppc_avr_t *b) \ 1291 { \ 1292 int i; \ 1293 int sat = 0; \ 1294 ppc_avr_t result; \ 1295 ppc_avr_t *a0 = PKBIG ? a : b; \ 1296 ppc_avr_t *a1 = PKBIG ? b : a; \ 1297 \ 1298 VECTOR_FOR_INORDER_I(i, from) { \ 1299 result.to[i] = cvt(a0->from[i], &sat); \ 1300 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1301 } \ 1302 *r = result; \ 1303 if (dosat && sat) { \ 1304 set_vscr_sat(env); \ 1305 } \ 1306 } 1307 #define I(x, y) (x) 1308 VPK(shss, s16, s8, cvtshsb, 1) 1309 VPK(shus, s16, u8, cvtshub, 1) 1310 VPK(swss, s32, s16, cvtswsh, 1) 1311 VPK(swus, s32, u16, cvtswuh, 1) 1312 VPK(sdss, s64, s32, cvtsdsw, 1) 1313 VPK(sdus, s64, u32, cvtsduw, 1) 1314 VPK(uhus, u16, u8, cvtuhub, 1) 1315 VPK(uwus, u32, u16, cvtuwuh, 1) 1316 VPK(udus, u64, u32, cvtuduw, 1) 1317 VPK(uhum, u16, u8, I, 0) 1318 VPK(uwum, u32, u16, I, 0) 1319 VPK(udum, u64, u32, I, 0) 1320 #undef I 1321 #undef VPK 1322 #undef PKBIG 1323 1324 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1325 { 1326 int i; 1327 1328 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1329 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1330 } 1331 } 1332 1333 #define VRFI(suffix, rounding) \ 1334 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1335 ppc_avr_t *b) \ 1336 { \ 1337 int i; \ 1338 float_status s = env->vec_status; \ 1339 \ 1340 set_float_rounding_mode(rounding, &s); \ 1341 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1342 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1343 } \ 1344 } 1345 VRFI(n, float_round_nearest_even) 1346 VRFI(m, float_round_down) 1347 VRFI(p, float_round_up) 1348 VRFI(z, float_round_to_zero) 1349 #undef VRFI 1350 1351 #define VROTATE(suffix, element, mask) \ 1352 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1353 { \ 1354 int i; \ 1355 \ 1356 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1357 unsigned int shift = b->element[i] & mask; \ 1358 r->element[i] = (a->element[i] << shift) | \ 1359 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ 1360 } \ 1361 } 1362 VROTATE(b, u8, 0x7) 1363 VROTATE(h, u16, 0xF) 1364 VROTATE(w, u32, 0x1F) 1365 VROTATE(d, u64, 0x3F) 1366 #undef VROTATE 1367 1368 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1369 { 1370 int i; 1371 1372 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1373 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1374 1375 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1376 } 1377 } 1378 1379 #define VRLMI(name, size, element, insert) \ 1380 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1381 { \ 1382 int i; \ 1383 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1384 uint##size##_t src1 = a->element[i]; \ 1385 uint##size##_t src2 = b->element[i]; \ 1386 uint##size##_t src3 = r->element[i]; \ 1387 uint##size##_t begin, end, shift, mask, rot_val; \ 1388 \ 1389 shift = extract##size(src2, 0, 6); \ 1390 end = extract##size(src2, 8, 6); \ 1391 begin = extract##size(src2, 16, 6); \ 1392 rot_val = rol##size(src1, shift); \ 1393 mask = mask_u##size(begin, end); \ 1394 if (insert) { \ 1395 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1396 } else { \ 1397 r->element[i] = (rot_val & mask); \ 1398 } \ 1399 } \ 1400 } 1401 1402 VRLMI(vrldmi, 64, u64, 1); 1403 VRLMI(vrlwmi, 32, u32, 1); 1404 VRLMI(vrldnm, 64, u64, 0); 1405 VRLMI(vrlwnm, 32, u32, 0); 1406 1407 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1408 ppc_avr_t *c) 1409 { 1410 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1411 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1412 } 1413 1414 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1415 { 1416 int i; 1417 1418 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1419 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1420 } 1421 } 1422 1423 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1424 { 1425 int i; 1426 1427 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1428 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1429 } 1430 } 1431 1432 #if defined(HOST_WORDS_BIGENDIAN) 1433 #define VEXTU_X_DO(name, size, left) \ 1434 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1435 { \ 1436 int index; \ 1437 if (left) { \ 1438 index = (a & 0xf) * 8; \ 1439 } else { \ 1440 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1441 } \ 1442 return int128_getlo(int128_rshift(b->s128, index)) & \ 1443 MAKE_64BIT_MASK(0, size); \ 1444 } 1445 #else 1446 #define VEXTU_X_DO(name, size, left) \ 1447 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1448 { \ 1449 int index; \ 1450 if (left) { \ 1451 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1452 } else { \ 1453 index = (a & 0xf) * 8; \ 1454 } \ 1455 return int128_getlo(int128_rshift(b->s128, index)) & \ 1456 MAKE_64BIT_MASK(0, size); \ 1457 } 1458 #endif 1459 1460 VEXTU_X_DO(vextublx, 8, 1) 1461 VEXTU_X_DO(vextuhlx, 16, 1) 1462 VEXTU_X_DO(vextuwlx, 32, 1) 1463 VEXTU_X_DO(vextubrx, 8, 0) 1464 VEXTU_X_DO(vextuhrx, 16, 0) 1465 VEXTU_X_DO(vextuwrx, 32, 0) 1466 #undef VEXTU_X_DO 1467 1468 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1469 { 1470 int i; 1471 unsigned int shift, bytes, size; 1472 1473 size = ARRAY_SIZE(r->u8); 1474 for (i = 0; i < size; i++) { 1475 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1476 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1477 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1478 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1479 } 1480 } 1481 1482 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1483 { 1484 int i; 1485 unsigned int shift, bytes; 1486 1487 /* 1488 * Use reverse order, as destination and source register can be 1489 * same. Its being modified in place saving temporary, reverse 1490 * order will guarantee that computed result is not fed back. 1491 */ 1492 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1493 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1494 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1495 /* extract adjacent bytes */ 1496 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1497 } 1498 } 1499 1500 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1501 { 1502 int sh = shift & 0xf; 1503 int i; 1504 ppc_avr_t result; 1505 1506 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1507 int index = sh + i; 1508 if (index > 0xf) { 1509 result.VsrB(i) = b->VsrB(index - 0x10); 1510 } else { 1511 result.VsrB(i) = a->VsrB(index); 1512 } 1513 } 1514 *r = result; 1515 } 1516 1517 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1518 { 1519 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1520 1521 #if defined(HOST_WORDS_BIGENDIAN) 1522 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1523 memset(&r->u8[16 - sh], 0, sh); 1524 #else 1525 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1526 memset(&r->u8[0], 0, sh); 1527 #endif 1528 } 1529 1530 #if defined(HOST_WORDS_BIGENDIAN) 1531 #define VINSERT(suffix, element) \ 1532 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1533 { \ 1534 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1535 sizeof(r->element[0])); \ 1536 } 1537 #else 1538 #define VINSERT(suffix, element) \ 1539 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1540 { \ 1541 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1542 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1543 } 1544 #endif 1545 VINSERT(b, u8) 1546 VINSERT(h, u16) 1547 VINSERT(w, u32) 1548 VINSERT(d, u64) 1549 #undef VINSERT 1550 #if defined(HOST_WORDS_BIGENDIAN) 1551 #define VEXTRACT(suffix, element) \ 1552 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1553 { \ 1554 uint32_t es = sizeof(r->element[0]); \ 1555 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1556 memset(&r->u8[8], 0, 8); \ 1557 memset(&r->u8[0], 0, 8 - es); \ 1558 } 1559 #else 1560 #define VEXTRACT(suffix, element) \ 1561 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1562 { \ 1563 uint32_t es = sizeof(r->element[0]); \ 1564 uint32_t s = (16 - index) - es; \ 1565 memmove(&r->u8[8], &b->u8[s], es); \ 1566 memset(&r->u8[0], 0, 8); \ 1567 memset(&r->u8[8 + es], 0, 8 - es); \ 1568 } 1569 #endif 1570 VEXTRACT(ub, u8) 1571 VEXTRACT(uh, u16) 1572 VEXTRACT(uw, u32) 1573 VEXTRACT(d, u64) 1574 #undef VEXTRACT 1575 1576 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1577 ppc_vsr_t *xb, uint32_t index) 1578 { 1579 ppc_vsr_t t = { }; 1580 size_t es = sizeof(uint32_t); 1581 uint32_t ext_index; 1582 int i; 1583 1584 ext_index = index; 1585 for (i = 0; i < es; i++, ext_index++) { 1586 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1587 } 1588 1589 *xt = t; 1590 } 1591 1592 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1593 ppc_vsr_t *xb, uint32_t index) 1594 { 1595 ppc_vsr_t t = *xt; 1596 size_t es = sizeof(uint32_t); 1597 int ins_index, i = 0; 1598 1599 ins_index = index; 1600 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1601 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1602 } 1603 1604 *xt = t; 1605 } 1606 1607 #define VEXT_SIGNED(name, element, cast) \ 1608 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1609 { \ 1610 int i; \ 1611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1612 r->element[i] = (cast)b->element[i]; \ 1613 } \ 1614 } 1615 VEXT_SIGNED(vextsb2w, s32, int8_t) 1616 VEXT_SIGNED(vextsb2d, s64, int8_t) 1617 VEXT_SIGNED(vextsh2w, s32, int16_t) 1618 VEXT_SIGNED(vextsh2d, s64, int16_t) 1619 VEXT_SIGNED(vextsw2d, s64, int32_t) 1620 #undef VEXT_SIGNED 1621 1622 #define VNEG(name, element) \ 1623 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1624 { \ 1625 int i; \ 1626 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1627 r->element[i] = -b->element[i]; \ 1628 } \ 1629 } 1630 VNEG(vnegw, s32) 1631 VNEG(vnegd, s64) 1632 #undef VNEG 1633 1634 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1635 { 1636 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1637 1638 #if defined(HOST_WORDS_BIGENDIAN) 1639 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1640 memset(&r->u8[0], 0, sh); 1641 #else 1642 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1643 memset(&r->u8[16 - sh], 0, sh); 1644 #endif 1645 } 1646 1647 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1648 { 1649 int i; 1650 1651 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1652 r->u32[i] = a->u32[i] >= b->u32[i]; 1653 } 1654 } 1655 1656 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1657 { 1658 int64_t t; 1659 int i, upper; 1660 ppc_avr_t result; 1661 int sat = 0; 1662 1663 upper = ARRAY_SIZE(r->s32) - 1; 1664 t = (int64_t)b->VsrSW(upper); 1665 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1666 t += a->VsrSW(i); 1667 result.VsrSW(i) = 0; 1668 } 1669 result.VsrSW(upper) = cvtsdsw(t, &sat); 1670 *r = result; 1671 1672 if (sat) { 1673 set_vscr_sat(env); 1674 } 1675 } 1676 1677 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1678 { 1679 int i, j, upper; 1680 ppc_avr_t result; 1681 int sat = 0; 1682 1683 upper = 1; 1684 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1685 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1686 1687 result.VsrD(i) = 0; 1688 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1689 t += a->VsrSW(2 * i + j); 1690 } 1691 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1692 } 1693 1694 *r = result; 1695 if (sat) { 1696 set_vscr_sat(env); 1697 } 1698 } 1699 1700 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1701 { 1702 int i, j; 1703 int sat = 0; 1704 1705 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1706 int64_t t = (int64_t)b->s32[i]; 1707 1708 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1709 t += a->s8[4 * i + j]; 1710 } 1711 r->s32[i] = cvtsdsw(t, &sat); 1712 } 1713 1714 if (sat) { 1715 set_vscr_sat(env); 1716 } 1717 } 1718 1719 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1720 { 1721 int sat = 0; 1722 int i; 1723 1724 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1725 int64_t t = (int64_t)b->s32[i]; 1726 1727 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1728 r->s32[i] = cvtsdsw(t, &sat); 1729 } 1730 1731 if (sat) { 1732 set_vscr_sat(env); 1733 } 1734 } 1735 1736 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1737 { 1738 int i, j; 1739 int sat = 0; 1740 1741 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1742 uint64_t t = (uint64_t)b->u32[i]; 1743 1744 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1745 t += a->u8[4 * i + j]; 1746 } 1747 r->u32[i] = cvtuduw(t, &sat); 1748 } 1749 1750 if (sat) { 1751 set_vscr_sat(env); 1752 } 1753 } 1754 1755 #if defined(HOST_WORDS_BIGENDIAN) 1756 #define UPKHI 1 1757 #define UPKLO 0 1758 #else 1759 #define UPKHI 0 1760 #define UPKLO 1 1761 #endif 1762 #define VUPKPX(suffix, hi) \ 1763 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1764 { \ 1765 int i; \ 1766 ppc_avr_t result; \ 1767 \ 1768 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1769 uint16_t e = b->u16[hi ? i : i + 4]; \ 1770 uint8_t a = (e >> 15) ? 0xff : 0; \ 1771 uint8_t r = (e >> 10) & 0x1f; \ 1772 uint8_t g = (e >> 5) & 0x1f; \ 1773 uint8_t b = e & 0x1f; \ 1774 \ 1775 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1776 } \ 1777 *r = result; \ 1778 } 1779 VUPKPX(lpx, UPKLO) 1780 VUPKPX(hpx, UPKHI) 1781 #undef VUPKPX 1782 1783 #define VUPK(suffix, unpacked, packee, hi) \ 1784 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1785 { \ 1786 int i; \ 1787 ppc_avr_t result; \ 1788 \ 1789 if (hi) { \ 1790 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1791 result.unpacked[i] = b->packee[i]; \ 1792 } \ 1793 } else { \ 1794 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1795 i++) { \ 1796 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1797 } \ 1798 } \ 1799 *r = result; \ 1800 } 1801 VUPK(hsb, s16, s8, UPKHI) 1802 VUPK(hsh, s32, s16, UPKHI) 1803 VUPK(hsw, s64, s32, UPKHI) 1804 VUPK(lsb, s16, s8, UPKLO) 1805 VUPK(lsh, s32, s16, UPKLO) 1806 VUPK(lsw, s64, s32, UPKLO) 1807 #undef VUPK 1808 #undef UPKHI 1809 #undef UPKLO 1810 1811 #define VGENERIC_DO(name, element) \ 1812 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1813 { \ 1814 int i; \ 1815 \ 1816 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1817 r->element[i] = name(b->element[i]); \ 1818 } \ 1819 } 1820 1821 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1822 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1823 1824 VGENERIC_DO(clzb, u8) 1825 VGENERIC_DO(clzh, u16) 1826 1827 #undef clzb 1828 #undef clzh 1829 1830 #define ctzb(v) ((v) ? ctz32(v) : 8) 1831 #define ctzh(v) ((v) ? ctz32(v) : 16) 1832 #define ctzw(v) ctz32((v)) 1833 #define ctzd(v) ctz64((v)) 1834 1835 VGENERIC_DO(ctzb, u8) 1836 VGENERIC_DO(ctzh, u16) 1837 VGENERIC_DO(ctzw, u32) 1838 VGENERIC_DO(ctzd, u64) 1839 1840 #undef ctzb 1841 #undef ctzh 1842 #undef ctzw 1843 #undef ctzd 1844 1845 #define popcntb(v) ctpop8(v) 1846 #define popcnth(v) ctpop16(v) 1847 #define popcntw(v) ctpop32(v) 1848 #define popcntd(v) ctpop64(v) 1849 1850 VGENERIC_DO(popcntb, u8) 1851 VGENERIC_DO(popcnth, u16) 1852 VGENERIC_DO(popcntw, u32) 1853 VGENERIC_DO(popcntd, u64) 1854 1855 #undef popcntb 1856 #undef popcnth 1857 #undef popcntw 1858 #undef popcntd 1859 1860 #undef VGENERIC_DO 1861 1862 #if defined(HOST_WORDS_BIGENDIAN) 1863 #define QW_ONE { .u64 = { 0, 1 } } 1864 #else 1865 #define QW_ONE { .u64 = { 1, 0 } } 1866 #endif 1867 1868 #ifndef CONFIG_INT128 1869 1870 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1871 { 1872 t->u64[0] = ~a.u64[0]; 1873 t->u64[1] = ~a.u64[1]; 1874 } 1875 1876 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1877 { 1878 if (a.VsrD(0) < b.VsrD(0)) { 1879 return -1; 1880 } else if (a.VsrD(0) > b.VsrD(0)) { 1881 return 1; 1882 } else if (a.VsrD(1) < b.VsrD(1)) { 1883 return -1; 1884 } else if (a.VsrD(1) > b.VsrD(1)) { 1885 return 1; 1886 } else { 1887 return 0; 1888 } 1889 } 1890 1891 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1892 { 1893 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1894 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1895 (~a.VsrD(1) < b.VsrD(1)); 1896 } 1897 1898 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1899 { 1900 ppc_avr_t not_a; 1901 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1902 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1903 (~a.VsrD(1) < b.VsrD(1)); 1904 avr_qw_not(¬_a, a); 1905 return avr_qw_cmpu(not_a, b) < 0; 1906 } 1907 1908 #endif 1909 1910 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1911 { 1912 #ifdef CONFIG_INT128 1913 r->u128 = a->u128 + b->u128; 1914 #else 1915 avr_qw_add(r, *a, *b); 1916 #endif 1917 } 1918 1919 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1920 { 1921 #ifdef CONFIG_INT128 1922 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1923 #else 1924 1925 if (c->VsrD(1) & 1) { 1926 ppc_avr_t tmp; 1927 1928 tmp.VsrD(0) = 0; 1929 tmp.VsrD(1) = c->VsrD(1) & 1; 1930 avr_qw_add(&tmp, *a, tmp); 1931 avr_qw_add(r, tmp, *b); 1932 } else { 1933 avr_qw_add(r, *a, *b); 1934 } 1935 #endif 1936 } 1937 1938 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1939 { 1940 #ifdef CONFIG_INT128 1941 r->u128 = (~a->u128 < b->u128); 1942 #else 1943 ppc_avr_t not_a; 1944 1945 avr_qw_not(¬_a, *a); 1946 1947 r->VsrD(0) = 0; 1948 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1949 #endif 1950 } 1951 1952 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1953 { 1954 #ifdef CONFIG_INT128 1955 int carry_out = (~a->u128 < b->u128); 1956 if (!carry_out && (c->u128 & 1)) { 1957 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1958 ((a->u128 != 0) || (b->u128 != 0)); 1959 } 1960 r->u128 = carry_out; 1961 #else 1962 1963 int carry_in = c->VsrD(1) & 1; 1964 int carry_out = 0; 1965 ppc_avr_t tmp; 1966 1967 carry_out = avr_qw_addc(&tmp, *a, *b); 1968 1969 if (!carry_out && carry_in) { 1970 ppc_avr_t one = QW_ONE; 1971 carry_out = avr_qw_addc(&tmp, tmp, one); 1972 } 1973 r->VsrD(0) = 0; 1974 r->VsrD(1) = carry_out; 1975 #endif 1976 } 1977 1978 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1979 { 1980 #ifdef CONFIG_INT128 1981 r->u128 = a->u128 - b->u128; 1982 #else 1983 ppc_avr_t tmp; 1984 ppc_avr_t one = QW_ONE; 1985 1986 avr_qw_not(&tmp, *b); 1987 avr_qw_add(&tmp, *a, tmp); 1988 avr_qw_add(r, tmp, one); 1989 #endif 1990 } 1991 1992 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1993 { 1994 #ifdef CONFIG_INT128 1995 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 1996 #else 1997 ppc_avr_t tmp, sum; 1998 1999 avr_qw_not(&tmp, *b); 2000 avr_qw_add(&sum, *a, tmp); 2001 2002 tmp.VsrD(0) = 0; 2003 tmp.VsrD(1) = c->VsrD(1) & 1; 2004 avr_qw_add(r, sum, tmp); 2005 #endif 2006 } 2007 2008 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2009 { 2010 #ifdef CONFIG_INT128 2011 r->u128 = (~a->u128 < ~b->u128) || 2012 (a->u128 + ~b->u128 == (__uint128_t)-1); 2013 #else 2014 int carry = (avr_qw_cmpu(*a, *b) > 0); 2015 if (!carry) { 2016 ppc_avr_t tmp; 2017 avr_qw_not(&tmp, *b); 2018 avr_qw_add(&tmp, *a, tmp); 2019 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2020 } 2021 r->VsrD(0) = 0; 2022 r->VsrD(1) = carry; 2023 #endif 2024 } 2025 2026 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2027 { 2028 #ifdef CONFIG_INT128 2029 r->u128 = 2030 (~a->u128 < ~b->u128) || 2031 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2032 #else 2033 int carry_in = c->VsrD(1) & 1; 2034 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2035 if (!carry_out && carry_in) { 2036 ppc_avr_t tmp; 2037 avr_qw_not(&tmp, *b); 2038 avr_qw_add(&tmp, *a, tmp); 2039 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2040 } 2041 2042 r->VsrD(0) = 0; 2043 r->VsrD(1) = carry_out; 2044 #endif 2045 } 2046 2047 #define BCD_PLUS_PREF_1 0xC 2048 #define BCD_PLUS_PREF_2 0xF 2049 #define BCD_PLUS_ALT_1 0xA 2050 #define BCD_NEG_PREF 0xD 2051 #define BCD_NEG_ALT 0xB 2052 #define BCD_PLUS_ALT_2 0xE 2053 #define NATIONAL_PLUS 0x2B 2054 #define NATIONAL_NEG 0x2D 2055 2056 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2057 2058 static int bcd_get_sgn(ppc_avr_t *bcd) 2059 { 2060 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2061 case BCD_PLUS_PREF_1: 2062 case BCD_PLUS_PREF_2: 2063 case BCD_PLUS_ALT_1: 2064 case BCD_PLUS_ALT_2: 2065 { 2066 return 1; 2067 } 2068 2069 case BCD_NEG_PREF: 2070 case BCD_NEG_ALT: 2071 { 2072 return -1; 2073 } 2074 2075 default: 2076 { 2077 return 0; 2078 } 2079 } 2080 } 2081 2082 static int bcd_preferred_sgn(int sgn, int ps) 2083 { 2084 if (sgn >= 0) { 2085 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2086 } else { 2087 return BCD_NEG_PREF; 2088 } 2089 } 2090 2091 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2092 { 2093 uint8_t result; 2094 if (n & 1) { 2095 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2096 } else { 2097 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2098 } 2099 2100 if (unlikely(result > 9)) { 2101 *invalid = true; 2102 } 2103 return result; 2104 } 2105 2106 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2107 { 2108 if (n & 1) { 2109 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2110 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2111 } else { 2112 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2113 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2114 } 2115 } 2116 2117 static bool bcd_is_valid(ppc_avr_t *bcd) 2118 { 2119 int i; 2120 int invalid = 0; 2121 2122 if (bcd_get_sgn(bcd) == 0) { 2123 return false; 2124 } 2125 2126 for (i = 1; i < 32; i++) { 2127 bcd_get_digit(bcd, i, &invalid); 2128 if (unlikely(invalid)) { 2129 return false; 2130 } 2131 } 2132 return true; 2133 } 2134 2135 static int bcd_cmp_zero(ppc_avr_t *bcd) 2136 { 2137 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2138 return CRF_EQ; 2139 } else { 2140 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2141 } 2142 } 2143 2144 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2145 { 2146 return reg->VsrH(7 - n); 2147 } 2148 2149 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2150 { 2151 reg->VsrH(7 - n) = val; 2152 } 2153 2154 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2155 { 2156 int i; 2157 int invalid = 0; 2158 for (i = 31; i > 0; i--) { 2159 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2160 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2161 if (unlikely(invalid)) { 2162 return 0; /* doesn't matter */ 2163 } else if (dig_a > dig_b) { 2164 return 1; 2165 } else if (dig_a < dig_b) { 2166 return -1; 2167 } 2168 } 2169 2170 return 0; 2171 } 2172 2173 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2174 int *overflow) 2175 { 2176 int carry = 0; 2177 int i; 2178 for (i = 1; i <= 31; i++) { 2179 uint8_t digit = bcd_get_digit(a, i, invalid) + 2180 bcd_get_digit(b, i, invalid) + carry; 2181 if (digit > 9) { 2182 carry = 1; 2183 digit -= 10; 2184 } else { 2185 carry = 0; 2186 } 2187 2188 bcd_put_digit(t, digit, i); 2189 } 2190 2191 *overflow = carry; 2192 } 2193 2194 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2195 int *overflow) 2196 { 2197 int carry = 0; 2198 int i; 2199 2200 for (i = 1; i <= 31; i++) { 2201 uint8_t digit = bcd_get_digit(a, i, invalid) - 2202 bcd_get_digit(b, i, invalid) + carry; 2203 if (digit & 0x80) { 2204 carry = -1; 2205 digit += 10; 2206 } else { 2207 carry = 0; 2208 } 2209 2210 bcd_put_digit(t, digit, i); 2211 } 2212 2213 *overflow = carry; 2214 } 2215 2216 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2217 { 2218 2219 int sgna = bcd_get_sgn(a); 2220 int sgnb = bcd_get_sgn(b); 2221 int invalid = (sgna == 0) || (sgnb == 0); 2222 int overflow = 0; 2223 uint32_t cr = 0; 2224 ppc_avr_t result = { .u64 = { 0, 0 } }; 2225 2226 if (!invalid) { 2227 if (sgna == sgnb) { 2228 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2229 bcd_add_mag(&result, a, b, &invalid, &overflow); 2230 cr = bcd_cmp_zero(&result); 2231 } else { 2232 int magnitude = bcd_cmp_mag(a, b); 2233 if (magnitude > 0) { 2234 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2235 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2236 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2237 } else if (magnitude < 0) { 2238 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2239 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2240 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2241 } else { 2242 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2243 cr = CRF_EQ; 2244 } 2245 } 2246 } 2247 2248 if (unlikely(invalid)) { 2249 result.VsrD(0) = result.VsrD(1) = -1; 2250 cr = CRF_SO; 2251 } else if (overflow) { 2252 cr |= CRF_SO; 2253 } 2254 2255 *r = result; 2256 2257 return cr; 2258 } 2259 2260 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2261 { 2262 ppc_avr_t bcopy = *b; 2263 int sgnb = bcd_get_sgn(b); 2264 if (sgnb < 0) { 2265 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2266 } else if (sgnb > 0) { 2267 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2268 } 2269 /* else invalid ... defer to bcdadd code for proper handling */ 2270 2271 return helper_bcdadd(r, a, &bcopy, ps); 2272 } 2273 2274 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2275 { 2276 int i; 2277 int cr = 0; 2278 uint16_t national = 0; 2279 uint16_t sgnb = get_national_digit(b, 0); 2280 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2281 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2282 2283 for (i = 1; i < 8; i++) { 2284 national = get_national_digit(b, i); 2285 if (unlikely(national < 0x30 || national > 0x39)) { 2286 invalid = 1; 2287 break; 2288 } 2289 2290 bcd_put_digit(&ret, national & 0xf, i); 2291 } 2292 2293 if (sgnb == NATIONAL_PLUS) { 2294 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2295 } else { 2296 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2297 } 2298 2299 cr = bcd_cmp_zero(&ret); 2300 2301 if (unlikely(invalid)) { 2302 cr = CRF_SO; 2303 } 2304 2305 *r = ret; 2306 2307 return cr; 2308 } 2309 2310 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2311 { 2312 int i; 2313 int cr = 0; 2314 int sgnb = bcd_get_sgn(b); 2315 int invalid = (sgnb == 0); 2316 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2317 2318 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2319 2320 for (i = 1; i < 8; i++) { 2321 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2322 2323 if (unlikely(invalid)) { 2324 break; 2325 } 2326 } 2327 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2328 2329 cr = bcd_cmp_zero(b); 2330 2331 if (ox_flag) { 2332 cr |= CRF_SO; 2333 } 2334 2335 if (unlikely(invalid)) { 2336 cr = CRF_SO; 2337 } 2338 2339 *r = ret; 2340 2341 return cr; 2342 } 2343 2344 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2345 { 2346 int i; 2347 int cr = 0; 2348 int invalid = 0; 2349 int zone_digit = 0; 2350 int zone_lead = ps ? 0xF : 0x3; 2351 int digit = 0; 2352 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2353 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2354 2355 if (unlikely((sgnb < 0xA) && ps)) { 2356 invalid = 1; 2357 } 2358 2359 for (i = 0; i < 16; i++) { 2360 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2361 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2362 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2363 invalid = 1; 2364 break; 2365 } 2366 2367 bcd_put_digit(&ret, digit, i + 1); 2368 } 2369 2370 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2371 (!ps && (sgnb & 0x4))) { 2372 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2373 } else { 2374 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2375 } 2376 2377 cr = bcd_cmp_zero(&ret); 2378 2379 if (unlikely(invalid)) { 2380 cr = CRF_SO; 2381 } 2382 2383 *r = ret; 2384 2385 return cr; 2386 } 2387 2388 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2389 { 2390 int i; 2391 int cr = 0; 2392 uint8_t digit = 0; 2393 int sgnb = bcd_get_sgn(b); 2394 int zone_lead = (ps) ? 0xF0 : 0x30; 2395 int invalid = (sgnb == 0); 2396 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2397 2398 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2399 2400 for (i = 0; i < 16; i++) { 2401 digit = bcd_get_digit(b, i + 1, &invalid); 2402 2403 if (unlikely(invalid)) { 2404 break; 2405 } 2406 2407 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2408 } 2409 2410 if (ps) { 2411 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2412 } else { 2413 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2414 } 2415 2416 cr = bcd_cmp_zero(b); 2417 2418 if (ox_flag) { 2419 cr |= CRF_SO; 2420 } 2421 2422 if (unlikely(invalid)) { 2423 cr = CRF_SO; 2424 } 2425 2426 *r = ret; 2427 2428 return cr; 2429 } 2430 2431 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2432 { 2433 int i; 2434 int cr = 0; 2435 uint64_t lo_value; 2436 uint64_t hi_value; 2437 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2438 2439 if (b->VsrSD(0) < 0) { 2440 lo_value = -b->VsrSD(1); 2441 hi_value = ~b->VsrD(0) + !lo_value; 2442 bcd_put_digit(&ret, 0xD, 0); 2443 } else { 2444 lo_value = b->VsrD(1); 2445 hi_value = b->VsrD(0); 2446 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2447 } 2448 2449 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2450 lo_value > 9999999999999999ULL) { 2451 cr = CRF_SO; 2452 } 2453 2454 for (i = 1; i < 16; hi_value /= 10, i++) { 2455 bcd_put_digit(&ret, hi_value % 10, i); 2456 } 2457 2458 for (; i < 32; lo_value /= 10, i++) { 2459 bcd_put_digit(&ret, lo_value % 10, i); 2460 } 2461 2462 cr |= bcd_cmp_zero(&ret); 2463 2464 *r = ret; 2465 2466 return cr; 2467 } 2468 2469 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2470 { 2471 uint8_t i; 2472 int cr; 2473 uint64_t carry; 2474 uint64_t unused; 2475 uint64_t lo_value; 2476 uint64_t hi_value = 0; 2477 int sgnb = bcd_get_sgn(b); 2478 int invalid = (sgnb == 0); 2479 2480 lo_value = bcd_get_digit(b, 31, &invalid); 2481 for (i = 30; i > 0; i--) { 2482 mulu64(&lo_value, &carry, lo_value, 10ULL); 2483 mulu64(&hi_value, &unused, hi_value, 10ULL); 2484 lo_value += bcd_get_digit(b, i, &invalid); 2485 hi_value += carry; 2486 2487 if (unlikely(invalid)) { 2488 break; 2489 } 2490 } 2491 2492 if (sgnb == -1) { 2493 r->VsrSD(1) = -lo_value; 2494 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2495 } else { 2496 r->VsrSD(1) = lo_value; 2497 r->VsrSD(0) = hi_value; 2498 } 2499 2500 cr = bcd_cmp_zero(b); 2501 2502 if (unlikely(invalid)) { 2503 cr = CRF_SO; 2504 } 2505 2506 return cr; 2507 } 2508 2509 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2510 { 2511 int i; 2512 int invalid = 0; 2513 2514 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2515 return CRF_SO; 2516 } 2517 2518 *r = *a; 2519 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2520 2521 for (i = 1; i < 32; i++) { 2522 bcd_get_digit(a, i, &invalid); 2523 bcd_get_digit(b, i, &invalid); 2524 if (unlikely(invalid)) { 2525 return CRF_SO; 2526 } 2527 } 2528 2529 return bcd_cmp_zero(r); 2530 } 2531 2532 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2533 { 2534 int sgnb = bcd_get_sgn(b); 2535 2536 *r = *b; 2537 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2538 2539 if (bcd_is_valid(b) == false) { 2540 return CRF_SO; 2541 } 2542 2543 return bcd_cmp_zero(r); 2544 } 2545 2546 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2547 { 2548 int cr; 2549 int i = a->VsrSB(7); 2550 bool ox_flag = false; 2551 int sgnb = bcd_get_sgn(b); 2552 ppc_avr_t ret = *b; 2553 ret.VsrD(1) &= ~0xf; 2554 2555 if (bcd_is_valid(b) == false) { 2556 return CRF_SO; 2557 } 2558 2559 if (unlikely(i > 31)) { 2560 i = 31; 2561 } else if (unlikely(i < -31)) { 2562 i = -31; 2563 } 2564 2565 if (i > 0) { 2566 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2567 } else { 2568 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2569 } 2570 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2571 2572 *r = ret; 2573 2574 cr = bcd_cmp_zero(r); 2575 if (ox_flag) { 2576 cr |= CRF_SO; 2577 } 2578 2579 return cr; 2580 } 2581 2582 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2583 { 2584 int cr; 2585 int i; 2586 int invalid = 0; 2587 bool ox_flag = false; 2588 ppc_avr_t ret = *b; 2589 2590 for (i = 0; i < 32; i++) { 2591 bcd_get_digit(b, i, &invalid); 2592 2593 if (unlikely(invalid)) { 2594 return CRF_SO; 2595 } 2596 } 2597 2598 i = a->VsrSB(7); 2599 if (i >= 32) { 2600 ox_flag = true; 2601 ret.VsrD(1) = ret.VsrD(0) = 0; 2602 } else if (i <= -32) { 2603 ret.VsrD(1) = ret.VsrD(0) = 0; 2604 } else if (i > 0) { 2605 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2606 } else { 2607 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2608 } 2609 *r = ret; 2610 2611 cr = bcd_cmp_zero(r); 2612 if (ox_flag) { 2613 cr |= CRF_SO; 2614 } 2615 2616 return cr; 2617 } 2618 2619 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2620 { 2621 int cr; 2622 int unused = 0; 2623 int invalid = 0; 2624 bool ox_flag = false; 2625 int sgnb = bcd_get_sgn(b); 2626 ppc_avr_t ret = *b; 2627 ret.VsrD(1) &= ~0xf; 2628 2629 int i = a->VsrSB(7); 2630 ppc_avr_t bcd_one; 2631 2632 bcd_one.VsrD(0) = 0; 2633 bcd_one.VsrD(1) = 0x10; 2634 2635 if (bcd_is_valid(b) == false) { 2636 return CRF_SO; 2637 } 2638 2639 if (unlikely(i > 31)) { 2640 i = 31; 2641 } else if (unlikely(i < -31)) { 2642 i = -31; 2643 } 2644 2645 if (i > 0) { 2646 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2647 } else { 2648 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2649 2650 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2651 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2652 } 2653 } 2654 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2655 2656 cr = bcd_cmp_zero(&ret); 2657 if (ox_flag) { 2658 cr |= CRF_SO; 2659 } 2660 *r = ret; 2661 2662 return cr; 2663 } 2664 2665 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2666 { 2667 uint64_t mask; 2668 uint32_t ox_flag = 0; 2669 int i = a->VsrSH(3) + 1; 2670 ppc_avr_t ret = *b; 2671 2672 if (bcd_is_valid(b) == false) { 2673 return CRF_SO; 2674 } 2675 2676 if (i > 16 && i < 32) { 2677 mask = (uint64_t)-1 >> (128 - i * 4); 2678 if (ret.VsrD(0) & ~mask) { 2679 ox_flag = CRF_SO; 2680 } 2681 2682 ret.VsrD(0) &= mask; 2683 } else if (i >= 0 && i <= 16) { 2684 mask = (uint64_t)-1 >> (64 - i * 4); 2685 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2686 ox_flag = CRF_SO; 2687 } 2688 2689 ret.VsrD(1) &= mask; 2690 ret.VsrD(0) = 0; 2691 } 2692 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2693 *r = ret; 2694 2695 return bcd_cmp_zero(&ret) | ox_flag; 2696 } 2697 2698 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2699 { 2700 int i; 2701 uint64_t mask; 2702 uint32_t ox_flag = 0; 2703 int invalid = 0; 2704 ppc_avr_t ret = *b; 2705 2706 for (i = 0; i < 32; i++) { 2707 bcd_get_digit(b, i, &invalid); 2708 2709 if (unlikely(invalid)) { 2710 return CRF_SO; 2711 } 2712 } 2713 2714 i = a->VsrSH(3); 2715 if (i > 16 && i < 33) { 2716 mask = (uint64_t)-1 >> (128 - i * 4); 2717 if (ret.VsrD(0) & ~mask) { 2718 ox_flag = CRF_SO; 2719 } 2720 2721 ret.VsrD(0) &= mask; 2722 } else if (i > 0 && i <= 16) { 2723 mask = (uint64_t)-1 >> (64 - i * 4); 2724 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2725 ox_flag = CRF_SO; 2726 } 2727 2728 ret.VsrD(1) &= mask; 2729 ret.VsrD(0) = 0; 2730 } else if (i == 0) { 2731 if (ret.VsrD(0) || ret.VsrD(1)) { 2732 ox_flag = CRF_SO; 2733 } 2734 ret.VsrD(0) = ret.VsrD(1) = 0; 2735 } 2736 2737 *r = ret; 2738 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2739 return ox_flag | CRF_EQ; 2740 } 2741 2742 return ox_flag | CRF_GT; 2743 } 2744 2745 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2746 { 2747 int i; 2748 VECTOR_FOR_INORDER_I(i, u8) { 2749 r->u8[i] = AES_sbox[a->u8[i]]; 2750 } 2751 } 2752 2753 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2754 { 2755 ppc_avr_t result; 2756 int i; 2757 2758 VECTOR_FOR_INORDER_I(i, u32) { 2759 result.VsrW(i) = b->VsrW(i) ^ 2760 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2761 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2762 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2763 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2764 } 2765 *r = result; 2766 } 2767 2768 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2769 { 2770 ppc_avr_t result; 2771 int i; 2772 2773 VECTOR_FOR_INORDER_I(i, u8) { 2774 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2775 } 2776 *r = result; 2777 } 2778 2779 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2780 { 2781 /* This differs from what is written in ISA V2.07. The RTL is */ 2782 /* incorrect and will be fixed in V2.07B. */ 2783 int i; 2784 ppc_avr_t tmp; 2785 2786 VECTOR_FOR_INORDER_I(i, u8) { 2787 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2788 } 2789 2790 VECTOR_FOR_INORDER_I(i, u32) { 2791 r->VsrW(i) = 2792 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2793 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2794 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2795 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2796 } 2797 } 2798 2799 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2800 { 2801 ppc_avr_t result; 2802 int i; 2803 2804 VECTOR_FOR_INORDER_I(i, u8) { 2805 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2806 } 2807 *r = result; 2808 } 2809 2810 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2811 { 2812 int st = (st_six & 0x10) != 0; 2813 int six = st_six & 0xF; 2814 int i; 2815 2816 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2817 if (st == 0) { 2818 if ((six & (0x8 >> i)) == 0) { 2819 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2820 ror32(a->VsrW(i), 18) ^ 2821 (a->VsrW(i) >> 3); 2822 } else { /* six.bit[i] == 1 */ 2823 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2824 ror32(a->VsrW(i), 19) ^ 2825 (a->VsrW(i) >> 10); 2826 } 2827 } else { /* st == 1 */ 2828 if ((six & (0x8 >> i)) == 0) { 2829 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2830 ror32(a->VsrW(i), 13) ^ 2831 ror32(a->VsrW(i), 22); 2832 } else { /* six.bit[i] == 1 */ 2833 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2834 ror32(a->VsrW(i), 11) ^ 2835 ror32(a->VsrW(i), 25); 2836 } 2837 } 2838 } 2839 } 2840 2841 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2842 { 2843 int st = (st_six & 0x10) != 0; 2844 int six = st_six & 0xF; 2845 int i; 2846 2847 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2848 if (st == 0) { 2849 if ((six & (0x8 >> (2 * i))) == 0) { 2850 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2851 ror64(a->VsrD(i), 8) ^ 2852 (a->VsrD(i) >> 7); 2853 } else { /* six.bit[2*i] == 1 */ 2854 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2855 ror64(a->VsrD(i), 61) ^ 2856 (a->VsrD(i) >> 6); 2857 } 2858 } else { /* st == 1 */ 2859 if ((six & (0x8 >> (2 * i))) == 0) { 2860 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2861 ror64(a->VsrD(i), 34) ^ 2862 ror64(a->VsrD(i), 39); 2863 } else { /* six.bit[2*i] == 1 */ 2864 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2865 ror64(a->VsrD(i), 18) ^ 2866 ror64(a->VsrD(i), 41); 2867 } 2868 } 2869 } 2870 } 2871 2872 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2873 { 2874 ppc_avr_t result; 2875 int i; 2876 2877 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2878 int indexA = c->VsrB(i) >> 4; 2879 int indexB = c->VsrB(i) & 0xF; 2880 2881 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2882 } 2883 *r = result; 2884 } 2885 2886 #undef VECTOR_FOR_INORDER_I 2887 2888 /*****************************************************************************/ 2889 /* SPE extension helpers */ 2890 /* Use a table to make this quicker */ 2891 static const uint8_t hbrev[16] = { 2892 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2893 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2894 }; 2895 2896 static inline uint8_t byte_reverse(uint8_t val) 2897 { 2898 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2899 } 2900 2901 static inline uint32_t word_reverse(uint32_t val) 2902 { 2903 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2904 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2905 } 2906 2907 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2908 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2909 { 2910 uint32_t a, b, d, mask; 2911 2912 mask = UINT32_MAX >> (32 - MASKBITS); 2913 a = arg1 & mask; 2914 b = arg2 & mask; 2915 d = word_reverse(1 + word_reverse(a | ~b)); 2916 return (arg1 & ~mask) | (d & b); 2917 } 2918 2919 uint32_t helper_cntlsw32(uint32_t val) 2920 { 2921 if (val & 0x80000000) { 2922 return clz32(~val); 2923 } else { 2924 return clz32(val); 2925 } 2926 } 2927 2928 uint32_t helper_cntlzw32(uint32_t val) 2929 { 2930 return clz32(val); 2931 } 2932 2933 /* 440 specific */ 2934 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2935 target_ulong low, uint32_t update_Rc) 2936 { 2937 target_ulong mask; 2938 int i; 2939 2940 i = 1; 2941 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2942 if ((high & mask) == 0) { 2943 if (update_Rc) { 2944 env->crf[0] = 0x4; 2945 } 2946 goto done; 2947 } 2948 i++; 2949 } 2950 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2951 if ((low & mask) == 0) { 2952 if (update_Rc) { 2953 env->crf[0] = 0x8; 2954 } 2955 goto done; 2956 } 2957 i++; 2958 } 2959 i = 8; 2960 if (update_Rc) { 2961 env->crf[0] = 0x2; 2962 } 2963 done: 2964 env->xer = (env->xer & ~0x7F) | i; 2965 if (update_Rc) { 2966 env->crf[0] |= xer_so; 2967 } 2968 return i; 2969 } 2970