1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 overflow = divu128(&rt, &ra, rb); 108 109 if (unlikely(overflow)) { 110 rt = 0; /* Undefined */ 111 } 112 113 if (oe) { 114 helper_update_ov_legacy(env, overflow); 115 } 116 117 return rt; 118 } 119 120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 121 { 122 int64_t rt = 0; 123 int64_t ra = (int64_t)rau; 124 int64_t rb = (int64_t)rbu; 125 int overflow = divs128(&rt, &ra, rb); 126 127 if (unlikely(overflow)) { 128 rt = 0; /* Undefined */ 129 } 130 131 if (oe) { 132 helper_update_ov_legacy(env, overflow); 133 } 134 135 return rt; 136 } 137 138 #endif 139 140 141 #if defined(TARGET_PPC64) 142 /* if x = 0xab, returns 0xababababababababa */ 143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 144 145 /* 146 * subtract 1 from each byte, and with inverse, check if MSB is set at each 147 * byte. 148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 150 */ 151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 152 153 /* When you XOR the pattern and there is a match, that byte will be zero */ 154 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 155 156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 157 { 158 return hasvalue(rb, ra) ? CRF_GT : 0; 159 } 160 161 #undef pattern 162 #undef haszero 163 #undef hasvalue 164 165 /* 166 * Return a random number. 167 */ 168 uint64_t helper_darn32(void) 169 { 170 Error *err = NULL; 171 uint32_t ret; 172 173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 175 error_get_pretty(err)); 176 error_free(err); 177 return -1; 178 } 179 180 return ret; 181 } 182 183 uint64_t helper_darn64(void) 184 { 185 Error *err = NULL; 186 uint64_t ret; 187 188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 190 error_get_pretty(err)); 191 error_free(err); 192 return -1; 193 } 194 195 return ret; 196 } 197 198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 199 { 200 int i; 201 uint64_t ra = 0; 202 203 for (i = 0; i < 8; i++) { 204 int index = (rs >> (i * 8)) & 0xFF; 205 if (index < 64) { 206 if (rb & PPC_BIT(index)) { 207 ra |= 1 << i; 208 } 209 } 210 } 211 return ra; 212 } 213 214 #endif 215 216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 217 { 218 target_ulong mask = 0xff; 219 target_ulong ra = 0; 220 int i; 221 222 for (i = 0; i < sizeof(target_ulong); i++) { 223 if ((rs & mask) == (rb & mask)) { 224 ra |= mask; 225 } 226 mask <<= 8; 227 } 228 return ra; 229 } 230 231 /* shift right arithmetic helper */ 232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 233 target_ulong shift) 234 { 235 int32_t ret; 236 237 if (likely(!(shift & 0x20))) { 238 if (likely((uint32_t)shift != 0)) { 239 shift &= 0x1f; 240 ret = (int32_t)value >> shift; 241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 242 env->ca32 = env->ca = 0; 243 } else { 244 env->ca32 = env->ca = 1; 245 } 246 } else { 247 ret = (int32_t)value; 248 env->ca32 = env->ca = 0; 249 } 250 } else { 251 ret = (int32_t)value >> 31; 252 env->ca32 = env->ca = (ret != 0); 253 } 254 return (target_long)ret; 255 } 256 257 #if defined(TARGET_PPC64) 258 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 259 target_ulong shift) 260 { 261 int64_t ret; 262 263 if (likely(!(shift & 0x40))) { 264 if (likely((uint64_t)shift != 0)) { 265 shift &= 0x3f; 266 ret = (int64_t)value >> shift; 267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 268 env->ca32 = env->ca = 0; 269 } else { 270 env->ca32 = env->ca = 1; 271 } 272 } else { 273 ret = (int64_t)value; 274 env->ca32 = env->ca = 0; 275 } 276 } else { 277 ret = (int64_t)value >> 63; 278 env->ca32 = env->ca = (ret != 0); 279 } 280 return ret; 281 } 282 #endif 283 284 #if defined(TARGET_PPC64) 285 target_ulong helper_popcntb(target_ulong val) 286 { 287 /* Note that we don't fold past bytes */ 288 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 289 0x5555555555555555ULL); 290 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 291 0x3333333333333333ULL); 292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 293 0x0f0f0f0f0f0f0f0fULL); 294 return val; 295 } 296 297 target_ulong helper_popcntw(target_ulong val) 298 { 299 /* Note that we don't fold past words. */ 300 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 301 0x5555555555555555ULL); 302 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 303 0x3333333333333333ULL); 304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 305 0x0f0f0f0f0f0f0f0fULL); 306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 307 0x00ff00ff00ff00ffULL); 308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 309 0x0000ffff0000ffffULL); 310 return val; 311 } 312 #else 313 target_ulong helper_popcntb(target_ulong val) 314 { 315 /* Note that we don't fold past bytes */ 316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 319 return val; 320 } 321 #endif 322 323 /*****************************************************************************/ 324 /* PowerPC 601 specific instructions (POWER bridge) */ 325 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 326 { 327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 328 329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 330 (int32_t)arg2 == 0) { 331 env->spr[SPR_MQ] = 0; 332 return INT32_MIN; 333 } else { 334 env->spr[SPR_MQ] = tmp % arg2; 335 return tmp / (int32_t)arg2; 336 } 337 } 338 339 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 340 target_ulong arg2) 341 { 342 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 343 344 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->so = env->ov = 1; 347 env->spr[SPR_MQ] = 0; 348 return INT32_MIN; 349 } else { 350 env->spr[SPR_MQ] = tmp % arg2; 351 tmp /= (int32_t)arg2; 352 if ((int32_t)tmp != tmp) { 353 env->so = env->ov = 1; 354 } else { 355 env->ov = 0; 356 } 357 return tmp; 358 } 359 } 360 361 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 362 target_ulong arg2) 363 { 364 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 365 (int32_t)arg2 == 0) { 366 env->spr[SPR_MQ] = 0; 367 return INT32_MIN; 368 } else { 369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 370 return (int32_t)arg1 / (int32_t)arg2; 371 } 372 } 373 374 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 375 target_ulong arg2) 376 { 377 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 378 (int32_t)arg2 == 0) { 379 env->so = env->ov = 1; 380 env->spr[SPR_MQ] = 0; 381 return INT32_MIN; 382 } else { 383 env->ov = 0; 384 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 385 return (int32_t)arg1 / (int32_t)arg2; 386 } 387 } 388 389 /*****************************************************************************/ 390 /* 602 specific instructions */ 391 /* mfrom is the most crazy instruction ever seen, imho ! */ 392 /* Real implementation uses a ROM table. Do the same */ 393 /* 394 * Extremely decomposed: 395 * -arg / 256 396 * return 256 * log10(10 + 1.0) + 0.5 397 */ 398 #if !defined(CONFIG_USER_ONLY) 399 target_ulong helper_602_mfrom(target_ulong arg) 400 { 401 if (likely(arg < 602)) { 402 #include "mfrom_table.c.inc" 403 return mfrom_ROM_table[arg]; 404 } else { 405 return 0; 406 } 407 } 408 #endif 409 410 /*****************************************************************************/ 411 /* Altivec extension helpers */ 412 #if defined(HOST_WORDS_BIGENDIAN) 413 #define VECTOR_FOR_INORDER_I(index, element) \ 414 for (index = 0; index < ARRAY_SIZE(r->element); index++) 415 #else 416 #define VECTOR_FOR_INORDER_I(index, element) \ 417 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 418 #endif 419 420 /* Saturating arithmetic helpers. */ 421 #define SATCVT(from, to, from_type, to_type, min, max) \ 422 static inline to_type cvt##from##to(from_type x, int *sat) \ 423 { \ 424 to_type r; \ 425 \ 426 if (x < (from_type)min) { \ 427 r = min; \ 428 *sat = 1; \ 429 } else if (x > (from_type)max) { \ 430 r = max; \ 431 *sat = 1; \ 432 } else { \ 433 r = x; \ 434 } \ 435 return r; \ 436 } 437 #define SATCVTU(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x > (from_type)max) { \ 443 r = max; \ 444 *sat = 1; \ 445 } else { \ 446 r = x; \ 447 } \ 448 return r; \ 449 } 450 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 451 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 452 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 453 454 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 455 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 456 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 457 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 458 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 459 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 460 #undef SATCVT 461 #undef SATCVTU 462 463 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 464 { 465 env->vscr = vscr & ~(1u << VSCR_SAT); 466 /* Which bit we set is completely arbitrary, but clear the rest. */ 467 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 468 env->vscr_sat.u64[1] = 0; 469 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 470 } 471 472 uint32_t helper_mfvscr(CPUPPCState *env) 473 { 474 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 475 return env->vscr | (sat << VSCR_SAT); 476 } 477 478 static inline void set_vscr_sat(CPUPPCState *env) 479 { 480 /* The choice of non-zero value is arbitrary. */ 481 env->vscr_sat.u32[0] = 1; 482 } 483 484 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 485 { 486 int i; 487 488 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 489 r->u32[i] = ~a->u32[i] < b->u32[i]; 490 } 491 } 492 493 /* vprtybw */ 494 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 495 { 496 int i; 497 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 498 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 499 res ^= res >> 8; 500 r->u32[i] = res & 1; 501 } 502 } 503 504 /* vprtybd */ 505 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 506 { 507 int i; 508 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 509 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 510 res ^= res >> 16; 511 res ^= res >> 8; 512 r->u64[i] = res & 1; 513 } 514 } 515 516 /* vprtybq */ 517 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 518 { 519 uint64_t res = b->u64[0] ^ b->u64[1]; 520 res ^= res >> 32; 521 res ^= res >> 16; 522 res ^= res >> 8; 523 r->VsrD(1) = res & 1; 524 r->VsrD(0) = 0; 525 } 526 527 #define VARITHFP(suffix, func) \ 528 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 529 ppc_avr_t *b) \ 530 { \ 531 int i; \ 532 \ 533 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 534 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 535 } \ 536 } 537 VARITHFP(addfp, float32_add) 538 VARITHFP(subfp, float32_sub) 539 VARITHFP(minfp, float32_min) 540 VARITHFP(maxfp, float32_max) 541 #undef VARITHFP 542 543 #define VARITHFPFMA(suffix, type) \ 544 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 545 ppc_avr_t *b, ppc_avr_t *c) \ 546 { \ 547 int i; \ 548 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 549 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 550 type, &env->vec_status); \ 551 } \ 552 } 553 VARITHFPFMA(maddfp, 0); 554 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 555 #undef VARITHFPFMA 556 557 #define VARITHSAT_CASE(type, op, cvt, element) \ 558 { \ 559 type result = (type)a->element[i] op (type)b->element[i]; \ 560 r->element[i] = cvt(result, &sat); \ 561 } 562 563 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 564 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 565 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 566 { \ 567 int sat = 0; \ 568 int i; \ 569 \ 570 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 571 VARITHSAT_CASE(optype, op, cvt, element); \ 572 } \ 573 if (sat) { \ 574 vscr_sat->u32[0] = 1; \ 575 } \ 576 } 577 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 578 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 579 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 580 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 581 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 582 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 583 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 584 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 585 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 586 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 587 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 588 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 589 #undef VARITHSAT_CASE 590 #undef VARITHSAT_DO 591 #undef VARITHSAT_SIGNED 592 #undef VARITHSAT_UNSIGNED 593 594 #define VAVG_DO(name, element, etype) \ 595 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 596 { \ 597 int i; \ 598 \ 599 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 600 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 601 r->element[i] = x >> 1; \ 602 } \ 603 } 604 605 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 606 unsigned_type) \ 607 VAVG_DO(avgs##type, signed_element, signed_type) \ 608 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 609 VAVG(b, s8, int16_t, u8, uint16_t) 610 VAVG(h, s16, int32_t, u16, uint32_t) 611 VAVG(w, s32, int64_t, u32, uint64_t) 612 #undef VAVG_DO 613 #undef VAVG 614 615 #define VABSDU_DO(name, element) \ 616 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 617 { \ 618 int i; \ 619 \ 620 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 621 r->element[i] = (a->element[i] > b->element[i]) ? \ 622 (a->element[i] - b->element[i]) : \ 623 (b->element[i] - a->element[i]); \ 624 } \ 625 } 626 627 /* 628 * VABSDU - Vector absolute difference unsigned 629 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 630 * element - element type to access from vector 631 */ 632 #define VABSDU(type, element) \ 633 VABSDU_DO(absdu##type, element) 634 VABSDU(b, u8) 635 VABSDU(h, u16) 636 VABSDU(w, u32) 637 #undef VABSDU_DO 638 #undef VABSDU 639 640 #define VCF(suffix, cvt, element) \ 641 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 642 ppc_avr_t *b, uint32_t uim) \ 643 { \ 644 int i; \ 645 \ 646 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 647 float32 t = cvt(b->element[i], &env->vec_status); \ 648 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 649 } \ 650 } 651 VCF(ux, uint32_to_float32, u32) 652 VCF(sx, int32_to_float32, s32) 653 #undef VCF 654 655 #define VCMP_DO(suffix, compare, element, record) \ 656 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 657 ppc_avr_t *a, ppc_avr_t *b) \ 658 { \ 659 uint64_t ones = (uint64_t)-1; \ 660 uint64_t all = ones; \ 661 uint64_t none = 0; \ 662 int i; \ 663 \ 664 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 665 uint64_t result = (a->element[i] compare b->element[i] ? \ 666 ones : 0x0); \ 667 switch (sizeof(a->element[0])) { \ 668 case 8: \ 669 r->u64[i] = result; \ 670 break; \ 671 case 4: \ 672 r->u32[i] = result; \ 673 break; \ 674 case 2: \ 675 r->u16[i] = result; \ 676 break; \ 677 case 1: \ 678 r->u8[i] = result; \ 679 break; \ 680 } \ 681 all &= result; \ 682 none |= result; \ 683 } \ 684 if (record) { \ 685 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 686 } \ 687 } 688 #define VCMP(suffix, compare, element) \ 689 VCMP_DO(suffix, compare, element, 0) \ 690 VCMP_DO(suffix##_dot, compare, element, 1) 691 VCMP(equb, ==, u8) 692 VCMP(equh, ==, u16) 693 VCMP(equw, ==, u32) 694 VCMP(equd, ==, u64) 695 VCMP(gtub, >, u8) 696 VCMP(gtuh, >, u16) 697 VCMP(gtuw, >, u32) 698 VCMP(gtud, >, u64) 699 VCMP(gtsb, >, s8) 700 VCMP(gtsh, >, s16) 701 VCMP(gtsw, >, s32) 702 VCMP(gtsd, >, s64) 703 #undef VCMP_DO 704 #undef VCMP 705 706 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 707 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 708 ppc_avr_t *a, ppc_avr_t *b) \ 709 { \ 710 etype ones = (etype)-1; \ 711 etype all = ones; \ 712 etype result, none = 0; \ 713 int i; \ 714 \ 715 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 716 if (cmpzero) { \ 717 result = ((a->element[i] == 0) \ 718 || (b->element[i] == 0) \ 719 || (a->element[i] != b->element[i]) ? \ 720 ones : 0x0); \ 721 } else { \ 722 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 723 } \ 724 r->element[i] = result; \ 725 all &= result; \ 726 none |= result; \ 727 } \ 728 if (record) { \ 729 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 730 } \ 731 } 732 733 /* 734 * VCMPNEZ - Vector compare not equal to zero 735 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 736 * element - element type to access from vector 737 */ 738 #define VCMPNE(suffix, element, etype, cmpzero) \ 739 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 740 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 741 VCMPNE(zb, u8, uint8_t, 1) 742 VCMPNE(zh, u16, uint16_t, 1) 743 VCMPNE(zw, u32, uint32_t, 1) 744 VCMPNE(b, u8, uint8_t, 0) 745 VCMPNE(h, u16, uint16_t, 0) 746 VCMPNE(w, u32, uint32_t, 0) 747 #undef VCMPNE_DO 748 #undef VCMPNE 749 750 #define VCMPFP_DO(suffix, compare, order, record) \ 751 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 752 ppc_avr_t *a, ppc_avr_t *b) \ 753 { \ 754 uint32_t ones = (uint32_t)-1; \ 755 uint32_t all = ones; \ 756 uint32_t none = 0; \ 757 int i; \ 758 \ 759 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 760 uint32_t result; \ 761 FloatRelation rel = \ 762 float32_compare_quiet(a->f32[i], b->f32[i], \ 763 &env->vec_status); \ 764 if (rel == float_relation_unordered) { \ 765 result = 0; \ 766 } else if (rel compare order) { \ 767 result = ones; \ 768 } else { \ 769 result = 0; \ 770 } \ 771 r->u32[i] = result; \ 772 all &= result; \ 773 none |= result; \ 774 } \ 775 if (record) { \ 776 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 777 } \ 778 } 779 #define VCMPFP(suffix, compare, order) \ 780 VCMPFP_DO(suffix, compare, order, 0) \ 781 VCMPFP_DO(suffix##_dot, compare, order, 1) 782 VCMPFP(eqfp, ==, float_relation_equal) 783 VCMPFP(gefp, !=, float_relation_less) 784 VCMPFP(gtfp, ==, float_relation_greater) 785 #undef VCMPFP_DO 786 #undef VCMPFP 787 788 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 789 ppc_avr_t *a, ppc_avr_t *b, int record) 790 { 791 int i; 792 int all_in = 0; 793 794 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 795 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 796 &env->vec_status); 797 if (le_rel == float_relation_unordered) { 798 r->u32[i] = 0xc0000000; 799 all_in = 1; 800 } else { 801 float32 bneg = float32_chs(b->f32[i]); 802 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 803 &env->vec_status); 804 int le = le_rel != float_relation_greater; 805 int ge = ge_rel != float_relation_less; 806 807 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 808 all_in |= (!le | !ge); 809 } 810 } 811 if (record) { 812 env->crf[6] = (all_in == 0) << 1; 813 } 814 } 815 816 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 817 { 818 vcmpbfp_internal(env, r, a, b, 0); 819 } 820 821 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 822 ppc_avr_t *b) 823 { 824 vcmpbfp_internal(env, r, a, b, 1); 825 } 826 827 #define VCT(suffix, satcvt, element) \ 828 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 829 ppc_avr_t *b, uint32_t uim) \ 830 { \ 831 int i; \ 832 int sat = 0; \ 833 float_status s = env->vec_status; \ 834 \ 835 set_float_rounding_mode(float_round_to_zero, &s); \ 836 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 837 if (float32_is_any_nan(b->f32[i])) { \ 838 r->element[i] = 0; \ 839 } else { \ 840 float64 t = float32_to_float64(b->f32[i], &s); \ 841 int64_t j; \ 842 \ 843 t = float64_scalbn(t, uim, &s); \ 844 j = float64_to_int64(t, &s); \ 845 r->element[i] = satcvt(j, &sat); \ 846 } \ 847 } \ 848 if (sat) { \ 849 set_vscr_sat(env); \ 850 } \ 851 } 852 VCT(uxs, cvtsduw, u32) 853 VCT(sxs, cvtsdsw, s32) 854 #undef VCT 855 856 target_ulong helper_vclzlsbb(ppc_avr_t *r) 857 { 858 target_ulong count = 0; 859 int i; 860 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 861 if (r->VsrB(i) & 0x01) { 862 break; 863 } 864 count++; 865 } 866 return count; 867 } 868 869 target_ulong helper_vctzlsbb(ppc_avr_t *r) 870 { 871 target_ulong count = 0; 872 int i; 873 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 874 if (r->VsrB(i) & 0x01) { 875 break; 876 } 877 count++; 878 } 879 return count; 880 } 881 882 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 883 ppc_avr_t *b, ppc_avr_t *c) 884 { 885 int sat = 0; 886 int i; 887 888 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 889 int32_t prod = a->s16[i] * b->s16[i]; 890 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 891 892 r->s16[i] = cvtswsh(t, &sat); 893 } 894 895 if (sat) { 896 set_vscr_sat(env); 897 } 898 } 899 900 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 901 ppc_avr_t *b, ppc_avr_t *c) 902 { 903 int sat = 0; 904 int i; 905 906 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 907 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 908 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 909 r->s16[i] = cvtswsh(t, &sat); 910 } 911 912 if (sat) { 913 set_vscr_sat(env); 914 } 915 } 916 917 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 918 { 919 int i; 920 921 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 922 int32_t prod = a->s16[i] * b->s16[i]; 923 r->s16[i] = (int16_t) (prod + c->s16[i]); 924 } 925 } 926 927 #define VMRG_DO(name, element, access, ofs) \ 928 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 929 { \ 930 ppc_avr_t result; \ 931 int i, half = ARRAY_SIZE(r->element) / 2; \ 932 \ 933 for (i = 0; i < half; i++) { \ 934 result.access(i * 2 + 0) = a->access(i + ofs); \ 935 result.access(i * 2 + 1) = b->access(i + ofs); \ 936 } \ 937 *r = result; \ 938 } 939 940 #define VMRG(suffix, element, access) \ 941 VMRG_DO(mrgl##suffix, element, access, half) \ 942 VMRG_DO(mrgh##suffix, element, access, 0) 943 VMRG(b, u8, VsrB) 944 VMRG(h, u16, VsrH) 945 VMRG(w, u32, VsrW) 946 #undef VMRG_DO 947 #undef VMRG 948 949 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 950 ppc_avr_t *b, ppc_avr_t *c) 951 { 952 int32_t prod[16]; 953 int i; 954 955 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 956 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 957 } 958 959 VECTOR_FOR_INORDER_I(i, s32) { 960 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 961 prod[4 * i + 2] + prod[4 * i + 3]; 962 } 963 } 964 965 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 966 ppc_avr_t *b, ppc_avr_t *c) 967 { 968 int32_t prod[8]; 969 int i; 970 971 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 972 prod[i] = a->s16[i] * b->s16[i]; 973 } 974 975 VECTOR_FOR_INORDER_I(i, s32) { 976 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 977 } 978 } 979 980 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 981 ppc_avr_t *b, ppc_avr_t *c) 982 { 983 int32_t prod[8]; 984 int i; 985 int sat = 0; 986 987 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 988 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 989 } 990 991 VECTOR_FOR_INORDER_I(i, s32) { 992 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 993 994 r->u32[i] = cvtsdsw(t, &sat); 995 } 996 997 if (sat) { 998 set_vscr_sat(env); 999 } 1000 } 1001 1002 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1003 ppc_avr_t *b, ppc_avr_t *c) 1004 { 1005 uint16_t prod[16]; 1006 int i; 1007 1008 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1009 prod[i] = a->u8[i] * b->u8[i]; 1010 } 1011 1012 VECTOR_FOR_INORDER_I(i, u32) { 1013 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1014 prod[4 * i + 2] + prod[4 * i + 3]; 1015 } 1016 } 1017 1018 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1019 ppc_avr_t *b, ppc_avr_t *c) 1020 { 1021 uint32_t prod[8]; 1022 int i; 1023 1024 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1025 prod[i] = a->u16[i] * b->u16[i]; 1026 } 1027 1028 VECTOR_FOR_INORDER_I(i, u32) { 1029 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1030 } 1031 } 1032 1033 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1034 ppc_avr_t *b, ppc_avr_t *c) 1035 { 1036 uint32_t prod[8]; 1037 int i; 1038 int sat = 0; 1039 1040 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1041 prod[i] = a->u16[i] * b->u16[i]; 1042 } 1043 1044 VECTOR_FOR_INORDER_I(i, s32) { 1045 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1046 1047 r->u32[i] = cvtuduw(t, &sat); 1048 } 1049 1050 if (sat) { 1051 set_vscr_sat(env); 1052 } 1053 } 1054 1055 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1056 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1057 { \ 1058 int i; \ 1059 \ 1060 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1061 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1062 (cast)b->mul_access(i); \ 1063 } \ 1064 } 1065 1066 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1068 { \ 1069 int i; \ 1070 \ 1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1072 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1073 (cast)b->mul_access(i + 1); \ 1074 } \ 1075 } 1076 1077 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1078 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1079 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1080 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1081 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1082 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1083 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1084 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1085 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1086 #undef VMUL_DO_EVN 1087 #undef VMUL_DO_ODD 1088 #undef VMUL 1089 1090 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1091 { 1092 int i; 1093 1094 for (i = 0; i < 4; i++) { 1095 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1096 } 1097 } 1098 1099 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1100 { 1101 int i; 1102 1103 for (i = 0; i < 4; i++) { 1104 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1105 (uint64_t)b->u32[i]) >> 32); 1106 } 1107 } 1108 1109 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1110 { 1111 uint64_t discard; 1112 1113 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1114 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1115 } 1116 1117 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1118 { 1119 uint64_t discard; 1120 1121 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1122 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1123 } 1124 1125 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1126 ppc_avr_t *c) 1127 { 1128 ppc_avr_t result; 1129 int i; 1130 1131 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1132 int s = c->VsrB(i) & 0x1f; 1133 int index = s & 0xf; 1134 1135 if (s & 0x10) { 1136 result.VsrB(i) = b->VsrB(index); 1137 } else { 1138 result.VsrB(i) = a->VsrB(index); 1139 } 1140 } 1141 *r = result; 1142 } 1143 1144 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1145 ppc_avr_t *c) 1146 { 1147 ppc_avr_t result; 1148 int i; 1149 1150 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1151 int s = c->VsrB(i) & 0x1f; 1152 int index = 15 - (s & 0xf); 1153 1154 if (s & 0x10) { 1155 result.VsrB(i) = a->VsrB(index); 1156 } else { 1157 result.VsrB(i) = b->VsrB(index); 1158 } 1159 } 1160 *r = result; 1161 } 1162 1163 #if defined(HOST_WORDS_BIGENDIAN) 1164 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1165 #define VBPERMD_INDEX(i) (i) 1166 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1167 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1168 #else 1169 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1170 #define VBPERMD_INDEX(i) (1 - i) 1171 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1172 #define EXTRACT_BIT(avr, i, index) \ 1173 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1174 #endif 1175 1176 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1177 { 1178 int i, j; 1179 ppc_avr_t result = { .u64 = { 0, 0 } }; 1180 VECTOR_FOR_INORDER_I(i, u64) { 1181 for (j = 0; j < 8; j++) { 1182 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1183 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1184 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1185 } 1186 } 1187 } 1188 *r = result; 1189 } 1190 1191 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1192 { 1193 int i; 1194 uint64_t perm = 0; 1195 1196 VECTOR_FOR_INORDER_I(i, u8) { 1197 int index = VBPERMQ_INDEX(b, i); 1198 1199 if (index < 128) { 1200 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1201 if (a->u64[VBPERMQ_DW(index)] & mask) { 1202 perm |= (0x8000 >> i); 1203 } 1204 } 1205 } 1206 1207 r->VsrD(0) = perm; 1208 r->VsrD(1) = 0; 1209 } 1210 1211 #undef VBPERMQ_INDEX 1212 #undef VBPERMQ_DW 1213 1214 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1215 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1216 { \ 1217 int i, j; \ 1218 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1219 \ 1220 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1221 prod[i] = 0; \ 1222 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1223 if (a->srcfld[i] & (1ull << j)) { \ 1224 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1225 } \ 1226 } \ 1227 } \ 1228 \ 1229 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1230 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1231 } \ 1232 } 1233 1234 PMSUM(vpmsumb, u8, u16, uint16_t) 1235 PMSUM(vpmsumh, u16, u32, uint32_t) 1236 PMSUM(vpmsumw, u32, u64, uint64_t) 1237 1238 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1239 { 1240 1241 #ifdef CONFIG_INT128 1242 int i, j; 1243 __uint128_t prod[2]; 1244 1245 VECTOR_FOR_INORDER_I(i, u64) { 1246 prod[i] = 0; 1247 for (j = 0; j < 64; j++) { 1248 if (a->u64[i] & (1ull << j)) { 1249 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1250 } 1251 } 1252 } 1253 1254 r->u128 = prod[0] ^ prod[1]; 1255 1256 #else 1257 int i, j; 1258 ppc_avr_t prod[2]; 1259 1260 VECTOR_FOR_INORDER_I(i, u64) { 1261 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1262 for (j = 0; j < 64; j++) { 1263 if (a->u64[i] & (1ull << j)) { 1264 ppc_avr_t bshift; 1265 if (j == 0) { 1266 bshift.VsrD(0) = 0; 1267 bshift.VsrD(1) = b->u64[i]; 1268 } else { 1269 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1270 bshift.VsrD(1) = b->u64[i] << j; 1271 } 1272 prod[i].VsrD(1) ^= bshift.VsrD(1); 1273 prod[i].VsrD(0) ^= bshift.VsrD(0); 1274 } 1275 } 1276 } 1277 1278 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1279 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1280 #endif 1281 } 1282 1283 1284 #if defined(HOST_WORDS_BIGENDIAN) 1285 #define PKBIG 1 1286 #else 1287 #define PKBIG 0 1288 #endif 1289 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1290 { 1291 int i, j; 1292 ppc_avr_t result; 1293 #if defined(HOST_WORDS_BIGENDIAN) 1294 const ppc_avr_t *x[2] = { a, b }; 1295 #else 1296 const ppc_avr_t *x[2] = { b, a }; 1297 #endif 1298 1299 VECTOR_FOR_INORDER_I(i, u64) { 1300 VECTOR_FOR_INORDER_I(j, u32) { 1301 uint32_t e = x[i]->u32[j]; 1302 1303 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1304 ((e >> 6) & 0x3e0) | 1305 ((e >> 3) & 0x1f)); 1306 } 1307 } 1308 *r = result; 1309 } 1310 1311 #define VPK(suffix, from, to, cvt, dosat) \ 1312 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1313 ppc_avr_t *a, ppc_avr_t *b) \ 1314 { \ 1315 int i; \ 1316 int sat = 0; \ 1317 ppc_avr_t result; \ 1318 ppc_avr_t *a0 = PKBIG ? a : b; \ 1319 ppc_avr_t *a1 = PKBIG ? b : a; \ 1320 \ 1321 VECTOR_FOR_INORDER_I(i, from) { \ 1322 result.to[i] = cvt(a0->from[i], &sat); \ 1323 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1324 } \ 1325 *r = result; \ 1326 if (dosat && sat) { \ 1327 set_vscr_sat(env); \ 1328 } \ 1329 } 1330 #define I(x, y) (x) 1331 VPK(shss, s16, s8, cvtshsb, 1) 1332 VPK(shus, s16, u8, cvtshub, 1) 1333 VPK(swss, s32, s16, cvtswsh, 1) 1334 VPK(swus, s32, u16, cvtswuh, 1) 1335 VPK(sdss, s64, s32, cvtsdsw, 1) 1336 VPK(sdus, s64, u32, cvtsduw, 1) 1337 VPK(uhus, u16, u8, cvtuhub, 1) 1338 VPK(uwus, u32, u16, cvtuwuh, 1) 1339 VPK(udus, u64, u32, cvtuduw, 1) 1340 VPK(uhum, u16, u8, I, 0) 1341 VPK(uwum, u32, u16, I, 0) 1342 VPK(udum, u64, u32, I, 0) 1343 #undef I 1344 #undef VPK 1345 #undef PKBIG 1346 1347 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1348 { 1349 int i; 1350 1351 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1352 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1353 } 1354 } 1355 1356 #define VRFI(suffix, rounding) \ 1357 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1358 ppc_avr_t *b) \ 1359 { \ 1360 int i; \ 1361 float_status s = env->vec_status; \ 1362 \ 1363 set_float_rounding_mode(rounding, &s); \ 1364 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1365 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1366 } \ 1367 } 1368 VRFI(n, float_round_nearest_even) 1369 VRFI(m, float_round_down) 1370 VRFI(p, float_round_up) 1371 VRFI(z, float_round_to_zero) 1372 #undef VRFI 1373 1374 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1375 { 1376 int i; 1377 1378 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1379 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1380 1381 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1382 } 1383 } 1384 1385 #define VRLMI(name, size, element, insert) \ 1386 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1387 { \ 1388 int i; \ 1389 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1390 uint##size##_t src1 = a->element[i]; \ 1391 uint##size##_t src2 = b->element[i]; \ 1392 uint##size##_t src3 = r->element[i]; \ 1393 uint##size##_t begin, end, shift, mask, rot_val; \ 1394 \ 1395 shift = extract##size(src2, 0, 6); \ 1396 end = extract##size(src2, 8, 6); \ 1397 begin = extract##size(src2, 16, 6); \ 1398 rot_val = rol##size(src1, shift); \ 1399 mask = mask_u##size(begin, end); \ 1400 if (insert) { \ 1401 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1402 } else { \ 1403 r->element[i] = (rot_val & mask); \ 1404 } \ 1405 } \ 1406 } 1407 1408 VRLMI(vrldmi, 64, u64, 1); 1409 VRLMI(vrlwmi, 32, u32, 1); 1410 VRLMI(vrldnm, 64, u64, 0); 1411 VRLMI(vrlwnm, 32, u32, 0); 1412 1413 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1414 ppc_avr_t *c) 1415 { 1416 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1417 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1418 } 1419 1420 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1421 { 1422 int i; 1423 1424 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1425 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1426 } 1427 } 1428 1429 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1430 { 1431 int i; 1432 1433 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1434 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1435 } 1436 } 1437 1438 #if defined(HOST_WORDS_BIGENDIAN) 1439 #define VEXTU_X_DO(name, size, left) \ 1440 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1441 { \ 1442 int index; \ 1443 if (left) { \ 1444 index = (a & 0xf) * 8; \ 1445 } else { \ 1446 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1447 } \ 1448 return int128_getlo(int128_rshift(b->s128, index)) & \ 1449 MAKE_64BIT_MASK(0, size); \ 1450 } 1451 #else 1452 #define VEXTU_X_DO(name, size, left) \ 1453 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1454 { \ 1455 int index; \ 1456 if (left) { \ 1457 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1458 } else { \ 1459 index = (a & 0xf) * 8; \ 1460 } \ 1461 return int128_getlo(int128_rshift(b->s128, index)) & \ 1462 MAKE_64BIT_MASK(0, size); \ 1463 } 1464 #endif 1465 1466 VEXTU_X_DO(vextublx, 8, 1) 1467 VEXTU_X_DO(vextuhlx, 16, 1) 1468 VEXTU_X_DO(vextuwlx, 32, 1) 1469 VEXTU_X_DO(vextubrx, 8, 0) 1470 VEXTU_X_DO(vextuhrx, 16, 0) 1471 VEXTU_X_DO(vextuwrx, 32, 0) 1472 #undef VEXTU_X_DO 1473 1474 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1475 { 1476 int i; 1477 unsigned int shift, bytes, size; 1478 1479 size = ARRAY_SIZE(r->u8); 1480 for (i = 0; i < size; i++) { 1481 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1482 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1483 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1484 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1485 } 1486 } 1487 1488 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1489 { 1490 int i; 1491 unsigned int shift, bytes; 1492 1493 /* 1494 * Use reverse order, as destination and source register can be 1495 * same. Its being modified in place saving temporary, reverse 1496 * order will guarantee that computed result is not fed back. 1497 */ 1498 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1499 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1500 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1501 /* extract adjacent bytes */ 1502 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1503 } 1504 } 1505 1506 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1507 { 1508 int sh = shift & 0xf; 1509 int i; 1510 ppc_avr_t result; 1511 1512 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1513 int index = sh + i; 1514 if (index > 0xf) { 1515 result.VsrB(i) = b->VsrB(index - 0x10); 1516 } else { 1517 result.VsrB(i) = a->VsrB(index); 1518 } 1519 } 1520 *r = result; 1521 } 1522 1523 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1524 { 1525 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1526 1527 #if defined(HOST_WORDS_BIGENDIAN) 1528 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1529 memset(&r->u8[16 - sh], 0, sh); 1530 #else 1531 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1532 memset(&r->u8[0], 0, sh); 1533 #endif 1534 } 1535 1536 #if defined(HOST_WORDS_BIGENDIAN) 1537 #define VINSERT(suffix, element) \ 1538 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1539 { \ 1540 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1541 sizeof(r->element[0])); \ 1542 } 1543 #else 1544 #define VINSERT(suffix, element) \ 1545 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1546 { \ 1547 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1548 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1549 } 1550 #endif 1551 VINSERT(b, u8) 1552 VINSERT(h, u16) 1553 VINSERT(w, u32) 1554 VINSERT(d, u64) 1555 #undef VINSERT 1556 #if defined(HOST_WORDS_BIGENDIAN) 1557 #define VEXTRACT(suffix, element) \ 1558 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1559 { \ 1560 uint32_t es = sizeof(r->element[0]); \ 1561 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1562 memset(&r->u8[8], 0, 8); \ 1563 memset(&r->u8[0], 0, 8 - es); \ 1564 } 1565 #else 1566 #define VEXTRACT(suffix, element) \ 1567 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1568 { \ 1569 uint32_t es = sizeof(r->element[0]); \ 1570 uint32_t s = (16 - index) - es; \ 1571 memmove(&r->u8[8], &b->u8[s], es); \ 1572 memset(&r->u8[0], 0, 8); \ 1573 memset(&r->u8[8 + es], 0, 8 - es); \ 1574 } 1575 #endif 1576 VEXTRACT(ub, u8) 1577 VEXTRACT(uh, u16) 1578 VEXTRACT(uw, u32) 1579 VEXTRACT(d, u64) 1580 #undef VEXTRACT 1581 1582 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1583 ppc_vsr_t *xb, uint32_t index) 1584 { 1585 ppc_vsr_t t = { }; 1586 size_t es = sizeof(uint32_t); 1587 uint32_t ext_index; 1588 int i; 1589 1590 ext_index = index; 1591 for (i = 0; i < es; i++, ext_index++) { 1592 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1593 } 1594 1595 *xt = t; 1596 } 1597 1598 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1599 ppc_vsr_t *xb, uint32_t index) 1600 { 1601 ppc_vsr_t t = *xt; 1602 size_t es = sizeof(uint32_t); 1603 int ins_index, i = 0; 1604 1605 ins_index = index; 1606 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1607 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1608 } 1609 1610 *xt = t; 1611 } 1612 1613 #define VEXT_SIGNED(name, element, cast) \ 1614 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1615 { \ 1616 int i; \ 1617 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1618 r->element[i] = (cast)b->element[i]; \ 1619 } \ 1620 } 1621 VEXT_SIGNED(vextsb2w, s32, int8_t) 1622 VEXT_SIGNED(vextsb2d, s64, int8_t) 1623 VEXT_SIGNED(vextsh2w, s32, int16_t) 1624 VEXT_SIGNED(vextsh2d, s64, int16_t) 1625 VEXT_SIGNED(vextsw2d, s64, int32_t) 1626 #undef VEXT_SIGNED 1627 1628 #define VNEG(name, element) \ 1629 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1630 { \ 1631 int i; \ 1632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1633 r->element[i] = -b->element[i]; \ 1634 } \ 1635 } 1636 VNEG(vnegw, s32) 1637 VNEG(vnegd, s64) 1638 #undef VNEG 1639 1640 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1641 { 1642 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1643 1644 #if defined(HOST_WORDS_BIGENDIAN) 1645 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1646 memset(&r->u8[0], 0, sh); 1647 #else 1648 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1649 memset(&r->u8[16 - sh], 0, sh); 1650 #endif 1651 } 1652 1653 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1654 { 1655 int i; 1656 1657 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1658 r->u32[i] = a->u32[i] >= b->u32[i]; 1659 } 1660 } 1661 1662 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1663 { 1664 int64_t t; 1665 int i, upper; 1666 ppc_avr_t result; 1667 int sat = 0; 1668 1669 upper = ARRAY_SIZE(r->s32) - 1; 1670 t = (int64_t)b->VsrSW(upper); 1671 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1672 t += a->VsrSW(i); 1673 result.VsrSW(i) = 0; 1674 } 1675 result.VsrSW(upper) = cvtsdsw(t, &sat); 1676 *r = result; 1677 1678 if (sat) { 1679 set_vscr_sat(env); 1680 } 1681 } 1682 1683 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1684 { 1685 int i, j, upper; 1686 ppc_avr_t result; 1687 int sat = 0; 1688 1689 upper = 1; 1690 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1691 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1692 1693 result.VsrD(i) = 0; 1694 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1695 t += a->VsrSW(2 * i + j); 1696 } 1697 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1698 } 1699 1700 *r = result; 1701 if (sat) { 1702 set_vscr_sat(env); 1703 } 1704 } 1705 1706 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1707 { 1708 int i, j; 1709 int sat = 0; 1710 1711 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1712 int64_t t = (int64_t)b->s32[i]; 1713 1714 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1715 t += a->s8[4 * i + j]; 1716 } 1717 r->s32[i] = cvtsdsw(t, &sat); 1718 } 1719 1720 if (sat) { 1721 set_vscr_sat(env); 1722 } 1723 } 1724 1725 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1726 { 1727 int sat = 0; 1728 int i; 1729 1730 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1731 int64_t t = (int64_t)b->s32[i]; 1732 1733 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1734 r->s32[i] = cvtsdsw(t, &sat); 1735 } 1736 1737 if (sat) { 1738 set_vscr_sat(env); 1739 } 1740 } 1741 1742 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1743 { 1744 int i, j; 1745 int sat = 0; 1746 1747 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1748 uint64_t t = (uint64_t)b->u32[i]; 1749 1750 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1751 t += a->u8[4 * i + j]; 1752 } 1753 r->u32[i] = cvtuduw(t, &sat); 1754 } 1755 1756 if (sat) { 1757 set_vscr_sat(env); 1758 } 1759 } 1760 1761 #if defined(HOST_WORDS_BIGENDIAN) 1762 #define UPKHI 1 1763 #define UPKLO 0 1764 #else 1765 #define UPKHI 0 1766 #define UPKLO 1 1767 #endif 1768 #define VUPKPX(suffix, hi) \ 1769 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1770 { \ 1771 int i; \ 1772 ppc_avr_t result; \ 1773 \ 1774 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1775 uint16_t e = b->u16[hi ? i : i + 4]; \ 1776 uint8_t a = (e >> 15) ? 0xff : 0; \ 1777 uint8_t r = (e >> 10) & 0x1f; \ 1778 uint8_t g = (e >> 5) & 0x1f; \ 1779 uint8_t b = e & 0x1f; \ 1780 \ 1781 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1782 } \ 1783 *r = result; \ 1784 } 1785 VUPKPX(lpx, UPKLO) 1786 VUPKPX(hpx, UPKHI) 1787 #undef VUPKPX 1788 1789 #define VUPK(suffix, unpacked, packee, hi) \ 1790 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1791 { \ 1792 int i; \ 1793 ppc_avr_t result; \ 1794 \ 1795 if (hi) { \ 1796 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1797 result.unpacked[i] = b->packee[i]; \ 1798 } \ 1799 } else { \ 1800 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1801 i++) { \ 1802 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1803 } \ 1804 } \ 1805 *r = result; \ 1806 } 1807 VUPK(hsb, s16, s8, UPKHI) 1808 VUPK(hsh, s32, s16, UPKHI) 1809 VUPK(hsw, s64, s32, UPKHI) 1810 VUPK(lsb, s16, s8, UPKLO) 1811 VUPK(lsh, s32, s16, UPKLO) 1812 VUPK(lsw, s64, s32, UPKLO) 1813 #undef VUPK 1814 #undef UPKHI 1815 #undef UPKLO 1816 1817 #define VGENERIC_DO(name, element) \ 1818 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1819 { \ 1820 int i; \ 1821 \ 1822 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1823 r->element[i] = name(b->element[i]); \ 1824 } \ 1825 } 1826 1827 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1828 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1829 1830 VGENERIC_DO(clzb, u8) 1831 VGENERIC_DO(clzh, u16) 1832 1833 #undef clzb 1834 #undef clzh 1835 1836 #define ctzb(v) ((v) ? ctz32(v) : 8) 1837 #define ctzh(v) ((v) ? ctz32(v) : 16) 1838 #define ctzw(v) ctz32((v)) 1839 #define ctzd(v) ctz64((v)) 1840 1841 VGENERIC_DO(ctzb, u8) 1842 VGENERIC_DO(ctzh, u16) 1843 VGENERIC_DO(ctzw, u32) 1844 VGENERIC_DO(ctzd, u64) 1845 1846 #undef ctzb 1847 #undef ctzh 1848 #undef ctzw 1849 #undef ctzd 1850 1851 #define popcntb(v) ctpop8(v) 1852 #define popcnth(v) ctpop16(v) 1853 #define popcntw(v) ctpop32(v) 1854 #define popcntd(v) ctpop64(v) 1855 1856 VGENERIC_DO(popcntb, u8) 1857 VGENERIC_DO(popcnth, u16) 1858 VGENERIC_DO(popcntw, u32) 1859 VGENERIC_DO(popcntd, u64) 1860 1861 #undef popcntb 1862 #undef popcnth 1863 #undef popcntw 1864 #undef popcntd 1865 1866 #undef VGENERIC_DO 1867 1868 #if defined(HOST_WORDS_BIGENDIAN) 1869 #define QW_ONE { .u64 = { 0, 1 } } 1870 #else 1871 #define QW_ONE { .u64 = { 1, 0 } } 1872 #endif 1873 1874 #ifndef CONFIG_INT128 1875 1876 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1877 { 1878 t->u64[0] = ~a.u64[0]; 1879 t->u64[1] = ~a.u64[1]; 1880 } 1881 1882 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1883 { 1884 if (a.VsrD(0) < b.VsrD(0)) { 1885 return -1; 1886 } else if (a.VsrD(0) > b.VsrD(0)) { 1887 return 1; 1888 } else if (a.VsrD(1) < b.VsrD(1)) { 1889 return -1; 1890 } else if (a.VsrD(1) > b.VsrD(1)) { 1891 return 1; 1892 } else { 1893 return 0; 1894 } 1895 } 1896 1897 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1898 { 1899 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1900 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1901 (~a.VsrD(1) < b.VsrD(1)); 1902 } 1903 1904 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1905 { 1906 ppc_avr_t not_a; 1907 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1908 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1909 (~a.VsrD(1) < b.VsrD(1)); 1910 avr_qw_not(¬_a, a); 1911 return avr_qw_cmpu(not_a, b) < 0; 1912 } 1913 1914 #endif 1915 1916 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1917 { 1918 #ifdef CONFIG_INT128 1919 r->u128 = a->u128 + b->u128; 1920 #else 1921 avr_qw_add(r, *a, *b); 1922 #endif 1923 } 1924 1925 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1926 { 1927 #ifdef CONFIG_INT128 1928 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1929 #else 1930 1931 if (c->VsrD(1) & 1) { 1932 ppc_avr_t tmp; 1933 1934 tmp.VsrD(0) = 0; 1935 tmp.VsrD(1) = c->VsrD(1) & 1; 1936 avr_qw_add(&tmp, *a, tmp); 1937 avr_qw_add(r, tmp, *b); 1938 } else { 1939 avr_qw_add(r, *a, *b); 1940 } 1941 #endif 1942 } 1943 1944 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1945 { 1946 #ifdef CONFIG_INT128 1947 r->u128 = (~a->u128 < b->u128); 1948 #else 1949 ppc_avr_t not_a; 1950 1951 avr_qw_not(¬_a, *a); 1952 1953 r->VsrD(0) = 0; 1954 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1955 #endif 1956 } 1957 1958 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1959 { 1960 #ifdef CONFIG_INT128 1961 int carry_out = (~a->u128 < b->u128); 1962 if (!carry_out && (c->u128 & 1)) { 1963 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1964 ((a->u128 != 0) || (b->u128 != 0)); 1965 } 1966 r->u128 = carry_out; 1967 #else 1968 1969 int carry_in = c->VsrD(1) & 1; 1970 int carry_out = 0; 1971 ppc_avr_t tmp; 1972 1973 carry_out = avr_qw_addc(&tmp, *a, *b); 1974 1975 if (!carry_out && carry_in) { 1976 ppc_avr_t one = QW_ONE; 1977 carry_out = avr_qw_addc(&tmp, tmp, one); 1978 } 1979 r->VsrD(0) = 0; 1980 r->VsrD(1) = carry_out; 1981 #endif 1982 } 1983 1984 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1985 { 1986 #ifdef CONFIG_INT128 1987 r->u128 = a->u128 - b->u128; 1988 #else 1989 ppc_avr_t tmp; 1990 ppc_avr_t one = QW_ONE; 1991 1992 avr_qw_not(&tmp, *b); 1993 avr_qw_add(&tmp, *a, tmp); 1994 avr_qw_add(r, tmp, one); 1995 #endif 1996 } 1997 1998 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1999 { 2000 #ifdef CONFIG_INT128 2001 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2002 #else 2003 ppc_avr_t tmp, sum; 2004 2005 avr_qw_not(&tmp, *b); 2006 avr_qw_add(&sum, *a, tmp); 2007 2008 tmp.VsrD(0) = 0; 2009 tmp.VsrD(1) = c->VsrD(1) & 1; 2010 avr_qw_add(r, sum, tmp); 2011 #endif 2012 } 2013 2014 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2015 { 2016 #ifdef CONFIG_INT128 2017 r->u128 = (~a->u128 < ~b->u128) || 2018 (a->u128 + ~b->u128 == (__uint128_t)-1); 2019 #else 2020 int carry = (avr_qw_cmpu(*a, *b) > 0); 2021 if (!carry) { 2022 ppc_avr_t tmp; 2023 avr_qw_not(&tmp, *b); 2024 avr_qw_add(&tmp, *a, tmp); 2025 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2026 } 2027 r->VsrD(0) = 0; 2028 r->VsrD(1) = carry; 2029 #endif 2030 } 2031 2032 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2033 { 2034 #ifdef CONFIG_INT128 2035 r->u128 = 2036 (~a->u128 < ~b->u128) || 2037 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2038 #else 2039 int carry_in = c->VsrD(1) & 1; 2040 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2041 if (!carry_out && carry_in) { 2042 ppc_avr_t tmp; 2043 avr_qw_not(&tmp, *b); 2044 avr_qw_add(&tmp, *a, tmp); 2045 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2046 } 2047 2048 r->VsrD(0) = 0; 2049 r->VsrD(1) = carry_out; 2050 #endif 2051 } 2052 2053 #define BCD_PLUS_PREF_1 0xC 2054 #define BCD_PLUS_PREF_2 0xF 2055 #define BCD_PLUS_ALT_1 0xA 2056 #define BCD_NEG_PREF 0xD 2057 #define BCD_NEG_ALT 0xB 2058 #define BCD_PLUS_ALT_2 0xE 2059 #define NATIONAL_PLUS 0x2B 2060 #define NATIONAL_NEG 0x2D 2061 2062 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2063 2064 static int bcd_get_sgn(ppc_avr_t *bcd) 2065 { 2066 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2067 case BCD_PLUS_PREF_1: 2068 case BCD_PLUS_PREF_2: 2069 case BCD_PLUS_ALT_1: 2070 case BCD_PLUS_ALT_2: 2071 { 2072 return 1; 2073 } 2074 2075 case BCD_NEG_PREF: 2076 case BCD_NEG_ALT: 2077 { 2078 return -1; 2079 } 2080 2081 default: 2082 { 2083 return 0; 2084 } 2085 } 2086 } 2087 2088 static int bcd_preferred_sgn(int sgn, int ps) 2089 { 2090 if (sgn >= 0) { 2091 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2092 } else { 2093 return BCD_NEG_PREF; 2094 } 2095 } 2096 2097 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2098 { 2099 uint8_t result; 2100 if (n & 1) { 2101 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2102 } else { 2103 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2104 } 2105 2106 if (unlikely(result > 9)) { 2107 *invalid = true; 2108 } 2109 return result; 2110 } 2111 2112 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2113 { 2114 if (n & 1) { 2115 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2116 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2117 } else { 2118 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2119 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2120 } 2121 } 2122 2123 static bool bcd_is_valid(ppc_avr_t *bcd) 2124 { 2125 int i; 2126 int invalid = 0; 2127 2128 if (bcd_get_sgn(bcd) == 0) { 2129 return false; 2130 } 2131 2132 for (i = 1; i < 32; i++) { 2133 bcd_get_digit(bcd, i, &invalid); 2134 if (unlikely(invalid)) { 2135 return false; 2136 } 2137 } 2138 return true; 2139 } 2140 2141 static int bcd_cmp_zero(ppc_avr_t *bcd) 2142 { 2143 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2144 return CRF_EQ; 2145 } else { 2146 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2147 } 2148 } 2149 2150 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2151 { 2152 return reg->VsrH(7 - n); 2153 } 2154 2155 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2156 { 2157 reg->VsrH(7 - n) = val; 2158 } 2159 2160 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2161 { 2162 int i; 2163 int invalid = 0; 2164 for (i = 31; i > 0; i--) { 2165 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2166 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2167 if (unlikely(invalid)) { 2168 return 0; /* doesn't matter */ 2169 } else if (dig_a > dig_b) { 2170 return 1; 2171 } else if (dig_a < dig_b) { 2172 return -1; 2173 } 2174 } 2175 2176 return 0; 2177 } 2178 2179 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2180 int *overflow) 2181 { 2182 int carry = 0; 2183 int i; 2184 int is_zero = 1; 2185 2186 for (i = 1; i <= 31; i++) { 2187 uint8_t digit = bcd_get_digit(a, i, invalid) + 2188 bcd_get_digit(b, i, invalid) + carry; 2189 is_zero &= (digit == 0); 2190 if (digit > 9) { 2191 carry = 1; 2192 digit -= 10; 2193 } else { 2194 carry = 0; 2195 } 2196 2197 bcd_put_digit(t, digit, i); 2198 } 2199 2200 *overflow = carry; 2201 return is_zero; 2202 } 2203 2204 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2205 int *overflow) 2206 { 2207 int carry = 0; 2208 int i; 2209 2210 for (i = 1; i <= 31; i++) { 2211 uint8_t digit = bcd_get_digit(a, i, invalid) - 2212 bcd_get_digit(b, i, invalid) + carry; 2213 if (digit & 0x80) { 2214 carry = -1; 2215 digit += 10; 2216 } else { 2217 carry = 0; 2218 } 2219 2220 bcd_put_digit(t, digit, i); 2221 } 2222 2223 *overflow = carry; 2224 } 2225 2226 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2227 { 2228 2229 int sgna = bcd_get_sgn(a); 2230 int sgnb = bcd_get_sgn(b); 2231 int invalid = (sgna == 0) || (sgnb == 0); 2232 int overflow = 0; 2233 int zero = 0; 2234 uint32_t cr = 0; 2235 ppc_avr_t result = { .u64 = { 0, 0 } }; 2236 2237 if (!invalid) { 2238 if (sgna == sgnb) { 2239 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2240 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2241 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2242 } else { 2243 int magnitude = bcd_cmp_mag(a, b); 2244 if (magnitude > 0) { 2245 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2246 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2247 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2248 } else if (magnitude < 0) { 2249 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2250 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2251 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2252 } else { 2253 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2254 cr = CRF_EQ; 2255 } 2256 } 2257 } 2258 2259 if (unlikely(invalid)) { 2260 result.VsrD(0) = result.VsrD(1) = -1; 2261 cr = CRF_SO; 2262 } else if (overflow) { 2263 cr |= CRF_SO; 2264 } else if (zero) { 2265 cr |= CRF_EQ; 2266 } 2267 2268 *r = result; 2269 2270 return cr; 2271 } 2272 2273 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2274 { 2275 ppc_avr_t bcopy = *b; 2276 int sgnb = bcd_get_sgn(b); 2277 if (sgnb < 0) { 2278 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2279 } else if (sgnb > 0) { 2280 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2281 } 2282 /* else invalid ... defer to bcdadd code for proper handling */ 2283 2284 return helper_bcdadd(r, a, &bcopy, ps); 2285 } 2286 2287 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2288 { 2289 int i; 2290 int cr = 0; 2291 uint16_t national = 0; 2292 uint16_t sgnb = get_national_digit(b, 0); 2293 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2294 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2295 2296 for (i = 1; i < 8; i++) { 2297 national = get_national_digit(b, i); 2298 if (unlikely(national < 0x30 || national > 0x39)) { 2299 invalid = 1; 2300 break; 2301 } 2302 2303 bcd_put_digit(&ret, national & 0xf, i); 2304 } 2305 2306 if (sgnb == NATIONAL_PLUS) { 2307 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2308 } else { 2309 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2310 } 2311 2312 cr = bcd_cmp_zero(&ret); 2313 2314 if (unlikely(invalid)) { 2315 cr = CRF_SO; 2316 } 2317 2318 *r = ret; 2319 2320 return cr; 2321 } 2322 2323 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2324 { 2325 int i; 2326 int cr = 0; 2327 int sgnb = bcd_get_sgn(b); 2328 int invalid = (sgnb == 0); 2329 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2330 2331 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2332 2333 for (i = 1; i < 8; i++) { 2334 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2335 2336 if (unlikely(invalid)) { 2337 break; 2338 } 2339 } 2340 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2341 2342 cr = bcd_cmp_zero(b); 2343 2344 if (ox_flag) { 2345 cr |= CRF_SO; 2346 } 2347 2348 if (unlikely(invalid)) { 2349 cr = CRF_SO; 2350 } 2351 2352 *r = ret; 2353 2354 return cr; 2355 } 2356 2357 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2358 { 2359 int i; 2360 int cr = 0; 2361 int invalid = 0; 2362 int zone_digit = 0; 2363 int zone_lead = ps ? 0xF : 0x3; 2364 int digit = 0; 2365 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2366 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2367 2368 if (unlikely((sgnb < 0xA) && ps)) { 2369 invalid = 1; 2370 } 2371 2372 for (i = 0; i < 16; i++) { 2373 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2374 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2375 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2376 invalid = 1; 2377 break; 2378 } 2379 2380 bcd_put_digit(&ret, digit, i + 1); 2381 } 2382 2383 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2384 (!ps && (sgnb & 0x4))) { 2385 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2386 } else { 2387 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2388 } 2389 2390 cr = bcd_cmp_zero(&ret); 2391 2392 if (unlikely(invalid)) { 2393 cr = CRF_SO; 2394 } 2395 2396 *r = ret; 2397 2398 return cr; 2399 } 2400 2401 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2402 { 2403 int i; 2404 int cr = 0; 2405 uint8_t digit = 0; 2406 int sgnb = bcd_get_sgn(b); 2407 int zone_lead = (ps) ? 0xF0 : 0x30; 2408 int invalid = (sgnb == 0); 2409 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2410 2411 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2412 2413 for (i = 0; i < 16; i++) { 2414 digit = bcd_get_digit(b, i + 1, &invalid); 2415 2416 if (unlikely(invalid)) { 2417 break; 2418 } 2419 2420 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2421 } 2422 2423 if (ps) { 2424 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2425 } else { 2426 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2427 } 2428 2429 cr = bcd_cmp_zero(b); 2430 2431 if (ox_flag) { 2432 cr |= CRF_SO; 2433 } 2434 2435 if (unlikely(invalid)) { 2436 cr = CRF_SO; 2437 } 2438 2439 *r = ret; 2440 2441 return cr; 2442 } 2443 2444 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2445 { 2446 int i; 2447 int cr = 0; 2448 uint64_t lo_value; 2449 uint64_t hi_value; 2450 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2451 2452 if (b->VsrSD(0) < 0) { 2453 lo_value = -b->VsrSD(1); 2454 hi_value = ~b->VsrD(0) + !lo_value; 2455 bcd_put_digit(&ret, 0xD, 0); 2456 } else { 2457 lo_value = b->VsrD(1); 2458 hi_value = b->VsrD(0); 2459 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2460 } 2461 2462 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2463 lo_value > 9999999999999999ULL) { 2464 cr = CRF_SO; 2465 } 2466 2467 for (i = 1; i < 16; hi_value /= 10, i++) { 2468 bcd_put_digit(&ret, hi_value % 10, i); 2469 } 2470 2471 for (; i < 32; lo_value /= 10, i++) { 2472 bcd_put_digit(&ret, lo_value % 10, i); 2473 } 2474 2475 cr |= bcd_cmp_zero(&ret); 2476 2477 *r = ret; 2478 2479 return cr; 2480 } 2481 2482 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2483 { 2484 uint8_t i; 2485 int cr; 2486 uint64_t carry; 2487 uint64_t unused; 2488 uint64_t lo_value; 2489 uint64_t hi_value = 0; 2490 int sgnb = bcd_get_sgn(b); 2491 int invalid = (sgnb == 0); 2492 2493 lo_value = bcd_get_digit(b, 31, &invalid); 2494 for (i = 30; i > 0; i--) { 2495 mulu64(&lo_value, &carry, lo_value, 10ULL); 2496 mulu64(&hi_value, &unused, hi_value, 10ULL); 2497 lo_value += bcd_get_digit(b, i, &invalid); 2498 hi_value += carry; 2499 2500 if (unlikely(invalid)) { 2501 break; 2502 } 2503 } 2504 2505 if (sgnb == -1) { 2506 r->VsrSD(1) = -lo_value; 2507 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2508 } else { 2509 r->VsrSD(1) = lo_value; 2510 r->VsrSD(0) = hi_value; 2511 } 2512 2513 cr = bcd_cmp_zero(b); 2514 2515 if (unlikely(invalid)) { 2516 cr = CRF_SO; 2517 } 2518 2519 return cr; 2520 } 2521 2522 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2523 { 2524 int i; 2525 int invalid = 0; 2526 2527 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2528 return CRF_SO; 2529 } 2530 2531 *r = *a; 2532 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2533 2534 for (i = 1; i < 32; i++) { 2535 bcd_get_digit(a, i, &invalid); 2536 bcd_get_digit(b, i, &invalid); 2537 if (unlikely(invalid)) { 2538 return CRF_SO; 2539 } 2540 } 2541 2542 return bcd_cmp_zero(r); 2543 } 2544 2545 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2546 { 2547 int sgnb = bcd_get_sgn(b); 2548 2549 *r = *b; 2550 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2551 2552 if (bcd_is_valid(b) == false) { 2553 return CRF_SO; 2554 } 2555 2556 return bcd_cmp_zero(r); 2557 } 2558 2559 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2560 { 2561 int cr; 2562 int i = a->VsrSB(7); 2563 bool ox_flag = false; 2564 int sgnb = bcd_get_sgn(b); 2565 ppc_avr_t ret = *b; 2566 ret.VsrD(1) &= ~0xf; 2567 2568 if (bcd_is_valid(b) == false) { 2569 return CRF_SO; 2570 } 2571 2572 if (unlikely(i > 31)) { 2573 i = 31; 2574 } else if (unlikely(i < -31)) { 2575 i = -31; 2576 } 2577 2578 if (i > 0) { 2579 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2580 } else { 2581 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2582 } 2583 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2584 2585 *r = ret; 2586 2587 cr = bcd_cmp_zero(r); 2588 if (ox_flag) { 2589 cr |= CRF_SO; 2590 } 2591 2592 return cr; 2593 } 2594 2595 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2596 { 2597 int cr; 2598 int i; 2599 int invalid = 0; 2600 bool ox_flag = false; 2601 ppc_avr_t ret = *b; 2602 2603 for (i = 0; i < 32; i++) { 2604 bcd_get_digit(b, i, &invalid); 2605 2606 if (unlikely(invalid)) { 2607 return CRF_SO; 2608 } 2609 } 2610 2611 i = a->VsrSB(7); 2612 if (i >= 32) { 2613 ox_flag = true; 2614 ret.VsrD(1) = ret.VsrD(0) = 0; 2615 } else if (i <= -32) { 2616 ret.VsrD(1) = ret.VsrD(0) = 0; 2617 } else if (i > 0) { 2618 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2619 } else { 2620 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2621 } 2622 *r = ret; 2623 2624 cr = bcd_cmp_zero(r); 2625 if (ox_flag) { 2626 cr |= CRF_SO; 2627 } 2628 2629 return cr; 2630 } 2631 2632 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2633 { 2634 int cr; 2635 int unused = 0; 2636 int invalid = 0; 2637 bool ox_flag = false; 2638 int sgnb = bcd_get_sgn(b); 2639 ppc_avr_t ret = *b; 2640 ret.VsrD(1) &= ~0xf; 2641 2642 int i = a->VsrSB(7); 2643 ppc_avr_t bcd_one; 2644 2645 bcd_one.VsrD(0) = 0; 2646 bcd_one.VsrD(1) = 0x10; 2647 2648 if (bcd_is_valid(b) == false) { 2649 return CRF_SO; 2650 } 2651 2652 if (unlikely(i > 31)) { 2653 i = 31; 2654 } else if (unlikely(i < -31)) { 2655 i = -31; 2656 } 2657 2658 if (i > 0) { 2659 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2660 } else { 2661 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2662 2663 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2664 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2665 } 2666 } 2667 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2668 2669 cr = bcd_cmp_zero(&ret); 2670 if (ox_flag) { 2671 cr |= CRF_SO; 2672 } 2673 *r = ret; 2674 2675 return cr; 2676 } 2677 2678 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2679 { 2680 uint64_t mask; 2681 uint32_t ox_flag = 0; 2682 int i = a->VsrSH(3) + 1; 2683 ppc_avr_t ret = *b; 2684 2685 if (bcd_is_valid(b) == false) { 2686 return CRF_SO; 2687 } 2688 2689 if (i > 16 && i < 32) { 2690 mask = (uint64_t)-1 >> (128 - i * 4); 2691 if (ret.VsrD(0) & ~mask) { 2692 ox_flag = CRF_SO; 2693 } 2694 2695 ret.VsrD(0) &= mask; 2696 } else if (i >= 0 && i <= 16) { 2697 mask = (uint64_t)-1 >> (64 - i * 4); 2698 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2699 ox_flag = CRF_SO; 2700 } 2701 2702 ret.VsrD(1) &= mask; 2703 ret.VsrD(0) = 0; 2704 } 2705 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2706 *r = ret; 2707 2708 return bcd_cmp_zero(&ret) | ox_flag; 2709 } 2710 2711 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2712 { 2713 int i; 2714 uint64_t mask; 2715 uint32_t ox_flag = 0; 2716 int invalid = 0; 2717 ppc_avr_t ret = *b; 2718 2719 for (i = 0; i < 32; i++) { 2720 bcd_get_digit(b, i, &invalid); 2721 2722 if (unlikely(invalid)) { 2723 return CRF_SO; 2724 } 2725 } 2726 2727 i = a->VsrSH(3); 2728 if (i > 16 && i < 33) { 2729 mask = (uint64_t)-1 >> (128 - i * 4); 2730 if (ret.VsrD(0) & ~mask) { 2731 ox_flag = CRF_SO; 2732 } 2733 2734 ret.VsrD(0) &= mask; 2735 } else if (i > 0 && i <= 16) { 2736 mask = (uint64_t)-1 >> (64 - i * 4); 2737 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2738 ox_flag = CRF_SO; 2739 } 2740 2741 ret.VsrD(1) &= mask; 2742 ret.VsrD(0) = 0; 2743 } else if (i == 0) { 2744 if (ret.VsrD(0) || ret.VsrD(1)) { 2745 ox_flag = CRF_SO; 2746 } 2747 ret.VsrD(0) = ret.VsrD(1) = 0; 2748 } 2749 2750 *r = ret; 2751 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2752 return ox_flag | CRF_EQ; 2753 } 2754 2755 return ox_flag | CRF_GT; 2756 } 2757 2758 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2759 { 2760 int i; 2761 VECTOR_FOR_INORDER_I(i, u8) { 2762 r->u8[i] = AES_sbox[a->u8[i]]; 2763 } 2764 } 2765 2766 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2767 { 2768 ppc_avr_t result; 2769 int i; 2770 2771 VECTOR_FOR_INORDER_I(i, u32) { 2772 result.VsrW(i) = b->VsrW(i) ^ 2773 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2774 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2775 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2776 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2777 } 2778 *r = result; 2779 } 2780 2781 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2782 { 2783 ppc_avr_t result; 2784 int i; 2785 2786 VECTOR_FOR_INORDER_I(i, u8) { 2787 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2788 } 2789 *r = result; 2790 } 2791 2792 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2793 { 2794 /* This differs from what is written in ISA V2.07. The RTL is */ 2795 /* incorrect and will be fixed in V2.07B. */ 2796 int i; 2797 ppc_avr_t tmp; 2798 2799 VECTOR_FOR_INORDER_I(i, u8) { 2800 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2801 } 2802 2803 VECTOR_FOR_INORDER_I(i, u32) { 2804 r->VsrW(i) = 2805 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2806 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2807 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2808 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2809 } 2810 } 2811 2812 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2813 { 2814 ppc_avr_t result; 2815 int i; 2816 2817 VECTOR_FOR_INORDER_I(i, u8) { 2818 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2819 } 2820 *r = result; 2821 } 2822 2823 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2824 { 2825 int st = (st_six & 0x10) != 0; 2826 int six = st_six & 0xF; 2827 int i; 2828 2829 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2830 if (st == 0) { 2831 if ((six & (0x8 >> i)) == 0) { 2832 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2833 ror32(a->VsrW(i), 18) ^ 2834 (a->VsrW(i) >> 3); 2835 } else { /* six.bit[i] == 1 */ 2836 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2837 ror32(a->VsrW(i), 19) ^ 2838 (a->VsrW(i) >> 10); 2839 } 2840 } else { /* st == 1 */ 2841 if ((six & (0x8 >> i)) == 0) { 2842 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2843 ror32(a->VsrW(i), 13) ^ 2844 ror32(a->VsrW(i), 22); 2845 } else { /* six.bit[i] == 1 */ 2846 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2847 ror32(a->VsrW(i), 11) ^ 2848 ror32(a->VsrW(i), 25); 2849 } 2850 } 2851 } 2852 } 2853 2854 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2855 { 2856 int st = (st_six & 0x10) != 0; 2857 int six = st_six & 0xF; 2858 int i; 2859 2860 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2861 if (st == 0) { 2862 if ((six & (0x8 >> (2 * i))) == 0) { 2863 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2864 ror64(a->VsrD(i), 8) ^ 2865 (a->VsrD(i) >> 7); 2866 } else { /* six.bit[2*i] == 1 */ 2867 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2868 ror64(a->VsrD(i), 61) ^ 2869 (a->VsrD(i) >> 6); 2870 } 2871 } else { /* st == 1 */ 2872 if ((six & (0x8 >> (2 * i))) == 0) { 2873 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2874 ror64(a->VsrD(i), 34) ^ 2875 ror64(a->VsrD(i), 39); 2876 } else { /* six.bit[2*i] == 1 */ 2877 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2878 ror64(a->VsrD(i), 18) ^ 2879 ror64(a->VsrD(i), 41); 2880 } 2881 } 2882 } 2883 } 2884 2885 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2886 { 2887 ppc_avr_t result; 2888 int i; 2889 2890 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2891 int indexA = c->VsrB(i) >> 4; 2892 int indexB = c->VsrB(i) & 0xF; 2893 2894 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2895 } 2896 *r = result; 2897 } 2898 2899 #undef VECTOR_FOR_INORDER_I 2900 2901 /*****************************************************************************/ 2902 /* SPE extension helpers */ 2903 /* Use a table to make this quicker */ 2904 static const uint8_t hbrev[16] = { 2905 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2906 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2907 }; 2908 2909 static inline uint8_t byte_reverse(uint8_t val) 2910 { 2911 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2912 } 2913 2914 static inline uint32_t word_reverse(uint32_t val) 2915 { 2916 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2917 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2918 } 2919 2920 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2921 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2922 { 2923 uint32_t a, b, d, mask; 2924 2925 mask = UINT32_MAX >> (32 - MASKBITS); 2926 a = arg1 & mask; 2927 b = arg2 & mask; 2928 d = word_reverse(1 + word_reverse(a | ~b)); 2929 return (arg1 & ~mask) | (d & b); 2930 } 2931 2932 uint32_t helper_cntlsw32(uint32_t val) 2933 { 2934 if (val & 0x80000000) { 2935 return clz32(~val); 2936 } else { 2937 return clz32(val); 2938 } 2939 } 2940 2941 uint32_t helper_cntlzw32(uint32_t val) 2942 { 2943 return clz32(val); 2944 } 2945 2946 /* 440 specific */ 2947 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2948 target_ulong low, uint32_t update_Rc) 2949 { 2950 target_ulong mask; 2951 int i; 2952 2953 i = 1; 2954 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2955 if ((high & mask) == 0) { 2956 if (update_Rc) { 2957 env->crf[0] = 0x4; 2958 } 2959 goto done; 2960 } 2961 i++; 2962 } 2963 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2964 if ((low & mask) == 0) { 2965 if (update_Rc) { 2966 env->crf[0] = 0x8; 2967 } 2968 goto done; 2969 } 2970 i++; 2971 } 2972 i = 8; 2973 if (update_Rc) { 2974 env->crf[0] = 0x2; 2975 } 2976 done: 2977 env->xer = (env->xer & ~0x7F) | i; 2978 if (update_Rc) { 2979 env->crf[0] |= xer_so; 2980 } 2981 return i; 2982 } 2983