1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "fpu/softfloat.h" 28 #include "qapi/error.h" 29 #include "qemu/guest-random.h" 30 31 #include "helper_regs.h" 32 /*****************************************************************************/ 33 /* Fixed point operations helpers */ 34 35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 36 { 37 if (unlikely(ov)) { 38 env->so = env->ov = 1; 39 } else { 40 env->ov = 0; 41 } 42 } 43 44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 45 uint32_t oe) 46 { 47 uint64_t rt = 0; 48 int overflow = 0; 49 50 uint64_t dividend = (uint64_t)ra << 32; 51 uint64_t divisor = (uint32_t)rb; 52 53 if (unlikely(divisor == 0)) { 54 overflow = 1; 55 } else { 56 rt = dividend / divisor; 57 overflow = rt > UINT32_MAX; 58 } 59 60 if (unlikely(overflow)) { 61 rt = 0; /* Undefined */ 62 } 63 64 if (oe) { 65 helper_update_ov_legacy(env, overflow); 66 } 67 68 return (target_ulong)rt; 69 } 70 71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 72 uint32_t oe) 73 { 74 int64_t rt = 0; 75 int overflow = 0; 76 77 int64_t dividend = (int64_t)ra << 32; 78 int64_t divisor = (int64_t)((int32_t)rb); 79 80 if (unlikely((divisor == 0) || 81 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 82 overflow = 1; 83 } else { 84 rt = dividend / divisor; 85 overflow = rt != (int32_t)rt; 86 } 87 88 if (unlikely(overflow)) { 89 rt = 0; /* Undefined */ 90 } 91 92 if (oe) { 93 helper_update_ov_legacy(env, overflow); 94 } 95 96 return (target_ulong)rt; 97 } 98 99 #if defined(TARGET_PPC64) 100 101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 102 { 103 uint64_t rt = 0; 104 int overflow = 0; 105 106 overflow = divu128(&rt, &ra, rb); 107 108 if (unlikely(overflow)) { 109 rt = 0; /* Undefined */ 110 } 111 112 if (oe) { 113 helper_update_ov_legacy(env, overflow); 114 } 115 116 return rt; 117 } 118 119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 120 { 121 int64_t rt = 0; 122 int64_t ra = (int64_t)rau; 123 int64_t rb = (int64_t)rbu; 124 int overflow = divs128(&rt, &ra, rb); 125 126 if (unlikely(overflow)) { 127 rt = 0; /* Undefined */ 128 } 129 130 if (oe) { 131 helper_update_ov_legacy(env, overflow); 132 } 133 134 return rt; 135 } 136 137 #endif 138 139 140 #if defined(TARGET_PPC64) 141 /* if x = 0xab, returns 0xababababababababa */ 142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 143 144 /* 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each 146 * byte. 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 149 */ 150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 151 152 /* When you XOR the pattern and there is a match, that byte will be zero */ 153 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 154 155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 156 { 157 return hasvalue(rb, ra) ? CRF_GT : 0; 158 } 159 160 #undef pattern 161 #undef haszero 162 #undef hasvalue 163 164 /* 165 * Return a random number. 166 */ 167 uint64_t helper_darn32(void) 168 { 169 Error *err = NULL; 170 uint32_t ret; 171 172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 174 error_get_pretty(err)); 175 error_free(err); 176 return -1; 177 } 178 179 return ret; 180 } 181 182 uint64_t helper_darn64(void) 183 { 184 Error *err = NULL; 185 uint64_t ret; 186 187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 189 error_get_pretty(err)); 190 error_free(err); 191 return -1; 192 } 193 194 return ret; 195 } 196 197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 198 { 199 int i; 200 uint64_t ra = 0; 201 202 for (i = 0; i < 8; i++) { 203 int index = (rs >> (i * 8)) & 0xFF; 204 if (index < 64) { 205 if (rb & PPC_BIT(index)) { 206 ra |= 1 << i; 207 } 208 } 209 } 210 return ra; 211 } 212 213 #endif 214 215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 216 { 217 target_ulong mask = 0xff; 218 target_ulong ra = 0; 219 int i; 220 221 for (i = 0; i < sizeof(target_ulong); i++) { 222 if ((rs & mask) == (rb & mask)) { 223 ra |= mask; 224 } 225 mask <<= 8; 226 } 227 return ra; 228 } 229 230 /* shift right arithmetic helper */ 231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 232 target_ulong shift) 233 { 234 int32_t ret; 235 236 if (likely(!(shift & 0x20))) { 237 if (likely((uint32_t)shift != 0)) { 238 shift &= 0x1f; 239 ret = (int32_t)value >> shift; 240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 241 env->ca32 = env->ca = 0; 242 } else { 243 env->ca32 = env->ca = 1; 244 } 245 } else { 246 ret = (int32_t)value; 247 env->ca32 = env->ca = 0; 248 } 249 } else { 250 ret = (int32_t)value >> 31; 251 env->ca32 = env->ca = (ret != 0); 252 } 253 return (target_long)ret; 254 } 255 256 #if defined(TARGET_PPC64) 257 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 258 target_ulong shift) 259 { 260 int64_t ret; 261 262 if (likely(!(shift & 0x40))) { 263 if (likely((uint64_t)shift != 0)) { 264 shift &= 0x3f; 265 ret = (int64_t)value >> shift; 266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 267 env->ca32 = env->ca = 0; 268 } else { 269 env->ca32 = env->ca = 1; 270 } 271 } else { 272 ret = (int64_t)value; 273 env->ca32 = env->ca = 0; 274 } 275 } else { 276 ret = (int64_t)value >> 63; 277 env->ca32 = env->ca = (ret != 0); 278 } 279 return ret; 280 } 281 #endif 282 283 #if defined(TARGET_PPC64) 284 target_ulong helper_popcntb(target_ulong val) 285 { 286 /* Note that we don't fold past bytes */ 287 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 288 0x5555555555555555ULL); 289 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 290 0x3333333333333333ULL); 291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 292 0x0f0f0f0f0f0f0f0fULL); 293 return val; 294 } 295 296 target_ulong helper_popcntw(target_ulong val) 297 { 298 /* Note that we don't fold past words. */ 299 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 300 0x5555555555555555ULL); 301 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 302 0x3333333333333333ULL); 303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 304 0x0f0f0f0f0f0f0f0fULL); 305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 306 0x00ff00ff00ff00ffULL); 307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 308 0x0000ffff0000ffffULL); 309 return val; 310 } 311 #else 312 target_ulong helper_popcntb(target_ulong val) 313 { 314 /* Note that we don't fold past bytes */ 315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 318 return val; 319 } 320 #endif 321 322 /*****************************************************************************/ 323 /* PowerPC 601 specific instructions (POWER bridge) */ 324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 325 { 326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 327 328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 329 (int32_t)arg2 == 0) { 330 env->spr[SPR_MQ] = 0; 331 return INT32_MIN; 332 } else { 333 env->spr[SPR_MQ] = tmp % arg2; 334 return tmp / (int32_t)arg2; 335 } 336 } 337 338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 339 target_ulong arg2) 340 { 341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 342 343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 344 (int32_t)arg2 == 0) { 345 env->so = env->ov = 1; 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = tmp % arg2; 350 tmp /= (int32_t)arg2; 351 if ((int32_t)tmp != tmp) { 352 env->so = env->ov = 1; 353 } else { 354 env->ov = 0; 355 } 356 return tmp; 357 } 358 } 359 360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 361 target_ulong arg2) 362 { 363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 364 (int32_t)arg2 == 0) { 365 env->spr[SPR_MQ] = 0; 366 return INT32_MIN; 367 } else { 368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 369 return (int32_t)arg1 / (int32_t)arg2; 370 } 371 } 372 373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 374 target_ulong arg2) 375 { 376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 377 (int32_t)arg2 == 0) { 378 env->so = env->ov = 1; 379 env->spr[SPR_MQ] = 0; 380 return INT32_MIN; 381 } else { 382 env->ov = 0; 383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 384 return (int32_t)arg1 / (int32_t)arg2; 385 } 386 } 387 388 /*****************************************************************************/ 389 /* 602 specific instructions */ 390 /* mfrom is the most crazy instruction ever seen, imho ! */ 391 /* Real implementation uses a ROM table. Do the same */ 392 /* 393 * Extremely decomposed: 394 * -arg / 256 395 * return 256 * log10(10 + 1.0) + 0.5 396 */ 397 #if !defined(CONFIG_USER_ONLY) 398 target_ulong helper_602_mfrom(target_ulong arg) 399 { 400 if (likely(arg < 602)) { 401 #include "mfrom_table.c.inc" 402 return mfrom_ROM_table[arg]; 403 } else { 404 return 0; 405 } 406 } 407 #endif 408 409 /*****************************************************************************/ 410 /* Altivec extension helpers */ 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 463 { 464 env->vscr = vscr & ~(1u << VSCR_SAT); 465 /* Which bit we set is completely arbitrary, but clear the rest. */ 466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 467 env->vscr_sat.u64[1] = 0; 468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 469 } 470 471 uint32_t helper_mfvscr(CPUPPCState *env) 472 { 473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 474 return env->vscr | (sat << VSCR_SAT); 475 } 476 477 static inline void set_vscr_sat(CPUPPCState *env) 478 { 479 /* The choice of non-zero value is arbitrary. */ 480 env->vscr_sat.u32[0] = 1; 481 } 482 483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 484 { 485 int i; 486 487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 488 r->u32[i] = ~a->u32[i] < b->u32[i]; 489 } 490 } 491 492 /* vprtybw */ 493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 494 { 495 int i; 496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 498 res ^= res >> 8; 499 r->u32[i] = res & 1; 500 } 501 } 502 503 /* vprtybd */ 504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 509 res ^= res >> 16; 510 res ^= res >> 8; 511 r->u64[i] = res & 1; 512 } 513 } 514 515 /* vprtybq */ 516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 uint64_t res = b->u64[0] ^ b->u64[1]; 519 res ^= res >> 32; 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->VsrD(1) = res & 1; 523 r->VsrD(0) = 0; 524 } 525 526 #define VARITHFP(suffix, func) \ 527 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 528 ppc_avr_t *b) \ 529 { \ 530 int i; \ 531 \ 532 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 533 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 534 } \ 535 } 536 VARITHFP(addfp, float32_add) 537 VARITHFP(subfp, float32_sub) 538 VARITHFP(minfp, float32_min) 539 VARITHFP(maxfp, float32_max) 540 #undef VARITHFP 541 542 #define VARITHFPFMA(suffix, type) \ 543 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 544 ppc_avr_t *b, ppc_avr_t *c) \ 545 { \ 546 int i; \ 547 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 548 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 549 type, &env->vec_status); \ 550 } \ 551 } 552 VARITHFPFMA(maddfp, 0); 553 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 554 #undef VARITHFPFMA 555 556 #define VARITHSAT_CASE(type, op, cvt, element) \ 557 { \ 558 type result = (type)a->element[i] op (type)b->element[i]; \ 559 r->element[i] = cvt(result, &sat); \ 560 } 561 562 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 563 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 564 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 565 { \ 566 int sat = 0; \ 567 int i; \ 568 \ 569 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 570 VARITHSAT_CASE(optype, op, cvt, element); \ 571 } \ 572 if (sat) { \ 573 vscr_sat->u32[0] = 1; \ 574 } \ 575 } 576 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 577 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 578 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 579 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 580 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 581 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 582 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 583 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 584 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 585 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 586 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 587 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 588 #undef VARITHSAT_CASE 589 #undef VARITHSAT_DO 590 #undef VARITHSAT_SIGNED 591 #undef VARITHSAT_UNSIGNED 592 593 #define VAVG_DO(name, element, etype) \ 594 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 600 r->element[i] = x >> 1; \ 601 } \ 602 } 603 604 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 605 unsigned_type) \ 606 VAVG_DO(avgs##type, signed_element, signed_type) \ 607 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 608 VAVG(b, s8, int16_t, u8, uint16_t) 609 VAVG(h, s16, int32_t, u16, uint32_t) 610 VAVG(w, s32, int64_t, u32, uint64_t) 611 #undef VAVG_DO 612 #undef VAVG 613 614 #define VABSDU_DO(name, element) \ 615 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 616 { \ 617 int i; \ 618 \ 619 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 620 r->element[i] = (a->element[i] > b->element[i]) ? \ 621 (a->element[i] - b->element[i]) : \ 622 (b->element[i] - a->element[i]); \ 623 } \ 624 } 625 626 /* 627 * VABSDU - Vector absolute difference unsigned 628 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 629 * element - element type to access from vector 630 */ 631 #define VABSDU(type, element) \ 632 VABSDU_DO(absdu##type, element) 633 VABSDU(b, u8) 634 VABSDU(h, u16) 635 VABSDU(w, u32) 636 #undef VABSDU_DO 637 #undef VABSDU 638 639 #define VCF(suffix, cvt, element) \ 640 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 641 ppc_avr_t *b, uint32_t uim) \ 642 { \ 643 int i; \ 644 \ 645 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 646 float32 t = cvt(b->element[i], &env->vec_status); \ 647 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 648 } \ 649 } 650 VCF(ux, uint32_to_float32, u32) 651 VCF(sx, int32_to_float32, s32) 652 #undef VCF 653 654 #define VCMP_DO(suffix, compare, element, record) \ 655 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 656 ppc_avr_t *a, ppc_avr_t *b) \ 657 { \ 658 uint64_t ones = (uint64_t)-1; \ 659 uint64_t all = ones; \ 660 uint64_t none = 0; \ 661 int i; \ 662 \ 663 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 664 uint64_t result = (a->element[i] compare b->element[i] ? \ 665 ones : 0x0); \ 666 switch (sizeof(a->element[0])) { \ 667 case 8: \ 668 r->u64[i] = result; \ 669 break; \ 670 case 4: \ 671 r->u32[i] = result; \ 672 break; \ 673 case 2: \ 674 r->u16[i] = result; \ 675 break; \ 676 case 1: \ 677 r->u8[i] = result; \ 678 break; \ 679 } \ 680 all &= result; \ 681 none |= result; \ 682 } \ 683 if (record) { \ 684 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 685 } \ 686 } 687 #define VCMP(suffix, compare, element) \ 688 VCMP_DO(suffix, compare, element, 0) \ 689 VCMP_DO(suffix##_dot, compare, element, 1) 690 VCMP(equb, ==, u8) 691 VCMP(equh, ==, u16) 692 VCMP(equw, ==, u32) 693 VCMP(equd, ==, u64) 694 VCMP(gtub, >, u8) 695 VCMP(gtuh, >, u16) 696 VCMP(gtuw, >, u32) 697 VCMP(gtud, >, u64) 698 VCMP(gtsb, >, s8) 699 VCMP(gtsh, >, s16) 700 VCMP(gtsw, >, s32) 701 VCMP(gtsd, >, s64) 702 #undef VCMP_DO 703 #undef VCMP 704 705 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 706 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 707 ppc_avr_t *a, ppc_avr_t *b) \ 708 { \ 709 etype ones = (etype)-1; \ 710 etype all = ones; \ 711 etype result, none = 0; \ 712 int i; \ 713 \ 714 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 715 if (cmpzero) { \ 716 result = ((a->element[i] == 0) \ 717 || (b->element[i] == 0) \ 718 || (a->element[i] != b->element[i]) ? \ 719 ones : 0x0); \ 720 } else { \ 721 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 722 } \ 723 r->element[i] = result; \ 724 all &= result; \ 725 none |= result; \ 726 } \ 727 if (record) { \ 728 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 729 } \ 730 } 731 732 /* 733 * VCMPNEZ - Vector compare not equal to zero 734 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 735 * element - element type to access from vector 736 */ 737 #define VCMPNE(suffix, element, etype, cmpzero) \ 738 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 739 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 740 VCMPNE(zb, u8, uint8_t, 1) 741 VCMPNE(zh, u16, uint16_t, 1) 742 VCMPNE(zw, u32, uint32_t, 1) 743 VCMPNE(b, u8, uint8_t, 0) 744 VCMPNE(h, u16, uint16_t, 0) 745 VCMPNE(w, u32, uint32_t, 0) 746 #undef VCMPNE_DO 747 #undef VCMPNE 748 749 #define VCMPFP_DO(suffix, compare, order, record) \ 750 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 751 ppc_avr_t *a, ppc_avr_t *b) \ 752 { \ 753 uint32_t ones = (uint32_t)-1; \ 754 uint32_t all = ones; \ 755 uint32_t none = 0; \ 756 int i; \ 757 \ 758 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 759 uint32_t result; \ 760 FloatRelation rel = \ 761 float32_compare_quiet(a->f32[i], b->f32[i], \ 762 &env->vec_status); \ 763 if (rel == float_relation_unordered) { \ 764 result = 0; \ 765 } else if (rel compare order) { \ 766 result = ones; \ 767 } else { \ 768 result = 0; \ 769 } \ 770 r->u32[i] = result; \ 771 all &= result; \ 772 none |= result; \ 773 } \ 774 if (record) { \ 775 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 776 } \ 777 } 778 #define VCMPFP(suffix, compare, order) \ 779 VCMPFP_DO(suffix, compare, order, 0) \ 780 VCMPFP_DO(suffix##_dot, compare, order, 1) 781 VCMPFP(eqfp, ==, float_relation_equal) 782 VCMPFP(gefp, !=, float_relation_less) 783 VCMPFP(gtfp, ==, float_relation_greater) 784 #undef VCMPFP_DO 785 #undef VCMPFP 786 787 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 788 ppc_avr_t *a, ppc_avr_t *b, int record) 789 { 790 int i; 791 int all_in = 0; 792 793 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 794 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 795 &env->vec_status); 796 if (le_rel == float_relation_unordered) { 797 r->u32[i] = 0xc0000000; 798 all_in = 1; 799 } else { 800 float32 bneg = float32_chs(b->f32[i]); 801 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 802 &env->vec_status); 803 int le = le_rel != float_relation_greater; 804 int ge = ge_rel != float_relation_less; 805 806 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 807 all_in |= (!le | !ge); 808 } 809 } 810 if (record) { 811 env->crf[6] = (all_in == 0) << 1; 812 } 813 } 814 815 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 816 { 817 vcmpbfp_internal(env, r, a, b, 0); 818 } 819 820 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 821 ppc_avr_t *b) 822 { 823 vcmpbfp_internal(env, r, a, b, 1); 824 } 825 826 #define VCT(suffix, satcvt, element) \ 827 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 828 ppc_avr_t *b, uint32_t uim) \ 829 { \ 830 int i; \ 831 int sat = 0; \ 832 float_status s = env->vec_status; \ 833 \ 834 set_float_rounding_mode(float_round_to_zero, &s); \ 835 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 836 if (float32_is_any_nan(b->f32[i])) { \ 837 r->element[i] = 0; \ 838 } else { \ 839 float64 t = float32_to_float64(b->f32[i], &s); \ 840 int64_t j; \ 841 \ 842 t = float64_scalbn(t, uim, &s); \ 843 j = float64_to_int64(t, &s); \ 844 r->element[i] = satcvt(j, &sat); \ 845 } \ 846 } \ 847 if (sat) { \ 848 set_vscr_sat(env); \ 849 } \ 850 } 851 VCT(uxs, cvtsduw, u32) 852 VCT(sxs, cvtsdsw, s32) 853 #undef VCT 854 855 target_ulong helper_vclzlsbb(ppc_avr_t *r) 856 { 857 target_ulong count = 0; 858 int i; 859 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 860 if (r->VsrB(i) & 0x01) { 861 break; 862 } 863 count++; 864 } 865 return count; 866 } 867 868 target_ulong helper_vctzlsbb(ppc_avr_t *r) 869 { 870 target_ulong count = 0; 871 int i; 872 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 873 if (r->VsrB(i) & 0x01) { 874 break; 875 } 876 count++; 877 } 878 return count; 879 } 880 881 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 882 ppc_avr_t *b, ppc_avr_t *c) 883 { 884 int sat = 0; 885 int i; 886 887 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 888 int32_t prod = a->s16[i] * b->s16[i]; 889 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 890 891 r->s16[i] = cvtswsh(t, &sat); 892 } 893 894 if (sat) { 895 set_vscr_sat(env); 896 } 897 } 898 899 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 900 ppc_avr_t *b, ppc_avr_t *c) 901 { 902 int sat = 0; 903 int i; 904 905 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 906 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 907 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 908 r->s16[i] = cvtswsh(t, &sat); 909 } 910 911 if (sat) { 912 set_vscr_sat(env); 913 } 914 } 915 916 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 917 { 918 int i; 919 920 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 921 int32_t prod = a->s16[i] * b->s16[i]; 922 r->s16[i] = (int16_t) (prod + c->s16[i]); 923 } 924 } 925 926 #define VMRG_DO(name, element, access, ofs) \ 927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 928 { \ 929 ppc_avr_t result; \ 930 int i, half = ARRAY_SIZE(r->element) / 2; \ 931 \ 932 for (i = 0; i < half; i++) { \ 933 result.access(i * 2 + 0) = a->access(i + ofs); \ 934 result.access(i * 2 + 1) = b->access(i + ofs); \ 935 } \ 936 *r = result; \ 937 } 938 939 #define VMRG(suffix, element, access) \ 940 VMRG_DO(mrgl##suffix, element, access, half) \ 941 VMRG_DO(mrgh##suffix, element, access, 0) 942 VMRG(b, u8, VsrB) 943 VMRG(h, u16, VsrH) 944 VMRG(w, u32, VsrW) 945 #undef VMRG_DO 946 #undef VMRG 947 948 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 949 ppc_avr_t *b, ppc_avr_t *c) 950 { 951 int32_t prod[16]; 952 int i; 953 954 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 955 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 956 } 957 958 VECTOR_FOR_INORDER_I(i, s32) { 959 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 960 prod[4 * i + 2] + prod[4 * i + 3]; 961 } 962 } 963 964 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 965 ppc_avr_t *b, ppc_avr_t *c) 966 { 967 int32_t prod[8]; 968 int i; 969 970 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 971 prod[i] = a->s16[i] * b->s16[i]; 972 } 973 974 VECTOR_FOR_INORDER_I(i, s32) { 975 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 976 } 977 } 978 979 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 980 ppc_avr_t *b, ppc_avr_t *c) 981 { 982 int32_t prod[8]; 983 int i; 984 int sat = 0; 985 986 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 987 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 988 } 989 990 VECTOR_FOR_INORDER_I(i, s32) { 991 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 992 993 r->u32[i] = cvtsdsw(t, &sat); 994 } 995 996 if (sat) { 997 set_vscr_sat(env); 998 } 999 } 1000 1001 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1002 ppc_avr_t *b, ppc_avr_t *c) 1003 { 1004 uint16_t prod[16]; 1005 int i; 1006 1007 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1008 prod[i] = a->u8[i] * b->u8[i]; 1009 } 1010 1011 VECTOR_FOR_INORDER_I(i, u32) { 1012 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1013 prod[4 * i + 2] + prod[4 * i + 3]; 1014 } 1015 } 1016 1017 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1018 ppc_avr_t *b, ppc_avr_t *c) 1019 { 1020 uint32_t prod[8]; 1021 int i; 1022 1023 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1024 prod[i] = a->u16[i] * b->u16[i]; 1025 } 1026 1027 VECTOR_FOR_INORDER_I(i, u32) { 1028 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1029 } 1030 } 1031 1032 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1033 ppc_avr_t *b, ppc_avr_t *c) 1034 { 1035 uint32_t prod[8]; 1036 int i; 1037 int sat = 0; 1038 1039 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1040 prod[i] = a->u16[i] * b->u16[i]; 1041 } 1042 1043 VECTOR_FOR_INORDER_I(i, s32) { 1044 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1045 1046 r->u32[i] = cvtuduw(t, &sat); 1047 } 1048 1049 if (sat) { 1050 set_vscr_sat(env); 1051 } 1052 } 1053 1054 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1055 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1056 { \ 1057 int i; \ 1058 \ 1059 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1060 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1061 (cast)b->mul_access(i); \ 1062 } \ 1063 } 1064 1065 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1066 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1067 { \ 1068 int i; \ 1069 \ 1070 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1071 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1072 (cast)b->mul_access(i + 1); \ 1073 } \ 1074 } 1075 1076 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1077 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1078 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1079 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1080 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1081 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1082 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1083 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1084 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1085 #undef VMUL_DO_EVN 1086 #undef VMUL_DO_ODD 1087 #undef VMUL 1088 1089 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1090 { 1091 int i; 1092 1093 for (i = 0; i < 4; i++) { 1094 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1095 } 1096 } 1097 1098 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1099 { 1100 int i; 1101 1102 for (i = 0; i < 4; i++) { 1103 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1104 (uint64_t)b->u32[i]) >> 32); 1105 } 1106 } 1107 1108 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1109 { 1110 uint64_t discard; 1111 1112 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1113 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1114 } 1115 1116 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1117 { 1118 uint64_t discard; 1119 1120 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1121 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1122 } 1123 1124 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1125 ppc_avr_t *c) 1126 { 1127 ppc_avr_t result; 1128 int i; 1129 1130 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1131 int s = c->VsrB(i) & 0x1f; 1132 int index = s & 0xf; 1133 1134 if (s & 0x10) { 1135 result.VsrB(i) = b->VsrB(index); 1136 } else { 1137 result.VsrB(i) = a->VsrB(index); 1138 } 1139 } 1140 *r = result; 1141 } 1142 1143 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1144 ppc_avr_t *c) 1145 { 1146 ppc_avr_t result; 1147 int i; 1148 1149 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1150 int s = c->VsrB(i) & 0x1f; 1151 int index = 15 - (s & 0xf); 1152 1153 if (s & 0x10) { 1154 result.VsrB(i) = a->VsrB(index); 1155 } else { 1156 result.VsrB(i) = b->VsrB(index); 1157 } 1158 } 1159 *r = result; 1160 } 1161 1162 #if defined(HOST_WORDS_BIGENDIAN) 1163 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1164 #define VBPERMD_INDEX(i) (i) 1165 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1166 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1167 #else 1168 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1169 #define VBPERMD_INDEX(i) (1 - i) 1170 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1171 #define EXTRACT_BIT(avr, i, index) \ 1172 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1173 #endif 1174 1175 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1176 { 1177 int i, j; 1178 ppc_avr_t result = { .u64 = { 0, 0 } }; 1179 VECTOR_FOR_INORDER_I(i, u64) { 1180 for (j = 0; j < 8; j++) { 1181 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1182 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1183 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1184 } 1185 } 1186 } 1187 *r = result; 1188 } 1189 1190 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1191 { 1192 int i; 1193 uint64_t perm = 0; 1194 1195 VECTOR_FOR_INORDER_I(i, u8) { 1196 int index = VBPERMQ_INDEX(b, i); 1197 1198 if (index < 128) { 1199 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1200 if (a->u64[VBPERMQ_DW(index)] & mask) { 1201 perm |= (0x8000 >> i); 1202 } 1203 } 1204 } 1205 1206 r->VsrD(0) = perm; 1207 r->VsrD(1) = 0; 1208 } 1209 1210 #undef VBPERMQ_INDEX 1211 #undef VBPERMQ_DW 1212 1213 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1214 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1215 { \ 1216 int i, j; \ 1217 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1218 \ 1219 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1220 prod[i] = 0; \ 1221 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1222 if (a->srcfld[i] & (1ull << j)) { \ 1223 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1224 } \ 1225 } \ 1226 } \ 1227 \ 1228 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1229 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1230 } \ 1231 } 1232 1233 PMSUM(vpmsumb, u8, u16, uint16_t) 1234 PMSUM(vpmsumh, u16, u32, uint32_t) 1235 PMSUM(vpmsumw, u32, u64, uint64_t) 1236 1237 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1238 { 1239 1240 #ifdef CONFIG_INT128 1241 int i, j; 1242 __uint128_t prod[2]; 1243 1244 VECTOR_FOR_INORDER_I(i, u64) { 1245 prod[i] = 0; 1246 for (j = 0; j < 64; j++) { 1247 if (a->u64[i] & (1ull << j)) { 1248 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1249 } 1250 } 1251 } 1252 1253 r->u128 = prod[0] ^ prod[1]; 1254 1255 #else 1256 int i, j; 1257 ppc_avr_t prod[2]; 1258 1259 VECTOR_FOR_INORDER_I(i, u64) { 1260 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1261 for (j = 0; j < 64; j++) { 1262 if (a->u64[i] & (1ull << j)) { 1263 ppc_avr_t bshift; 1264 if (j == 0) { 1265 bshift.VsrD(0) = 0; 1266 bshift.VsrD(1) = b->u64[i]; 1267 } else { 1268 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1269 bshift.VsrD(1) = b->u64[i] << j; 1270 } 1271 prod[i].VsrD(1) ^= bshift.VsrD(1); 1272 prod[i].VsrD(0) ^= bshift.VsrD(0); 1273 } 1274 } 1275 } 1276 1277 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1278 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1279 #endif 1280 } 1281 1282 1283 #if defined(HOST_WORDS_BIGENDIAN) 1284 #define PKBIG 1 1285 #else 1286 #define PKBIG 0 1287 #endif 1288 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1289 { 1290 int i, j; 1291 ppc_avr_t result; 1292 #if defined(HOST_WORDS_BIGENDIAN) 1293 const ppc_avr_t *x[2] = { a, b }; 1294 #else 1295 const ppc_avr_t *x[2] = { b, a }; 1296 #endif 1297 1298 VECTOR_FOR_INORDER_I(i, u64) { 1299 VECTOR_FOR_INORDER_I(j, u32) { 1300 uint32_t e = x[i]->u32[j]; 1301 1302 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1303 ((e >> 6) & 0x3e0) | 1304 ((e >> 3) & 0x1f)); 1305 } 1306 } 1307 *r = result; 1308 } 1309 1310 #define VPK(suffix, from, to, cvt, dosat) \ 1311 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1312 ppc_avr_t *a, ppc_avr_t *b) \ 1313 { \ 1314 int i; \ 1315 int sat = 0; \ 1316 ppc_avr_t result; \ 1317 ppc_avr_t *a0 = PKBIG ? a : b; \ 1318 ppc_avr_t *a1 = PKBIG ? b : a; \ 1319 \ 1320 VECTOR_FOR_INORDER_I(i, from) { \ 1321 result.to[i] = cvt(a0->from[i], &sat); \ 1322 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1323 } \ 1324 *r = result; \ 1325 if (dosat && sat) { \ 1326 set_vscr_sat(env); \ 1327 } \ 1328 } 1329 #define I(x, y) (x) 1330 VPK(shss, s16, s8, cvtshsb, 1) 1331 VPK(shus, s16, u8, cvtshub, 1) 1332 VPK(swss, s32, s16, cvtswsh, 1) 1333 VPK(swus, s32, u16, cvtswuh, 1) 1334 VPK(sdss, s64, s32, cvtsdsw, 1) 1335 VPK(sdus, s64, u32, cvtsduw, 1) 1336 VPK(uhus, u16, u8, cvtuhub, 1) 1337 VPK(uwus, u32, u16, cvtuwuh, 1) 1338 VPK(udus, u64, u32, cvtuduw, 1) 1339 VPK(uhum, u16, u8, I, 0) 1340 VPK(uwum, u32, u16, I, 0) 1341 VPK(udum, u64, u32, I, 0) 1342 #undef I 1343 #undef VPK 1344 #undef PKBIG 1345 1346 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1347 { 1348 int i; 1349 1350 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1351 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1352 } 1353 } 1354 1355 #define VRFI(suffix, rounding) \ 1356 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1357 ppc_avr_t *b) \ 1358 { \ 1359 int i; \ 1360 float_status s = env->vec_status; \ 1361 \ 1362 set_float_rounding_mode(rounding, &s); \ 1363 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1364 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1365 } \ 1366 } 1367 VRFI(n, float_round_nearest_even) 1368 VRFI(m, float_round_down) 1369 VRFI(p, float_round_up) 1370 VRFI(z, float_round_to_zero) 1371 #undef VRFI 1372 1373 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1374 { 1375 int i; 1376 1377 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1378 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1379 1380 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1381 } 1382 } 1383 1384 #define VRLMI(name, size, element, insert) \ 1385 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1386 { \ 1387 int i; \ 1388 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1389 uint##size##_t src1 = a->element[i]; \ 1390 uint##size##_t src2 = b->element[i]; \ 1391 uint##size##_t src3 = r->element[i]; \ 1392 uint##size##_t begin, end, shift, mask, rot_val; \ 1393 \ 1394 shift = extract##size(src2, 0, 6); \ 1395 end = extract##size(src2, 8, 6); \ 1396 begin = extract##size(src2, 16, 6); \ 1397 rot_val = rol##size(src1, shift); \ 1398 mask = mask_u##size(begin, end); \ 1399 if (insert) { \ 1400 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1401 } else { \ 1402 r->element[i] = (rot_val & mask); \ 1403 } \ 1404 } \ 1405 } 1406 1407 VRLMI(vrldmi, 64, u64, 1); 1408 VRLMI(vrlwmi, 32, u32, 1); 1409 VRLMI(vrldnm, 64, u64, 0); 1410 VRLMI(vrlwnm, 32, u32, 0); 1411 1412 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1413 ppc_avr_t *c) 1414 { 1415 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1416 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1417 } 1418 1419 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1420 { 1421 int i; 1422 1423 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1424 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1425 } 1426 } 1427 1428 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1429 { 1430 int i; 1431 1432 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1433 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1434 } 1435 } 1436 1437 #if defined(HOST_WORDS_BIGENDIAN) 1438 #define VEXTU_X_DO(name, size, left) \ 1439 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1440 { \ 1441 int index; \ 1442 if (left) { \ 1443 index = (a & 0xf) * 8; \ 1444 } else { \ 1445 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1446 } \ 1447 return int128_getlo(int128_rshift(b->s128, index)) & \ 1448 MAKE_64BIT_MASK(0, size); \ 1449 } 1450 #else 1451 #define VEXTU_X_DO(name, size, left) \ 1452 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1453 { \ 1454 int index; \ 1455 if (left) { \ 1456 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1457 } else { \ 1458 index = (a & 0xf) * 8; \ 1459 } \ 1460 return int128_getlo(int128_rshift(b->s128, index)) & \ 1461 MAKE_64BIT_MASK(0, size); \ 1462 } 1463 #endif 1464 1465 VEXTU_X_DO(vextublx, 8, 1) 1466 VEXTU_X_DO(vextuhlx, 16, 1) 1467 VEXTU_X_DO(vextuwlx, 32, 1) 1468 VEXTU_X_DO(vextubrx, 8, 0) 1469 VEXTU_X_DO(vextuhrx, 16, 0) 1470 VEXTU_X_DO(vextuwrx, 32, 0) 1471 #undef VEXTU_X_DO 1472 1473 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1474 { 1475 int i; 1476 unsigned int shift, bytes, size; 1477 1478 size = ARRAY_SIZE(r->u8); 1479 for (i = 0; i < size; i++) { 1480 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1481 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1482 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1483 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1484 } 1485 } 1486 1487 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1488 { 1489 int i; 1490 unsigned int shift, bytes; 1491 1492 /* 1493 * Use reverse order, as destination and source register can be 1494 * same. Its being modified in place saving temporary, reverse 1495 * order will guarantee that computed result is not fed back. 1496 */ 1497 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1498 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1499 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1500 /* extract adjacent bytes */ 1501 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1502 } 1503 } 1504 1505 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1506 { 1507 int sh = shift & 0xf; 1508 int i; 1509 ppc_avr_t result; 1510 1511 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1512 int index = sh + i; 1513 if (index > 0xf) { 1514 result.VsrB(i) = b->VsrB(index - 0x10); 1515 } else { 1516 result.VsrB(i) = a->VsrB(index); 1517 } 1518 } 1519 *r = result; 1520 } 1521 1522 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1523 { 1524 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1525 1526 #if defined(HOST_WORDS_BIGENDIAN) 1527 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1528 memset(&r->u8[16 - sh], 0, sh); 1529 #else 1530 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1531 memset(&r->u8[0], 0, sh); 1532 #endif 1533 } 1534 1535 #if defined(HOST_WORDS_BIGENDIAN) 1536 #define VINSERT(suffix, element) \ 1537 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1538 { \ 1539 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1540 sizeof(r->element[0])); \ 1541 } 1542 #else 1543 #define VINSERT(suffix, element) \ 1544 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1545 { \ 1546 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1547 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1548 } 1549 #endif 1550 VINSERT(b, u8) 1551 VINSERT(h, u16) 1552 VINSERT(w, u32) 1553 VINSERT(d, u64) 1554 #undef VINSERT 1555 #if defined(HOST_WORDS_BIGENDIAN) 1556 #define VEXTRACT(suffix, element) \ 1557 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1558 { \ 1559 uint32_t es = sizeof(r->element[0]); \ 1560 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1561 memset(&r->u8[8], 0, 8); \ 1562 memset(&r->u8[0], 0, 8 - es); \ 1563 } 1564 #else 1565 #define VEXTRACT(suffix, element) \ 1566 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1567 { \ 1568 uint32_t es = sizeof(r->element[0]); \ 1569 uint32_t s = (16 - index) - es; \ 1570 memmove(&r->u8[8], &b->u8[s], es); \ 1571 memset(&r->u8[0], 0, 8); \ 1572 memset(&r->u8[8 + es], 0, 8 - es); \ 1573 } 1574 #endif 1575 VEXTRACT(ub, u8) 1576 VEXTRACT(uh, u16) 1577 VEXTRACT(uw, u32) 1578 VEXTRACT(d, u64) 1579 #undef VEXTRACT 1580 1581 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1582 ppc_vsr_t *xb, uint32_t index) 1583 { 1584 ppc_vsr_t t = { }; 1585 size_t es = sizeof(uint32_t); 1586 uint32_t ext_index; 1587 int i; 1588 1589 ext_index = index; 1590 for (i = 0; i < es; i++, ext_index++) { 1591 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1592 } 1593 1594 *xt = t; 1595 } 1596 1597 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1598 ppc_vsr_t *xb, uint32_t index) 1599 { 1600 ppc_vsr_t t = *xt; 1601 size_t es = sizeof(uint32_t); 1602 int ins_index, i = 0; 1603 1604 ins_index = index; 1605 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1606 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1607 } 1608 1609 *xt = t; 1610 } 1611 1612 #define VEXT_SIGNED(name, element, cast) \ 1613 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1614 { \ 1615 int i; \ 1616 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1617 r->element[i] = (cast)b->element[i]; \ 1618 } \ 1619 } 1620 VEXT_SIGNED(vextsb2w, s32, int8_t) 1621 VEXT_SIGNED(vextsb2d, s64, int8_t) 1622 VEXT_SIGNED(vextsh2w, s32, int16_t) 1623 VEXT_SIGNED(vextsh2d, s64, int16_t) 1624 VEXT_SIGNED(vextsw2d, s64, int32_t) 1625 #undef VEXT_SIGNED 1626 1627 #define VNEG(name, element) \ 1628 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1629 { \ 1630 int i; \ 1631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1632 r->element[i] = -b->element[i]; \ 1633 } \ 1634 } 1635 VNEG(vnegw, s32) 1636 VNEG(vnegd, s64) 1637 #undef VNEG 1638 1639 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1640 { 1641 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1642 1643 #if defined(HOST_WORDS_BIGENDIAN) 1644 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1645 memset(&r->u8[0], 0, sh); 1646 #else 1647 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1648 memset(&r->u8[16 - sh], 0, sh); 1649 #endif 1650 } 1651 1652 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1653 { 1654 int i; 1655 1656 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1657 r->u32[i] = a->u32[i] >= b->u32[i]; 1658 } 1659 } 1660 1661 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1662 { 1663 int64_t t; 1664 int i, upper; 1665 ppc_avr_t result; 1666 int sat = 0; 1667 1668 upper = ARRAY_SIZE(r->s32) - 1; 1669 t = (int64_t)b->VsrSW(upper); 1670 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1671 t += a->VsrSW(i); 1672 result.VsrSW(i) = 0; 1673 } 1674 result.VsrSW(upper) = cvtsdsw(t, &sat); 1675 *r = result; 1676 1677 if (sat) { 1678 set_vscr_sat(env); 1679 } 1680 } 1681 1682 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1683 { 1684 int i, j, upper; 1685 ppc_avr_t result; 1686 int sat = 0; 1687 1688 upper = 1; 1689 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1690 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1691 1692 result.VsrD(i) = 0; 1693 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1694 t += a->VsrSW(2 * i + j); 1695 } 1696 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1697 } 1698 1699 *r = result; 1700 if (sat) { 1701 set_vscr_sat(env); 1702 } 1703 } 1704 1705 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1706 { 1707 int i, j; 1708 int sat = 0; 1709 1710 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1711 int64_t t = (int64_t)b->s32[i]; 1712 1713 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1714 t += a->s8[4 * i + j]; 1715 } 1716 r->s32[i] = cvtsdsw(t, &sat); 1717 } 1718 1719 if (sat) { 1720 set_vscr_sat(env); 1721 } 1722 } 1723 1724 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1725 { 1726 int sat = 0; 1727 int i; 1728 1729 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1730 int64_t t = (int64_t)b->s32[i]; 1731 1732 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1733 r->s32[i] = cvtsdsw(t, &sat); 1734 } 1735 1736 if (sat) { 1737 set_vscr_sat(env); 1738 } 1739 } 1740 1741 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1742 { 1743 int i, j; 1744 int sat = 0; 1745 1746 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1747 uint64_t t = (uint64_t)b->u32[i]; 1748 1749 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1750 t += a->u8[4 * i + j]; 1751 } 1752 r->u32[i] = cvtuduw(t, &sat); 1753 } 1754 1755 if (sat) { 1756 set_vscr_sat(env); 1757 } 1758 } 1759 1760 #if defined(HOST_WORDS_BIGENDIAN) 1761 #define UPKHI 1 1762 #define UPKLO 0 1763 #else 1764 #define UPKHI 0 1765 #define UPKLO 1 1766 #endif 1767 #define VUPKPX(suffix, hi) \ 1768 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1769 { \ 1770 int i; \ 1771 ppc_avr_t result; \ 1772 \ 1773 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1774 uint16_t e = b->u16[hi ? i : i + 4]; \ 1775 uint8_t a = (e >> 15) ? 0xff : 0; \ 1776 uint8_t r = (e >> 10) & 0x1f; \ 1777 uint8_t g = (e >> 5) & 0x1f; \ 1778 uint8_t b = e & 0x1f; \ 1779 \ 1780 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1781 } \ 1782 *r = result; \ 1783 } 1784 VUPKPX(lpx, UPKLO) 1785 VUPKPX(hpx, UPKHI) 1786 #undef VUPKPX 1787 1788 #define VUPK(suffix, unpacked, packee, hi) \ 1789 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1790 { \ 1791 int i; \ 1792 ppc_avr_t result; \ 1793 \ 1794 if (hi) { \ 1795 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1796 result.unpacked[i] = b->packee[i]; \ 1797 } \ 1798 } else { \ 1799 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1800 i++) { \ 1801 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1802 } \ 1803 } \ 1804 *r = result; \ 1805 } 1806 VUPK(hsb, s16, s8, UPKHI) 1807 VUPK(hsh, s32, s16, UPKHI) 1808 VUPK(hsw, s64, s32, UPKHI) 1809 VUPK(lsb, s16, s8, UPKLO) 1810 VUPK(lsh, s32, s16, UPKLO) 1811 VUPK(lsw, s64, s32, UPKLO) 1812 #undef VUPK 1813 #undef UPKHI 1814 #undef UPKLO 1815 1816 #define VGENERIC_DO(name, element) \ 1817 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1818 { \ 1819 int i; \ 1820 \ 1821 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1822 r->element[i] = name(b->element[i]); \ 1823 } \ 1824 } 1825 1826 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1827 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1828 1829 VGENERIC_DO(clzb, u8) 1830 VGENERIC_DO(clzh, u16) 1831 1832 #undef clzb 1833 #undef clzh 1834 1835 #define ctzb(v) ((v) ? ctz32(v) : 8) 1836 #define ctzh(v) ((v) ? ctz32(v) : 16) 1837 #define ctzw(v) ctz32((v)) 1838 #define ctzd(v) ctz64((v)) 1839 1840 VGENERIC_DO(ctzb, u8) 1841 VGENERIC_DO(ctzh, u16) 1842 VGENERIC_DO(ctzw, u32) 1843 VGENERIC_DO(ctzd, u64) 1844 1845 #undef ctzb 1846 #undef ctzh 1847 #undef ctzw 1848 #undef ctzd 1849 1850 #define popcntb(v) ctpop8(v) 1851 #define popcnth(v) ctpop16(v) 1852 #define popcntw(v) ctpop32(v) 1853 #define popcntd(v) ctpop64(v) 1854 1855 VGENERIC_DO(popcntb, u8) 1856 VGENERIC_DO(popcnth, u16) 1857 VGENERIC_DO(popcntw, u32) 1858 VGENERIC_DO(popcntd, u64) 1859 1860 #undef popcntb 1861 #undef popcnth 1862 #undef popcntw 1863 #undef popcntd 1864 1865 #undef VGENERIC_DO 1866 1867 #if defined(HOST_WORDS_BIGENDIAN) 1868 #define QW_ONE { .u64 = { 0, 1 } } 1869 #else 1870 #define QW_ONE { .u64 = { 1, 0 } } 1871 #endif 1872 1873 #ifndef CONFIG_INT128 1874 1875 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1876 { 1877 t->u64[0] = ~a.u64[0]; 1878 t->u64[1] = ~a.u64[1]; 1879 } 1880 1881 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1882 { 1883 if (a.VsrD(0) < b.VsrD(0)) { 1884 return -1; 1885 } else if (a.VsrD(0) > b.VsrD(0)) { 1886 return 1; 1887 } else if (a.VsrD(1) < b.VsrD(1)) { 1888 return -1; 1889 } else if (a.VsrD(1) > b.VsrD(1)) { 1890 return 1; 1891 } else { 1892 return 0; 1893 } 1894 } 1895 1896 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1897 { 1898 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1899 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1900 (~a.VsrD(1) < b.VsrD(1)); 1901 } 1902 1903 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1904 { 1905 ppc_avr_t not_a; 1906 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1907 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1908 (~a.VsrD(1) < b.VsrD(1)); 1909 avr_qw_not(¬_a, a); 1910 return avr_qw_cmpu(not_a, b) < 0; 1911 } 1912 1913 #endif 1914 1915 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1916 { 1917 #ifdef CONFIG_INT128 1918 r->u128 = a->u128 + b->u128; 1919 #else 1920 avr_qw_add(r, *a, *b); 1921 #endif 1922 } 1923 1924 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1925 { 1926 #ifdef CONFIG_INT128 1927 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1928 #else 1929 1930 if (c->VsrD(1) & 1) { 1931 ppc_avr_t tmp; 1932 1933 tmp.VsrD(0) = 0; 1934 tmp.VsrD(1) = c->VsrD(1) & 1; 1935 avr_qw_add(&tmp, *a, tmp); 1936 avr_qw_add(r, tmp, *b); 1937 } else { 1938 avr_qw_add(r, *a, *b); 1939 } 1940 #endif 1941 } 1942 1943 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1944 { 1945 #ifdef CONFIG_INT128 1946 r->u128 = (~a->u128 < b->u128); 1947 #else 1948 ppc_avr_t not_a; 1949 1950 avr_qw_not(¬_a, *a); 1951 1952 r->VsrD(0) = 0; 1953 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1954 #endif 1955 } 1956 1957 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1958 { 1959 #ifdef CONFIG_INT128 1960 int carry_out = (~a->u128 < b->u128); 1961 if (!carry_out && (c->u128 & 1)) { 1962 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1963 ((a->u128 != 0) || (b->u128 != 0)); 1964 } 1965 r->u128 = carry_out; 1966 #else 1967 1968 int carry_in = c->VsrD(1) & 1; 1969 int carry_out = 0; 1970 ppc_avr_t tmp; 1971 1972 carry_out = avr_qw_addc(&tmp, *a, *b); 1973 1974 if (!carry_out && carry_in) { 1975 ppc_avr_t one = QW_ONE; 1976 carry_out = avr_qw_addc(&tmp, tmp, one); 1977 } 1978 r->VsrD(0) = 0; 1979 r->VsrD(1) = carry_out; 1980 #endif 1981 } 1982 1983 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1984 { 1985 #ifdef CONFIG_INT128 1986 r->u128 = a->u128 - b->u128; 1987 #else 1988 ppc_avr_t tmp; 1989 ppc_avr_t one = QW_ONE; 1990 1991 avr_qw_not(&tmp, *b); 1992 avr_qw_add(&tmp, *a, tmp); 1993 avr_qw_add(r, tmp, one); 1994 #endif 1995 } 1996 1997 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1998 { 1999 #ifdef CONFIG_INT128 2000 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 2001 #else 2002 ppc_avr_t tmp, sum; 2003 2004 avr_qw_not(&tmp, *b); 2005 avr_qw_add(&sum, *a, tmp); 2006 2007 tmp.VsrD(0) = 0; 2008 tmp.VsrD(1) = c->VsrD(1) & 1; 2009 avr_qw_add(r, sum, tmp); 2010 #endif 2011 } 2012 2013 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2014 { 2015 #ifdef CONFIG_INT128 2016 r->u128 = (~a->u128 < ~b->u128) || 2017 (a->u128 + ~b->u128 == (__uint128_t)-1); 2018 #else 2019 int carry = (avr_qw_cmpu(*a, *b) > 0); 2020 if (!carry) { 2021 ppc_avr_t tmp; 2022 avr_qw_not(&tmp, *b); 2023 avr_qw_add(&tmp, *a, tmp); 2024 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2025 } 2026 r->VsrD(0) = 0; 2027 r->VsrD(1) = carry; 2028 #endif 2029 } 2030 2031 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2032 { 2033 #ifdef CONFIG_INT128 2034 r->u128 = 2035 (~a->u128 < ~b->u128) || 2036 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2037 #else 2038 int carry_in = c->VsrD(1) & 1; 2039 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2040 if (!carry_out && carry_in) { 2041 ppc_avr_t tmp; 2042 avr_qw_not(&tmp, *b); 2043 avr_qw_add(&tmp, *a, tmp); 2044 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2045 } 2046 2047 r->VsrD(0) = 0; 2048 r->VsrD(1) = carry_out; 2049 #endif 2050 } 2051 2052 #define BCD_PLUS_PREF_1 0xC 2053 #define BCD_PLUS_PREF_2 0xF 2054 #define BCD_PLUS_ALT_1 0xA 2055 #define BCD_NEG_PREF 0xD 2056 #define BCD_NEG_ALT 0xB 2057 #define BCD_PLUS_ALT_2 0xE 2058 #define NATIONAL_PLUS 0x2B 2059 #define NATIONAL_NEG 0x2D 2060 2061 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2062 2063 static int bcd_get_sgn(ppc_avr_t *bcd) 2064 { 2065 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2066 case BCD_PLUS_PREF_1: 2067 case BCD_PLUS_PREF_2: 2068 case BCD_PLUS_ALT_1: 2069 case BCD_PLUS_ALT_2: 2070 { 2071 return 1; 2072 } 2073 2074 case BCD_NEG_PREF: 2075 case BCD_NEG_ALT: 2076 { 2077 return -1; 2078 } 2079 2080 default: 2081 { 2082 return 0; 2083 } 2084 } 2085 } 2086 2087 static int bcd_preferred_sgn(int sgn, int ps) 2088 { 2089 if (sgn >= 0) { 2090 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2091 } else { 2092 return BCD_NEG_PREF; 2093 } 2094 } 2095 2096 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2097 { 2098 uint8_t result; 2099 if (n & 1) { 2100 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2101 } else { 2102 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2103 } 2104 2105 if (unlikely(result > 9)) { 2106 *invalid = true; 2107 } 2108 return result; 2109 } 2110 2111 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2112 { 2113 if (n & 1) { 2114 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2115 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2116 } else { 2117 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2118 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2119 } 2120 } 2121 2122 static bool bcd_is_valid(ppc_avr_t *bcd) 2123 { 2124 int i; 2125 int invalid = 0; 2126 2127 if (bcd_get_sgn(bcd) == 0) { 2128 return false; 2129 } 2130 2131 for (i = 1; i < 32; i++) { 2132 bcd_get_digit(bcd, i, &invalid); 2133 if (unlikely(invalid)) { 2134 return false; 2135 } 2136 } 2137 return true; 2138 } 2139 2140 static int bcd_cmp_zero(ppc_avr_t *bcd) 2141 { 2142 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2143 return CRF_EQ; 2144 } else { 2145 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2146 } 2147 } 2148 2149 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2150 { 2151 return reg->VsrH(7 - n); 2152 } 2153 2154 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2155 { 2156 reg->VsrH(7 - n) = val; 2157 } 2158 2159 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2160 { 2161 int i; 2162 int invalid = 0; 2163 for (i = 31; i > 0; i--) { 2164 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2165 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2166 if (unlikely(invalid)) { 2167 return 0; /* doesn't matter */ 2168 } else if (dig_a > dig_b) { 2169 return 1; 2170 } else if (dig_a < dig_b) { 2171 return -1; 2172 } 2173 } 2174 2175 return 0; 2176 } 2177 2178 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2179 int *overflow) 2180 { 2181 int carry = 0; 2182 int i; 2183 for (i = 1; i <= 31; i++) { 2184 uint8_t digit = bcd_get_digit(a, i, invalid) + 2185 bcd_get_digit(b, i, invalid) + carry; 2186 if (digit > 9) { 2187 carry = 1; 2188 digit -= 10; 2189 } else { 2190 carry = 0; 2191 } 2192 2193 bcd_put_digit(t, digit, i); 2194 } 2195 2196 *overflow = carry; 2197 } 2198 2199 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2200 int *overflow) 2201 { 2202 int carry = 0; 2203 int i; 2204 2205 for (i = 1; i <= 31; i++) { 2206 uint8_t digit = bcd_get_digit(a, i, invalid) - 2207 bcd_get_digit(b, i, invalid) + carry; 2208 if (digit & 0x80) { 2209 carry = -1; 2210 digit += 10; 2211 } else { 2212 carry = 0; 2213 } 2214 2215 bcd_put_digit(t, digit, i); 2216 } 2217 2218 *overflow = carry; 2219 } 2220 2221 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2222 { 2223 2224 int sgna = bcd_get_sgn(a); 2225 int sgnb = bcd_get_sgn(b); 2226 int invalid = (sgna == 0) || (sgnb == 0); 2227 int overflow = 0; 2228 uint32_t cr = 0; 2229 ppc_avr_t result = { .u64 = { 0, 0 } }; 2230 2231 if (!invalid) { 2232 if (sgna == sgnb) { 2233 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2234 bcd_add_mag(&result, a, b, &invalid, &overflow); 2235 cr = bcd_cmp_zero(&result); 2236 } else { 2237 int magnitude = bcd_cmp_mag(a, b); 2238 if (magnitude > 0) { 2239 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2240 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2241 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2242 } else if (magnitude < 0) { 2243 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2244 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2245 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2246 } else { 2247 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2248 cr = CRF_EQ; 2249 } 2250 } 2251 } 2252 2253 if (unlikely(invalid)) { 2254 result.VsrD(0) = result.VsrD(1) = -1; 2255 cr = CRF_SO; 2256 } else if (overflow) { 2257 cr |= CRF_SO; 2258 } 2259 2260 *r = result; 2261 2262 return cr; 2263 } 2264 2265 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2266 { 2267 ppc_avr_t bcopy = *b; 2268 int sgnb = bcd_get_sgn(b); 2269 if (sgnb < 0) { 2270 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2271 } else if (sgnb > 0) { 2272 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2273 } 2274 /* else invalid ... defer to bcdadd code for proper handling */ 2275 2276 return helper_bcdadd(r, a, &bcopy, ps); 2277 } 2278 2279 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2280 { 2281 int i; 2282 int cr = 0; 2283 uint16_t national = 0; 2284 uint16_t sgnb = get_national_digit(b, 0); 2285 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2286 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2287 2288 for (i = 1; i < 8; i++) { 2289 national = get_national_digit(b, i); 2290 if (unlikely(national < 0x30 || national > 0x39)) { 2291 invalid = 1; 2292 break; 2293 } 2294 2295 bcd_put_digit(&ret, national & 0xf, i); 2296 } 2297 2298 if (sgnb == NATIONAL_PLUS) { 2299 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2300 } else { 2301 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2302 } 2303 2304 cr = bcd_cmp_zero(&ret); 2305 2306 if (unlikely(invalid)) { 2307 cr = CRF_SO; 2308 } 2309 2310 *r = ret; 2311 2312 return cr; 2313 } 2314 2315 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2316 { 2317 int i; 2318 int cr = 0; 2319 int sgnb = bcd_get_sgn(b); 2320 int invalid = (sgnb == 0); 2321 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2322 2323 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2324 2325 for (i = 1; i < 8; i++) { 2326 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2327 2328 if (unlikely(invalid)) { 2329 break; 2330 } 2331 } 2332 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2333 2334 cr = bcd_cmp_zero(b); 2335 2336 if (ox_flag) { 2337 cr |= CRF_SO; 2338 } 2339 2340 if (unlikely(invalid)) { 2341 cr = CRF_SO; 2342 } 2343 2344 *r = ret; 2345 2346 return cr; 2347 } 2348 2349 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2350 { 2351 int i; 2352 int cr = 0; 2353 int invalid = 0; 2354 int zone_digit = 0; 2355 int zone_lead = ps ? 0xF : 0x3; 2356 int digit = 0; 2357 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2358 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2359 2360 if (unlikely((sgnb < 0xA) && ps)) { 2361 invalid = 1; 2362 } 2363 2364 for (i = 0; i < 16; i++) { 2365 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2366 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2367 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2368 invalid = 1; 2369 break; 2370 } 2371 2372 bcd_put_digit(&ret, digit, i + 1); 2373 } 2374 2375 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2376 (!ps && (sgnb & 0x4))) { 2377 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2378 } else { 2379 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2380 } 2381 2382 cr = bcd_cmp_zero(&ret); 2383 2384 if (unlikely(invalid)) { 2385 cr = CRF_SO; 2386 } 2387 2388 *r = ret; 2389 2390 return cr; 2391 } 2392 2393 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2394 { 2395 int i; 2396 int cr = 0; 2397 uint8_t digit = 0; 2398 int sgnb = bcd_get_sgn(b); 2399 int zone_lead = (ps) ? 0xF0 : 0x30; 2400 int invalid = (sgnb == 0); 2401 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2402 2403 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2404 2405 for (i = 0; i < 16; i++) { 2406 digit = bcd_get_digit(b, i + 1, &invalid); 2407 2408 if (unlikely(invalid)) { 2409 break; 2410 } 2411 2412 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2413 } 2414 2415 if (ps) { 2416 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2417 } else { 2418 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2419 } 2420 2421 cr = bcd_cmp_zero(b); 2422 2423 if (ox_flag) { 2424 cr |= CRF_SO; 2425 } 2426 2427 if (unlikely(invalid)) { 2428 cr = CRF_SO; 2429 } 2430 2431 *r = ret; 2432 2433 return cr; 2434 } 2435 2436 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2437 { 2438 int i; 2439 int cr = 0; 2440 uint64_t lo_value; 2441 uint64_t hi_value; 2442 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2443 2444 if (b->VsrSD(0) < 0) { 2445 lo_value = -b->VsrSD(1); 2446 hi_value = ~b->VsrD(0) + !lo_value; 2447 bcd_put_digit(&ret, 0xD, 0); 2448 } else { 2449 lo_value = b->VsrD(1); 2450 hi_value = b->VsrD(0); 2451 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2452 } 2453 2454 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2455 lo_value > 9999999999999999ULL) { 2456 cr = CRF_SO; 2457 } 2458 2459 for (i = 1; i < 16; hi_value /= 10, i++) { 2460 bcd_put_digit(&ret, hi_value % 10, i); 2461 } 2462 2463 for (; i < 32; lo_value /= 10, i++) { 2464 bcd_put_digit(&ret, lo_value % 10, i); 2465 } 2466 2467 cr |= bcd_cmp_zero(&ret); 2468 2469 *r = ret; 2470 2471 return cr; 2472 } 2473 2474 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2475 { 2476 uint8_t i; 2477 int cr; 2478 uint64_t carry; 2479 uint64_t unused; 2480 uint64_t lo_value; 2481 uint64_t hi_value = 0; 2482 int sgnb = bcd_get_sgn(b); 2483 int invalid = (sgnb == 0); 2484 2485 lo_value = bcd_get_digit(b, 31, &invalid); 2486 for (i = 30; i > 0; i--) { 2487 mulu64(&lo_value, &carry, lo_value, 10ULL); 2488 mulu64(&hi_value, &unused, hi_value, 10ULL); 2489 lo_value += bcd_get_digit(b, i, &invalid); 2490 hi_value += carry; 2491 2492 if (unlikely(invalid)) { 2493 break; 2494 } 2495 } 2496 2497 if (sgnb == -1) { 2498 r->VsrSD(1) = -lo_value; 2499 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2500 } else { 2501 r->VsrSD(1) = lo_value; 2502 r->VsrSD(0) = hi_value; 2503 } 2504 2505 cr = bcd_cmp_zero(b); 2506 2507 if (unlikely(invalid)) { 2508 cr = CRF_SO; 2509 } 2510 2511 return cr; 2512 } 2513 2514 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2515 { 2516 int i; 2517 int invalid = 0; 2518 2519 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2520 return CRF_SO; 2521 } 2522 2523 *r = *a; 2524 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2525 2526 for (i = 1; i < 32; i++) { 2527 bcd_get_digit(a, i, &invalid); 2528 bcd_get_digit(b, i, &invalid); 2529 if (unlikely(invalid)) { 2530 return CRF_SO; 2531 } 2532 } 2533 2534 return bcd_cmp_zero(r); 2535 } 2536 2537 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2538 { 2539 int sgnb = bcd_get_sgn(b); 2540 2541 *r = *b; 2542 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2543 2544 if (bcd_is_valid(b) == false) { 2545 return CRF_SO; 2546 } 2547 2548 return bcd_cmp_zero(r); 2549 } 2550 2551 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2552 { 2553 int cr; 2554 int i = a->VsrSB(7); 2555 bool ox_flag = false; 2556 int sgnb = bcd_get_sgn(b); 2557 ppc_avr_t ret = *b; 2558 ret.VsrD(1) &= ~0xf; 2559 2560 if (bcd_is_valid(b) == false) { 2561 return CRF_SO; 2562 } 2563 2564 if (unlikely(i > 31)) { 2565 i = 31; 2566 } else if (unlikely(i < -31)) { 2567 i = -31; 2568 } 2569 2570 if (i > 0) { 2571 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2572 } else { 2573 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2574 } 2575 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2576 2577 *r = ret; 2578 2579 cr = bcd_cmp_zero(r); 2580 if (ox_flag) { 2581 cr |= CRF_SO; 2582 } 2583 2584 return cr; 2585 } 2586 2587 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2588 { 2589 int cr; 2590 int i; 2591 int invalid = 0; 2592 bool ox_flag = false; 2593 ppc_avr_t ret = *b; 2594 2595 for (i = 0; i < 32; i++) { 2596 bcd_get_digit(b, i, &invalid); 2597 2598 if (unlikely(invalid)) { 2599 return CRF_SO; 2600 } 2601 } 2602 2603 i = a->VsrSB(7); 2604 if (i >= 32) { 2605 ox_flag = true; 2606 ret.VsrD(1) = ret.VsrD(0) = 0; 2607 } else if (i <= -32) { 2608 ret.VsrD(1) = ret.VsrD(0) = 0; 2609 } else if (i > 0) { 2610 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2611 } else { 2612 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2613 } 2614 *r = ret; 2615 2616 cr = bcd_cmp_zero(r); 2617 if (ox_flag) { 2618 cr |= CRF_SO; 2619 } 2620 2621 return cr; 2622 } 2623 2624 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2625 { 2626 int cr; 2627 int unused = 0; 2628 int invalid = 0; 2629 bool ox_flag = false; 2630 int sgnb = bcd_get_sgn(b); 2631 ppc_avr_t ret = *b; 2632 ret.VsrD(1) &= ~0xf; 2633 2634 int i = a->VsrSB(7); 2635 ppc_avr_t bcd_one; 2636 2637 bcd_one.VsrD(0) = 0; 2638 bcd_one.VsrD(1) = 0x10; 2639 2640 if (bcd_is_valid(b) == false) { 2641 return CRF_SO; 2642 } 2643 2644 if (unlikely(i > 31)) { 2645 i = 31; 2646 } else if (unlikely(i < -31)) { 2647 i = -31; 2648 } 2649 2650 if (i > 0) { 2651 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2652 } else { 2653 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2654 2655 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2656 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2657 } 2658 } 2659 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2660 2661 cr = bcd_cmp_zero(&ret); 2662 if (ox_flag) { 2663 cr |= CRF_SO; 2664 } 2665 *r = ret; 2666 2667 return cr; 2668 } 2669 2670 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2671 { 2672 uint64_t mask; 2673 uint32_t ox_flag = 0; 2674 int i = a->VsrSH(3) + 1; 2675 ppc_avr_t ret = *b; 2676 2677 if (bcd_is_valid(b) == false) { 2678 return CRF_SO; 2679 } 2680 2681 if (i > 16 && i < 32) { 2682 mask = (uint64_t)-1 >> (128 - i * 4); 2683 if (ret.VsrD(0) & ~mask) { 2684 ox_flag = CRF_SO; 2685 } 2686 2687 ret.VsrD(0) &= mask; 2688 } else if (i >= 0 && i <= 16) { 2689 mask = (uint64_t)-1 >> (64 - i * 4); 2690 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2691 ox_flag = CRF_SO; 2692 } 2693 2694 ret.VsrD(1) &= mask; 2695 ret.VsrD(0) = 0; 2696 } 2697 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2698 *r = ret; 2699 2700 return bcd_cmp_zero(&ret) | ox_flag; 2701 } 2702 2703 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2704 { 2705 int i; 2706 uint64_t mask; 2707 uint32_t ox_flag = 0; 2708 int invalid = 0; 2709 ppc_avr_t ret = *b; 2710 2711 for (i = 0; i < 32; i++) { 2712 bcd_get_digit(b, i, &invalid); 2713 2714 if (unlikely(invalid)) { 2715 return CRF_SO; 2716 } 2717 } 2718 2719 i = a->VsrSH(3); 2720 if (i > 16 && i < 33) { 2721 mask = (uint64_t)-1 >> (128 - i * 4); 2722 if (ret.VsrD(0) & ~mask) { 2723 ox_flag = CRF_SO; 2724 } 2725 2726 ret.VsrD(0) &= mask; 2727 } else if (i > 0 && i <= 16) { 2728 mask = (uint64_t)-1 >> (64 - i * 4); 2729 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2730 ox_flag = CRF_SO; 2731 } 2732 2733 ret.VsrD(1) &= mask; 2734 ret.VsrD(0) = 0; 2735 } else if (i == 0) { 2736 if (ret.VsrD(0) || ret.VsrD(1)) { 2737 ox_flag = CRF_SO; 2738 } 2739 ret.VsrD(0) = ret.VsrD(1) = 0; 2740 } 2741 2742 *r = ret; 2743 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2744 return ox_flag | CRF_EQ; 2745 } 2746 2747 return ox_flag | CRF_GT; 2748 } 2749 2750 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2751 { 2752 int i; 2753 VECTOR_FOR_INORDER_I(i, u8) { 2754 r->u8[i] = AES_sbox[a->u8[i]]; 2755 } 2756 } 2757 2758 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2759 { 2760 ppc_avr_t result; 2761 int i; 2762 2763 VECTOR_FOR_INORDER_I(i, u32) { 2764 result.VsrW(i) = b->VsrW(i) ^ 2765 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2766 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2767 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2768 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2769 } 2770 *r = result; 2771 } 2772 2773 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2774 { 2775 ppc_avr_t result; 2776 int i; 2777 2778 VECTOR_FOR_INORDER_I(i, u8) { 2779 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2780 } 2781 *r = result; 2782 } 2783 2784 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2785 { 2786 /* This differs from what is written in ISA V2.07. The RTL is */ 2787 /* incorrect and will be fixed in V2.07B. */ 2788 int i; 2789 ppc_avr_t tmp; 2790 2791 VECTOR_FOR_INORDER_I(i, u8) { 2792 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2793 } 2794 2795 VECTOR_FOR_INORDER_I(i, u32) { 2796 r->VsrW(i) = 2797 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2798 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2799 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2800 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2801 } 2802 } 2803 2804 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2805 { 2806 ppc_avr_t result; 2807 int i; 2808 2809 VECTOR_FOR_INORDER_I(i, u8) { 2810 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2811 } 2812 *r = result; 2813 } 2814 2815 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2816 { 2817 int st = (st_six & 0x10) != 0; 2818 int six = st_six & 0xF; 2819 int i; 2820 2821 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2822 if (st == 0) { 2823 if ((six & (0x8 >> i)) == 0) { 2824 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2825 ror32(a->VsrW(i), 18) ^ 2826 (a->VsrW(i) >> 3); 2827 } else { /* six.bit[i] == 1 */ 2828 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2829 ror32(a->VsrW(i), 19) ^ 2830 (a->VsrW(i) >> 10); 2831 } 2832 } else { /* st == 1 */ 2833 if ((six & (0x8 >> i)) == 0) { 2834 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2835 ror32(a->VsrW(i), 13) ^ 2836 ror32(a->VsrW(i), 22); 2837 } else { /* six.bit[i] == 1 */ 2838 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2839 ror32(a->VsrW(i), 11) ^ 2840 ror32(a->VsrW(i), 25); 2841 } 2842 } 2843 } 2844 } 2845 2846 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2847 { 2848 int st = (st_six & 0x10) != 0; 2849 int six = st_six & 0xF; 2850 int i; 2851 2852 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2853 if (st == 0) { 2854 if ((six & (0x8 >> (2 * i))) == 0) { 2855 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2856 ror64(a->VsrD(i), 8) ^ 2857 (a->VsrD(i) >> 7); 2858 } else { /* six.bit[2*i] == 1 */ 2859 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2860 ror64(a->VsrD(i), 61) ^ 2861 (a->VsrD(i) >> 6); 2862 } 2863 } else { /* st == 1 */ 2864 if ((six & (0x8 >> (2 * i))) == 0) { 2865 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2866 ror64(a->VsrD(i), 34) ^ 2867 ror64(a->VsrD(i), 39); 2868 } else { /* six.bit[2*i] == 1 */ 2869 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2870 ror64(a->VsrD(i), 18) ^ 2871 ror64(a->VsrD(i), 41); 2872 } 2873 } 2874 } 2875 } 2876 2877 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2878 { 2879 ppc_avr_t result; 2880 int i; 2881 2882 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2883 int indexA = c->VsrB(i) >> 4; 2884 int indexB = c->VsrB(i) & 0xF; 2885 2886 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2887 } 2888 *r = result; 2889 } 2890 2891 #undef VECTOR_FOR_INORDER_I 2892 2893 /*****************************************************************************/ 2894 /* SPE extension helpers */ 2895 /* Use a table to make this quicker */ 2896 static const uint8_t hbrev[16] = { 2897 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2898 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2899 }; 2900 2901 static inline uint8_t byte_reverse(uint8_t val) 2902 { 2903 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2904 } 2905 2906 static inline uint32_t word_reverse(uint32_t val) 2907 { 2908 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2909 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2910 } 2911 2912 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2913 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2914 { 2915 uint32_t a, b, d, mask; 2916 2917 mask = UINT32_MAX >> (32 - MASKBITS); 2918 a = arg1 & mask; 2919 b = arg2 & mask; 2920 d = word_reverse(1 + word_reverse(a | ~b)); 2921 return (arg1 & ~mask) | (d & b); 2922 } 2923 2924 uint32_t helper_cntlsw32(uint32_t val) 2925 { 2926 if (val & 0x80000000) { 2927 return clz32(~val); 2928 } else { 2929 return clz32(val); 2930 } 2931 } 2932 2933 uint32_t helper_cntlzw32(uint32_t val) 2934 { 2935 return clz32(val); 2936 } 2937 2938 /* 440 specific */ 2939 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2940 target_ulong low, uint32_t update_Rc) 2941 { 2942 target_ulong mask; 2943 int i; 2944 2945 i = 1; 2946 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2947 if ((high & mask) == 0) { 2948 if (update_Rc) { 2949 env->crf[0] = 0x4; 2950 } 2951 goto done; 2952 } 2953 i++; 2954 } 2955 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2956 if ((low & mask) == 0) { 2957 if (update_Rc) { 2958 env->crf[0] = 0x8; 2959 } 2960 goto done; 2961 } 2962 i++; 2963 } 2964 i = 8; 2965 if (update_Rc) { 2966 env->crf[0] = 0x2; 2967 } 2968 done: 2969 env->xer = (env->xer & ~0x7F) | i; 2970 if (update_Rc) { 2971 env->crf[0] |= xer_so; 2972 } 2973 return i; 2974 } 2975