1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/log.h" 26 #include "exec/helper-proto.h" 27 #include "crypto/aes.h" 28 #include "fpu/softfloat.h" 29 #include "qapi/error.h" 30 #include "qemu/guest-random.h" 31 32 #include "helper_regs.h" 33 /*****************************************************************************/ 34 /* Fixed point operations helpers */ 35 36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 37 { 38 if (unlikely(ov)) { 39 env->so = env->ov = 1; 40 } else { 41 env->ov = 0; 42 } 43 } 44 45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 46 uint32_t oe) 47 { 48 uint64_t rt = 0; 49 int overflow = 0; 50 51 uint64_t dividend = (uint64_t)ra << 32; 52 uint64_t divisor = (uint32_t)rb; 53 54 if (unlikely(divisor == 0)) { 55 overflow = 1; 56 } else { 57 rt = dividend / divisor; 58 overflow = rt > UINT32_MAX; 59 } 60 61 if (unlikely(overflow)) { 62 rt = 0; /* Undefined */ 63 } 64 65 if (oe) { 66 helper_update_ov_legacy(env, overflow); 67 } 68 69 return (target_ulong)rt; 70 } 71 72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 73 uint32_t oe) 74 { 75 int64_t rt = 0; 76 int overflow = 0; 77 78 int64_t dividend = (int64_t)ra << 32; 79 int64_t divisor = (int64_t)((int32_t)rb); 80 81 if (unlikely((divisor == 0) || 82 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 83 overflow = 1; 84 } else { 85 rt = dividend / divisor; 86 overflow = rt != (int32_t)rt; 87 } 88 89 if (unlikely(overflow)) { 90 rt = 0; /* Undefined */ 91 } 92 93 if (oe) { 94 helper_update_ov_legacy(env, overflow); 95 } 96 97 return (target_ulong)rt; 98 } 99 100 #if defined(TARGET_PPC64) 101 102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 103 { 104 uint64_t rt = 0; 105 int overflow = 0; 106 107 overflow = divu128(&rt, &ra, rb); 108 109 if (unlikely(overflow)) { 110 rt = 0; /* Undefined */ 111 } 112 113 if (oe) { 114 helper_update_ov_legacy(env, overflow); 115 } 116 117 return rt; 118 } 119 120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 121 { 122 int64_t rt = 0; 123 int64_t ra = (int64_t)rau; 124 int64_t rb = (int64_t)rbu; 125 int overflow = divs128(&rt, &ra, rb); 126 127 if (unlikely(overflow)) { 128 rt = 0; /* Undefined */ 129 } 130 131 if (oe) { 132 helper_update_ov_legacy(env, overflow); 133 } 134 135 return rt; 136 } 137 138 #endif 139 140 141 #if defined(TARGET_PPC64) 142 /* if x = 0xab, returns 0xababababababababa */ 143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 144 145 /* 146 * subtract 1 from each byte, and with inverse, check if MSB is set at each 147 * byte. 148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 150 */ 151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 152 153 /* When you XOR the pattern and there is a match, that byte will be zero */ 154 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 155 156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 157 { 158 return hasvalue(rb, ra) ? CRF_GT : 0; 159 } 160 161 #undef pattern 162 #undef haszero 163 #undef hasvalue 164 165 /* 166 * Return a random number. 167 */ 168 uint64_t helper_darn32(void) 169 { 170 Error *err = NULL; 171 uint32_t ret; 172 173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 175 error_get_pretty(err)); 176 error_free(err); 177 return -1; 178 } 179 180 return ret; 181 } 182 183 uint64_t helper_darn64(void) 184 { 185 Error *err = NULL; 186 uint64_t ret; 187 188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 190 error_get_pretty(err)); 191 error_free(err); 192 return -1; 193 } 194 195 return ret; 196 } 197 198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 199 { 200 int i; 201 uint64_t ra = 0; 202 203 for (i = 0; i < 8; i++) { 204 int index = (rs >> (i * 8)) & 0xFF; 205 if (index < 64) { 206 if (rb & PPC_BIT(index)) { 207 ra |= 1 << i; 208 } 209 } 210 } 211 return ra; 212 } 213 214 #endif 215 216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 217 { 218 target_ulong mask = 0xff; 219 target_ulong ra = 0; 220 int i; 221 222 for (i = 0; i < sizeof(target_ulong); i++) { 223 if ((rs & mask) == (rb & mask)) { 224 ra |= mask; 225 } 226 mask <<= 8; 227 } 228 return ra; 229 } 230 231 /* shift right arithmetic helper */ 232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 233 target_ulong shift) 234 { 235 int32_t ret; 236 237 if (likely(!(shift & 0x20))) { 238 if (likely((uint32_t)shift != 0)) { 239 shift &= 0x1f; 240 ret = (int32_t)value >> shift; 241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 242 env->ca32 = env->ca = 0; 243 } else { 244 env->ca32 = env->ca = 1; 245 } 246 } else { 247 ret = (int32_t)value; 248 env->ca32 = env->ca = 0; 249 } 250 } else { 251 ret = (int32_t)value >> 31; 252 env->ca32 = env->ca = (ret != 0); 253 } 254 return (target_long)ret; 255 } 256 257 #if defined(TARGET_PPC64) 258 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 259 target_ulong shift) 260 { 261 int64_t ret; 262 263 if (likely(!(shift & 0x40))) { 264 if (likely((uint64_t)shift != 0)) { 265 shift &= 0x3f; 266 ret = (int64_t)value >> shift; 267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 268 env->ca32 = env->ca = 0; 269 } else { 270 env->ca32 = env->ca = 1; 271 } 272 } else { 273 ret = (int64_t)value; 274 env->ca32 = env->ca = 0; 275 } 276 } else { 277 ret = (int64_t)value >> 63; 278 env->ca32 = env->ca = (ret != 0); 279 } 280 return ret; 281 } 282 #endif 283 284 #if defined(TARGET_PPC64) 285 target_ulong helper_popcntb(target_ulong val) 286 { 287 /* Note that we don't fold past bytes */ 288 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 289 0x5555555555555555ULL); 290 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 291 0x3333333333333333ULL); 292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 293 0x0f0f0f0f0f0f0f0fULL); 294 return val; 295 } 296 297 target_ulong helper_popcntw(target_ulong val) 298 { 299 /* Note that we don't fold past words. */ 300 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 301 0x5555555555555555ULL); 302 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 303 0x3333333333333333ULL); 304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 305 0x0f0f0f0f0f0f0f0fULL); 306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 307 0x00ff00ff00ff00ffULL); 308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 309 0x0000ffff0000ffffULL); 310 return val; 311 } 312 #else 313 target_ulong helper_popcntb(target_ulong val) 314 { 315 /* Note that we don't fold past bytes */ 316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 319 return val; 320 } 321 #endif 322 323 /*****************************************************************************/ 324 /* PowerPC 601 specific instructions (POWER bridge) */ 325 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 326 { 327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 328 329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 330 (int32_t)arg2 == 0) { 331 env->spr[SPR_MQ] = 0; 332 return INT32_MIN; 333 } else { 334 env->spr[SPR_MQ] = tmp % arg2; 335 return tmp / (int32_t)arg2; 336 } 337 } 338 339 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 340 target_ulong arg2) 341 { 342 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 343 344 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 345 (int32_t)arg2 == 0) { 346 env->so = env->ov = 1; 347 env->spr[SPR_MQ] = 0; 348 return INT32_MIN; 349 } else { 350 env->spr[SPR_MQ] = tmp % arg2; 351 tmp /= (int32_t)arg2; 352 if ((int32_t)tmp != tmp) { 353 env->so = env->ov = 1; 354 } else { 355 env->ov = 0; 356 } 357 return tmp; 358 } 359 } 360 361 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 362 target_ulong arg2) 363 { 364 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 365 (int32_t)arg2 == 0) { 366 env->spr[SPR_MQ] = 0; 367 return INT32_MIN; 368 } else { 369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 370 return (int32_t)arg1 / (int32_t)arg2; 371 } 372 } 373 374 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 375 target_ulong arg2) 376 { 377 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 378 (int32_t)arg2 == 0) { 379 env->so = env->ov = 1; 380 env->spr[SPR_MQ] = 0; 381 return INT32_MIN; 382 } else { 383 env->ov = 0; 384 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 385 return (int32_t)arg1 / (int32_t)arg2; 386 } 387 } 388 389 /*****************************************************************************/ 390 /* 602 specific instructions */ 391 /* mfrom is the most crazy instruction ever seen, imho ! */ 392 /* Real implementation uses a ROM table. Do the same */ 393 /* 394 * Extremely decomposed: 395 * -arg / 256 396 * return 256 * log10(10 + 1.0) + 0.5 397 */ 398 #if !defined(CONFIG_USER_ONLY) 399 target_ulong helper_602_mfrom(target_ulong arg) 400 { 401 if (likely(arg < 602)) { 402 #include "mfrom_table.c.inc" 403 return mfrom_ROM_table[arg]; 404 } else { 405 return 0; 406 } 407 } 408 #endif 409 410 /*****************************************************************************/ 411 /* Altivec extension helpers */ 412 #if defined(HOST_WORDS_BIGENDIAN) 413 #define VECTOR_FOR_INORDER_I(index, element) \ 414 for (index = 0; index < ARRAY_SIZE(r->element); index++) 415 #else 416 #define VECTOR_FOR_INORDER_I(index, element) \ 417 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 418 #endif 419 420 /* Saturating arithmetic helpers. */ 421 #define SATCVT(from, to, from_type, to_type, min, max) \ 422 static inline to_type cvt##from##to(from_type x, int *sat) \ 423 { \ 424 to_type r; \ 425 \ 426 if (x < (from_type)min) { \ 427 r = min; \ 428 *sat = 1; \ 429 } else if (x > (from_type)max) { \ 430 r = max; \ 431 *sat = 1; \ 432 } else { \ 433 r = x; \ 434 } \ 435 return r; \ 436 } 437 #define SATCVTU(from, to, from_type, to_type, min, max) \ 438 static inline to_type cvt##from##to(from_type x, int *sat) \ 439 { \ 440 to_type r; \ 441 \ 442 if (x > (from_type)max) { \ 443 r = max; \ 444 *sat = 1; \ 445 } else { \ 446 r = x; \ 447 } \ 448 return r; \ 449 } 450 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 451 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 452 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 453 454 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 455 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 456 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 457 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 458 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 459 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 460 #undef SATCVT 461 #undef SATCVTU 462 463 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 464 { 465 ppc_store_vscr(env, vscr); 466 } 467 468 uint32_t helper_mfvscr(CPUPPCState *env) 469 { 470 return ppc_get_vscr(env); 471 } 472 473 static inline void set_vscr_sat(CPUPPCState *env) 474 { 475 /* The choice of non-zero value is arbitrary. */ 476 env->vscr_sat.u32[0] = 1; 477 } 478 479 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 480 { 481 int i; 482 483 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 484 r->u32[i] = ~a->u32[i] < b->u32[i]; 485 } 486 } 487 488 /* vprtybw */ 489 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 490 { 491 int i; 492 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 493 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 494 res ^= res >> 8; 495 r->u32[i] = res & 1; 496 } 497 } 498 499 /* vprtybd */ 500 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 501 { 502 int i; 503 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 504 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 505 res ^= res >> 16; 506 res ^= res >> 8; 507 r->u64[i] = res & 1; 508 } 509 } 510 511 /* vprtybq */ 512 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 513 { 514 uint64_t res = b->u64[0] ^ b->u64[1]; 515 res ^= res >> 32; 516 res ^= res >> 16; 517 res ^= res >> 8; 518 r->VsrD(1) = res & 1; 519 r->VsrD(0) = 0; 520 } 521 522 #define VARITHFP(suffix, func) \ 523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 524 ppc_avr_t *b) \ 525 { \ 526 int i; \ 527 \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 530 } \ 531 } 532 VARITHFP(addfp, float32_add) 533 VARITHFP(subfp, float32_sub) 534 VARITHFP(minfp, float32_min) 535 VARITHFP(maxfp, float32_max) 536 #undef VARITHFP 537 538 #define VARITHFPFMA(suffix, type) \ 539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 540 ppc_avr_t *b, ppc_avr_t *c) \ 541 { \ 542 int i; \ 543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 544 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 545 type, &env->vec_status); \ 546 } \ 547 } 548 VARITHFPFMA(maddfp, 0); 549 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 550 #undef VARITHFPFMA 551 552 #define VARITHSAT_CASE(type, op, cvt, element) \ 553 { \ 554 type result = (type)a->element[i] op (type)b->element[i]; \ 555 r->element[i] = cvt(result, &sat); \ 556 } 557 558 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 559 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 560 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 561 { \ 562 int sat = 0; \ 563 int i; \ 564 \ 565 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 566 VARITHSAT_CASE(optype, op, cvt, element); \ 567 } \ 568 if (sat) { \ 569 vscr_sat->u32[0] = 1; \ 570 } \ 571 } 572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 578 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 579 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 580 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 581 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 582 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 583 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 584 #undef VARITHSAT_CASE 585 #undef VARITHSAT_DO 586 #undef VARITHSAT_SIGNED 587 #undef VARITHSAT_UNSIGNED 588 589 #define VAVG_DO(name, element, etype) \ 590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 591 { \ 592 int i; \ 593 \ 594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 596 r->element[i] = x >> 1; \ 597 } \ 598 } 599 600 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 601 unsigned_type) \ 602 VAVG_DO(avgs##type, signed_element, signed_type) \ 603 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 604 VAVG(b, s8, int16_t, u8, uint16_t) 605 VAVG(h, s16, int32_t, u16, uint32_t) 606 VAVG(w, s32, int64_t, u32, uint64_t) 607 #undef VAVG_DO 608 #undef VAVG 609 610 #define VABSDU_DO(name, element) \ 611 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 612 { \ 613 int i; \ 614 \ 615 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 616 r->element[i] = (a->element[i] > b->element[i]) ? \ 617 (a->element[i] - b->element[i]) : \ 618 (b->element[i] - a->element[i]); \ 619 } \ 620 } 621 622 /* 623 * VABSDU - Vector absolute difference unsigned 624 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 625 * element - element type to access from vector 626 */ 627 #define VABSDU(type, element) \ 628 VABSDU_DO(absdu##type, element) 629 VABSDU(b, u8) 630 VABSDU(h, u16) 631 VABSDU(w, u32) 632 #undef VABSDU_DO 633 #undef VABSDU 634 635 #define VCF(suffix, cvt, element) \ 636 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 637 ppc_avr_t *b, uint32_t uim) \ 638 { \ 639 int i; \ 640 \ 641 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 642 float32 t = cvt(b->element[i], &env->vec_status); \ 643 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 644 } \ 645 } 646 VCF(ux, uint32_to_float32, u32) 647 VCF(sx, int32_to_float32, s32) 648 #undef VCF 649 650 #define VCMP_DO(suffix, compare, element, record) \ 651 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 652 ppc_avr_t *a, ppc_avr_t *b) \ 653 { \ 654 uint64_t ones = (uint64_t)-1; \ 655 uint64_t all = ones; \ 656 uint64_t none = 0; \ 657 int i; \ 658 \ 659 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 660 uint64_t result = (a->element[i] compare b->element[i] ? \ 661 ones : 0x0); \ 662 switch (sizeof(a->element[0])) { \ 663 case 8: \ 664 r->u64[i] = result; \ 665 break; \ 666 case 4: \ 667 r->u32[i] = result; \ 668 break; \ 669 case 2: \ 670 r->u16[i] = result; \ 671 break; \ 672 case 1: \ 673 r->u8[i] = result; \ 674 break; \ 675 } \ 676 all &= result; \ 677 none |= result; \ 678 } \ 679 if (record) { \ 680 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 681 } \ 682 } 683 #define VCMP(suffix, compare, element) \ 684 VCMP_DO(suffix, compare, element, 0) \ 685 VCMP_DO(suffix##_dot, compare, element, 1) 686 VCMP(equb, ==, u8) 687 VCMP(equh, ==, u16) 688 VCMP(equw, ==, u32) 689 VCMP(equd, ==, u64) 690 VCMP(gtub, >, u8) 691 VCMP(gtuh, >, u16) 692 VCMP(gtuw, >, u32) 693 VCMP(gtud, >, u64) 694 VCMP(gtsb, >, s8) 695 VCMP(gtsh, >, s16) 696 VCMP(gtsw, >, s32) 697 VCMP(gtsd, >, s64) 698 #undef VCMP_DO 699 #undef VCMP 700 701 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 702 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 703 ppc_avr_t *a, ppc_avr_t *b) \ 704 { \ 705 etype ones = (etype)-1; \ 706 etype all = ones; \ 707 etype result, none = 0; \ 708 int i; \ 709 \ 710 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 711 if (cmpzero) { \ 712 result = ((a->element[i] == 0) \ 713 || (b->element[i] == 0) \ 714 || (a->element[i] != b->element[i]) ? \ 715 ones : 0x0); \ 716 } else { \ 717 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 718 } \ 719 r->element[i] = result; \ 720 all &= result; \ 721 none |= result; \ 722 } \ 723 if (record) { \ 724 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 725 } \ 726 } 727 728 /* 729 * VCMPNEZ - Vector compare not equal to zero 730 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 731 * element - element type to access from vector 732 */ 733 #define VCMPNE(suffix, element, etype, cmpzero) \ 734 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 735 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 736 VCMPNE(zb, u8, uint8_t, 1) 737 VCMPNE(zh, u16, uint16_t, 1) 738 VCMPNE(zw, u32, uint32_t, 1) 739 VCMPNE(b, u8, uint8_t, 0) 740 VCMPNE(h, u16, uint16_t, 0) 741 VCMPNE(w, u32, uint32_t, 0) 742 #undef VCMPNE_DO 743 #undef VCMPNE 744 745 #define VCMPFP_DO(suffix, compare, order, record) \ 746 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 747 ppc_avr_t *a, ppc_avr_t *b) \ 748 { \ 749 uint32_t ones = (uint32_t)-1; \ 750 uint32_t all = ones; \ 751 uint32_t none = 0; \ 752 int i; \ 753 \ 754 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 755 uint32_t result; \ 756 FloatRelation rel = \ 757 float32_compare_quiet(a->f32[i], b->f32[i], \ 758 &env->vec_status); \ 759 if (rel == float_relation_unordered) { \ 760 result = 0; \ 761 } else if (rel compare order) { \ 762 result = ones; \ 763 } else { \ 764 result = 0; \ 765 } \ 766 r->u32[i] = result; \ 767 all &= result; \ 768 none |= result; \ 769 } \ 770 if (record) { \ 771 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 772 } \ 773 } 774 #define VCMPFP(suffix, compare, order) \ 775 VCMPFP_DO(suffix, compare, order, 0) \ 776 VCMPFP_DO(suffix##_dot, compare, order, 1) 777 VCMPFP(eqfp, ==, float_relation_equal) 778 VCMPFP(gefp, !=, float_relation_less) 779 VCMPFP(gtfp, ==, float_relation_greater) 780 #undef VCMPFP_DO 781 #undef VCMPFP 782 783 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 784 ppc_avr_t *a, ppc_avr_t *b, int record) 785 { 786 int i; 787 int all_in = 0; 788 789 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 790 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 791 &env->vec_status); 792 if (le_rel == float_relation_unordered) { 793 r->u32[i] = 0xc0000000; 794 all_in = 1; 795 } else { 796 float32 bneg = float32_chs(b->f32[i]); 797 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 798 &env->vec_status); 799 int le = le_rel != float_relation_greater; 800 int ge = ge_rel != float_relation_less; 801 802 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 803 all_in |= (!le | !ge); 804 } 805 } 806 if (record) { 807 env->crf[6] = (all_in == 0) << 1; 808 } 809 } 810 811 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 812 { 813 vcmpbfp_internal(env, r, a, b, 0); 814 } 815 816 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 817 ppc_avr_t *b) 818 { 819 vcmpbfp_internal(env, r, a, b, 1); 820 } 821 822 #define VCT(suffix, satcvt, element) \ 823 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 824 ppc_avr_t *b, uint32_t uim) \ 825 { \ 826 int i; \ 827 int sat = 0; \ 828 float_status s = env->vec_status; \ 829 \ 830 set_float_rounding_mode(float_round_to_zero, &s); \ 831 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 832 if (float32_is_any_nan(b->f32[i])) { \ 833 r->element[i] = 0; \ 834 } else { \ 835 float64 t = float32_to_float64(b->f32[i], &s); \ 836 int64_t j; \ 837 \ 838 t = float64_scalbn(t, uim, &s); \ 839 j = float64_to_int64(t, &s); \ 840 r->element[i] = satcvt(j, &sat); \ 841 } \ 842 } \ 843 if (sat) { \ 844 set_vscr_sat(env); \ 845 } \ 846 } 847 VCT(uxs, cvtsduw, u32) 848 VCT(sxs, cvtsdsw, s32) 849 #undef VCT 850 851 target_ulong helper_vclzlsbb(ppc_avr_t *r) 852 { 853 target_ulong count = 0; 854 int i; 855 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 856 if (r->VsrB(i) & 0x01) { 857 break; 858 } 859 count++; 860 } 861 return count; 862 } 863 864 target_ulong helper_vctzlsbb(ppc_avr_t *r) 865 { 866 target_ulong count = 0; 867 int i; 868 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 869 if (r->VsrB(i) & 0x01) { 870 break; 871 } 872 count++; 873 } 874 return count; 875 } 876 877 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 878 ppc_avr_t *b, ppc_avr_t *c) 879 { 880 int sat = 0; 881 int i; 882 883 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 884 int32_t prod = a->s16[i] * b->s16[i]; 885 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 886 887 r->s16[i] = cvtswsh(t, &sat); 888 } 889 890 if (sat) { 891 set_vscr_sat(env); 892 } 893 } 894 895 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 896 ppc_avr_t *b, ppc_avr_t *c) 897 { 898 int sat = 0; 899 int i; 900 901 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 902 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 903 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 904 r->s16[i] = cvtswsh(t, &sat); 905 } 906 907 if (sat) { 908 set_vscr_sat(env); 909 } 910 } 911 912 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 913 { 914 int i; 915 916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 917 int32_t prod = a->s16[i] * b->s16[i]; 918 r->s16[i] = (int16_t) (prod + c->s16[i]); 919 } 920 } 921 922 #define VMRG_DO(name, element, access, ofs) \ 923 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 924 { \ 925 ppc_avr_t result; \ 926 int i, half = ARRAY_SIZE(r->element) / 2; \ 927 \ 928 for (i = 0; i < half; i++) { \ 929 result.access(i * 2 + 0) = a->access(i + ofs); \ 930 result.access(i * 2 + 1) = b->access(i + ofs); \ 931 } \ 932 *r = result; \ 933 } 934 935 #define VMRG(suffix, element, access) \ 936 VMRG_DO(mrgl##suffix, element, access, half) \ 937 VMRG_DO(mrgh##suffix, element, access, 0) 938 VMRG(b, u8, VsrB) 939 VMRG(h, u16, VsrH) 940 VMRG(w, u32, VsrW) 941 #undef VMRG_DO 942 #undef VMRG 943 944 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 945 ppc_avr_t *b, ppc_avr_t *c) 946 { 947 int32_t prod[16]; 948 int i; 949 950 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 951 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 952 } 953 954 VECTOR_FOR_INORDER_I(i, s32) { 955 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 956 prod[4 * i + 2] + prod[4 * i + 3]; 957 } 958 } 959 960 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 961 ppc_avr_t *b, ppc_avr_t *c) 962 { 963 int32_t prod[8]; 964 int i; 965 966 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 967 prod[i] = a->s16[i] * b->s16[i]; 968 } 969 970 VECTOR_FOR_INORDER_I(i, s32) { 971 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 972 } 973 } 974 975 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 976 ppc_avr_t *b, ppc_avr_t *c) 977 { 978 int32_t prod[8]; 979 int i; 980 int sat = 0; 981 982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 983 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 984 } 985 986 VECTOR_FOR_INORDER_I(i, s32) { 987 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 988 989 r->u32[i] = cvtsdsw(t, &sat); 990 } 991 992 if (sat) { 993 set_vscr_sat(env); 994 } 995 } 996 997 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 998 ppc_avr_t *b, ppc_avr_t *c) 999 { 1000 uint16_t prod[16]; 1001 int i; 1002 1003 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1004 prod[i] = a->u8[i] * b->u8[i]; 1005 } 1006 1007 VECTOR_FOR_INORDER_I(i, u32) { 1008 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1009 prod[4 * i + 2] + prod[4 * i + 3]; 1010 } 1011 } 1012 1013 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1014 ppc_avr_t *b, ppc_avr_t *c) 1015 { 1016 uint32_t prod[8]; 1017 int i; 1018 1019 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1020 prod[i] = a->u16[i] * b->u16[i]; 1021 } 1022 1023 VECTOR_FOR_INORDER_I(i, u32) { 1024 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1025 } 1026 } 1027 1028 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1029 ppc_avr_t *b, ppc_avr_t *c) 1030 { 1031 uint32_t prod[8]; 1032 int i; 1033 int sat = 0; 1034 1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1036 prod[i] = a->u16[i] * b->u16[i]; 1037 } 1038 1039 VECTOR_FOR_INORDER_I(i, s32) { 1040 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1041 1042 r->u32[i] = cvtuduw(t, &sat); 1043 } 1044 1045 if (sat) { 1046 set_vscr_sat(env); 1047 } 1048 } 1049 1050 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1051 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1052 { \ 1053 int i; \ 1054 \ 1055 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1056 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1057 (cast)b->mul_access(i); \ 1058 } \ 1059 } 1060 1061 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1062 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1063 { \ 1064 int i; \ 1065 \ 1066 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1067 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1068 (cast)b->mul_access(i + 1); \ 1069 } \ 1070 } 1071 1072 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1073 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1074 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1075 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1076 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1077 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1078 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1079 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1080 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1081 #undef VMUL_DO_EVN 1082 #undef VMUL_DO_ODD 1083 #undef VMUL 1084 1085 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1086 { 1087 int i; 1088 1089 for (i = 0; i < 4; i++) { 1090 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); 1091 } 1092 } 1093 1094 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1095 { 1096 int i; 1097 1098 for (i = 0; i < 4; i++) { 1099 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * 1100 (uint64_t)b->u32[i]) >> 32); 1101 } 1102 } 1103 1104 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1105 { 1106 uint64_t discard; 1107 1108 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); 1109 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); 1110 } 1111 1112 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1113 { 1114 uint64_t discard; 1115 1116 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); 1117 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); 1118 } 1119 1120 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1121 ppc_avr_t *c) 1122 { 1123 ppc_avr_t result; 1124 int i; 1125 1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1127 int s = c->VsrB(i) & 0x1f; 1128 int index = s & 0xf; 1129 1130 if (s & 0x10) { 1131 result.VsrB(i) = b->VsrB(index); 1132 } else { 1133 result.VsrB(i) = a->VsrB(index); 1134 } 1135 } 1136 *r = result; 1137 } 1138 1139 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1140 ppc_avr_t *c) 1141 { 1142 ppc_avr_t result; 1143 int i; 1144 1145 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1146 int s = c->VsrB(i) & 0x1f; 1147 int index = 15 - (s & 0xf); 1148 1149 if (s & 0x10) { 1150 result.VsrB(i) = a->VsrB(index); 1151 } else { 1152 result.VsrB(i) = b->VsrB(index); 1153 } 1154 } 1155 *r = result; 1156 } 1157 1158 #if defined(HOST_WORDS_BIGENDIAN) 1159 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1160 #define VBPERMD_INDEX(i) (i) 1161 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1162 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1163 #else 1164 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1165 #define VBPERMD_INDEX(i) (1 - i) 1166 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1167 #define EXTRACT_BIT(avr, i, index) \ 1168 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1169 #endif 1170 1171 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1172 { 1173 int i, j; 1174 ppc_avr_t result = { .u64 = { 0, 0 } }; 1175 VECTOR_FOR_INORDER_I(i, u64) { 1176 for (j = 0; j < 8; j++) { 1177 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1178 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1179 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1180 } 1181 } 1182 } 1183 *r = result; 1184 } 1185 1186 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1187 { 1188 int i; 1189 uint64_t perm = 0; 1190 1191 VECTOR_FOR_INORDER_I(i, u8) { 1192 int index = VBPERMQ_INDEX(b, i); 1193 1194 if (index < 128) { 1195 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1196 if (a->u64[VBPERMQ_DW(index)] & mask) { 1197 perm |= (0x8000 >> i); 1198 } 1199 } 1200 } 1201 1202 r->VsrD(0) = perm; 1203 r->VsrD(1) = 0; 1204 } 1205 1206 #undef VBPERMQ_INDEX 1207 #undef VBPERMQ_DW 1208 1209 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1210 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1211 { \ 1212 int i, j; \ 1213 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1214 \ 1215 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1216 prod[i] = 0; \ 1217 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1218 if (a->srcfld[i] & (1ull << j)) { \ 1219 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1220 } \ 1221 } \ 1222 } \ 1223 \ 1224 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1225 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1226 } \ 1227 } 1228 1229 PMSUM(vpmsumb, u8, u16, uint16_t) 1230 PMSUM(vpmsumh, u16, u32, uint32_t) 1231 PMSUM(vpmsumw, u32, u64, uint64_t) 1232 1233 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1234 { 1235 1236 #ifdef CONFIG_INT128 1237 int i, j; 1238 __uint128_t prod[2]; 1239 1240 VECTOR_FOR_INORDER_I(i, u64) { 1241 prod[i] = 0; 1242 for (j = 0; j < 64; j++) { 1243 if (a->u64[i] & (1ull << j)) { 1244 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1245 } 1246 } 1247 } 1248 1249 r->u128 = prod[0] ^ prod[1]; 1250 1251 #else 1252 int i, j; 1253 ppc_avr_t prod[2]; 1254 1255 VECTOR_FOR_INORDER_I(i, u64) { 1256 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1257 for (j = 0; j < 64; j++) { 1258 if (a->u64[i] & (1ull << j)) { 1259 ppc_avr_t bshift; 1260 if (j == 0) { 1261 bshift.VsrD(0) = 0; 1262 bshift.VsrD(1) = b->u64[i]; 1263 } else { 1264 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1265 bshift.VsrD(1) = b->u64[i] << j; 1266 } 1267 prod[i].VsrD(1) ^= bshift.VsrD(1); 1268 prod[i].VsrD(0) ^= bshift.VsrD(0); 1269 } 1270 } 1271 } 1272 1273 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1274 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1275 #endif 1276 } 1277 1278 1279 #if defined(HOST_WORDS_BIGENDIAN) 1280 #define PKBIG 1 1281 #else 1282 #define PKBIG 0 1283 #endif 1284 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1285 { 1286 int i, j; 1287 ppc_avr_t result; 1288 #if defined(HOST_WORDS_BIGENDIAN) 1289 const ppc_avr_t *x[2] = { a, b }; 1290 #else 1291 const ppc_avr_t *x[2] = { b, a }; 1292 #endif 1293 1294 VECTOR_FOR_INORDER_I(i, u64) { 1295 VECTOR_FOR_INORDER_I(j, u32) { 1296 uint32_t e = x[i]->u32[j]; 1297 1298 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1299 ((e >> 6) & 0x3e0) | 1300 ((e >> 3) & 0x1f)); 1301 } 1302 } 1303 *r = result; 1304 } 1305 1306 #define VPK(suffix, from, to, cvt, dosat) \ 1307 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1308 ppc_avr_t *a, ppc_avr_t *b) \ 1309 { \ 1310 int i; \ 1311 int sat = 0; \ 1312 ppc_avr_t result; \ 1313 ppc_avr_t *a0 = PKBIG ? a : b; \ 1314 ppc_avr_t *a1 = PKBIG ? b : a; \ 1315 \ 1316 VECTOR_FOR_INORDER_I(i, from) { \ 1317 result.to[i] = cvt(a0->from[i], &sat); \ 1318 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1319 } \ 1320 *r = result; \ 1321 if (dosat && sat) { \ 1322 set_vscr_sat(env); \ 1323 } \ 1324 } 1325 #define I(x, y) (x) 1326 VPK(shss, s16, s8, cvtshsb, 1) 1327 VPK(shus, s16, u8, cvtshub, 1) 1328 VPK(swss, s32, s16, cvtswsh, 1) 1329 VPK(swus, s32, u16, cvtswuh, 1) 1330 VPK(sdss, s64, s32, cvtsdsw, 1) 1331 VPK(sdus, s64, u32, cvtsduw, 1) 1332 VPK(uhus, u16, u8, cvtuhub, 1) 1333 VPK(uwus, u32, u16, cvtuwuh, 1) 1334 VPK(udus, u64, u32, cvtuduw, 1) 1335 VPK(uhum, u16, u8, I, 0) 1336 VPK(uwum, u32, u16, I, 0) 1337 VPK(udum, u64, u32, I, 0) 1338 #undef I 1339 #undef VPK 1340 #undef PKBIG 1341 1342 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1343 { 1344 int i; 1345 1346 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1347 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1348 } 1349 } 1350 1351 #define VRFI(suffix, rounding) \ 1352 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1353 ppc_avr_t *b) \ 1354 { \ 1355 int i; \ 1356 float_status s = env->vec_status; \ 1357 \ 1358 set_float_rounding_mode(rounding, &s); \ 1359 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1360 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1361 } \ 1362 } 1363 VRFI(n, float_round_nearest_even) 1364 VRFI(m, float_round_down) 1365 VRFI(p, float_round_up) 1366 VRFI(z, float_round_to_zero) 1367 #undef VRFI 1368 1369 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1370 { 1371 int i; 1372 1373 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1374 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1375 1376 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1377 } 1378 } 1379 1380 #define VRLMI(name, size, element, insert) \ 1381 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1382 { \ 1383 int i; \ 1384 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1385 uint##size##_t src1 = a->element[i]; \ 1386 uint##size##_t src2 = b->element[i]; \ 1387 uint##size##_t src3 = r->element[i]; \ 1388 uint##size##_t begin, end, shift, mask, rot_val; \ 1389 \ 1390 shift = extract##size(src2, 0, 6); \ 1391 end = extract##size(src2, 8, 6); \ 1392 begin = extract##size(src2, 16, 6); \ 1393 rot_val = rol##size(src1, shift); \ 1394 mask = mask_u##size(begin, end); \ 1395 if (insert) { \ 1396 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1397 } else { \ 1398 r->element[i] = (rot_val & mask); \ 1399 } \ 1400 } \ 1401 } 1402 1403 VRLMI(vrldmi, 64, u64, 1); 1404 VRLMI(vrlwmi, 32, u32, 1); 1405 VRLMI(vrldnm, 64, u64, 0); 1406 VRLMI(vrlwnm, 32, u32, 0); 1407 1408 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1409 ppc_avr_t *c) 1410 { 1411 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1412 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1413 } 1414 1415 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1416 { 1417 int i; 1418 1419 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1420 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1421 } 1422 } 1423 1424 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1425 { 1426 int i; 1427 1428 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1429 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1430 } 1431 } 1432 1433 #if defined(HOST_WORDS_BIGENDIAN) 1434 #define VEXTU_X_DO(name, size, left) \ 1435 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1436 { \ 1437 int index; \ 1438 if (left) { \ 1439 index = (a & 0xf) * 8; \ 1440 } else { \ 1441 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1442 } \ 1443 return int128_getlo(int128_rshift(b->s128, index)) & \ 1444 MAKE_64BIT_MASK(0, size); \ 1445 } 1446 #else 1447 #define VEXTU_X_DO(name, size, left) \ 1448 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1449 { \ 1450 int index; \ 1451 if (left) { \ 1452 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1453 } else { \ 1454 index = (a & 0xf) * 8; \ 1455 } \ 1456 return int128_getlo(int128_rshift(b->s128, index)) & \ 1457 MAKE_64BIT_MASK(0, size); \ 1458 } 1459 #endif 1460 1461 VEXTU_X_DO(vextublx, 8, 1) 1462 VEXTU_X_DO(vextuhlx, 16, 1) 1463 VEXTU_X_DO(vextuwlx, 32, 1) 1464 VEXTU_X_DO(vextubrx, 8, 0) 1465 VEXTU_X_DO(vextuhrx, 16, 0) 1466 VEXTU_X_DO(vextuwrx, 32, 0) 1467 #undef VEXTU_X_DO 1468 1469 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1470 { 1471 int i; 1472 unsigned int shift, bytes, size; 1473 1474 size = ARRAY_SIZE(r->u8); 1475 for (i = 0; i < size; i++) { 1476 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1477 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1478 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1479 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1480 } 1481 } 1482 1483 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1484 { 1485 int i; 1486 unsigned int shift, bytes; 1487 1488 /* 1489 * Use reverse order, as destination and source register can be 1490 * same. Its being modified in place saving temporary, reverse 1491 * order will guarantee that computed result is not fed back. 1492 */ 1493 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1494 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1495 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1496 /* extract adjacent bytes */ 1497 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1498 } 1499 } 1500 1501 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1502 { 1503 int sh = shift & 0xf; 1504 int i; 1505 ppc_avr_t result; 1506 1507 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1508 int index = sh + i; 1509 if (index > 0xf) { 1510 result.VsrB(i) = b->VsrB(index - 0x10); 1511 } else { 1512 result.VsrB(i) = a->VsrB(index); 1513 } 1514 } 1515 *r = result; 1516 } 1517 1518 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1519 { 1520 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1521 1522 #if defined(HOST_WORDS_BIGENDIAN) 1523 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1524 memset(&r->u8[16 - sh], 0, sh); 1525 #else 1526 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1527 memset(&r->u8[0], 0, sh); 1528 #endif 1529 } 1530 1531 #if defined(HOST_WORDS_BIGENDIAN) 1532 #define VINSERT(suffix, element) \ 1533 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1534 { \ 1535 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1536 sizeof(r->element[0])); \ 1537 } 1538 #else 1539 #define VINSERT(suffix, element) \ 1540 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1541 { \ 1542 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1543 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1544 } 1545 #endif 1546 VINSERT(b, u8) 1547 VINSERT(h, u16) 1548 VINSERT(w, u32) 1549 VINSERT(d, u64) 1550 #undef VINSERT 1551 #if defined(HOST_WORDS_BIGENDIAN) 1552 #define VEXTRACT(suffix, element) \ 1553 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1554 { \ 1555 uint32_t es = sizeof(r->element[0]); \ 1556 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1557 memset(&r->u8[8], 0, 8); \ 1558 memset(&r->u8[0], 0, 8 - es); \ 1559 } 1560 #else 1561 #define VEXTRACT(suffix, element) \ 1562 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1563 { \ 1564 uint32_t es = sizeof(r->element[0]); \ 1565 uint32_t s = (16 - index) - es; \ 1566 memmove(&r->u8[8], &b->u8[s], es); \ 1567 memset(&r->u8[0], 0, 8); \ 1568 memset(&r->u8[8 + es], 0, 8 - es); \ 1569 } 1570 #endif 1571 VEXTRACT(ub, u8) 1572 VEXTRACT(uh, u16) 1573 VEXTRACT(uw, u32) 1574 VEXTRACT(d, u64) 1575 #undef VEXTRACT 1576 1577 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1578 ppc_vsr_t *xb, uint32_t index) 1579 { 1580 ppc_vsr_t t = { }; 1581 size_t es = sizeof(uint32_t); 1582 uint32_t ext_index; 1583 int i; 1584 1585 ext_index = index; 1586 for (i = 0; i < es; i++, ext_index++) { 1587 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1588 } 1589 1590 *xt = t; 1591 } 1592 1593 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1594 ppc_vsr_t *xb, uint32_t index) 1595 { 1596 ppc_vsr_t t = *xt; 1597 size_t es = sizeof(uint32_t); 1598 int ins_index, i = 0; 1599 1600 ins_index = index; 1601 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1602 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1603 } 1604 1605 *xt = t; 1606 } 1607 1608 #define VEXT_SIGNED(name, element, cast) \ 1609 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1610 { \ 1611 int i; \ 1612 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1613 r->element[i] = (cast)b->element[i]; \ 1614 } \ 1615 } 1616 VEXT_SIGNED(vextsb2w, s32, int8_t) 1617 VEXT_SIGNED(vextsb2d, s64, int8_t) 1618 VEXT_SIGNED(vextsh2w, s32, int16_t) 1619 VEXT_SIGNED(vextsh2d, s64, int16_t) 1620 VEXT_SIGNED(vextsw2d, s64, int32_t) 1621 #undef VEXT_SIGNED 1622 1623 #define VNEG(name, element) \ 1624 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1625 { \ 1626 int i; \ 1627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1628 r->element[i] = -b->element[i]; \ 1629 } \ 1630 } 1631 VNEG(vnegw, s32) 1632 VNEG(vnegd, s64) 1633 #undef VNEG 1634 1635 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1636 { 1637 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1638 1639 #if defined(HOST_WORDS_BIGENDIAN) 1640 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1641 memset(&r->u8[0], 0, sh); 1642 #else 1643 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1644 memset(&r->u8[16 - sh], 0, sh); 1645 #endif 1646 } 1647 1648 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1649 { 1650 int i; 1651 1652 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1653 r->u32[i] = a->u32[i] >= b->u32[i]; 1654 } 1655 } 1656 1657 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1658 { 1659 int64_t t; 1660 int i, upper; 1661 ppc_avr_t result; 1662 int sat = 0; 1663 1664 upper = ARRAY_SIZE(r->s32) - 1; 1665 t = (int64_t)b->VsrSW(upper); 1666 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1667 t += a->VsrSW(i); 1668 result.VsrSW(i) = 0; 1669 } 1670 result.VsrSW(upper) = cvtsdsw(t, &sat); 1671 *r = result; 1672 1673 if (sat) { 1674 set_vscr_sat(env); 1675 } 1676 } 1677 1678 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1679 { 1680 int i, j, upper; 1681 ppc_avr_t result; 1682 int sat = 0; 1683 1684 upper = 1; 1685 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1686 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1687 1688 result.VsrD(i) = 0; 1689 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1690 t += a->VsrSW(2 * i + j); 1691 } 1692 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1693 } 1694 1695 *r = result; 1696 if (sat) { 1697 set_vscr_sat(env); 1698 } 1699 } 1700 1701 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1702 { 1703 int i, j; 1704 int sat = 0; 1705 1706 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1707 int64_t t = (int64_t)b->s32[i]; 1708 1709 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1710 t += a->s8[4 * i + j]; 1711 } 1712 r->s32[i] = cvtsdsw(t, &sat); 1713 } 1714 1715 if (sat) { 1716 set_vscr_sat(env); 1717 } 1718 } 1719 1720 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1721 { 1722 int sat = 0; 1723 int i; 1724 1725 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1726 int64_t t = (int64_t)b->s32[i]; 1727 1728 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1729 r->s32[i] = cvtsdsw(t, &sat); 1730 } 1731 1732 if (sat) { 1733 set_vscr_sat(env); 1734 } 1735 } 1736 1737 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1738 { 1739 int i, j; 1740 int sat = 0; 1741 1742 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1743 uint64_t t = (uint64_t)b->u32[i]; 1744 1745 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1746 t += a->u8[4 * i + j]; 1747 } 1748 r->u32[i] = cvtuduw(t, &sat); 1749 } 1750 1751 if (sat) { 1752 set_vscr_sat(env); 1753 } 1754 } 1755 1756 #if defined(HOST_WORDS_BIGENDIAN) 1757 #define UPKHI 1 1758 #define UPKLO 0 1759 #else 1760 #define UPKHI 0 1761 #define UPKLO 1 1762 #endif 1763 #define VUPKPX(suffix, hi) \ 1764 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1765 { \ 1766 int i; \ 1767 ppc_avr_t result; \ 1768 \ 1769 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1770 uint16_t e = b->u16[hi ? i : i + 4]; \ 1771 uint8_t a = (e >> 15) ? 0xff : 0; \ 1772 uint8_t r = (e >> 10) & 0x1f; \ 1773 uint8_t g = (e >> 5) & 0x1f; \ 1774 uint8_t b = e & 0x1f; \ 1775 \ 1776 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1777 } \ 1778 *r = result; \ 1779 } 1780 VUPKPX(lpx, UPKLO) 1781 VUPKPX(hpx, UPKHI) 1782 #undef VUPKPX 1783 1784 #define VUPK(suffix, unpacked, packee, hi) \ 1785 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1786 { \ 1787 int i; \ 1788 ppc_avr_t result; \ 1789 \ 1790 if (hi) { \ 1791 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1792 result.unpacked[i] = b->packee[i]; \ 1793 } \ 1794 } else { \ 1795 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1796 i++) { \ 1797 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1798 } \ 1799 } \ 1800 *r = result; \ 1801 } 1802 VUPK(hsb, s16, s8, UPKHI) 1803 VUPK(hsh, s32, s16, UPKHI) 1804 VUPK(hsw, s64, s32, UPKHI) 1805 VUPK(lsb, s16, s8, UPKLO) 1806 VUPK(lsh, s32, s16, UPKLO) 1807 VUPK(lsw, s64, s32, UPKLO) 1808 #undef VUPK 1809 #undef UPKHI 1810 #undef UPKLO 1811 1812 #define VGENERIC_DO(name, element) \ 1813 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1814 { \ 1815 int i; \ 1816 \ 1817 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1818 r->element[i] = name(b->element[i]); \ 1819 } \ 1820 } 1821 1822 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1823 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1824 1825 VGENERIC_DO(clzb, u8) 1826 VGENERIC_DO(clzh, u16) 1827 1828 #undef clzb 1829 #undef clzh 1830 1831 #define ctzb(v) ((v) ? ctz32(v) : 8) 1832 #define ctzh(v) ((v) ? ctz32(v) : 16) 1833 #define ctzw(v) ctz32((v)) 1834 #define ctzd(v) ctz64((v)) 1835 1836 VGENERIC_DO(ctzb, u8) 1837 VGENERIC_DO(ctzh, u16) 1838 VGENERIC_DO(ctzw, u32) 1839 VGENERIC_DO(ctzd, u64) 1840 1841 #undef ctzb 1842 #undef ctzh 1843 #undef ctzw 1844 #undef ctzd 1845 1846 #define popcntb(v) ctpop8(v) 1847 #define popcnth(v) ctpop16(v) 1848 #define popcntw(v) ctpop32(v) 1849 #define popcntd(v) ctpop64(v) 1850 1851 VGENERIC_DO(popcntb, u8) 1852 VGENERIC_DO(popcnth, u16) 1853 VGENERIC_DO(popcntw, u32) 1854 VGENERIC_DO(popcntd, u64) 1855 1856 #undef popcntb 1857 #undef popcnth 1858 #undef popcntw 1859 #undef popcntd 1860 1861 #undef VGENERIC_DO 1862 1863 #if defined(HOST_WORDS_BIGENDIAN) 1864 #define QW_ONE { .u64 = { 0, 1 } } 1865 #else 1866 #define QW_ONE { .u64 = { 1, 0 } } 1867 #endif 1868 1869 #ifndef CONFIG_INT128 1870 1871 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1872 { 1873 t->u64[0] = ~a.u64[0]; 1874 t->u64[1] = ~a.u64[1]; 1875 } 1876 1877 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1878 { 1879 if (a.VsrD(0) < b.VsrD(0)) { 1880 return -1; 1881 } else if (a.VsrD(0) > b.VsrD(0)) { 1882 return 1; 1883 } else if (a.VsrD(1) < b.VsrD(1)) { 1884 return -1; 1885 } else if (a.VsrD(1) > b.VsrD(1)) { 1886 return 1; 1887 } else { 1888 return 0; 1889 } 1890 } 1891 1892 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1893 { 1894 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1895 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1896 (~a.VsrD(1) < b.VsrD(1)); 1897 } 1898 1899 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1900 { 1901 ppc_avr_t not_a; 1902 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1903 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1904 (~a.VsrD(1) < b.VsrD(1)); 1905 avr_qw_not(¬_a, a); 1906 return avr_qw_cmpu(not_a, b) < 0; 1907 } 1908 1909 #endif 1910 1911 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1912 { 1913 #ifdef CONFIG_INT128 1914 r->u128 = a->u128 + b->u128; 1915 #else 1916 avr_qw_add(r, *a, *b); 1917 #endif 1918 } 1919 1920 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1921 { 1922 #ifdef CONFIG_INT128 1923 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1924 #else 1925 1926 if (c->VsrD(1) & 1) { 1927 ppc_avr_t tmp; 1928 1929 tmp.VsrD(0) = 0; 1930 tmp.VsrD(1) = c->VsrD(1) & 1; 1931 avr_qw_add(&tmp, *a, tmp); 1932 avr_qw_add(r, tmp, *b); 1933 } else { 1934 avr_qw_add(r, *a, *b); 1935 } 1936 #endif 1937 } 1938 1939 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1940 { 1941 #ifdef CONFIG_INT128 1942 r->u128 = (~a->u128 < b->u128); 1943 #else 1944 ppc_avr_t not_a; 1945 1946 avr_qw_not(¬_a, *a); 1947 1948 r->VsrD(0) = 0; 1949 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1950 #endif 1951 } 1952 1953 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1954 { 1955 #ifdef CONFIG_INT128 1956 int carry_out = (~a->u128 < b->u128); 1957 if (!carry_out && (c->u128 & 1)) { 1958 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1959 ((a->u128 != 0) || (b->u128 != 0)); 1960 } 1961 r->u128 = carry_out; 1962 #else 1963 1964 int carry_in = c->VsrD(1) & 1; 1965 int carry_out = 0; 1966 ppc_avr_t tmp; 1967 1968 carry_out = avr_qw_addc(&tmp, *a, *b); 1969 1970 if (!carry_out && carry_in) { 1971 ppc_avr_t one = QW_ONE; 1972 carry_out = avr_qw_addc(&tmp, tmp, one); 1973 } 1974 r->VsrD(0) = 0; 1975 r->VsrD(1) = carry_out; 1976 #endif 1977 } 1978 1979 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1980 { 1981 #ifdef CONFIG_INT128 1982 r->u128 = a->u128 - b->u128; 1983 #else 1984 ppc_avr_t tmp; 1985 ppc_avr_t one = QW_ONE; 1986 1987 avr_qw_not(&tmp, *b); 1988 avr_qw_add(&tmp, *a, tmp); 1989 avr_qw_add(r, tmp, one); 1990 #endif 1991 } 1992 1993 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1994 { 1995 #ifdef CONFIG_INT128 1996 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 1997 #else 1998 ppc_avr_t tmp, sum; 1999 2000 avr_qw_not(&tmp, *b); 2001 avr_qw_add(&sum, *a, tmp); 2002 2003 tmp.VsrD(0) = 0; 2004 tmp.VsrD(1) = c->VsrD(1) & 1; 2005 avr_qw_add(r, sum, tmp); 2006 #endif 2007 } 2008 2009 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2010 { 2011 #ifdef CONFIG_INT128 2012 r->u128 = (~a->u128 < ~b->u128) || 2013 (a->u128 + ~b->u128 == (__uint128_t)-1); 2014 #else 2015 int carry = (avr_qw_cmpu(*a, *b) > 0); 2016 if (!carry) { 2017 ppc_avr_t tmp; 2018 avr_qw_not(&tmp, *b); 2019 avr_qw_add(&tmp, *a, tmp); 2020 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 2021 } 2022 r->VsrD(0) = 0; 2023 r->VsrD(1) = carry; 2024 #endif 2025 } 2026 2027 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2028 { 2029 #ifdef CONFIG_INT128 2030 r->u128 = 2031 (~a->u128 < ~b->u128) || 2032 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2033 #else 2034 int carry_in = c->VsrD(1) & 1; 2035 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2036 if (!carry_out && carry_in) { 2037 ppc_avr_t tmp; 2038 avr_qw_not(&tmp, *b); 2039 avr_qw_add(&tmp, *a, tmp); 2040 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2041 } 2042 2043 r->VsrD(0) = 0; 2044 r->VsrD(1) = carry_out; 2045 #endif 2046 } 2047 2048 #define BCD_PLUS_PREF_1 0xC 2049 #define BCD_PLUS_PREF_2 0xF 2050 #define BCD_PLUS_ALT_1 0xA 2051 #define BCD_NEG_PREF 0xD 2052 #define BCD_NEG_ALT 0xB 2053 #define BCD_PLUS_ALT_2 0xE 2054 #define NATIONAL_PLUS 0x2B 2055 #define NATIONAL_NEG 0x2D 2056 2057 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2058 2059 static int bcd_get_sgn(ppc_avr_t *bcd) 2060 { 2061 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2062 case BCD_PLUS_PREF_1: 2063 case BCD_PLUS_PREF_2: 2064 case BCD_PLUS_ALT_1: 2065 case BCD_PLUS_ALT_2: 2066 { 2067 return 1; 2068 } 2069 2070 case BCD_NEG_PREF: 2071 case BCD_NEG_ALT: 2072 { 2073 return -1; 2074 } 2075 2076 default: 2077 { 2078 return 0; 2079 } 2080 } 2081 } 2082 2083 static int bcd_preferred_sgn(int sgn, int ps) 2084 { 2085 if (sgn >= 0) { 2086 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2087 } else { 2088 return BCD_NEG_PREF; 2089 } 2090 } 2091 2092 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2093 { 2094 uint8_t result; 2095 if (n & 1) { 2096 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2097 } else { 2098 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2099 } 2100 2101 if (unlikely(result > 9)) { 2102 *invalid = true; 2103 } 2104 return result; 2105 } 2106 2107 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2108 { 2109 if (n & 1) { 2110 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2111 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2112 } else { 2113 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2114 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2115 } 2116 } 2117 2118 static bool bcd_is_valid(ppc_avr_t *bcd) 2119 { 2120 int i; 2121 int invalid = 0; 2122 2123 if (bcd_get_sgn(bcd) == 0) { 2124 return false; 2125 } 2126 2127 for (i = 1; i < 32; i++) { 2128 bcd_get_digit(bcd, i, &invalid); 2129 if (unlikely(invalid)) { 2130 return false; 2131 } 2132 } 2133 return true; 2134 } 2135 2136 static int bcd_cmp_zero(ppc_avr_t *bcd) 2137 { 2138 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2139 return CRF_EQ; 2140 } else { 2141 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2142 } 2143 } 2144 2145 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2146 { 2147 return reg->VsrH(7 - n); 2148 } 2149 2150 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2151 { 2152 reg->VsrH(7 - n) = val; 2153 } 2154 2155 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2156 { 2157 int i; 2158 int invalid = 0; 2159 for (i = 31; i > 0; i--) { 2160 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2161 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2162 if (unlikely(invalid)) { 2163 return 0; /* doesn't matter */ 2164 } else if (dig_a > dig_b) { 2165 return 1; 2166 } else if (dig_a < dig_b) { 2167 return -1; 2168 } 2169 } 2170 2171 return 0; 2172 } 2173 2174 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2175 int *overflow) 2176 { 2177 int carry = 0; 2178 int i; 2179 int is_zero = 1; 2180 2181 for (i = 1; i <= 31; i++) { 2182 uint8_t digit = bcd_get_digit(a, i, invalid) + 2183 bcd_get_digit(b, i, invalid) + carry; 2184 is_zero &= (digit == 0); 2185 if (digit > 9) { 2186 carry = 1; 2187 digit -= 10; 2188 } else { 2189 carry = 0; 2190 } 2191 2192 bcd_put_digit(t, digit, i); 2193 } 2194 2195 *overflow = carry; 2196 return is_zero; 2197 } 2198 2199 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2200 int *overflow) 2201 { 2202 int carry = 0; 2203 int i; 2204 2205 for (i = 1; i <= 31; i++) { 2206 uint8_t digit = bcd_get_digit(a, i, invalid) - 2207 bcd_get_digit(b, i, invalid) + carry; 2208 if (digit & 0x80) { 2209 carry = -1; 2210 digit += 10; 2211 } else { 2212 carry = 0; 2213 } 2214 2215 bcd_put_digit(t, digit, i); 2216 } 2217 2218 *overflow = carry; 2219 } 2220 2221 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2222 { 2223 2224 int sgna = bcd_get_sgn(a); 2225 int sgnb = bcd_get_sgn(b); 2226 int invalid = (sgna == 0) || (sgnb == 0); 2227 int overflow = 0; 2228 int zero = 0; 2229 uint32_t cr = 0; 2230 ppc_avr_t result = { .u64 = { 0, 0 } }; 2231 2232 if (!invalid) { 2233 if (sgna == sgnb) { 2234 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2235 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2236 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2237 } else { 2238 int magnitude = bcd_cmp_mag(a, b); 2239 if (magnitude > 0) { 2240 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2241 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2242 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2243 } else if (magnitude < 0) { 2244 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2245 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2246 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2247 } else { 2248 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2249 cr = CRF_EQ; 2250 } 2251 } 2252 } 2253 2254 if (unlikely(invalid)) { 2255 result.VsrD(0) = result.VsrD(1) = -1; 2256 cr = CRF_SO; 2257 } else if (overflow) { 2258 cr |= CRF_SO; 2259 } else if (zero) { 2260 cr |= CRF_EQ; 2261 } 2262 2263 *r = result; 2264 2265 return cr; 2266 } 2267 2268 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2269 { 2270 ppc_avr_t bcopy = *b; 2271 int sgnb = bcd_get_sgn(b); 2272 if (sgnb < 0) { 2273 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2274 } else if (sgnb > 0) { 2275 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2276 } 2277 /* else invalid ... defer to bcdadd code for proper handling */ 2278 2279 return helper_bcdadd(r, a, &bcopy, ps); 2280 } 2281 2282 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2283 { 2284 int i; 2285 int cr = 0; 2286 uint16_t national = 0; 2287 uint16_t sgnb = get_national_digit(b, 0); 2288 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2289 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2290 2291 for (i = 1; i < 8; i++) { 2292 national = get_national_digit(b, i); 2293 if (unlikely(national < 0x30 || national > 0x39)) { 2294 invalid = 1; 2295 break; 2296 } 2297 2298 bcd_put_digit(&ret, national & 0xf, i); 2299 } 2300 2301 if (sgnb == NATIONAL_PLUS) { 2302 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2303 } else { 2304 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2305 } 2306 2307 cr = bcd_cmp_zero(&ret); 2308 2309 if (unlikely(invalid)) { 2310 cr = CRF_SO; 2311 } 2312 2313 *r = ret; 2314 2315 return cr; 2316 } 2317 2318 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2319 { 2320 int i; 2321 int cr = 0; 2322 int sgnb = bcd_get_sgn(b); 2323 int invalid = (sgnb == 0); 2324 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2325 2326 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2327 2328 for (i = 1; i < 8; i++) { 2329 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2330 2331 if (unlikely(invalid)) { 2332 break; 2333 } 2334 } 2335 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2336 2337 cr = bcd_cmp_zero(b); 2338 2339 if (ox_flag) { 2340 cr |= CRF_SO; 2341 } 2342 2343 if (unlikely(invalid)) { 2344 cr = CRF_SO; 2345 } 2346 2347 *r = ret; 2348 2349 return cr; 2350 } 2351 2352 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2353 { 2354 int i; 2355 int cr = 0; 2356 int invalid = 0; 2357 int zone_digit = 0; 2358 int zone_lead = ps ? 0xF : 0x3; 2359 int digit = 0; 2360 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2361 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2362 2363 if (unlikely((sgnb < 0xA) && ps)) { 2364 invalid = 1; 2365 } 2366 2367 for (i = 0; i < 16; i++) { 2368 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2369 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2370 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2371 invalid = 1; 2372 break; 2373 } 2374 2375 bcd_put_digit(&ret, digit, i + 1); 2376 } 2377 2378 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2379 (!ps && (sgnb & 0x4))) { 2380 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2381 } else { 2382 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2383 } 2384 2385 cr = bcd_cmp_zero(&ret); 2386 2387 if (unlikely(invalid)) { 2388 cr = CRF_SO; 2389 } 2390 2391 *r = ret; 2392 2393 return cr; 2394 } 2395 2396 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2397 { 2398 int i; 2399 int cr = 0; 2400 uint8_t digit = 0; 2401 int sgnb = bcd_get_sgn(b); 2402 int zone_lead = (ps) ? 0xF0 : 0x30; 2403 int invalid = (sgnb == 0); 2404 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2405 2406 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2407 2408 for (i = 0; i < 16; i++) { 2409 digit = bcd_get_digit(b, i + 1, &invalid); 2410 2411 if (unlikely(invalid)) { 2412 break; 2413 } 2414 2415 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2416 } 2417 2418 if (ps) { 2419 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2420 } else { 2421 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2422 } 2423 2424 cr = bcd_cmp_zero(b); 2425 2426 if (ox_flag) { 2427 cr |= CRF_SO; 2428 } 2429 2430 if (unlikely(invalid)) { 2431 cr = CRF_SO; 2432 } 2433 2434 *r = ret; 2435 2436 return cr; 2437 } 2438 2439 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2440 { 2441 int i; 2442 int cr = 0; 2443 uint64_t lo_value; 2444 uint64_t hi_value; 2445 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2446 2447 if (b->VsrSD(0) < 0) { 2448 lo_value = -b->VsrSD(1); 2449 hi_value = ~b->VsrD(0) + !lo_value; 2450 bcd_put_digit(&ret, 0xD, 0); 2451 } else { 2452 lo_value = b->VsrD(1); 2453 hi_value = b->VsrD(0); 2454 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2455 } 2456 2457 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2458 lo_value > 9999999999999999ULL) { 2459 cr = CRF_SO; 2460 } 2461 2462 for (i = 1; i < 16; hi_value /= 10, i++) { 2463 bcd_put_digit(&ret, hi_value % 10, i); 2464 } 2465 2466 for (; i < 32; lo_value /= 10, i++) { 2467 bcd_put_digit(&ret, lo_value % 10, i); 2468 } 2469 2470 cr |= bcd_cmp_zero(&ret); 2471 2472 *r = ret; 2473 2474 return cr; 2475 } 2476 2477 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2478 { 2479 uint8_t i; 2480 int cr; 2481 uint64_t carry; 2482 uint64_t unused; 2483 uint64_t lo_value; 2484 uint64_t hi_value = 0; 2485 int sgnb = bcd_get_sgn(b); 2486 int invalid = (sgnb == 0); 2487 2488 lo_value = bcd_get_digit(b, 31, &invalid); 2489 for (i = 30; i > 0; i--) { 2490 mulu64(&lo_value, &carry, lo_value, 10ULL); 2491 mulu64(&hi_value, &unused, hi_value, 10ULL); 2492 lo_value += bcd_get_digit(b, i, &invalid); 2493 hi_value += carry; 2494 2495 if (unlikely(invalid)) { 2496 break; 2497 } 2498 } 2499 2500 if (sgnb == -1) { 2501 r->VsrSD(1) = -lo_value; 2502 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2503 } else { 2504 r->VsrSD(1) = lo_value; 2505 r->VsrSD(0) = hi_value; 2506 } 2507 2508 cr = bcd_cmp_zero(b); 2509 2510 if (unlikely(invalid)) { 2511 cr = CRF_SO; 2512 } 2513 2514 return cr; 2515 } 2516 2517 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2518 { 2519 int i; 2520 int invalid = 0; 2521 2522 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2523 return CRF_SO; 2524 } 2525 2526 *r = *a; 2527 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2528 2529 for (i = 1; i < 32; i++) { 2530 bcd_get_digit(a, i, &invalid); 2531 bcd_get_digit(b, i, &invalid); 2532 if (unlikely(invalid)) { 2533 return CRF_SO; 2534 } 2535 } 2536 2537 return bcd_cmp_zero(r); 2538 } 2539 2540 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2541 { 2542 int sgnb = bcd_get_sgn(b); 2543 2544 *r = *b; 2545 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2546 2547 if (bcd_is_valid(b) == false) { 2548 return CRF_SO; 2549 } 2550 2551 return bcd_cmp_zero(r); 2552 } 2553 2554 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2555 { 2556 int cr; 2557 int i = a->VsrSB(7); 2558 bool ox_flag = false; 2559 int sgnb = bcd_get_sgn(b); 2560 ppc_avr_t ret = *b; 2561 ret.VsrD(1) &= ~0xf; 2562 2563 if (bcd_is_valid(b) == false) { 2564 return CRF_SO; 2565 } 2566 2567 if (unlikely(i > 31)) { 2568 i = 31; 2569 } else if (unlikely(i < -31)) { 2570 i = -31; 2571 } 2572 2573 if (i > 0) { 2574 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2575 } else { 2576 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2577 } 2578 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2579 2580 *r = ret; 2581 2582 cr = bcd_cmp_zero(r); 2583 if (ox_flag) { 2584 cr |= CRF_SO; 2585 } 2586 2587 return cr; 2588 } 2589 2590 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2591 { 2592 int cr; 2593 int i; 2594 int invalid = 0; 2595 bool ox_flag = false; 2596 ppc_avr_t ret = *b; 2597 2598 for (i = 0; i < 32; i++) { 2599 bcd_get_digit(b, i, &invalid); 2600 2601 if (unlikely(invalid)) { 2602 return CRF_SO; 2603 } 2604 } 2605 2606 i = a->VsrSB(7); 2607 if (i >= 32) { 2608 ox_flag = true; 2609 ret.VsrD(1) = ret.VsrD(0) = 0; 2610 } else if (i <= -32) { 2611 ret.VsrD(1) = ret.VsrD(0) = 0; 2612 } else if (i > 0) { 2613 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2614 } else { 2615 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2616 } 2617 *r = ret; 2618 2619 cr = bcd_cmp_zero(r); 2620 if (ox_flag) { 2621 cr |= CRF_SO; 2622 } 2623 2624 return cr; 2625 } 2626 2627 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2628 { 2629 int cr; 2630 int unused = 0; 2631 int invalid = 0; 2632 bool ox_flag = false; 2633 int sgnb = bcd_get_sgn(b); 2634 ppc_avr_t ret = *b; 2635 ret.VsrD(1) &= ~0xf; 2636 2637 int i = a->VsrSB(7); 2638 ppc_avr_t bcd_one; 2639 2640 bcd_one.VsrD(0) = 0; 2641 bcd_one.VsrD(1) = 0x10; 2642 2643 if (bcd_is_valid(b) == false) { 2644 return CRF_SO; 2645 } 2646 2647 if (unlikely(i > 31)) { 2648 i = 31; 2649 } else if (unlikely(i < -31)) { 2650 i = -31; 2651 } 2652 2653 if (i > 0) { 2654 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2655 } else { 2656 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2657 2658 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2659 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2660 } 2661 } 2662 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2663 2664 cr = bcd_cmp_zero(&ret); 2665 if (ox_flag) { 2666 cr |= CRF_SO; 2667 } 2668 *r = ret; 2669 2670 return cr; 2671 } 2672 2673 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2674 { 2675 uint64_t mask; 2676 uint32_t ox_flag = 0; 2677 int i = a->VsrSH(3) + 1; 2678 ppc_avr_t ret = *b; 2679 2680 if (bcd_is_valid(b) == false) { 2681 return CRF_SO; 2682 } 2683 2684 if (i > 16 && i < 32) { 2685 mask = (uint64_t)-1 >> (128 - i * 4); 2686 if (ret.VsrD(0) & ~mask) { 2687 ox_flag = CRF_SO; 2688 } 2689 2690 ret.VsrD(0) &= mask; 2691 } else if (i >= 0 && i <= 16) { 2692 mask = (uint64_t)-1 >> (64 - i * 4); 2693 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2694 ox_flag = CRF_SO; 2695 } 2696 2697 ret.VsrD(1) &= mask; 2698 ret.VsrD(0) = 0; 2699 } 2700 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2701 *r = ret; 2702 2703 return bcd_cmp_zero(&ret) | ox_flag; 2704 } 2705 2706 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2707 { 2708 int i; 2709 uint64_t mask; 2710 uint32_t ox_flag = 0; 2711 int invalid = 0; 2712 ppc_avr_t ret = *b; 2713 2714 for (i = 0; i < 32; i++) { 2715 bcd_get_digit(b, i, &invalid); 2716 2717 if (unlikely(invalid)) { 2718 return CRF_SO; 2719 } 2720 } 2721 2722 i = a->VsrSH(3); 2723 if (i > 16 && i < 33) { 2724 mask = (uint64_t)-1 >> (128 - i * 4); 2725 if (ret.VsrD(0) & ~mask) { 2726 ox_flag = CRF_SO; 2727 } 2728 2729 ret.VsrD(0) &= mask; 2730 } else if (i > 0 && i <= 16) { 2731 mask = (uint64_t)-1 >> (64 - i * 4); 2732 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2733 ox_flag = CRF_SO; 2734 } 2735 2736 ret.VsrD(1) &= mask; 2737 ret.VsrD(0) = 0; 2738 } else if (i == 0) { 2739 if (ret.VsrD(0) || ret.VsrD(1)) { 2740 ox_flag = CRF_SO; 2741 } 2742 ret.VsrD(0) = ret.VsrD(1) = 0; 2743 } 2744 2745 *r = ret; 2746 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2747 return ox_flag | CRF_EQ; 2748 } 2749 2750 return ox_flag | CRF_GT; 2751 } 2752 2753 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2754 { 2755 int i; 2756 VECTOR_FOR_INORDER_I(i, u8) { 2757 r->u8[i] = AES_sbox[a->u8[i]]; 2758 } 2759 } 2760 2761 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2762 { 2763 ppc_avr_t result; 2764 int i; 2765 2766 VECTOR_FOR_INORDER_I(i, u32) { 2767 result.VsrW(i) = b->VsrW(i) ^ 2768 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2769 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2770 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2771 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2772 } 2773 *r = result; 2774 } 2775 2776 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2777 { 2778 ppc_avr_t result; 2779 int i; 2780 2781 VECTOR_FOR_INORDER_I(i, u8) { 2782 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2783 } 2784 *r = result; 2785 } 2786 2787 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2788 { 2789 /* This differs from what is written in ISA V2.07. The RTL is */ 2790 /* incorrect and will be fixed in V2.07B. */ 2791 int i; 2792 ppc_avr_t tmp; 2793 2794 VECTOR_FOR_INORDER_I(i, u8) { 2795 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2796 } 2797 2798 VECTOR_FOR_INORDER_I(i, u32) { 2799 r->VsrW(i) = 2800 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2801 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2802 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2803 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2804 } 2805 } 2806 2807 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2808 { 2809 ppc_avr_t result; 2810 int i; 2811 2812 VECTOR_FOR_INORDER_I(i, u8) { 2813 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2814 } 2815 *r = result; 2816 } 2817 2818 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2819 { 2820 int st = (st_six & 0x10) != 0; 2821 int six = st_six & 0xF; 2822 int i; 2823 2824 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2825 if (st == 0) { 2826 if ((six & (0x8 >> i)) == 0) { 2827 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2828 ror32(a->VsrW(i), 18) ^ 2829 (a->VsrW(i) >> 3); 2830 } else { /* six.bit[i] == 1 */ 2831 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2832 ror32(a->VsrW(i), 19) ^ 2833 (a->VsrW(i) >> 10); 2834 } 2835 } else { /* st == 1 */ 2836 if ((six & (0x8 >> i)) == 0) { 2837 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2838 ror32(a->VsrW(i), 13) ^ 2839 ror32(a->VsrW(i), 22); 2840 } else { /* six.bit[i] == 1 */ 2841 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2842 ror32(a->VsrW(i), 11) ^ 2843 ror32(a->VsrW(i), 25); 2844 } 2845 } 2846 } 2847 } 2848 2849 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2850 { 2851 int st = (st_six & 0x10) != 0; 2852 int six = st_six & 0xF; 2853 int i; 2854 2855 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2856 if (st == 0) { 2857 if ((six & (0x8 >> (2 * i))) == 0) { 2858 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2859 ror64(a->VsrD(i), 8) ^ 2860 (a->VsrD(i) >> 7); 2861 } else { /* six.bit[2*i] == 1 */ 2862 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2863 ror64(a->VsrD(i), 61) ^ 2864 (a->VsrD(i) >> 6); 2865 } 2866 } else { /* st == 1 */ 2867 if ((six & (0x8 >> (2 * i))) == 0) { 2868 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2869 ror64(a->VsrD(i), 34) ^ 2870 ror64(a->VsrD(i), 39); 2871 } else { /* six.bit[2*i] == 1 */ 2872 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2873 ror64(a->VsrD(i), 18) ^ 2874 ror64(a->VsrD(i), 41); 2875 } 2876 } 2877 } 2878 } 2879 2880 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2881 { 2882 ppc_avr_t result; 2883 int i; 2884 2885 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2886 int indexA = c->VsrB(i) >> 4; 2887 int indexB = c->VsrB(i) & 0xF; 2888 2889 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2890 } 2891 *r = result; 2892 } 2893 2894 #undef VECTOR_FOR_INORDER_I 2895 2896 /*****************************************************************************/ 2897 /* SPE extension helpers */ 2898 /* Use a table to make this quicker */ 2899 static const uint8_t hbrev[16] = { 2900 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2901 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2902 }; 2903 2904 static inline uint8_t byte_reverse(uint8_t val) 2905 { 2906 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2907 } 2908 2909 static inline uint32_t word_reverse(uint32_t val) 2910 { 2911 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2912 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2913 } 2914 2915 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2916 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2917 { 2918 uint32_t a, b, d, mask; 2919 2920 mask = UINT32_MAX >> (32 - MASKBITS); 2921 a = arg1 & mask; 2922 b = arg2 & mask; 2923 d = word_reverse(1 + word_reverse(a | ~b)); 2924 return (arg1 & ~mask) | (d & b); 2925 } 2926 2927 uint32_t helper_cntlsw32(uint32_t val) 2928 { 2929 if (val & 0x80000000) { 2930 return clz32(~val); 2931 } else { 2932 return clz32(val); 2933 } 2934 } 2935 2936 uint32_t helper_cntlzw32(uint32_t val) 2937 { 2938 return clz32(val); 2939 } 2940 2941 /* 440 specific */ 2942 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2943 target_ulong low, uint32_t update_Rc) 2944 { 2945 target_ulong mask; 2946 int i; 2947 2948 i = 1; 2949 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2950 if ((high & mask) == 0) { 2951 if (update_Rc) { 2952 env->crf[0] = 0x4; 2953 } 2954 goto done; 2955 } 2956 i++; 2957 } 2958 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2959 if ((low & mask) == 0) { 2960 if (update_Rc) { 2961 env->crf[0] = 0x8; 2962 } 2963 goto done; 2964 } 2965 i++; 2966 } 2967 i = 8; 2968 if (update_Rc) { 2969 env->crf[0] = 0x2; 2970 } 2971 done: 2972 env->xer = (env->xer & ~0x7F) | i; 2973 if (update_Rc) { 2974 env->crf[0] |= xer_so; 2975 } 2976 return i; 2977 } 2978