1 /* 2 * Loongson Multimedia Instruction emulation helpers for QEMU. 3 * 4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/helper-proto.h" 23 24 /* 25 * If the byte ordering doesn't matter, i.e. all columns are treated 26 * identically, then this union can be used directly. If byte ordering 27 * does matter, we generally ignore dumping to memory. 28 */ 29 typedef union { 30 uint8_t ub[8]; 31 int8_t sb[8]; 32 uint16_t uh[4]; 33 int16_t sh[4]; 34 uint32_t uw[2]; 35 int32_t sw[2]; 36 uint64_t d; 37 } LMIValue; 38 39 /* Some byte ordering issues can be mitigated by XORing in the following. */ 40 #if HOST_BIG_ENDIAN 41 # define BYTE_ORDER_XOR(N) N 42 #else 43 # define BYTE_ORDER_XOR(N) 0 44 #endif 45 46 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) 47 #define SATUB(x) (x > 0xff ? 0xff : x) 48 49 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) 50 #define SATUH(x) (x > 0xffff ? 0xffff : x) 51 52 #define SATSW(x) \ 53 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) 54 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) 55 56 uint64_t helper_paddsb(uint64_t fs, uint64_t ft) 57 { 58 LMIValue vs, vt; 59 unsigned int i; 60 61 vs.d = fs; 62 vt.d = ft; 63 for (i = 0; i < 8; ++i) { 64 int r = vs.sb[i] + vt.sb[i]; 65 vs.sb[i] = SATSB(r); 66 } 67 return vs.d; 68 } 69 70 uint64_t helper_paddusb(uint64_t fs, uint64_t ft) 71 { 72 LMIValue vs, vt; 73 unsigned int i; 74 75 vs.d = fs; 76 vt.d = ft; 77 for (i = 0; i < 8; ++i) { 78 int r = vs.ub[i] + vt.ub[i]; 79 vs.ub[i] = SATUB(r); 80 } 81 return vs.d; 82 } 83 84 uint64_t helper_paddsh(uint64_t fs, uint64_t ft) 85 { 86 LMIValue vs, vt; 87 unsigned int i; 88 89 vs.d = fs; 90 vt.d = ft; 91 for (i = 0; i < 4; ++i) { 92 int r = vs.sh[i] + vt.sh[i]; 93 vs.sh[i] = SATSH(r); 94 } 95 return vs.d; 96 } 97 98 uint64_t helper_paddush(uint64_t fs, uint64_t ft) 99 { 100 LMIValue vs, vt; 101 unsigned int i; 102 103 vs.d = fs; 104 vt.d = ft; 105 for (i = 0; i < 4; ++i) { 106 int r = vs.uh[i] + vt.uh[i]; 107 vs.uh[i] = SATUH(r); 108 } 109 return vs.d; 110 } 111 112 uint64_t helper_paddb(uint64_t fs, uint64_t ft) 113 { 114 LMIValue vs, vt; 115 unsigned int i; 116 117 vs.d = fs; 118 vt.d = ft; 119 for (i = 0; i < 8; ++i) { 120 vs.ub[i] += vt.ub[i]; 121 } 122 return vs.d; 123 } 124 125 uint64_t helper_paddh(uint64_t fs, uint64_t ft) 126 { 127 LMIValue vs, vt; 128 unsigned int i; 129 130 vs.d = fs; 131 vt.d = ft; 132 for (i = 0; i < 4; ++i) { 133 vs.uh[i] += vt.uh[i]; 134 } 135 return vs.d; 136 } 137 138 uint64_t helper_paddw(uint64_t fs, uint64_t ft) 139 { 140 LMIValue vs, vt; 141 unsigned int i; 142 143 vs.d = fs; 144 vt.d = ft; 145 for (i = 0; i < 2; ++i) { 146 vs.uw[i] += vt.uw[i]; 147 } 148 return vs.d; 149 } 150 151 uint64_t helper_psubsb(uint64_t fs, uint64_t ft) 152 { 153 LMIValue vs, vt; 154 unsigned int i; 155 156 vs.d = fs; 157 vt.d = ft; 158 for (i = 0; i < 8; ++i) { 159 int r = vs.sb[i] - vt.sb[i]; 160 vs.sb[i] = SATSB(r); 161 } 162 return vs.d; 163 } 164 165 uint64_t helper_psubusb(uint64_t fs, uint64_t ft) 166 { 167 LMIValue vs, vt; 168 unsigned int i; 169 170 vs.d = fs; 171 vt.d = ft; 172 for (i = 0; i < 8; ++i) { 173 int r = vs.ub[i] - vt.ub[i]; 174 vs.ub[i] = SATUB(r); 175 } 176 return vs.d; 177 } 178 179 uint64_t helper_psubsh(uint64_t fs, uint64_t ft) 180 { 181 LMIValue vs, vt; 182 unsigned int i; 183 184 vs.d = fs; 185 vt.d = ft; 186 for (i = 0; i < 4; ++i) { 187 int r = vs.sh[i] - vt.sh[i]; 188 vs.sh[i] = SATSH(r); 189 } 190 return vs.d; 191 } 192 193 uint64_t helper_psubush(uint64_t fs, uint64_t ft) 194 { 195 LMIValue vs, vt; 196 unsigned int i; 197 198 vs.d = fs; 199 vt.d = ft; 200 for (i = 0; i < 4; ++i) { 201 int r = vs.uh[i] - vt.uh[i]; 202 vs.uh[i] = SATUH(r); 203 } 204 return vs.d; 205 } 206 207 uint64_t helper_psubb(uint64_t fs, uint64_t ft) 208 { 209 LMIValue vs, vt; 210 unsigned int i; 211 212 vs.d = fs; 213 vt.d = ft; 214 for (i = 0; i < 8; ++i) { 215 vs.ub[i] -= vt.ub[i]; 216 } 217 return vs.d; 218 } 219 220 uint64_t helper_psubh(uint64_t fs, uint64_t ft) 221 { 222 LMIValue vs, vt; 223 unsigned int i; 224 225 vs.d = fs; 226 vt.d = ft; 227 for (i = 0; i < 4; ++i) { 228 vs.uh[i] -= vt.uh[i]; 229 } 230 return vs.d; 231 } 232 233 uint64_t helper_psubw(uint64_t fs, uint64_t ft) 234 { 235 LMIValue vs, vt; 236 unsigned int i; 237 238 vs.d = fs; 239 vt.d = ft; 240 for (i = 0; i < 2; ++i) { 241 vs.uw[i] -= vt.uw[i]; 242 } 243 return vs.d; 244 } 245 246 uint64_t helper_pshufh(uint64_t fs, uint64_t ft) 247 { 248 unsigned host = BYTE_ORDER_XOR(3); 249 LMIValue vd, vs; 250 unsigned i; 251 252 vs.d = fs; 253 vd.d = 0; 254 for (i = 0; i < 4; i++, ft >>= 2) { 255 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; 256 } 257 return vd.d; 258 } 259 260 uint64_t helper_packsswh(uint64_t fs, uint64_t ft) 261 { 262 uint64_t fd = 0; 263 int64_t tmp; 264 265 tmp = (int32_t)(fs >> 0); 266 tmp = SATSH(tmp); 267 fd |= (tmp & 0xffff) << 0; 268 269 tmp = (int32_t)(fs >> 32); 270 tmp = SATSH(tmp); 271 fd |= (tmp & 0xffff) << 16; 272 273 tmp = (int32_t)(ft >> 0); 274 tmp = SATSH(tmp); 275 fd |= (tmp & 0xffff) << 32; 276 277 tmp = (int32_t)(ft >> 32); 278 tmp = SATSH(tmp); 279 fd |= (tmp & 0xffff) << 48; 280 281 return fd; 282 } 283 284 uint64_t helper_packsshb(uint64_t fs, uint64_t ft) 285 { 286 uint64_t fd = 0; 287 unsigned int i; 288 289 for (i = 0; i < 4; ++i) { 290 int16_t tmp = fs >> (i * 16); 291 tmp = SATSB(tmp); 292 fd |= (uint64_t)(tmp & 0xff) << (i * 8); 293 } 294 for (i = 0; i < 4; ++i) { 295 int16_t tmp = ft >> (i * 16); 296 tmp = SATSB(tmp); 297 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 298 } 299 300 return fd; 301 } 302 303 uint64_t helper_packushb(uint64_t fs, uint64_t ft) 304 { 305 uint64_t fd = 0; 306 unsigned int i; 307 308 for (i = 0; i < 4; ++i) { 309 int16_t tmp = fs >> (i * 16); 310 tmp = SATUB(tmp); 311 fd |= (uint64_t)(tmp & 0xff) << (i * 8); 312 } 313 for (i = 0; i < 4; ++i) { 314 int16_t tmp = ft >> (i * 16); 315 tmp = SATUB(tmp); 316 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 317 } 318 319 return fd; 320 } 321 322 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) 323 { 324 return (fs & 0xffffffff) | (ft << 32); 325 } 326 327 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) 328 { 329 return (fs >> 32) | (ft & ~0xffffffffull); 330 } 331 332 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) 333 { 334 unsigned host = BYTE_ORDER_XOR(3); 335 LMIValue vd, vs, vt; 336 337 vs.d = fs; 338 vt.d = ft; 339 vd.uh[0 ^ host] = vs.uh[0 ^ host]; 340 vd.uh[1 ^ host] = vt.uh[0 ^ host]; 341 vd.uh[2 ^ host] = vs.uh[1 ^ host]; 342 vd.uh[3 ^ host] = vt.uh[1 ^ host]; 343 344 return vd.d; 345 } 346 347 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) 348 { 349 unsigned host = BYTE_ORDER_XOR(3); 350 LMIValue vd, vs, vt; 351 352 vs.d = fs; 353 vt.d = ft; 354 vd.uh[0 ^ host] = vs.uh[2 ^ host]; 355 vd.uh[1 ^ host] = vt.uh[2 ^ host]; 356 vd.uh[2 ^ host] = vs.uh[3 ^ host]; 357 vd.uh[3 ^ host] = vt.uh[3 ^ host]; 358 359 return vd.d; 360 } 361 362 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) 363 { 364 unsigned host = BYTE_ORDER_XOR(7); 365 LMIValue vd, vs, vt; 366 367 vs.d = fs; 368 vt.d = ft; 369 vd.ub[0 ^ host] = vs.ub[0 ^ host]; 370 vd.ub[1 ^ host] = vt.ub[0 ^ host]; 371 vd.ub[2 ^ host] = vs.ub[1 ^ host]; 372 vd.ub[3 ^ host] = vt.ub[1 ^ host]; 373 vd.ub[4 ^ host] = vs.ub[2 ^ host]; 374 vd.ub[5 ^ host] = vt.ub[2 ^ host]; 375 vd.ub[6 ^ host] = vs.ub[3 ^ host]; 376 vd.ub[7 ^ host] = vt.ub[3 ^ host]; 377 378 return vd.d; 379 } 380 381 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) 382 { 383 unsigned host = BYTE_ORDER_XOR(7); 384 LMIValue vd, vs, vt; 385 386 vs.d = fs; 387 vt.d = ft; 388 vd.ub[0 ^ host] = vs.ub[4 ^ host]; 389 vd.ub[1 ^ host] = vt.ub[4 ^ host]; 390 vd.ub[2 ^ host] = vs.ub[5 ^ host]; 391 vd.ub[3 ^ host] = vt.ub[5 ^ host]; 392 vd.ub[4 ^ host] = vs.ub[6 ^ host]; 393 vd.ub[5 ^ host] = vt.ub[6 ^ host]; 394 vd.ub[6 ^ host] = vs.ub[7 ^ host]; 395 vd.ub[7 ^ host] = vt.ub[7 ^ host]; 396 397 return vd.d; 398 } 399 400 uint64_t helper_pavgh(uint64_t fs, uint64_t ft) 401 { 402 LMIValue vs, vt; 403 unsigned i; 404 405 vs.d = fs; 406 vt.d = ft; 407 for (i = 0; i < 4; i++) { 408 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; 409 } 410 return vs.d; 411 } 412 413 uint64_t helper_pavgb(uint64_t fs, uint64_t ft) 414 { 415 LMIValue vs, vt; 416 unsigned i; 417 418 vs.d = fs; 419 vt.d = ft; 420 for (i = 0; i < 8; i++) { 421 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; 422 } 423 return vs.d; 424 } 425 426 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) 427 { 428 LMIValue vs, vt; 429 unsigned i; 430 431 vs.d = fs; 432 vt.d = ft; 433 for (i = 0; i < 4; i++) { 434 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 435 } 436 return vs.d; 437 } 438 439 uint64_t helper_pminsh(uint64_t fs, uint64_t ft) 440 { 441 LMIValue vs, vt; 442 unsigned i; 443 444 vs.d = fs; 445 vt.d = ft; 446 for (i = 0; i < 4; i++) { 447 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 448 } 449 return vs.d; 450 } 451 452 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) 453 { 454 LMIValue vs, vt; 455 unsigned i; 456 457 vs.d = fs; 458 vt.d = ft; 459 for (i = 0; i < 4; i++) { 460 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 461 } 462 return vs.d; 463 } 464 465 uint64_t helper_pminub(uint64_t fs, uint64_t ft) 466 { 467 LMIValue vs, vt; 468 unsigned i; 469 470 vs.d = fs; 471 vt.d = ft; 472 for (i = 0; i < 4; i++) { 473 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 474 } 475 return vs.d; 476 } 477 478 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) 479 { 480 LMIValue vs, vt; 481 unsigned i; 482 483 vs.d = fs; 484 vt.d = ft; 485 for (i = 0; i < 2; i++) { 486 vs.uw[i] = -(vs.uw[i] == vt.uw[i]); 487 } 488 return vs.d; 489 } 490 491 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) 492 { 493 LMIValue vs, vt; 494 unsigned i; 495 496 vs.d = fs; 497 vt.d = ft; 498 for (i = 0; i < 2; i++) { 499 vs.uw[i] = -(vs.uw[i] > vt.uw[i]); 500 } 501 return vs.d; 502 } 503 504 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) 505 { 506 LMIValue vs, vt; 507 unsigned i; 508 509 vs.d = fs; 510 vt.d = ft; 511 for (i = 0; i < 4; i++) { 512 vs.uh[i] = -(vs.uh[i] == vt.uh[i]); 513 } 514 return vs.d; 515 } 516 517 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) 518 { 519 LMIValue vs, vt; 520 unsigned i; 521 522 vs.d = fs; 523 vt.d = ft; 524 for (i = 0; i < 4; i++) { 525 vs.uh[i] = -(vs.uh[i] > vt.uh[i]); 526 } 527 return vs.d; 528 } 529 530 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) 531 { 532 LMIValue vs, vt; 533 unsigned i; 534 535 vs.d = fs; 536 vt.d = ft; 537 for (i = 0; i < 8; i++) { 538 vs.ub[i] = -(vs.ub[i] == vt.ub[i]); 539 } 540 return vs.d; 541 } 542 543 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) 544 { 545 LMIValue vs, vt; 546 unsigned i; 547 548 vs.d = fs; 549 vt.d = ft; 550 for (i = 0; i < 8; i++) { 551 vs.ub[i] = -(vs.ub[i] > vt.ub[i]); 552 } 553 return vs.d; 554 } 555 556 uint64_t helper_psllw(uint64_t fs, uint64_t ft) 557 { 558 LMIValue vs; 559 unsigned i; 560 561 ft &= 0x7f; 562 if (ft > 31) { 563 return 0; 564 } 565 vs.d = fs; 566 for (i = 0; i < 2; ++i) { 567 vs.uw[i] <<= ft; 568 } 569 return vs.d; 570 } 571 572 uint64_t helper_psrlw(uint64_t fs, uint64_t ft) 573 { 574 LMIValue vs; 575 unsigned i; 576 577 ft &= 0x7f; 578 if (ft > 31) { 579 return 0; 580 } 581 vs.d = fs; 582 for (i = 0; i < 2; ++i) { 583 vs.uw[i] >>= ft; 584 } 585 return vs.d; 586 } 587 588 uint64_t helper_psraw(uint64_t fs, uint64_t ft) 589 { 590 LMIValue vs; 591 unsigned i; 592 593 ft &= 0x7f; 594 if (ft > 31) { 595 ft = 31; 596 } 597 vs.d = fs; 598 for (i = 0; i < 2; ++i) { 599 vs.sw[i] >>= ft; 600 } 601 return vs.d; 602 } 603 604 uint64_t helper_psllh(uint64_t fs, uint64_t ft) 605 { 606 LMIValue vs; 607 unsigned i; 608 609 ft &= 0x7f; 610 if (ft > 15) { 611 return 0; 612 } 613 vs.d = fs; 614 for (i = 0; i < 4; ++i) { 615 vs.uh[i] <<= ft; 616 } 617 return vs.d; 618 } 619 620 uint64_t helper_psrlh(uint64_t fs, uint64_t ft) 621 { 622 LMIValue vs; 623 unsigned i; 624 625 ft &= 0x7f; 626 if (ft > 15) { 627 return 0; 628 } 629 vs.d = fs; 630 for (i = 0; i < 4; ++i) { 631 vs.uh[i] >>= ft; 632 } 633 return vs.d; 634 } 635 636 uint64_t helper_psrah(uint64_t fs, uint64_t ft) 637 { 638 LMIValue vs; 639 unsigned i; 640 641 ft &= 0x7f; 642 if (ft > 15) { 643 ft = 15; 644 } 645 vs.d = fs; 646 for (i = 0; i < 4; ++i) { 647 vs.sh[i] >>= ft; 648 } 649 return vs.d; 650 } 651 652 uint64_t helper_pmullh(uint64_t fs, uint64_t ft) 653 { 654 LMIValue vs, vt; 655 unsigned i; 656 657 vs.d = fs; 658 vt.d = ft; 659 for (i = 0; i < 4; ++i) { 660 vs.sh[i] *= vt.sh[i]; 661 } 662 return vs.d; 663 } 664 665 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) 666 { 667 LMIValue vs, vt; 668 unsigned i; 669 670 vs.d = fs; 671 vt.d = ft; 672 for (i = 0; i < 4; ++i) { 673 int32_t r = vs.sh[i] * vt.sh[i]; 674 vs.sh[i] = r >> 16; 675 } 676 return vs.d; 677 } 678 679 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) 680 { 681 LMIValue vs, vt; 682 unsigned i; 683 684 vs.d = fs; 685 vt.d = ft; 686 for (i = 0; i < 4; ++i) { 687 uint32_t r = vs.uh[i] * vt.uh[i]; 688 vs.uh[i] = r >> 16; 689 } 690 return vs.d; 691 } 692 693 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) 694 { 695 unsigned host = BYTE_ORDER_XOR(3); 696 LMIValue vs, vt; 697 uint32_t p0, p1; 698 699 vs.d = fs; 700 vt.d = ft; 701 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; 702 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; 703 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; 704 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; 705 706 return ((uint64_t)p1 << 32) | p0; 707 } 708 709 uint64_t helper_pasubub(uint64_t fs, uint64_t ft) 710 { 711 LMIValue vs, vt; 712 unsigned i; 713 714 vs.d = fs; 715 vt.d = ft; 716 for (i = 0; i < 8; ++i) { 717 int r = vs.ub[i] - vt.ub[i]; 718 vs.ub[i] = (r < 0 ? -r : r); 719 } 720 return vs.d; 721 } 722 723 uint64_t helper_biadd(uint64_t fs) 724 { 725 unsigned i, fd; 726 727 for (i = fd = 0; i < 8; ++i) { 728 fd += (fs >> (i * 8)) & 0xff; 729 } 730 return fd & 0xffff; 731 } 732 733 uint64_t helper_pmovmskb(uint64_t fs) 734 { 735 unsigned fd = 0; 736 737 fd |= ((fs >> 7) & 1) << 0; 738 fd |= ((fs >> 15) & 1) << 1; 739 fd |= ((fs >> 23) & 1) << 2; 740 fd |= ((fs >> 31) & 1) << 3; 741 fd |= ((fs >> 39) & 1) << 4; 742 fd |= ((fs >> 47) & 1) << 5; 743 fd |= ((fs >> 55) & 1) << 6; 744 fd |= ((fs >> 63) & 1) << 7; 745 746 return fd & 0xff; 747 } 748