1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "tcg/tcg-op-common.h" 29 #include "tcg-internal.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 #define CASE_OP_32_64_VEC(x) \ 36 glue(glue(case INDEX_op_, x), _i32): \ 37 glue(glue(case INDEX_op_, x), _i64): \ 38 glue(glue(case INDEX_op_, x), _vec) 39 40 typedef struct TempOptInfo { 41 bool is_const; 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 uint64_t val; 45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 46 uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ 47 } TempOptInfo; 48 49 typedef struct OptContext { 50 TCGContext *tcg; 51 TCGOp *prev_mb; 52 TCGTempSet temps_used; 53 54 /* In flight values from optimization. */ 55 uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ 56 uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ 57 uint64_t s_mask; /* mask of clrsb(value) bits */ 58 TCGType type; 59 } OptContext; 60 61 /* Calculate the smask for a specific value. */ 62 static uint64_t smask_from_value(uint64_t value) 63 { 64 int rep = clrsb64(value); 65 return ~(~0ull >> rep); 66 } 67 68 /* 69 * Calculate the smask for a given set of known-zeros. 70 * If there are lots of zeros on the left, we can consider the remainder 71 * an unsigned field, and thus the corresponding signed field is one bit 72 * larger. 73 */ 74 static uint64_t smask_from_zmask(uint64_t zmask) 75 { 76 /* 77 * Only the 0 bits are significant for zmask, thus the msb itself 78 * must be zero, else we have no sign information. 79 */ 80 int rep = clz64(zmask); 81 if (rep == 0) { 82 return 0; 83 } 84 rep -= 1; 85 return ~(~0ull >> rep); 86 } 87 88 /* 89 * Recreate a properly left-aligned smask after manipulation. 90 * Some bit-shuffling, particularly shifts and rotates, may 91 * retain sign bits on the left, but may scatter disconnected 92 * sign bits on the right. Retain only what remains to the left. 93 */ 94 static uint64_t smask_from_smask(int64_t smask) 95 { 96 /* Only the 1 bits are significant for smask */ 97 return smask_from_zmask(~smask); 98 } 99 100 static inline TempOptInfo *ts_info(TCGTemp *ts) 101 { 102 return ts->state_ptr; 103 } 104 105 static inline TempOptInfo *arg_info(TCGArg arg) 106 { 107 return ts_info(arg_temp(arg)); 108 } 109 110 static inline bool ts_is_const(TCGTemp *ts) 111 { 112 return ts_info(ts)->is_const; 113 } 114 115 static inline bool arg_is_const(TCGArg arg) 116 { 117 return ts_is_const(arg_temp(arg)); 118 } 119 120 static inline bool ts_is_copy(TCGTemp *ts) 121 { 122 return ts_info(ts)->next_copy != ts; 123 } 124 125 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 126 static void reset_ts(TCGTemp *ts) 127 { 128 TempOptInfo *ti = ts_info(ts); 129 TempOptInfo *pi = ts_info(ti->prev_copy); 130 TempOptInfo *ni = ts_info(ti->next_copy); 131 132 ni->prev_copy = ti->prev_copy; 133 pi->next_copy = ti->next_copy; 134 ti->next_copy = ts; 135 ti->prev_copy = ts; 136 ti->is_const = false; 137 ti->z_mask = -1; 138 ti->s_mask = 0; 139 } 140 141 static void reset_temp(TCGArg arg) 142 { 143 reset_ts(arg_temp(arg)); 144 } 145 146 /* Initialize and activate a temporary. */ 147 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 148 { 149 size_t idx = temp_idx(ts); 150 TempOptInfo *ti; 151 152 if (test_bit(idx, ctx->temps_used.l)) { 153 return; 154 } 155 set_bit(idx, ctx->temps_used.l); 156 157 ti = ts->state_ptr; 158 if (ti == NULL) { 159 ti = tcg_malloc(sizeof(TempOptInfo)); 160 ts->state_ptr = ti; 161 } 162 163 ti->next_copy = ts; 164 ti->prev_copy = ts; 165 if (ts->kind == TEMP_CONST) { 166 ti->is_const = true; 167 ti->val = ts->val; 168 ti->z_mask = ts->val; 169 ti->s_mask = smask_from_value(ts->val); 170 } else { 171 ti->is_const = false; 172 ti->z_mask = -1; 173 ti->s_mask = 0; 174 } 175 } 176 177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 178 { 179 TCGTemp *i, *g, *l; 180 181 /* If this is already readonly, we can't do better. */ 182 if (temp_readonly(ts)) { 183 return ts; 184 } 185 186 g = l = NULL; 187 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 188 if (temp_readonly(i)) { 189 return i; 190 } else if (i->kind > ts->kind) { 191 if (i->kind == TEMP_GLOBAL) { 192 g = i; 193 } else if (i->kind == TEMP_TB) { 194 l = i; 195 } 196 } 197 } 198 199 /* If we didn't find a better representation, return the same temp. */ 200 return g ? g : l ? l : ts; 201 } 202 203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 204 { 205 TCGTemp *i; 206 207 if (ts1 == ts2) { 208 return true; 209 } 210 211 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 212 return false; 213 } 214 215 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 216 if (i == ts2) { 217 return true; 218 } 219 } 220 221 return false; 222 } 223 224 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 225 { 226 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 227 } 228 229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 230 { 231 TCGTemp *dst_ts = arg_temp(dst); 232 TCGTemp *src_ts = arg_temp(src); 233 TempOptInfo *di; 234 TempOptInfo *si; 235 TCGOpcode new_op; 236 237 if (ts_are_copies(dst_ts, src_ts)) { 238 tcg_op_remove(ctx->tcg, op); 239 return true; 240 } 241 242 reset_ts(dst_ts); 243 di = ts_info(dst_ts); 244 si = ts_info(src_ts); 245 246 switch (ctx->type) { 247 case TCG_TYPE_I32: 248 new_op = INDEX_op_mov_i32; 249 break; 250 case TCG_TYPE_I64: 251 new_op = INDEX_op_mov_i64; 252 break; 253 case TCG_TYPE_V64: 254 case TCG_TYPE_V128: 255 case TCG_TYPE_V256: 256 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 257 new_op = INDEX_op_mov_vec; 258 break; 259 default: 260 g_assert_not_reached(); 261 } 262 op->opc = new_op; 263 op->args[0] = dst; 264 op->args[1] = src; 265 266 di->z_mask = si->z_mask; 267 di->s_mask = si->s_mask; 268 269 if (src_ts->type == dst_ts->type) { 270 TempOptInfo *ni = ts_info(si->next_copy); 271 272 di->next_copy = si->next_copy; 273 di->prev_copy = src_ts; 274 ni->prev_copy = dst_ts; 275 si->next_copy = dst_ts; 276 di->is_const = si->is_const; 277 di->val = si->val; 278 } 279 return true; 280 } 281 282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 283 TCGArg dst, uint64_t val) 284 { 285 TCGTemp *tv; 286 287 if (ctx->type == TCG_TYPE_I32) { 288 val = (int32_t)val; 289 } 290 291 /* Convert movi to mov with constant temp. */ 292 tv = tcg_constant_internal(ctx->type, val); 293 init_ts_info(ctx, tv); 294 return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); 295 } 296 297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 298 { 299 uint64_t l64, h64; 300 301 switch (op) { 302 CASE_OP_32_64(add): 303 return x + y; 304 305 CASE_OP_32_64(sub): 306 return x - y; 307 308 CASE_OP_32_64(mul): 309 return x * y; 310 311 CASE_OP_32_64_VEC(and): 312 return x & y; 313 314 CASE_OP_32_64_VEC(or): 315 return x | y; 316 317 CASE_OP_32_64_VEC(xor): 318 return x ^ y; 319 320 case INDEX_op_shl_i32: 321 return (uint32_t)x << (y & 31); 322 323 case INDEX_op_shl_i64: 324 return (uint64_t)x << (y & 63); 325 326 case INDEX_op_shr_i32: 327 return (uint32_t)x >> (y & 31); 328 329 case INDEX_op_shr_i64: 330 return (uint64_t)x >> (y & 63); 331 332 case INDEX_op_sar_i32: 333 return (int32_t)x >> (y & 31); 334 335 case INDEX_op_sar_i64: 336 return (int64_t)x >> (y & 63); 337 338 case INDEX_op_rotr_i32: 339 return ror32(x, y & 31); 340 341 case INDEX_op_rotr_i64: 342 return ror64(x, y & 63); 343 344 case INDEX_op_rotl_i32: 345 return rol32(x, y & 31); 346 347 case INDEX_op_rotl_i64: 348 return rol64(x, y & 63); 349 350 CASE_OP_32_64_VEC(not): 351 return ~x; 352 353 CASE_OP_32_64(neg): 354 return -x; 355 356 CASE_OP_32_64_VEC(andc): 357 return x & ~y; 358 359 CASE_OP_32_64_VEC(orc): 360 return x | ~y; 361 362 CASE_OP_32_64_VEC(eqv): 363 return ~(x ^ y); 364 365 CASE_OP_32_64_VEC(nand): 366 return ~(x & y); 367 368 CASE_OP_32_64_VEC(nor): 369 return ~(x | y); 370 371 case INDEX_op_clz_i32: 372 return (uint32_t)x ? clz32(x) : y; 373 374 case INDEX_op_clz_i64: 375 return x ? clz64(x) : y; 376 377 case INDEX_op_ctz_i32: 378 return (uint32_t)x ? ctz32(x) : y; 379 380 case INDEX_op_ctz_i64: 381 return x ? ctz64(x) : y; 382 383 case INDEX_op_ctpop_i32: 384 return ctpop32(x); 385 386 case INDEX_op_ctpop_i64: 387 return ctpop64(x); 388 389 CASE_OP_32_64(ext8s): 390 return (int8_t)x; 391 392 CASE_OP_32_64(ext16s): 393 return (int16_t)x; 394 395 CASE_OP_32_64(ext8u): 396 return (uint8_t)x; 397 398 CASE_OP_32_64(ext16u): 399 return (uint16_t)x; 400 401 CASE_OP_32_64(bswap16): 402 x = bswap16(x); 403 return y & TCG_BSWAP_OS ? (int16_t)x : x; 404 405 CASE_OP_32_64(bswap32): 406 x = bswap32(x); 407 return y & TCG_BSWAP_OS ? (int32_t)x : x; 408 409 case INDEX_op_bswap64_i64: 410 return bswap64(x); 411 412 case INDEX_op_ext_i32_i64: 413 case INDEX_op_ext32s_i64: 414 return (int32_t)x; 415 416 case INDEX_op_extu_i32_i64: 417 case INDEX_op_extrl_i64_i32: 418 case INDEX_op_ext32u_i64: 419 return (uint32_t)x; 420 421 case INDEX_op_extrh_i64_i32: 422 return (uint64_t)x >> 32; 423 424 case INDEX_op_muluh_i32: 425 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 426 case INDEX_op_mulsh_i32: 427 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 428 429 case INDEX_op_muluh_i64: 430 mulu64(&l64, &h64, x, y); 431 return h64; 432 case INDEX_op_mulsh_i64: 433 muls64(&l64, &h64, x, y); 434 return h64; 435 436 case INDEX_op_div_i32: 437 /* Avoid crashing on divide by zero, otherwise undefined. */ 438 return (int32_t)x / ((int32_t)y ? : 1); 439 case INDEX_op_divu_i32: 440 return (uint32_t)x / ((uint32_t)y ? : 1); 441 case INDEX_op_div_i64: 442 return (int64_t)x / ((int64_t)y ? : 1); 443 case INDEX_op_divu_i64: 444 return (uint64_t)x / ((uint64_t)y ? : 1); 445 446 case INDEX_op_rem_i32: 447 return (int32_t)x % ((int32_t)y ? : 1); 448 case INDEX_op_remu_i32: 449 return (uint32_t)x % ((uint32_t)y ? : 1); 450 case INDEX_op_rem_i64: 451 return (int64_t)x % ((int64_t)y ? : 1); 452 case INDEX_op_remu_i64: 453 return (uint64_t)x % ((uint64_t)y ? : 1); 454 455 default: 456 g_assert_not_reached(); 457 } 458 } 459 460 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 461 uint64_t x, uint64_t y) 462 { 463 uint64_t res = do_constant_folding_2(op, x, y); 464 if (type == TCG_TYPE_I32) { 465 res = (int32_t)res; 466 } 467 return res; 468 } 469 470 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 471 { 472 switch (c) { 473 case TCG_COND_EQ: 474 return x == y; 475 case TCG_COND_NE: 476 return x != y; 477 case TCG_COND_LT: 478 return (int32_t)x < (int32_t)y; 479 case TCG_COND_GE: 480 return (int32_t)x >= (int32_t)y; 481 case TCG_COND_LE: 482 return (int32_t)x <= (int32_t)y; 483 case TCG_COND_GT: 484 return (int32_t)x > (int32_t)y; 485 case TCG_COND_LTU: 486 return x < y; 487 case TCG_COND_GEU: 488 return x >= y; 489 case TCG_COND_LEU: 490 return x <= y; 491 case TCG_COND_GTU: 492 return x > y; 493 default: 494 g_assert_not_reached(); 495 } 496 } 497 498 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 499 { 500 switch (c) { 501 case TCG_COND_EQ: 502 return x == y; 503 case TCG_COND_NE: 504 return x != y; 505 case TCG_COND_LT: 506 return (int64_t)x < (int64_t)y; 507 case TCG_COND_GE: 508 return (int64_t)x >= (int64_t)y; 509 case TCG_COND_LE: 510 return (int64_t)x <= (int64_t)y; 511 case TCG_COND_GT: 512 return (int64_t)x > (int64_t)y; 513 case TCG_COND_LTU: 514 return x < y; 515 case TCG_COND_GEU: 516 return x >= y; 517 case TCG_COND_LEU: 518 return x <= y; 519 case TCG_COND_GTU: 520 return x > y; 521 default: 522 g_assert_not_reached(); 523 } 524 } 525 526 static bool do_constant_folding_cond_eq(TCGCond c) 527 { 528 switch (c) { 529 case TCG_COND_GT: 530 case TCG_COND_LTU: 531 case TCG_COND_LT: 532 case TCG_COND_GTU: 533 case TCG_COND_NE: 534 return 0; 535 case TCG_COND_GE: 536 case TCG_COND_GEU: 537 case TCG_COND_LE: 538 case TCG_COND_LEU: 539 case TCG_COND_EQ: 540 return 1; 541 default: 542 g_assert_not_reached(); 543 } 544 } 545 546 /* 547 * Return -1 if the condition can't be simplified, 548 * and the result of the condition (0 or 1) if it can. 549 */ 550 static int do_constant_folding_cond(TCGType type, TCGArg x, 551 TCGArg y, TCGCond c) 552 { 553 if (arg_is_const(x) && arg_is_const(y)) { 554 uint64_t xv = arg_info(x)->val; 555 uint64_t yv = arg_info(y)->val; 556 557 switch (type) { 558 case TCG_TYPE_I32: 559 return do_constant_folding_cond_32(xv, yv, c); 560 case TCG_TYPE_I64: 561 return do_constant_folding_cond_64(xv, yv, c); 562 default: 563 /* Only scalar comparisons are optimizable */ 564 return -1; 565 } 566 } else if (args_are_copies(x, y)) { 567 return do_constant_folding_cond_eq(c); 568 } else if (arg_is_const(y) && arg_info(y)->val == 0) { 569 switch (c) { 570 case TCG_COND_LTU: 571 return 0; 572 case TCG_COND_GEU: 573 return 1; 574 default: 575 return -1; 576 } 577 } 578 return -1; 579 } 580 581 /* 582 * Return -1 if the condition can't be simplified, 583 * and the result of the condition (0 or 1) if it can. 584 */ 585 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 586 { 587 TCGArg al = p1[0], ah = p1[1]; 588 TCGArg bl = p2[0], bh = p2[1]; 589 590 if (arg_is_const(bl) && arg_is_const(bh)) { 591 tcg_target_ulong blv = arg_info(bl)->val; 592 tcg_target_ulong bhv = arg_info(bh)->val; 593 uint64_t b = deposit64(blv, 32, 32, bhv); 594 595 if (arg_is_const(al) && arg_is_const(ah)) { 596 tcg_target_ulong alv = arg_info(al)->val; 597 tcg_target_ulong ahv = arg_info(ah)->val; 598 uint64_t a = deposit64(alv, 32, 32, ahv); 599 return do_constant_folding_cond_64(a, b, c); 600 } 601 if (b == 0) { 602 switch (c) { 603 case TCG_COND_LTU: 604 return 0; 605 case TCG_COND_GEU: 606 return 1; 607 default: 608 break; 609 } 610 } 611 } 612 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 613 return do_constant_folding_cond_eq(c); 614 } 615 return -1; 616 } 617 618 /** 619 * swap_commutative: 620 * @dest: TCGArg of the destination argument, or NO_DEST. 621 * @p1: first paired argument 622 * @p2: second paired argument 623 * 624 * If *@p1 is a constant and *@p2 is not, swap. 625 * If *@p2 matches @dest, swap. 626 * Return true if a swap was performed. 627 */ 628 629 #define NO_DEST temp_arg(NULL) 630 631 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 632 { 633 TCGArg a1 = *p1, a2 = *p2; 634 int sum = 0; 635 sum += arg_is_const(a1); 636 sum -= arg_is_const(a2); 637 638 /* Prefer the constant in second argument, and then the form 639 op a, a, b, which is better handled on non-RISC hosts. */ 640 if (sum > 0 || (sum == 0 && dest == a2)) { 641 *p1 = a2; 642 *p2 = a1; 643 return true; 644 } 645 return false; 646 } 647 648 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 649 { 650 int sum = 0; 651 sum += arg_is_const(p1[0]); 652 sum += arg_is_const(p1[1]); 653 sum -= arg_is_const(p2[0]); 654 sum -= arg_is_const(p2[1]); 655 if (sum > 0) { 656 TCGArg t; 657 t = p1[0], p1[0] = p2[0], p2[0] = t; 658 t = p1[1], p1[1] = p2[1], p2[1] = t; 659 return true; 660 } 661 return false; 662 } 663 664 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 665 { 666 for (int i = 0; i < nb_args; i++) { 667 TCGTemp *ts = arg_temp(op->args[i]); 668 init_ts_info(ctx, ts); 669 } 670 } 671 672 static void copy_propagate(OptContext *ctx, TCGOp *op, 673 int nb_oargs, int nb_iargs) 674 { 675 TCGContext *s = ctx->tcg; 676 677 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 678 TCGTemp *ts = arg_temp(op->args[i]); 679 if (ts_is_copy(ts)) { 680 op->args[i] = temp_arg(find_better_copy(s, ts)); 681 } 682 } 683 } 684 685 static void finish_folding(OptContext *ctx, TCGOp *op) 686 { 687 const TCGOpDef *def = &tcg_op_defs[op->opc]; 688 int i, nb_oargs; 689 690 /* 691 * For an opcode that ends a BB, reset all temp data. 692 * We do no cross-BB optimization. 693 */ 694 if (def->flags & TCG_OPF_BB_END) { 695 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 696 ctx->prev_mb = NULL; 697 return; 698 } 699 700 nb_oargs = def->nb_oargs; 701 for (i = 0; i < nb_oargs; i++) { 702 TCGTemp *ts = arg_temp(op->args[i]); 703 reset_ts(ts); 704 /* 705 * Save the corresponding known-zero/sign bits mask for the 706 * first output argument (only one supported so far). 707 */ 708 if (i == 0) { 709 ts_info(ts)->z_mask = ctx->z_mask; 710 ts_info(ts)->s_mask = ctx->s_mask; 711 } 712 } 713 } 714 715 /* 716 * The fold_* functions return true when processing is complete, 717 * usually by folding the operation to a constant or to a copy, 718 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 719 * like collect information about the value produced, for use in 720 * optimizing a subsequent operation. 721 * 722 * These first fold_* functions are all helpers, used by other 723 * folders for more specific operations. 724 */ 725 726 static bool fold_const1(OptContext *ctx, TCGOp *op) 727 { 728 if (arg_is_const(op->args[1])) { 729 uint64_t t; 730 731 t = arg_info(op->args[1])->val; 732 t = do_constant_folding(op->opc, ctx->type, t, 0); 733 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 734 } 735 return false; 736 } 737 738 static bool fold_const2(OptContext *ctx, TCGOp *op) 739 { 740 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 741 uint64_t t1 = arg_info(op->args[1])->val; 742 uint64_t t2 = arg_info(op->args[2])->val; 743 744 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 745 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 746 } 747 return false; 748 } 749 750 static bool fold_commutative(OptContext *ctx, TCGOp *op) 751 { 752 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 753 return false; 754 } 755 756 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 757 { 758 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 759 return fold_const2(ctx, op); 760 } 761 762 static bool fold_masks(OptContext *ctx, TCGOp *op) 763 { 764 uint64_t a_mask = ctx->a_mask; 765 uint64_t z_mask = ctx->z_mask; 766 uint64_t s_mask = ctx->s_mask; 767 768 /* 769 * 32-bit ops generate 32-bit results, which for the purpose of 770 * simplifying tcg are sign-extended. Certainly that's how we 771 * represent our constants elsewhere. Note that the bits will 772 * be reset properly for a 64-bit value when encountering the 773 * type changing opcodes. 774 */ 775 if (ctx->type == TCG_TYPE_I32) { 776 a_mask = (int32_t)a_mask; 777 z_mask = (int32_t)z_mask; 778 s_mask |= MAKE_64BIT_MASK(32, 32); 779 ctx->z_mask = z_mask; 780 ctx->s_mask = s_mask; 781 } 782 783 if (z_mask == 0) { 784 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 785 } 786 if (a_mask == 0) { 787 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 788 } 789 return false; 790 } 791 792 /* 793 * Convert @op to NOT, if NOT is supported by the host. 794 * Return true f the conversion is successful, which will still 795 * indicate that the processing is complete. 796 */ 797 static bool fold_not(OptContext *ctx, TCGOp *op); 798 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 799 { 800 TCGOpcode not_op; 801 bool have_not; 802 803 switch (ctx->type) { 804 case TCG_TYPE_I32: 805 not_op = INDEX_op_not_i32; 806 have_not = TCG_TARGET_HAS_not_i32; 807 break; 808 case TCG_TYPE_I64: 809 not_op = INDEX_op_not_i64; 810 have_not = TCG_TARGET_HAS_not_i64; 811 break; 812 case TCG_TYPE_V64: 813 case TCG_TYPE_V128: 814 case TCG_TYPE_V256: 815 not_op = INDEX_op_not_vec; 816 have_not = TCG_TARGET_HAS_not_vec; 817 break; 818 default: 819 g_assert_not_reached(); 820 } 821 if (have_not) { 822 op->opc = not_op; 823 op->args[1] = op->args[idx]; 824 return fold_not(ctx, op); 825 } 826 return false; 827 } 828 829 /* If the binary operation has first argument @i, fold to @i. */ 830 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 831 { 832 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 833 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 834 } 835 return false; 836 } 837 838 /* If the binary operation has first argument @i, fold to NOT. */ 839 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 840 { 841 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 842 return fold_to_not(ctx, op, 2); 843 } 844 return false; 845 } 846 847 /* If the binary operation has second argument @i, fold to @i. */ 848 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 849 { 850 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 851 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 852 } 853 return false; 854 } 855 856 /* If the binary operation has second argument @i, fold to identity. */ 857 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 858 { 859 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 860 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 861 } 862 return false; 863 } 864 865 /* If the binary operation has second argument @i, fold to NOT. */ 866 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 867 { 868 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 869 return fold_to_not(ctx, op, 1); 870 } 871 return false; 872 } 873 874 /* If the binary operation has both arguments equal, fold to @i. */ 875 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 876 { 877 if (args_are_copies(op->args[1], op->args[2])) { 878 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 879 } 880 return false; 881 } 882 883 /* If the binary operation has both arguments equal, fold to identity. */ 884 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 885 { 886 if (args_are_copies(op->args[1], op->args[2])) { 887 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 888 } 889 return false; 890 } 891 892 /* 893 * These outermost fold_<op> functions are sorted alphabetically. 894 * 895 * The ordering of the transformations should be: 896 * 1) those that produce a constant 897 * 2) those that produce a copy 898 * 3) those that produce information about the result value. 899 */ 900 901 static bool fold_add(OptContext *ctx, TCGOp *op) 902 { 903 if (fold_const2_commutative(ctx, op) || 904 fold_xi_to_x(ctx, op, 0)) { 905 return true; 906 } 907 return false; 908 } 909 910 /* We cannot as yet do_constant_folding with vectors. */ 911 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 912 { 913 if (fold_commutative(ctx, op) || 914 fold_xi_to_x(ctx, op, 0)) { 915 return true; 916 } 917 return false; 918 } 919 920 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) 921 { 922 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && 923 arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 924 uint64_t al = arg_info(op->args[2])->val; 925 uint64_t ah = arg_info(op->args[3])->val; 926 uint64_t bl = arg_info(op->args[4])->val; 927 uint64_t bh = arg_info(op->args[5])->val; 928 TCGArg rl, rh; 929 TCGOp *op2; 930 931 if (ctx->type == TCG_TYPE_I32) { 932 uint64_t a = deposit64(al, 32, 32, ah); 933 uint64_t b = deposit64(bl, 32, 32, bh); 934 935 if (add) { 936 a += b; 937 } else { 938 a -= b; 939 } 940 941 al = sextract64(a, 0, 32); 942 ah = sextract64(a, 32, 32); 943 } else { 944 Int128 a = int128_make128(al, ah); 945 Int128 b = int128_make128(bl, bh); 946 947 if (add) { 948 a = int128_add(a, b); 949 } else { 950 a = int128_sub(a, b); 951 } 952 953 al = int128_getlo(a); 954 ah = int128_gethi(a); 955 } 956 957 rl = op->args[0]; 958 rh = op->args[1]; 959 960 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 961 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 962 963 tcg_opt_gen_movi(ctx, op, rl, al); 964 tcg_opt_gen_movi(ctx, op2, rh, ah); 965 return true; 966 } 967 return false; 968 } 969 970 static bool fold_add2(OptContext *ctx, TCGOp *op) 971 { 972 /* Note that the high and low parts may be independently swapped. */ 973 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 974 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 975 976 return fold_addsub2(ctx, op, true); 977 } 978 979 static bool fold_and(OptContext *ctx, TCGOp *op) 980 { 981 uint64_t z1, z2; 982 983 if (fold_const2_commutative(ctx, op) || 984 fold_xi_to_i(ctx, op, 0) || 985 fold_xi_to_x(ctx, op, -1) || 986 fold_xx_to_x(ctx, op)) { 987 return true; 988 } 989 990 z1 = arg_info(op->args[1])->z_mask; 991 z2 = arg_info(op->args[2])->z_mask; 992 ctx->z_mask = z1 & z2; 993 994 /* 995 * Sign repetitions are perforce all identical, whether they are 1 or 0. 996 * Bitwise operations preserve the relative quantity of the repetitions. 997 */ 998 ctx->s_mask = arg_info(op->args[1])->s_mask 999 & arg_info(op->args[2])->s_mask; 1000 1001 /* 1002 * Known-zeros does not imply known-ones. Therefore unless 1003 * arg2 is constant, we can't infer affected bits from it. 1004 */ 1005 if (arg_is_const(op->args[2])) { 1006 ctx->a_mask = z1 & ~z2; 1007 } 1008 1009 return fold_masks(ctx, op); 1010 } 1011 1012 static bool fold_andc(OptContext *ctx, TCGOp *op) 1013 { 1014 uint64_t z1; 1015 1016 if (fold_const2(ctx, op) || 1017 fold_xx_to_i(ctx, op, 0) || 1018 fold_xi_to_x(ctx, op, 0) || 1019 fold_ix_to_not(ctx, op, -1)) { 1020 return true; 1021 } 1022 1023 z1 = arg_info(op->args[1])->z_mask; 1024 1025 /* 1026 * Known-zeros does not imply known-ones. Therefore unless 1027 * arg2 is constant, we can't infer anything from it. 1028 */ 1029 if (arg_is_const(op->args[2])) { 1030 uint64_t z2 = ~arg_info(op->args[2])->z_mask; 1031 ctx->a_mask = z1 & ~z2; 1032 z1 &= z2; 1033 } 1034 ctx->z_mask = z1; 1035 1036 ctx->s_mask = arg_info(op->args[1])->s_mask 1037 & arg_info(op->args[2])->s_mask; 1038 return fold_masks(ctx, op); 1039 } 1040 1041 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1042 { 1043 TCGCond cond = op->args[2]; 1044 int i; 1045 1046 if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { 1047 op->args[2] = cond = tcg_swap_cond(cond); 1048 } 1049 1050 i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); 1051 if (i == 0) { 1052 tcg_op_remove(ctx->tcg, op); 1053 return true; 1054 } 1055 if (i > 0) { 1056 op->opc = INDEX_op_br; 1057 op->args[0] = op->args[3]; 1058 } 1059 return false; 1060 } 1061 1062 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1063 { 1064 TCGCond cond = op->args[4]; 1065 TCGArg label = op->args[5]; 1066 int i, inv = 0; 1067 1068 if (swap_commutative2(&op->args[0], &op->args[2])) { 1069 op->args[4] = cond = tcg_swap_cond(cond); 1070 } 1071 1072 i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); 1073 if (i >= 0) { 1074 goto do_brcond_const; 1075 } 1076 1077 switch (cond) { 1078 case TCG_COND_LT: 1079 case TCG_COND_GE: 1080 /* 1081 * Simplify LT/GE comparisons vs zero to a single compare 1082 * vs the high word of the input. 1083 */ 1084 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && 1085 arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) { 1086 goto do_brcond_high; 1087 } 1088 break; 1089 1090 case TCG_COND_NE: 1091 inv = 1; 1092 QEMU_FALLTHROUGH; 1093 case TCG_COND_EQ: 1094 /* 1095 * Simplify EQ/NE comparisons where one of the pairs 1096 * can be simplified. 1097 */ 1098 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1099 op->args[2], cond); 1100 switch (i ^ inv) { 1101 case 0: 1102 goto do_brcond_const; 1103 case 1: 1104 goto do_brcond_high; 1105 } 1106 1107 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1108 op->args[3], cond); 1109 switch (i ^ inv) { 1110 case 0: 1111 goto do_brcond_const; 1112 case 1: 1113 op->opc = INDEX_op_brcond_i32; 1114 op->args[1] = op->args[2]; 1115 op->args[2] = cond; 1116 op->args[3] = label; 1117 break; 1118 } 1119 break; 1120 1121 default: 1122 break; 1123 1124 do_brcond_high: 1125 op->opc = INDEX_op_brcond_i32; 1126 op->args[0] = op->args[1]; 1127 op->args[1] = op->args[3]; 1128 op->args[2] = cond; 1129 op->args[3] = label; 1130 break; 1131 1132 do_brcond_const: 1133 if (i == 0) { 1134 tcg_op_remove(ctx->tcg, op); 1135 return true; 1136 } 1137 op->opc = INDEX_op_br; 1138 op->args[0] = label; 1139 break; 1140 } 1141 return false; 1142 } 1143 1144 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1145 { 1146 uint64_t z_mask, s_mask, sign; 1147 1148 if (arg_is_const(op->args[1])) { 1149 uint64_t t = arg_info(op->args[1])->val; 1150 1151 t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); 1152 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1153 } 1154 1155 z_mask = arg_info(op->args[1])->z_mask; 1156 1157 switch (op->opc) { 1158 case INDEX_op_bswap16_i32: 1159 case INDEX_op_bswap16_i64: 1160 z_mask = bswap16(z_mask); 1161 sign = INT16_MIN; 1162 break; 1163 case INDEX_op_bswap32_i32: 1164 case INDEX_op_bswap32_i64: 1165 z_mask = bswap32(z_mask); 1166 sign = INT32_MIN; 1167 break; 1168 case INDEX_op_bswap64_i64: 1169 z_mask = bswap64(z_mask); 1170 sign = INT64_MIN; 1171 break; 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 s_mask = smask_from_zmask(z_mask); 1176 1177 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1178 case TCG_BSWAP_OZ: 1179 break; 1180 case TCG_BSWAP_OS: 1181 /* If the sign bit may be 1, force all the bits above to 1. */ 1182 if (z_mask & sign) { 1183 z_mask |= sign; 1184 s_mask = sign << 1; 1185 } 1186 break; 1187 default: 1188 /* The high bits are undefined: force all bits above the sign to 1. */ 1189 z_mask |= sign << 1; 1190 s_mask = 0; 1191 break; 1192 } 1193 ctx->z_mask = z_mask; 1194 ctx->s_mask = s_mask; 1195 1196 return fold_masks(ctx, op); 1197 } 1198 1199 static bool fold_call(OptContext *ctx, TCGOp *op) 1200 { 1201 TCGContext *s = ctx->tcg; 1202 int nb_oargs = TCGOP_CALLO(op); 1203 int nb_iargs = TCGOP_CALLI(op); 1204 int flags, i; 1205 1206 init_arguments(ctx, op, nb_oargs + nb_iargs); 1207 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1208 1209 /* If the function reads or writes globals, reset temp data. */ 1210 flags = tcg_call_flags(op); 1211 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1212 int nb_globals = s->nb_globals; 1213 1214 for (i = 0; i < nb_globals; i++) { 1215 if (test_bit(i, ctx->temps_used.l)) { 1216 reset_ts(&ctx->tcg->temps[i]); 1217 } 1218 } 1219 } 1220 1221 /* Reset temp data for outputs. */ 1222 for (i = 0; i < nb_oargs; i++) { 1223 reset_temp(op->args[i]); 1224 } 1225 1226 /* Stop optimizing MB across calls. */ 1227 ctx->prev_mb = NULL; 1228 return true; 1229 } 1230 1231 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1232 { 1233 uint64_t z_mask; 1234 1235 if (arg_is_const(op->args[1])) { 1236 uint64_t t = arg_info(op->args[1])->val; 1237 1238 if (t != 0) { 1239 t = do_constant_folding(op->opc, ctx->type, t, 0); 1240 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1241 } 1242 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1243 } 1244 1245 switch (ctx->type) { 1246 case TCG_TYPE_I32: 1247 z_mask = 31; 1248 break; 1249 case TCG_TYPE_I64: 1250 z_mask = 63; 1251 break; 1252 default: 1253 g_assert_not_reached(); 1254 } 1255 ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; 1256 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1257 return false; 1258 } 1259 1260 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1261 { 1262 if (fold_const1(ctx, op)) { 1263 return true; 1264 } 1265 1266 switch (ctx->type) { 1267 case TCG_TYPE_I32: 1268 ctx->z_mask = 32 | 31; 1269 break; 1270 case TCG_TYPE_I64: 1271 ctx->z_mask = 64 | 63; 1272 break; 1273 default: 1274 g_assert_not_reached(); 1275 } 1276 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1277 return false; 1278 } 1279 1280 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1281 { 1282 TCGOpcode and_opc; 1283 1284 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1285 uint64_t t1 = arg_info(op->args[1])->val; 1286 uint64_t t2 = arg_info(op->args[2])->val; 1287 1288 t1 = deposit64(t1, op->args[3], op->args[4], t2); 1289 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1290 } 1291 1292 switch (ctx->type) { 1293 case TCG_TYPE_I32: 1294 and_opc = INDEX_op_and_i32; 1295 break; 1296 case TCG_TYPE_I64: 1297 and_opc = INDEX_op_and_i64; 1298 break; 1299 default: 1300 g_assert_not_reached(); 1301 } 1302 1303 /* Inserting a value into zero at offset 0. */ 1304 if (arg_is_const(op->args[1]) 1305 && arg_info(op->args[1])->val == 0 1306 && op->args[3] == 0) { 1307 uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); 1308 1309 op->opc = and_opc; 1310 op->args[1] = op->args[2]; 1311 op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); 1312 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1313 return false; 1314 } 1315 1316 /* Inserting zero into a value. */ 1317 if (arg_is_const(op->args[2]) 1318 && arg_info(op->args[2])->val == 0) { 1319 uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); 1320 1321 op->opc = and_opc; 1322 op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); 1323 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1324 return false; 1325 } 1326 1327 ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, 1328 op->args[3], op->args[4], 1329 arg_info(op->args[2])->z_mask); 1330 return false; 1331 } 1332 1333 static bool fold_divide(OptContext *ctx, TCGOp *op) 1334 { 1335 if (fold_const2(ctx, op) || 1336 fold_xi_to_x(ctx, op, 1)) { 1337 return true; 1338 } 1339 return false; 1340 } 1341 1342 static bool fold_dup(OptContext *ctx, TCGOp *op) 1343 { 1344 if (arg_is_const(op->args[1])) { 1345 uint64_t t = arg_info(op->args[1])->val; 1346 t = dup_const(TCGOP_VECE(op), t); 1347 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1348 } 1349 return false; 1350 } 1351 1352 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1353 { 1354 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1355 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1356 arg_info(op->args[2])->val); 1357 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1358 } 1359 1360 if (args_are_copies(op->args[1], op->args[2])) { 1361 op->opc = INDEX_op_dup_vec; 1362 TCGOP_VECE(op) = MO_32; 1363 } 1364 return false; 1365 } 1366 1367 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1368 { 1369 if (fold_const2_commutative(ctx, op) || 1370 fold_xi_to_x(ctx, op, -1) || 1371 fold_xi_to_not(ctx, op, 0)) { 1372 return true; 1373 } 1374 1375 ctx->s_mask = arg_info(op->args[1])->s_mask 1376 & arg_info(op->args[2])->s_mask; 1377 return false; 1378 } 1379 1380 static bool fold_extract(OptContext *ctx, TCGOp *op) 1381 { 1382 uint64_t z_mask_old, z_mask; 1383 int pos = op->args[2]; 1384 int len = op->args[3]; 1385 1386 if (arg_is_const(op->args[1])) { 1387 uint64_t t; 1388 1389 t = arg_info(op->args[1])->val; 1390 t = extract64(t, pos, len); 1391 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1392 } 1393 1394 z_mask_old = arg_info(op->args[1])->z_mask; 1395 z_mask = extract64(z_mask_old, pos, len); 1396 if (pos == 0) { 1397 ctx->a_mask = z_mask_old ^ z_mask; 1398 } 1399 ctx->z_mask = z_mask; 1400 ctx->s_mask = smask_from_zmask(z_mask); 1401 1402 return fold_masks(ctx, op); 1403 } 1404 1405 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1406 { 1407 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1408 uint64_t v1 = arg_info(op->args[1])->val; 1409 uint64_t v2 = arg_info(op->args[2])->val; 1410 int shr = op->args[3]; 1411 1412 if (op->opc == INDEX_op_extract2_i64) { 1413 v1 >>= shr; 1414 v2 <<= 64 - shr; 1415 } else { 1416 v1 = (uint32_t)v1 >> shr; 1417 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1418 } 1419 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1420 } 1421 return false; 1422 } 1423 1424 static bool fold_exts(OptContext *ctx, TCGOp *op) 1425 { 1426 uint64_t s_mask_old, s_mask, z_mask, sign; 1427 bool type_change = false; 1428 1429 if (fold_const1(ctx, op)) { 1430 return true; 1431 } 1432 1433 z_mask = arg_info(op->args[1])->z_mask; 1434 s_mask = arg_info(op->args[1])->s_mask; 1435 s_mask_old = s_mask; 1436 1437 switch (op->opc) { 1438 CASE_OP_32_64(ext8s): 1439 sign = INT8_MIN; 1440 z_mask = (uint8_t)z_mask; 1441 break; 1442 CASE_OP_32_64(ext16s): 1443 sign = INT16_MIN; 1444 z_mask = (uint16_t)z_mask; 1445 break; 1446 case INDEX_op_ext_i32_i64: 1447 type_change = true; 1448 QEMU_FALLTHROUGH; 1449 case INDEX_op_ext32s_i64: 1450 sign = INT32_MIN; 1451 z_mask = (uint32_t)z_mask; 1452 break; 1453 default: 1454 g_assert_not_reached(); 1455 } 1456 1457 if (z_mask & sign) { 1458 z_mask |= sign; 1459 } 1460 s_mask |= sign << 1; 1461 1462 ctx->z_mask = z_mask; 1463 ctx->s_mask = s_mask; 1464 if (!type_change) { 1465 ctx->a_mask = s_mask & ~s_mask_old; 1466 } 1467 1468 return fold_masks(ctx, op); 1469 } 1470 1471 static bool fold_extu(OptContext *ctx, TCGOp *op) 1472 { 1473 uint64_t z_mask_old, z_mask; 1474 bool type_change = false; 1475 1476 if (fold_const1(ctx, op)) { 1477 return true; 1478 } 1479 1480 z_mask_old = z_mask = arg_info(op->args[1])->z_mask; 1481 1482 switch (op->opc) { 1483 CASE_OP_32_64(ext8u): 1484 z_mask = (uint8_t)z_mask; 1485 break; 1486 CASE_OP_32_64(ext16u): 1487 z_mask = (uint16_t)z_mask; 1488 break; 1489 case INDEX_op_extrl_i64_i32: 1490 case INDEX_op_extu_i32_i64: 1491 type_change = true; 1492 QEMU_FALLTHROUGH; 1493 case INDEX_op_ext32u_i64: 1494 z_mask = (uint32_t)z_mask; 1495 break; 1496 case INDEX_op_extrh_i64_i32: 1497 type_change = true; 1498 z_mask >>= 32; 1499 break; 1500 default: 1501 g_assert_not_reached(); 1502 } 1503 1504 ctx->z_mask = z_mask; 1505 ctx->s_mask = smask_from_zmask(z_mask); 1506 if (!type_change) { 1507 ctx->a_mask = z_mask_old ^ z_mask; 1508 } 1509 return fold_masks(ctx, op); 1510 } 1511 1512 static bool fold_mb(OptContext *ctx, TCGOp *op) 1513 { 1514 /* Eliminate duplicate and redundant fence instructions. */ 1515 if (ctx->prev_mb) { 1516 /* 1517 * Merge two barriers of the same type into one, 1518 * or a weaker barrier into a stronger one, 1519 * or two weaker barriers into a stronger one. 1520 * mb X; mb Y => mb X|Y 1521 * mb; strl => mb; st 1522 * ldaq; mb => ld; mb 1523 * ldaq; strl => ld; mb; st 1524 * Other combinations are also merged into a strong 1525 * barrier. This is stricter than specified but for 1526 * the purposes of TCG is better than not optimizing. 1527 */ 1528 ctx->prev_mb->args[0] |= op->args[0]; 1529 tcg_op_remove(ctx->tcg, op); 1530 } else { 1531 ctx->prev_mb = op; 1532 } 1533 return true; 1534 } 1535 1536 static bool fold_mov(OptContext *ctx, TCGOp *op) 1537 { 1538 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1539 } 1540 1541 static bool fold_movcond(OptContext *ctx, TCGOp *op) 1542 { 1543 TCGCond cond = op->args[5]; 1544 int i; 1545 1546 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1547 op->args[5] = cond = tcg_swap_cond(cond); 1548 } 1549 /* 1550 * Canonicalize the "false" input reg to match the destination reg so 1551 * that the tcg backend can implement a "move if true" operation. 1552 */ 1553 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1554 op->args[5] = cond = tcg_invert_cond(cond); 1555 } 1556 1557 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1558 if (i >= 0) { 1559 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 1560 } 1561 1562 ctx->z_mask = arg_info(op->args[3])->z_mask 1563 | arg_info(op->args[4])->z_mask; 1564 ctx->s_mask = arg_info(op->args[3])->s_mask 1565 & arg_info(op->args[4])->s_mask; 1566 1567 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1568 uint64_t tv = arg_info(op->args[3])->val; 1569 uint64_t fv = arg_info(op->args[4])->val; 1570 TCGOpcode opc, negopc = 0; 1571 1572 switch (ctx->type) { 1573 case TCG_TYPE_I32: 1574 opc = INDEX_op_setcond_i32; 1575 if (TCG_TARGET_HAS_negsetcond_i32) { 1576 negopc = INDEX_op_negsetcond_i32; 1577 } 1578 tv = (int32_t)tv; 1579 fv = (int32_t)fv; 1580 break; 1581 case TCG_TYPE_I64: 1582 opc = INDEX_op_setcond_i64; 1583 if (TCG_TARGET_HAS_negsetcond_i64) { 1584 negopc = INDEX_op_negsetcond_i64; 1585 } 1586 break; 1587 default: 1588 g_assert_not_reached(); 1589 } 1590 1591 if (tv == 1 && fv == 0) { 1592 op->opc = opc; 1593 op->args[3] = cond; 1594 } else if (fv == 1 && tv == 0) { 1595 op->opc = opc; 1596 op->args[3] = tcg_invert_cond(cond); 1597 } else if (negopc) { 1598 if (tv == -1 && fv == 0) { 1599 op->opc = negopc; 1600 op->args[3] = cond; 1601 } else if (fv == -1 && tv == 0) { 1602 op->opc = negopc; 1603 op->args[3] = tcg_invert_cond(cond); 1604 } 1605 } 1606 } 1607 return false; 1608 } 1609 1610 static bool fold_mul(OptContext *ctx, TCGOp *op) 1611 { 1612 if (fold_const2(ctx, op) || 1613 fold_xi_to_i(ctx, op, 0) || 1614 fold_xi_to_x(ctx, op, 1)) { 1615 return true; 1616 } 1617 return false; 1618 } 1619 1620 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 1621 { 1622 if (fold_const2_commutative(ctx, op) || 1623 fold_xi_to_i(ctx, op, 0)) { 1624 return true; 1625 } 1626 return false; 1627 } 1628 1629 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 1630 { 1631 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 1632 1633 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1634 uint64_t a = arg_info(op->args[2])->val; 1635 uint64_t b = arg_info(op->args[3])->val; 1636 uint64_t h, l; 1637 TCGArg rl, rh; 1638 TCGOp *op2; 1639 1640 switch (op->opc) { 1641 case INDEX_op_mulu2_i32: 1642 l = (uint64_t)(uint32_t)a * (uint32_t)b; 1643 h = (int32_t)(l >> 32); 1644 l = (int32_t)l; 1645 break; 1646 case INDEX_op_muls2_i32: 1647 l = (int64_t)(int32_t)a * (int32_t)b; 1648 h = l >> 32; 1649 l = (int32_t)l; 1650 break; 1651 case INDEX_op_mulu2_i64: 1652 mulu64(&l, &h, a, b); 1653 break; 1654 case INDEX_op_muls2_i64: 1655 muls64(&l, &h, a, b); 1656 break; 1657 default: 1658 g_assert_not_reached(); 1659 } 1660 1661 rl = op->args[0]; 1662 rh = op->args[1]; 1663 1664 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1665 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1666 1667 tcg_opt_gen_movi(ctx, op, rl, l); 1668 tcg_opt_gen_movi(ctx, op2, rh, h); 1669 return true; 1670 } 1671 return false; 1672 } 1673 1674 static bool fold_nand(OptContext *ctx, TCGOp *op) 1675 { 1676 if (fold_const2_commutative(ctx, op) || 1677 fold_xi_to_not(ctx, op, -1)) { 1678 return true; 1679 } 1680 1681 ctx->s_mask = arg_info(op->args[1])->s_mask 1682 & arg_info(op->args[2])->s_mask; 1683 return false; 1684 } 1685 1686 static bool fold_neg(OptContext *ctx, TCGOp *op) 1687 { 1688 uint64_t z_mask; 1689 1690 if (fold_const1(ctx, op)) { 1691 return true; 1692 } 1693 1694 /* Set to 1 all bits to the left of the rightmost. */ 1695 z_mask = arg_info(op->args[1])->z_mask; 1696 ctx->z_mask = -(z_mask & -z_mask); 1697 1698 /* 1699 * Because of fold_sub_to_neg, we want to always return true, 1700 * via finish_folding. 1701 */ 1702 finish_folding(ctx, op); 1703 return true; 1704 } 1705 1706 static bool fold_nor(OptContext *ctx, TCGOp *op) 1707 { 1708 if (fold_const2_commutative(ctx, op) || 1709 fold_xi_to_not(ctx, op, 0)) { 1710 return true; 1711 } 1712 1713 ctx->s_mask = arg_info(op->args[1])->s_mask 1714 & arg_info(op->args[2])->s_mask; 1715 return false; 1716 } 1717 1718 static bool fold_not(OptContext *ctx, TCGOp *op) 1719 { 1720 if (fold_const1(ctx, op)) { 1721 return true; 1722 } 1723 1724 ctx->s_mask = arg_info(op->args[1])->s_mask; 1725 1726 /* Because of fold_to_not, we want to always return true, via finish. */ 1727 finish_folding(ctx, op); 1728 return true; 1729 } 1730 1731 static bool fold_or(OptContext *ctx, TCGOp *op) 1732 { 1733 if (fold_const2_commutative(ctx, op) || 1734 fold_xi_to_x(ctx, op, 0) || 1735 fold_xx_to_x(ctx, op)) { 1736 return true; 1737 } 1738 1739 ctx->z_mask = arg_info(op->args[1])->z_mask 1740 | arg_info(op->args[2])->z_mask; 1741 ctx->s_mask = arg_info(op->args[1])->s_mask 1742 & arg_info(op->args[2])->s_mask; 1743 return fold_masks(ctx, op); 1744 } 1745 1746 static bool fold_orc(OptContext *ctx, TCGOp *op) 1747 { 1748 if (fold_const2(ctx, op) || 1749 fold_xx_to_i(ctx, op, -1) || 1750 fold_xi_to_x(ctx, op, -1) || 1751 fold_ix_to_not(ctx, op, 0)) { 1752 return true; 1753 } 1754 1755 ctx->s_mask = arg_info(op->args[1])->s_mask 1756 & arg_info(op->args[2])->s_mask; 1757 return false; 1758 } 1759 1760 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) 1761 { 1762 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1763 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 1764 MemOp mop = get_memop(oi); 1765 int width = 8 * memop_size(mop); 1766 1767 if (width < 64) { 1768 ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); 1769 if (!(mop & MO_SIGN)) { 1770 ctx->z_mask = MAKE_64BIT_MASK(0, width); 1771 ctx->s_mask <<= 1; 1772 } 1773 } 1774 1775 /* Opcodes that touch guest memory stop the mb optimization. */ 1776 ctx->prev_mb = NULL; 1777 return false; 1778 } 1779 1780 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 1781 { 1782 /* Opcodes that touch guest memory stop the mb optimization. */ 1783 ctx->prev_mb = NULL; 1784 return false; 1785 } 1786 1787 static bool fold_remainder(OptContext *ctx, TCGOp *op) 1788 { 1789 if (fold_const2(ctx, op) || 1790 fold_xx_to_i(ctx, op, 0)) { 1791 return true; 1792 } 1793 return false; 1794 } 1795 1796 static bool fold_setcond(OptContext *ctx, TCGOp *op) 1797 { 1798 TCGCond cond = op->args[3]; 1799 int i; 1800 1801 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 1802 op->args[3] = cond = tcg_swap_cond(cond); 1803 } 1804 1805 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1806 if (i >= 0) { 1807 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1808 } 1809 1810 ctx->z_mask = 1; 1811 ctx->s_mask = smask_from_zmask(1); 1812 return false; 1813 } 1814 1815 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 1816 { 1817 TCGCond cond = op->args[3]; 1818 int i; 1819 1820 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 1821 op->args[3] = cond = tcg_swap_cond(cond); 1822 } 1823 1824 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1825 if (i >= 0) { 1826 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 1827 } 1828 1829 /* Value is {0,-1} so all bits are repetitions of the sign. */ 1830 ctx->s_mask = -1; 1831 return false; 1832 } 1833 1834 1835 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 1836 { 1837 TCGCond cond = op->args[5]; 1838 int i, inv = 0; 1839 1840 if (swap_commutative2(&op->args[1], &op->args[3])) { 1841 op->args[5] = cond = tcg_swap_cond(cond); 1842 } 1843 1844 i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); 1845 if (i >= 0) { 1846 goto do_setcond_const; 1847 } 1848 1849 switch (cond) { 1850 case TCG_COND_LT: 1851 case TCG_COND_GE: 1852 /* 1853 * Simplify LT/GE comparisons vs zero to a single compare 1854 * vs the high word of the input. 1855 */ 1856 if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && 1857 arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) { 1858 goto do_setcond_high; 1859 } 1860 break; 1861 1862 case TCG_COND_NE: 1863 inv = 1; 1864 QEMU_FALLTHROUGH; 1865 case TCG_COND_EQ: 1866 /* 1867 * Simplify EQ/NE comparisons where one of the pairs 1868 * can be simplified. 1869 */ 1870 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1871 op->args[3], cond); 1872 switch (i ^ inv) { 1873 case 0: 1874 goto do_setcond_const; 1875 case 1: 1876 goto do_setcond_high; 1877 } 1878 1879 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 1880 op->args[4], cond); 1881 switch (i ^ inv) { 1882 case 0: 1883 goto do_setcond_const; 1884 case 1: 1885 op->args[2] = op->args[3]; 1886 op->args[3] = cond; 1887 op->opc = INDEX_op_setcond_i32; 1888 break; 1889 } 1890 break; 1891 1892 default: 1893 break; 1894 1895 do_setcond_high: 1896 op->args[1] = op->args[2]; 1897 op->args[2] = op->args[4]; 1898 op->args[3] = cond; 1899 op->opc = INDEX_op_setcond_i32; 1900 break; 1901 } 1902 1903 ctx->z_mask = 1; 1904 ctx->s_mask = smask_from_zmask(1); 1905 return false; 1906 1907 do_setcond_const: 1908 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1909 } 1910 1911 static bool fold_sextract(OptContext *ctx, TCGOp *op) 1912 { 1913 uint64_t z_mask, s_mask, s_mask_old; 1914 int pos = op->args[2]; 1915 int len = op->args[3]; 1916 1917 if (arg_is_const(op->args[1])) { 1918 uint64_t t; 1919 1920 t = arg_info(op->args[1])->val; 1921 t = sextract64(t, pos, len); 1922 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1923 } 1924 1925 z_mask = arg_info(op->args[1])->z_mask; 1926 z_mask = sextract64(z_mask, pos, len); 1927 ctx->z_mask = z_mask; 1928 1929 s_mask_old = arg_info(op->args[1])->s_mask; 1930 s_mask = sextract64(s_mask_old, pos, len); 1931 s_mask |= MAKE_64BIT_MASK(len, 64 - len); 1932 ctx->s_mask = s_mask; 1933 1934 if (pos == 0) { 1935 ctx->a_mask = s_mask & ~s_mask_old; 1936 } 1937 1938 return fold_masks(ctx, op); 1939 } 1940 1941 static bool fold_shift(OptContext *ctx, TCGOp *op) 1942 { 1943 uint64_t s_mask, z_mask, sign; 1944 1945 if (fold_const2(ctx, op) || 1946 fold_ix_to_i(ctx, op, 0) || 1947 fold_xi_to_x(ctx, op, 0)) { 1948 return true; 1949 } 1950 1951 s_mask = arg_info(op->args[1])->s_mask; 1952 z_mask = arg_info(op->args[1])->z_mask; 1953 1954 if (arg_is_const(op->args[2])) { 1955 int sh = arg_info(op->args[2])->val; 1956 1957 ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 1958 1959 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 1960 ctx->s_mask = smask_from_smask(s_mask); 1961 1962 return fold_masks(ctx, op); 1963 } 1964 1965 switch (op->opc) { 1966 CASE_OP_32_64(sar): 1967 /* 1968 * Arithmetic right shift will not reduce the number of 1969 * input sign repetitions. 1970 */ 1971 ctx->s_mask = s_mask; 1972 break; 1973 CASE_OP_32_64(shr): 1974 /* 1975 * If the sign bit is known zero, then logical right shift 1976 * will not reduced the number of input sign repetitions. 1977 */ 1978 sign = (s_mask & -s_mask) >> 1; 1979 if (!(z_mask & sign)) { 1980 ctx->s_mask = s_mask; 1981 } 1982 break; 1983 default: 1984 break; 1985 } 1986 1987 return false; 1988 } 1989 1990 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 1991 { 1992 TCGOpcode neg_op; 1993 bool have_neg; 1994 1995 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 1996 return false; 1997 } 1998 1999 switch (ctx->type) { 2000 case TCG_TYPE_I32: 2001 neg_op = INDEX_op_neg_i32; 2002 have_neg = TCG_TARGET_HAS_neg_i32; 2003 break; 2004 case TCG_TYPE_I64: 2005 neg_op = INDEX_op_neg_i64; 2006 have_neg = TCG_TARGET_HAS_neg_i64; 2007 break; 2008 case TCG_TYPE_V64: 2009 case TCG_TYPE_V128: 2010 case TCG_TYPE_V256: 2011 neg_op = INDEX_op_neg_vec; 2012 have_neg = (TCG_TARGET_HAS_neg_vec && 2013 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2014 break; 2015 default: 2016 g_assert_not_reached(); 2017 } 2018 if (have_neg) { 2019 op->opc = neg_op; 2020 op->args[1] = op->args[2]; 2021 return fold_neg(ctx, op); 2022 } 2023 return false; 2024 } 2025 2026 /* We cannot as yet do_constant_folding with vectors. */ 2027 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2028 { 2029 if (fold_xx_to_i(ctx, op, 0) || 2030 fold_xi_to_x(ctx, op, 0) || 2031 fold_sub_to_neg(ctx, op)) { 2032 return true; 2033 } 2034 return false; 2035 } 2036 2037 static bool fold_sub(OptContext *ctx, TCGOp *op) 2038 { 2039 return fold_const2(ctx, op) || fold_sub_vec(ctx, op); 2040 } 2041 2042 static bool fold_sub2(OptContext *ctx, TCGOp *op) 2043 { 2044 return fold_addsub2(ctx, op, false); 2045 } 2046 2047 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2048 { 2049 /* We can't do any folding with a load, but we can record bits. */ 2050 switch (op->opc) { 2051 CASE_OP_32_64(ld8s): 2052 ctx->s_mask = MAKE_64BIT_MASK(8, 56); 2053 break; 2054 CASE_OP_32_64(ld8u): 2055 ctx->z_mask = MAKE_64BIT_MASK(0, 8); 2056 ctx->s_mask = MAKE_64BIT_MASK(9, 55); 2057 break; 2058 CASE_OP_32_64(ld16s): 2059 ctx->s_mask = MAKE_64BIT_MASK(16, 48); 2060 break; 2061 CASE_OP_32_64(ld16u): 2062 ctx->z_mask = MAKE_64BIT_MASK(0, 16); 2063 ctx->s_mask = MAKE_64BIT_MASK(17, 47); 2064 break; 2065 case INDEX_op_ld32s_i64: 2066 ctx->s_mask = MAKE_64BIT_MASK(32, 32); 2067 break; 2068 case INDEX_op_ld32u_i64: 2069 ctx->z_mask = MAKE_64BIT_MASK(0, 32); 2070 ctx->s_mask = MAKE_64BIT_MASK(33, 31); 2071 break; 2072 default: 2073 g_assert_not_reached(); 2074 } 2075 return false; 2076 } 2077 2078 static bool fold_xor(OptContext *ctx, TCGOp *op) 2079 { 2080 if (fold_const2_commutative(ctx, op) || 2081 fold_xx_to_i(ctx, op, 0) || 2082 fold_xi_to_x(ctx, op, 0) || 2083 fold_xi_to_not(ctx, op, -1)) { 2084 return true; 2085 } 2086 2087 ctx->z_mask = arg_info(op->args[1])->z_mask 2088 | arg_info(op->args[2])->z_mask; 2089 ctx->s_mask = arg_info(op->args[1])->s_mask 2090 & arg_info(op->args[2])->s_mask; 2091 return fold_masks(ctx, op); 2092 } 2093 2094 /* Propagate constants and copies, fold constant expressions. */ 2095 void tcg_optimize(TCGContext *s) 2096 { 2097 int nb_temps, i; 2098 TCGOp *op, *op_next; 2099 OptContext ctx = { .tcg = s }; 2100 2101 /* Array VALS has an element for each temp. 2102 If this temp holds a constant then its value is kept in VALS' element. 2103 If this temp is a copy of other ones then the other copies are 2104 available through the doubly linked circular list. */ 2105 2106 nb_temps = s->nb_temps; 2107 for (i = 0; i < nb_temps; ++i) { 2108 s->temps[i].state_ptr = NULL; 2109 } 2110 2111 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2112 TCGOpcode opc = op->opc; 2113 const TCGOpDef *def; 2114 bool done = false; 2115 2116 /* Calls are special. */ 2117 if (opc == INDEX_op_call) { 2118 fold_call(&ctx, op); 2119 continue; 2120 } 2121 2122 def = &tcg_op_defs[opc]; 2123 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 2124 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 2125 2126 /* Pre-compute the type of the operation. */ 2127 if (def->flags & TCG_OPF_VECTOR) { 2128 ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); 2129 } else if (def->flags & TCG_OPF_64BIT) { 2130 ctx.type = TCG_TYPE_I64; 2131 } else { 2132 ctx.type = TCG_TYPE_I32; 2133 } 2134 2135 /* Assume all bits affected, no bits known zero, no sign reps. */ 2136 ctx.a_mask = -1; 2137 ctx.z_mask = -1; 2138 ctx.s_mask = 0; 2139 2140 /* 2141 * Process each opcode. 2142 * Sorted alphabetically by opcode as much as possible. 2143 */ 2144 switch (opc) { 2145 CASE_OP_32_64(add): 2146 done = fold_add(&ctx, op); 2147 break; 2148 case INDEX_op_add_vec: 2149 done = fold_add_vec(&ctx, op); 2150 break; 2151 CASE_OP_32_64(add2): 2152 done = fold_add2(&ctx, op); 2153 break; 2154 CASE_OP_32_64_VEC(and): 2155 done = fold_and(&ctx, op); 2156 break; 2157 CASE_OP_32_64_VEC(andc): 2158 done = fold_andc(&ctx, op); 2159 break; 2160 CASE_OP_32_64(brcond): 2161 done = fold_brcond(&ctx, op); 2162 break; 2163 case INDEX_op_brcond2_i32: 2164 done = fold_brcond2(&ctx, op); 2165 break; 2166 CASE_OP_32_64(bswap16): 2167 CASE_OP_32_64(bswap32): 2168 case INDEX_op_bswap64_i64: 2169 done = fold_bswap(&ctx, op); 2170 break; 2171 CASE_OP_32_64(clz): 2172 CASE_OP_32_64(ctz): 2173 done = fold_count_zeros(&ctx, op); 2174 break; 2175 CASE_OP_32_64(ctpop): 2176 done = fold_ctpop(&ctx, op); 2177 break; 2178 CASE_OP_32_64(deposit): 2179 done = fold_deposit(&ctx, op); 2180 break; 2181 CASE_OP_32_64(div): 2182 CASE_OP_32_64(divu): 2183 done = fold_divide(&ctx, op); 2184 break; 2185 case INDEX_op_dup_vec: 2186 done = fold_dup(&ctx, op); 2187 break; 2188 case INDEX_op_dup2_vec: 2189 done = fold_dup2(&ctx, op); 2190 break; 2191 CASE_OP_32_64_VEC(eqv): 2192 done = fold_eqv(&ctx, op); 2193 break; 2194 CASE_OP_32_64(extract): 2195 done = fold_extract(&ctx, op); 2196 break; 2197 CASE_OP_32_64(extract2): 2198 done = fold_extract2(&ctx, op); 2199 break; 2200 CASE_OP_32_64(ext8s): 2201 CASE_OP_32_64(ext16s): 2202 case INDEX_op_ext32s_i64: 2203 case INDEX_op_ext_i32_i64: 2204 done = fold_exts(&ctx, op); 2205 break; 2206 CASE_OP_32_64(ext8u): 2207 CASE_OP_32_64(ext16u): 2208 case INDEX_op_ext32u_i64: 2209 case INDEX_op_extu_i32_i64: 2210 case INDEX_op_extrl_i64_i32: 2211 case INDEX_op_extrh_i64_i32: 2212 done = fold_extu(&ctx, op); 2213 break; 2214 CASE_OP_32_64(ld8s): 2215 CASE_OP_32_64(ld8u): 2216 CASE_OP_32_64(ld16s): 2217 CASE_OP_32_64(ld16u): 2218 case INDEX_op_ld32s_i64: 2219 case INDEX_op_ld32u_i64: 2220 done = fold_tcg_ld(&ctx, op); 2221 break; 2222 case INDEX_op_mb: 2223 done = fold_mb(&ctx, op); 2224 break; 2225 CASE_OP_32_64_VEC(mov): 2226 done = fold_mov(&ctx, op); 2227 break; 2228 CASE_OP_32_64(movcond): 2229 done = fold_movcond(&ctx, op); 2230 break; 2231 CASE_OP_32_64(mul): 2232 done = fold_mul(&ctx, op); 2233 break; 2234 CASE_OP_32_64(mulsh): 2235 CASE_OP_32_64(muluh): 2236 done = fold_mul_highpart(&ctx, op); 2237 break; 2238 CASE_OP_32_64(muls2): 2239 CASE_OP_32_64(mulu2): 2240 done = fold_multiply2(&ctx, op); 2241 break; 2242 CASE_OP_32_64_VEC(nand): 2243 done = fold_nand(&ctx, op); 2244 break; 2245 CASE_OP_32_64(neg): 2246 done = fold_neg(&ctx, op); 2247 break; 2248 CASE_OP_32_64_VEC(nor): 2249 done = fold_nor(&ctx, op); 2250 break; 2251 CASE_OP_32_64_VEC(not): 2252 done = fold_not(&ctx, op); 2253 break; 2254 CASE_OP_32_64_VEC(or): 2255 done = fold_or(&ctx, op); 2256 break; 2257 CASE_OP_32_64_VEC(orc): 2258 done = fold_orc(&ctx, op); 2259 break; 2260 case INDEX_op_qemu_ld_a32_i32: 2261 case INDEX_op_qemu_ld_a64_i32: 2262 case INDEX_op_qemu_ld_a32_i64: 2263 case INDEX_op_qemu_ld_a64_i64: 2264 case INDEX_op_qemu_ld_a32_i128: 2265 case INDEX_op_qemu_ld_a64_i128: 2266 done = fold_qemu_ld(&ctx, op); 2267 break; 2268 case INDEX_op_qemu_st8_a32_i32: 2269 case INDEX_op_qemu_st8_a64_i32: 2270 case INDEX_op_qemu_st_a32_i32: 2271 case INDEX_op_qemu_st_a64_i32: 2272 case INDEX_op_qemu_st_a32_i64: 2273 case INDEX_op_qemu_st_a64_i64: 2274 case INDEX_op_qemu_st_a32_i128: 2275 case INDEX_op_qemu_st_a64_i128: 2276 done = fold_qemu_st(&ctx, op); 2277 break; 2278 CASE_OP_32_64(rem): 2279 CASE_OP_32_64(remu): 2280 done = fold_remainder(&ctx, op); 2281 break; 2282 CASE_OP_32_64(rotl): 2283 CASE_OP_32_64(rotr): 2284 CASE_OP_32_64(sar): 2285 CASE_OP_32_64(shl): 2286 CASE_OP_32_64(shr): 2287 done = fold_shift(&ctx, op); 2288 break; 2289 CASE_OP_32_64(setcond): 2290 done = fold_setcond(&ctx, op); 2291 break; 2292 CASE_OP_32_64(negsetcond): 2293 done = fold_negsetcond(&ctx, op); 2294 break; 2295 case INDEX_op_setcond2_i32: 2296 done = fold_setcond2(&ctx, op); 2297 break; 2298 CASE_OP_32_64(sextract): 2299 done = fold_sextract(&ctx, op); 2300 break; 2301 CASE_OP_32_64(sub): 2302 done = fold_sub(&ctx, op); 2303 break; 2304 case INDEX_op_sub_vec: 2305 done = fold_sub_vec(&ctx, op); 2306 break; 2307 CASE_OP_32_64(sub2): 2308 done = fold_sub2(&ctx, op); 2309 break; 2310 CASE_OP_32_64_VEC(xor): 2311 done = fold_xor(&ctx, op); 2312 break; 2313 default: 2314 break; 2315 } 2316 2317 if (!done) { 2318 finish_folding(&ctx, op); 2319 } 2320 } 2321 } 2322