1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "tcg/tcg-op.h" 29 #include "tcg-internal.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 #define CASE_OP_32_64_VEC(x) \ 36 glue(glue(case INDEX_op_, x), _i32): \ 37 glue(glue(case INDEX_op_, x), _i64): \ 38 glue(glue(case INDEX_op_, x), _vec) 39 40 typedef struct TempOptInfo { 41 bool is_const; 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 uint64_t val; 45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 46 uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ 47 } TempOptInfo; 48 49 typedef struct OptContext { 50 TCGContext *tcg; 51 TCGOp *prev_mb; 52 TCGTempSet temps_used; 53 54 /* In flight values from optimization. */ 55 uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ 56 uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ 57 uint64_t s_mask; /* mask of clrsb(value) bits */ 58 TCGType type; 59 } OptContext; 60 61 /* Calculate the smask for a specific value. */ 62 static uint64_t smask_from_value(uint64_t value) 63 { 64 int rep = clrsb64(value); 65 return ~(~0ull >> rep); 66 } 67 68 /* 69 * Calculate the smask for a given set of known-zeros. 70 * If there are lots of zeros on the left, we can consider the remainder 71 * an unsigned field, and thus the corresponding signed field is one bit 72 * larger. 73 */ 74 static uint64_t smask_from_zmask(uint64_t zmask) 75 { 76 /* 77 * Only the 0 bits are significant for zmask, thus the msb itself 78 * must be zero, else we have no sign information. 79 */ 80 int rep = clz64(zmask); 81 if (rep == 0) { 82 return 0; 83 } 84 rep -= 1; 85 return ~(~0ull >> rep); 86 } 87 88 /* 89 * Recreate a properly left-aligned smask after manipulation. 90 * Some bit-shuffling, particularly shifts and rotates, may 91 * retain sign bits on the left, but may scatter disconnected 92 * sign bits on the right. Retain only what remains to the left. 93 */ 94 static uint64_t smask_from_smask(int64_t smask) 95 { 96 /* Only the 1 bits are significant for smask */ 97 return smask_from_zmask(~smask); 98 } 99 100 static inline TempOptInfo *ts_info(TCGTemp *ts) 101 { 102 return ts->state_ptr; 103 } 104 105 static inline TempOptInfo *arg_info(TCGArg arg) 106 { 107 return ts_info(arg_temp(arg)); 108 } 109 110 static inline bool ts_is_const(TCGTemp *ts) 111 { 112 return ts_info(ts)->is_const; 113 } 114 115 static inline bool arg_is_const(TCGArg arg) 116 { 117 return ts_is_const(arg_temp(arg)); 118 } 119 120 static inline bool ts_is_copy(TCGTemp *ts) 121 { 122 return ts_info(ts)->next_copy != ts; 123 } 124 125 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 126 static void reset_ts(TCGTemp *ts) 127 { 128 TempOptInfo *ti = ts_info(ts); 129 TempOptInfo *pi = ts_info(ti->prev_copy); 130 TempOptInfo *ni = ts_info(ti->next_copy); 131 132 ni->prev_copy = ti->prev_copy; 133 pi->next_copy = ti->next_copy; 134 ti->next_copy = ts; 135 ti->prev_copy = ts; 136 ti->is_const = false; 137 ti->z_mask = -1; 138 ti->s_mask = 0; 139 } 140 141 static void reset_temp(TCGArg arg) 142 { 143 reset_ts(arg_temp(arg)); 144 } 145 146 /* Initialize and activate a temporary. */ 147 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 148 { 149 size_t idx = temp_idx(ts); 150 TempOptInfo *ti; 151 152 if (test_bit(idx, ctx->temps_used.l)) { 153 return; 154 } 155 set_bit(idx, ctx->temps_used.l); 156 157 ti = ts->state_ptr; 158 if (ti == NULL) { 159 ti = tcg_malloc(sizeof(TempOptInfo)); 160 ts->state_ptr = ti; 161 } 162 163 ti->next_copy = ts; 164 ti->prev_copy = ts; 165 if (ts->kind == TEMP_CONST) { 166 ti->is_const = true; 167 ti->val = ts->val; 168 ti->z_mask = ts->val; 169 ti->s_mask = smask_from_value(ts->val); 170 } else { 171 ti->is_const = false; 172 ti->z_mask = -1; 173 ti->s_mask = 0; 174 } 175 } 176 177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 178 { 179 TCGTemp *i, *g, *l; 180 181 /* If this is already readonly, we can't do better. */ 182 if (temp_readonly(ts)) { 183 return ts; 184 } 185 186 g = l = NULL; 187 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 188 if (temp_readonly(i)) { 189 return i; 190 } else if (i->kind > ts->kind) { 191 if (i->kind == TEMP_GLOBAL) { 192 g = i; 193 } else if (i->kind == TEMP_LOCAL) { 194 l = i; 195 } 196 } 197 } 198 199 /* If we didn't find a better representation, return the same temp. */ 200 return g ? g : l ? l : ts; 201 } 202 203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 204 { 205 TCGTemp *i; 206 207 if (ts1 == ts2) { 208 return true; 209 } 210 211 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 212 return false; 213 } 214 215 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 216 if (i == ts2) { 217 return true; 218 } 219 } 220 221 return false; 222 } 223 224 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 225 { 226 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 227 } 228 229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 230 { 231 TCGTemp *dst_ts = arg_temp(dst); 232 TCGTemp *src_ts = arg_temp(src); 233 TempOptInfo *di; 234 TempOptInfo *si; 235 TCGOpcode new_op; 236 237 if (ts_are_copies(dst_ts, src_ts)) { 238 tcg_op_remove(ctx->tcg, op); 239 return true; 240 } 241 242 reset_ts(dst_ts); 243 di = ts_info(dst_ts); 244 si = ts_info(src_ts); 245 246 switch (ctx->type) { 247 case TCG_TYPE_I32: 248 new_op = INDEX_op_mov_i32; 249 break; 250 case TCG_TYPE_I64: 251 new_op = INDEX_op_mov_i64; 252 break; 253 case TCG_TYPE_V64: 254 case TCG_TYPE_V128: 255 case TCG_TYPE_V256: 256 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 257 new_op = INDEX_op_mov_vec; 258 break; 259 default: 260 g_assert_not_reached(); 261 } 262 op->opc = new_op; 263 op->args[0] = dst; 264 op->args[1] = src; 265 266 di->z_mask = si->z_mask; 267 di->s_mask = si->s_mask; 268 269 if (src_ts->type == dst_ts->type) { 270 TempOptInfo *ni = ts_info(si->next_copy); 271 272 di->next_copy = si->next_copy; 273 di->prev_copy = src_ts; 274 ni->prev_copy = dst_ts; 275 si->next_copy = dst_ts; 276 di->is_const = si->is_const; 277 di->val = si->val; 278 } 279 return true; 280 } 281 282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 283 TCGArg dst, uint64_t val) 284 { 285 TCGTemp *tv; 286 287 if (ctx->type == TCG_TYPE_I32) { 288 val = (int32_t)val; 289 } 290 291 /* Convert movi to mov with constant temp. */ 292 tv = tcg_constant_internal(ctx->type, val); 293 init_ts_info(ctx, tv); 294 return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); 295 } 296 297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 298 { 299 uint64_t l64, h64; 300 301 switch (op) { 302 CASE_OP_32_64(add): 303 return x + y; 304 305 CASE_OP_32_64(sub): 306 return x - y; 307 308 CASE_OP_32_64(mul): 309 return x * y; 310 311 CASE_OP_32_64_VEC(and): 312 return x & y; 313 314 CASE_OP_32_64_VEC(or): 315 return x | y; 316 317 CASE_OP_32_64_VEC(xor): 318 return x ^ y; 319 320 case INDEX_op_shl_i32: 321 return (uint32_t)x << (y & 31); 322 323 case INDEX_op_shl_i64: 324 return (uint64_t)x << (y & 63); 325 326 case INDEX_op_shr_i32: 327 return (uint32_t)x >> (y & 31); 328 329 case INDEX_op_shr_i64: 330 return (uint64_t)x >> (y & 63); 331 332 case INDEX_op_sar_i32: 333 return (int32_t)x >> (y & 31); 334 335 case INDEX_op_sar_i64: 336 return (int64_t)x >> (y & 63); 337 338 case INDEX_op_rotr_i32: 339 return ror32(x, y & 31); 340 341 case INDEX_op_rotr_i64: 342 return ror64(x, y & 63); 343 344 case INDEX_op_rotl_i32: 345 return rol32(x, y & 31); 346 347 case INDEX_op_rotl_i64: 348 return rol64(x, y & 63); 349 350 CASE_OP_32_64_VEC(not): 351 return ~x; 352 353 CASE_OP_32_64(neg): 354 return -x; 355 356 CASE_OP_32_64_VEC(andc): 357 return x & ~y; 358 359 CASE_OP_32_64_VEC(orc): 360 return x | ~y; 361 362 CASE_OP_32_64(eqv): 363 return ~(x ^ y); 364 365 CASE_OP_32_64(nand): 366 return ~(x & y); 367 368 CASE_OP_32_64(nor): 369 return ~(x | y); 370 371 case INDEX_op_clz_i32: 372 return (uint32_t)x ? clz32(x) : y; 373 374 case INDEX_op_clz_i64: 375 return x ? clz64(x) : y; 376 377 case INDEX_op_ctz_i32: 378 return (uint32_t)x ? ctz32(x) : y; 379 380 case INDEX_op_ctz_i64: 381 return x ? ctz64(x) : y; 382 383 case INDEX_op_ctpop_i32: 384 return ctpop32(x); 385 386 case INDEX_op_ctpop_i64: 387 return ctpop64(x); 388 389 CASE_OP_32_64(ext8s): 390 return (int8_t)x; 391 392 CASE_OP_32_64(ext16s): 393 return (int16_t)x; 394 395 CASE_OP_32_64(ext8u): 396 return (uint8_t)x; 397 398 CASE_OP_32_64(ext16u): 399 return (uint16_t)x; 400 401 CASE_OP_32_64(bswap16): 402 x = bswap16(x); 403 return y & TCG_BSWAP_OS ? (int16_t)x : x; 404 405 CASE_OP_32_64(bswap32): 406 x = bswap32(x); 407 return y & TCG_BSWAP_OS ? (int32_t)x : x; 408 409 case INDEX_op_bswap64_i64: 410 return bswap64(x); 411 412 case INDEX_op_ext_i32_i64: 413 case INDEX_op_ext32s_i64: 414 return (int32_t)x; 415 416 case INDEX_op_extu_i32_i64: 417 case INDEX_op_extrl_i64_i32: 418 case INDEX_op_ext32u_i64: 419 return (uint32_t)x; 420 421 case INDEX_op_extrh_i64_i32: 422 return (uint64_t)x >> 32; 423 424 case INDEX_op_muluh_i32: 425 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 426 case INDEX_op_mulsh_i32: 427 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 428 429 case INDEX_op_muluh_i64: 430 mulu64(&l64, &h64, x, y); 431 return h64; 432 case INDEX_op_mulsh_i64: 433 muls64(&l64, &h64, x, y); 434 return h64; 435 436 case INDEX_op_div_i32: 437 /* Avoid crashing on divide by zero, otherwise undefined. */ 438 return (int32_t)x / ((int32_t)y ? : 1); 439 case INDEX_op_divu_i32: 440 return (uint32_t)x / ((uint32_t)y ? : 1); 441 case INDEX_op_div_i64: 442 return (int64_t)x / ((int64_t)y ? : 1); 443 case INDEX_op_divu_i64: 444 return (uint64_t)x / ((uint64_t)y ? : 1); 445 446 case INDEX_op_rem_i32: 447 return (int32_t)x % ((int32_t)y ? : 1); 448 case INDEX_op_remu_i32: 449 return (uint32_t)x % ((uint32_t)y ? : 1); 450 case INDEX_op_rem_i64: 451 return (int64_t)x % ((int64_t)y ? : 1); 452 case INDEX_op_remu_i64: 453 return (uint64_t)x % ((uint64_t)y ? : 1); 454 455 default: 456 fprintf(stderr, 457 "Unrecognized operation %d in do_constant_folding.\n", op); 458 tcg_abort(); 459 } 460 } 461 462 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 463 uint64_t x, uint64_t y) 464 { 465 uint64_t res = do_constant_folding_2(op, x, y); 466 if (type == TCG_TYPE_I32) { 467 res = (int32_t)res; 468 } 469 return res; 470 } 471 472 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 473 { 474 switch (c) { 475 case TCG_COND_EQ: 476 return x == y; 477 case TCG_COND_NE: 478 return x != y; 479 case TCG_COND_LT: 480 return (int32_t)x < (int32_t)y; 481 case TCG_COND_GE: 482 return (int32_t)x >= (int32_t)y; 483 case TCG_COND_LE: 484 return (int32_t)x <= (int32_t)y; 485 case TCG_COND_GT: 486 return (int32_t)x > (int32_t)y; 487 case TCG_COND_LTU: 488 return x < y; 489 case TCG_COND_GEU: 490 return x >= y; 491 case TCG_COND_LEU: 492 return x <= y; 493 case TCG_COND_GTU: 494 return x > y; 495 default: 496 tcg_abort(); 497 } 498 } 499 500 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 501 { 502 switch (c) { 503 case TCG_COND_EQ: 504 return x == y; 505 case TCG_COND_NE: 506 return x != y; 507 case TCG_COND_LT: 508 return (int64_t)x < (int64_t)y; 509 case TCG_COND_GE: 510 return (int64_t)x >= (int64_t)y; 511 case TCG_COND_LE: 512 return (int64_t)x <= (int64_t)y; 513 case TCG_COND_GT: 514 return (int64_t)x > (int64_t)y; 515 case TCG_COND_LTU: 516 return x < y; 517 case TCG_COND_GEU: 518 return x >= y; 519 case TCG_COND_LEU: 520 return x <= y; 521 case TCG_COND_GTU: 522 return x > y; 523 default: 524 tcg_abort(); 525 } 526 } 527 528 static bool do_constant_folding_cond_eq(TCGCond c) 529 { 530 switch (c) { 531 case TCG_COND_GT: 532 case TCG_COND_LTU: 533 case TCG_COND_LT: 534 case TCG_COND_GTU: 535 case TCG_COND_NE: 536 return 0; 537 case TCG_COND_GE: 538 case TCG_COND_GEU: 539 case TCG_COND_LE: 540 case TCG_COND_LEU: 541 case TCG_COND_EQ: 542 return 1; 543 default: 544 tcg_abort(); 545 } 546 } 547 548 /* 549 * Return -1 if the condition can't be simplified, 550 * and the result of the condition (0 or 1) if it can. 551 */ 552 static int do_constant_folding_cond(TCGType type, TCGArg x, 553 TCGArg y, TCGCond c) 554 { 555 uint64_t xv = arg_info(x)->val; 556 uint64_t yv = arg_info(y)->val; 557 558 if (arg_is_const(x) && arg_is_const(y)) { 559 switch (type) { 560 case TCG_TYPE_I32: 561 return do_constant_folding_cond_32(xv, yv, c); 562 case TCG_TYPE_I64: 563 return do_constant_folding_cond_64(xv, yv, c); 564 default: 565 /* Only scalar comparisons are optimizable */ 566 return -1; 567 } 568 } else if (args_are_copies(x, y)) { 569 return do_constant_folding_cond_eq(c); 570 } else if (arg_is_const(y) && yv == 0) { 571 switch (c) { 572 case TCG_COND_LTU: 573 return 0; 574 case TCG_COND_GEU: 575 return 1; 576 default: 577 return -1; 578 } 579 } 580 return -1; 581 } 582 583 /* 584 * Return -1 if the condition can't be simplified, 585 * and the result of the condition (0 or 1) if it can. 586 */ 587 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 588 { 589 TCGArg al = p1[0], ah = p1[1]; 590 TCGArg bl = p2[0], bh = p2[1]; 591 592 if (arg_is_const(bl) && arg_is_const(bh)) { 593 tcg_target_ulong blv = arg_info(bl)->val; 594 tcg_target_ulong bhv = arg_info(bh)->val; 595 uint64_t b = deposit64(blv, 32, 32, bhv); 596 597 if (arg_is_const(al) && arg_is_const(ah)) { 598 tcg_target_ulong alv = arg_info(al)->val; 599 tcg_target_ulong ahv = arg_info(ah)->val; 600 uint64_t a = deposit64(alv, 32, 32, ahv); 601 return do_constant_folding_cond_64(a, b, c); 602 } 603 if (b == 0) { 604 switch (c) { 605 case TCG_COND_LTU: 606 return 0; 607 case TCG_COND_GEU: 608 return 1; 609 default: 610 break; 611 } 612 } 613 } 614 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 615 return do_constant_folding_cond_eq(c); 616 } 617 return -1; 618 } 619 620 /** 621 * swap_commutative: 622 * @dest: TCGArg of the destination argument, or NO_DEST. 623 * @p1: first paired argument 624 * @p2: second paired argument 625 * 626 * If *@p1 is a constant and *@p2 is not, swap. 627 * If *@p2 matches @dest, swap. 628 * Return true if a swap was performed. 629 */ 630 631 #define NO_DEST temp_arg(NULL) 632 633 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 634 { 635 TCGArg a1 = *p1, a2 = *p2; 636 int sum = 0; 637 sum += arg_is_const(a1); 638 sum -= arg_is_const(a2); 639 640 /* Prefer the constant in second argument, and then the form 641 op a, a, b, which is better handled on non-RISC hosts. */ 642 if (sum > 0 || (sum == 0 && dest == a2)) { 643 *p1 = a2; 644 *p2 = a1; 645 return true; 646 } 647 return false; 648 } 649 650 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 651 { 652 int sum = 0; 653 sum += arg_is_const(p1[0]); 654 sum += arg_is_const(p1[1]); 655 sum -= arg_is_const(p2[0]); 656 sum -= arg_is_const(p2[1]); 657 if (sum > 0) { 658 TCGArg t; 659 t = p1[0], p1[0] = p2[0], p2[0] = t; 660 t = p1[1], p1[1] = p2[1], p2[1] = t; 661 return true; 662 } 663 return false; 664 } 665 666 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 667 { 668 for (int i = 0; i < nb_args; i++) { 669 TCGTemp *ts = arg_temp(op->args[i]); 670 if (ts) { 671 init_ts_info(ctx, ts); 672 } 673 } 674 } 675 676 static void copy_propagate(OptContext *ctx, TCGOp *op, 677 int nb_oargs, int nb_iargs) 678 { 679 TCGContext *s = ctx->tcg; 680 681 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 682 TCGTemp *ts = arg_temp(op->args[i]); 683 if (ts && ts_is_copy(ts)) { 684 op->args[i] = temp_arg(find_better_copy(s, ts)); 685 } 686 } 687 } 688 689 static void finish_folding(OptContext *ctx, TCGOp *op) 690 { 691 const TCGOpDef *def = &tcg_op_defs[op->opc]; 692 int i, nb_oargs; 693 694 /* 695 * For an opcode that ends a BB, reset all temp data. 696 * We do no cross-BB optimization. 697 */ 698 if (def->flags & TCG_OPF_BB_END) { 699 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 700 ctx->prev_mb = NULL; 701 return; 702 } 703 704 nb_oargs = def->nb_oargs; 705 for (i = 0; i < nb_oargs; i++) { 706 TCGTemp *ts = arg_temp(op->args[i]); 707 reset_ts(ts); 708 /* 709 * Save the corresponding known-zero/sign bits mask for the 710 * first output argument (only one supported so far). 711 */ 712 if (i == 0) { 713 ts_info(ts)->z_mask = ctx->z_mask; 714 ts_info(ts)->s_mask = ctx->s_mask; 715 } 716 } 717 } 718 719 /* 720 * The fold_* functions return true when processing is complete, 721 * usually by folding the operation to a constant or to a copy, 722 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 723 * like collect information about the value produced, for use in 724 * optimizing a subsequent operation. 725 * 726 * These first fold_* functions are all helpers, used by other 727 * folders for more specific operations. 728 */ 729 730 static bool fold_const1(OptContext *ctx, TCGOp *op) 731 { 732 if (arg_is_const(op->args[1])) { 733 uint64_t t; 734 735 t = arg_info(op->args[1])->val; 736 t = do_constant_folding(op->opc, ctx->type, t, 0); 737 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 738 } 739 return false; 740 } 741 742 static bool fold_const2(OptContext *ctx, TCGOp *op) 743 { 744 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 745 uint64_t t1 = arg_info(op->args[1])->val; 746 uint64_t t2 = arg_info(op->args[2])->val; 747 748 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 749 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 750 } 751 return false; 752 } 753 754 static bool fold_commutative(OptContext *ctx, TCGOp *op) 755 { 756 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 757 return false; 758 } 759 760 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 761 { 762 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 763 return fold_const2(ctx, op); 764 } 765 766 static bool fold_masks(OptContext *ctx, TCGOp *op) 767 { 768 uint64_t a_mask = ctx->a_mask; 769 uint64_t z_mask = ctx->z_mask; 770 uint64_t s_mask = ctx->s_mask; 771 772 /* 773 * 32-bit ops generate 32-bit results, which for the purpose of 774 * simplifying tcg are sign-extended. Certainly that's how we 775 * represent our constants elsewhere. Note that the bits will 776 * be reset properly for a 64-bit value when encountering the 777 * type changing opcodes. 778 */ 779 if (ctx->type == TCG_TYPE_I32) { 780 a_mask = (int32_t)a_mask; 781 z_mask = (int32_t)z_mask; 782 s_mask |= MAKE_64BIT_MASK(32, 32); 783 ctx->z_mask = z_mask; 784 ctx->s_mask = s_mask; 785 } 786 787 if (z_mask == 0) { 788 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 789 } 790 if (a_mask == 0) { 791 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 792 } 793 return false; 794 } 795 796 /* 797 * Convert @op to NOT, if NOT is supported by the host. 798 * Return true f the conversion is successful, which will still 799 * indicate that the processing is complete. 800 */ 801 static bool fold_not(OptContext *ctx, TCGOp *op); 802 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 803 { 804 TCGOpcode not_op; 805 bool have_not; 806 807 switch (ctx->type) { 808 case TCG_TYPE_I32: 809 not_op = INDEX_op_not_i32; 810 have_not = TCG_TARGET_HAS_not_i32; 811 break; 812 case TCG_TYPE_I64: 813 not_op = INDEX_op_not_i64; 814 have_not = TCG_TARGET_HAS_not_i64; 815 break; 816 case TCG_TYPE_V64: 817 case TCG_TYPE_V128: 818 case TCG_TYPE_V256: 819 not_op = INDEX_op_not_vec; 820 have_not = TCG_TARGET_HAS_not_vec; 821 break; 822 default: 823 g_assert_not_reached(); 824 } 825 if (have_not) { 826 op->opc = not_op; 827 op->args[1] = op->args[idx]; 828 return fold_not(ctx, op); 829 } 830 return false; 831 } 832 833 /* If the binary operation has first argument @i, fold to @i. */ 834 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 835 { 836 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 837 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 838 } 839 return false; 840 } 841 842 /* If the binary operation has first argument @i, fold to NOT. */ 843 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 844 { 845 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 846 return fold_to_not(ctx, op, 2); 847 } 848 return false; 849 } 850 851 /* If the binary operation has second argument @i, fold to @i. */ 852 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 853 { 854 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 855 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 856 } 857 return false; 858 } 859 860 /* If the binary operation has second argument @i, fold to identity. */ 861 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 862 { 863 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 864 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 865 } 866 return false; 867 } 868 869 /* If the binary operation has second argument @i, fold to NOT. */ 870 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 871 { 872 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 873 return fold_to_not(ctx, op, 1); 874 } 875 return false; 876 } 877 878 /* If the binary operation has both arguments equal, fold to @i. */ 879 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 880 { 881 if (args_are_copies(op->args[1], op->args[2])) { 882 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 883 } 884 return false; 885 } 886 887 /* If the binary operation has both arguments equal, fold to identity. */ 888 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 889 { 890 if (args_are_copies(op->args[1], op->args[2])) { 891 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 892 } 893 return false; 894 } 895 896 /* 897 * These outermost fold_<op> functions are sorted alphabetically. 898 * 899 * The ordering of the transformations should be: 900 * 1) those that produce a constant 901 * 2) those that produce a copy 902 * 3) those that produce information about the result value. 903 */ 904 905 static bool fold_add(OptContext *ctx, TCGOp *op) 906 { 907 if (fold_const2_commutative(ctx, op) || 908 fold_xi_to_x(ctx, op, 0)) { 909 return true; 910 } 911 return false; 912 } 913 914 /* We cannot as yet do_constant_folding with vectors. */ 915 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 916 { 917 if (fold_commutative(ctx, op) || 918 fold_xi_to_x(ctx, op, 0)) { 919 return true; 920 } 921 return false; 922 } 923 924 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) 925 { 926 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && 927 arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 928 uint64_t al = arg_info(op->args[2])->val; 929 uint64_t ah = arg_info(op->args[3])->val; 930 uint64_t bl = arg_info(op->args[4])->val; 931 uint64_t bh = arg_info(op->args[5])->val; 932 TCGArg rl, rh; 933 TCGOp *op2; 934 935 if (ctx->type == TCG_TYPE_I32) { 936 uint64_t a = deposit64(al, 32, 32, ah); 937 uint64_t b = deposit64(bl, 32, 32, bh); 938 939 if (add) { 940 a += b; 941 } else { 942 a -= b; 943 } 944 945 al = sextract64(a, 0, 32); 946 ah = sextract64(a, 32, 32); 947 } else { 948 Int128 a = int128_make128(al, ah); 949 Int128 b = int128_make128(bl, bh); 950 951 if (add) { 952 a = int128_add(a, b); 953 } else { 954 a = int128_sub(a, b); 955 } 956 957 al = int128_getlo(a); 958 ah = int128_gethi(a); 959 } 960 961 rl = op->args[0]; 962 rh = op->args[1]; 963 964 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 965 op2 = tcg_op_insert_before(ctx->tcg, op, 0); 966 967 tcg_opt_gen_movi(ctx, op, rl, al); 968 tcg_opt_gen_movi(ctx, op2, rh, ah); 969 return true; 970 } 971 return false; 972 } 973 974 static bool fold_add2(OptContext *ctx, TCGOp *op) 975 { 976 /* Note that the high and low parts may be independently swapped. */ 977 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 978 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 979 980 return fold_addsub2(ctx, op, true); 981 } 982 983 static bool fold_and(OptContext *ctx, TCGOp *op) 984 { 985 uint64_t z1, z2; 986 987 if (fold_const2_commutative(ctx, op) || 988 fold_xi_to_i(ctx, op, 0) || 989 fold_xi_to_x(ctx, op, -1) || 990 fold_xx_to_x(ctx, op)) { 991 return true; 992 } 993 994 z1 = arg_info(op->args[1])->z_mask; 995 z2 = arg_info(op->args[2])->z_mask; 996 ctx->z_mask = z1 & z2; 997 998 /* 999 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1000 * Bitwise operations preserve the relative quantity of the repetitions. 1001 */ 1002 ctx->s_mask = arg_info(op->args[1])->s_mask 1003 & arg_info(op->args[2])->s_mask; 1004 1005 /* 1006 * Known-zeros does not imply known-ones. Therefore unless 1007 * arg2 is constant, we can't infer affected bits from it. 1008 */ 1009 if (arg_is_const(op->args[2])) { 1010 ctx->a_mask = z1 & ~z2; 1011 } 1012 1013 return fold_masks(ctx, op); 1014 } 1015 1016 static bool fold_andc(OptContext *ctx, TCGOp *op) 1017 { 1018 uint64_t z1; 1019 1020 if (fold_const2(ctx, op) || 1021 fold_xx_to_i(ctx, op, 0) || 1022 fold_xi_to_x(ctx, op, 0) || 1023 fold_ix_to_not(ctx, op, -1)) { 1024 return true; 1025 } 1026 1027 z1 = arg_info(op->args[1])->z_mask; 1028 1029 /* 1030 * Known-zeros does not imply known-ones. Therefore unless 1031 * arg2 is constant, we can't infer anything from it. 1032 */ 1033 if (arg_is_const(op->args[2])) { 1034 uint64_t z2 = ~arg_info(op->args[2])->z_mask; 1035 ctx->a_mask = z1 & ~z2; 1036 z1 &= z2; 1037 } 1038 ctx->z_mask = z1; 1039 1040 ctx->s_mask = arg_info(op->args[1])->s_mask 1041 & arg_info(op->args[2])->s_mask; 1042 return fold_masks(ctx, op); 1043 } 1044 1045 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1046 { 1047 TCGCond cond = op->args[2]; 1048 int i; 1049 1050 if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { 1051 op->args[2] = cond = tcg_swap_cond(cond); 1052 } 1053 1054 i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); 1055 if (i == 0) { 1056 tcg_op_remove(ctx->tcg, op); 1057 return true; 1058 } 1059 if (i > 0) { 1060 op->opc = INDEX_op_br; 1061 op->args[0] = op->args[3]; 1062 } 1063 return false; 1064 } 1065 1066 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1067 { 1068 TCGCond cond = op->args[4]; 1069 TCGArg label = op->args[5]; 1070 int i, inv = 0; 1071 1072 if (swap_commutative2(&op->args[0], &op->args[2])) { 1073 op->args[4] = cond = tcg_swap_cond(cond); 1074 } 1075 1076 i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); 1077 if (i >= 0) { 1078 goto do_brcond_const; 1079 } 1080 1081 switch (cond) { 1082 case TCG_COND_LT: 1083 case TCG_COND_GE: 1084 /* 1085 * Simplify LT/GE comparisons vs zero to a single compare 1086 * vs the high word of the input. 1087 */ 1088 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && 1089 arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) { 1090 goto do_brcond_high; 1091 } 1092 break; 1093 1094 case TCG_COND_NE: 1095 inv = 1; 1096 QEMU_FALLTHROUGH; 1097 case TCG_COND_EQ: 1098 /* 1099 * Simplify EQ/NE comparisons where one of the pairs 1100 * can be simplified. 1101 */ 1102 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1103 op->args[2], cond); 1104 switch (i ^ inv) { 1105 case 0: 1106 goto do_brcond_const; 1107 case 1: 1108 goto do_brcond_high; 1109 } 1110 1111 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1112 op->args[3], cond); 1113 switch (i ^ inv) { 1114 case 0: 1115 goto do_brcond_const; 1116 case 1: 1117 op->opc = INDEX_op_brcond_i32; 1118 op->args[1] = op->args[2]; 1119 op->args[2] = cond; 1120 op->args[3] = label; 1121 break; 1122 } 1123 break; 1124 1125 default: 1126 break; 1127 1128 do_brcond_high: 1129 op->opc = INDEX_op_brcond_i32; 1130 op->args[0] = op->args[1]; 1131 op->args[1] = op->args[3]; 1132 op->args[2] = cond; 1133 op->args[3] = label; 1134 break; 1135 1136 do_brcond_const: 1137 if (i == 0) { 1138 tcg_op_remove(ctx->tcg, op); 1139 return true; 1140 } 1141 op->opc = INDEX_op_br; 1142 op->args[0] = label; 1143 break; 1144 } 1145 return false; 1146 } 1147 1148 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1149 { 1150 uint64_t z_mask, s_mask, sign; 1151 1152 if (arg_is_const(op->args[1])) { 1153 uint64_t t = arg_info(op->args[1])->val; 1154 1155 t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); 1156 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1157 } 1158 1159 z_mask = arg_info(op->args[1])->z_mask; 1160 1161 switch (op->opc) { 1162 case INDEX_op_bswap16_i32: 1163 case INDEX_op_bswap16_i64: 1164 z_mask = bswap16(z_mask); 1165 sign = INT16_MIN; 1166 break; 1167 case INDEX_op_bswap32_i32: 1168 case INDEX_op_bswap32_i64: 1169 z_mask = bswap32(z_mask); 1170 sign = INT32_MIN; 1171 break; 1172 case INDEX_op_bswap64_i64: 1173 z_mask = bswap64(z_mask); 1174 sign = INT64_MIN; 1175 break; 1176 default: 1177 g_assert_not_reached(); 1178 } 1179 s_mask = smask_from_zmask(z_mask); 1180 1181 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1182 case TCG_BSWAP_OZ: 1183 break; 1184 case TCG_BSWAP_OS: 1185 /* If the sign bit may be 1, force all the bits above to 1. */ 1186 if (z_mask & sign) { 1187 z_mask |= sign; 1188 s_mask = sign << 1; 1189 } 1190 break; 1191 default: 1192 /* The high bits are undefined: force all bits above the sign to 1. */ 1193 z_mask |= sign << 1; 1194 s_mask = 0; 1195 break; 1196 } 1197 ctx->z_mask = z_mask; 1198 ctx->s_mask = s_mask; 1199 1200 return fold_masks(ctx, op); 1201 } 1202 1203 static bool fold_call(OptContext *ctx, TCGOp *op) 1204 { 1205 TCGContext *s = ctx->tcg; 1206 int nb_oargs = TCGOP_CALLO(op); 1207 int nb_iargs = TCGOP_CALLI(op); 1208 int flags, i; 1209 1210 init_arguments(ctx, op, nb_oargs + nb_iargs); 1211 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1212 1213 /* If the function reads or writes globals, reset temp data. */ 1214 flags = tcg_call_flags(op); 1215 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1216 int nb_globals = s->nb_globals; 1217 1218 for (i = 0; i < nb_globals; i++) { 1219 if (test_bit(i, ctx->temps_used.l)) { 1220 reset_ts(&ctx->tcg->temps[i]); 1221 } 1222 } 1223 } 1224 1225 /* Reset temp data for outputs. */ 1226 for (i = 0; i < nb_oargs; i++) { 1227 reset_temp(op->args[i]); 1228 } 1229 1230 /* Stop optimizing MB across calls. */ 1231 ctx->prev_mb = NULL; 1232 return true; 1233 } 1234 1235 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1236 { 1237 uint64_t z_mask; 1238 1239 if (arg_is_const(op->args[1])) { 1240 uint64_t t = arg_info(op->args[1])->val; 1241 1242 if (t != 0) { 1243 t = do_constant_folding(op->opc, ctx->type, t, 0); 1244 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1245 } 1246 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1247 } 1248 1249 switch (ctx->type) { 1250 case TCG_TYPE_I32: 1251 z_mask = 31; 1252 break; 1253 case TCG_TYPE_I64: 1254 z_mask = 63; 1255 break; 1256 default: 1257 g_assert_not_reached(); 1258 } 1259 ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; 1260 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1261 return false; 1262 } 1263 1264 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1265 { 1266 if (fold_const1(ctx, op)) { 1267 return true; 1268 } 1269 1270 switch (ctx->type) { 1271 case TCG_TYPE_I32: 1272 ctx->z_mask = 32 | 31; 1273 break; 1274 case TCG_TYPE_I64: 1275 ctx->z_mask = 64 | 63; 1276 break; 1277 default: 1278 g_assert_not_reached(); 1279 } 1280 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1281 return false; 1282 } 1283 1284 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1285 { 1286 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1287 uint64_t t1 = arg_info(op->args[1])->val; 1288 uint64_t t2 = arg_info(op->args[2])->val; 1289 1290 t1 = deposit64(t1, op->args[3], op->args[4], t2); 1291 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1292 } 1293 1294 ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, 1295 op->args[3], op->args[4], 1296 arg_info(op->args[2])->z_mask); 1297 return false; 1298 } 1299 1300 static bool fold_divide(OptContext *ctx, TCGOp *op) 1301 { 1302 if (fold_const2(ctx, op) || 1303 fold_xi_to_x(ctx, op, 1)) { 1304 return true; 1305 } 1306 return false; 1307 } 1308 1309 static bool fold_dup(OptContext *ctx, TCGOp *op) 1310 { 1311 if (arg_is_const(op->args[1])) { 1312 uint64_t t = arg_info(op->args[1])->val; 1313 t = dup_const(TCGOP_VECE(op), t); 1314 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1315 } 1316 return false; 1317 } 1318 1319 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1320 { 1321 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1322 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1323 arg_info(op->args[2])->val); 1324 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1325 } 1326 1327 if (args_are_copies(op->args[1], op->args[2])) { 1328 op->opc = INDEX_op_dup_vec; 1329 TCGOP_VECE(op) = MO_32; 1330 } 1331 return false; 1332 } 1333 1334 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1335 { 1336 if (fold_const2_commutative(ctx, op) || 1337 fold_xi_to_x(ctx, op, -1) || 1338 fold_xi_to_not(ctx, op, 0)) { 1339 return true; 1340 } 1341 1342 ctx->s_mask = arg_info(op->args[1])->s_mask 1343 & arg_info(op->args[2])->s_mask; 1344 return false; 1345 } 1346 1347 static bool fold_extract(OptContext *ctx, TCGOp *op) 1348 { 1349 uint64_t z_mask_old, z_mask; 1350 int pos = op->args[2]; 1351 int len = op->args[3]; 1352 1353 if (arg_is_const(op->args[1])) { 1354 uint64_t t; 1355 1356 t = arg_info(op->args[1])->val; 1357 t = extract64(t, pos, len); 1358 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1359 } 1360 1361 z_mask_old = arg_info(op->args[1])->z_mask; 1362 z_mask = extract64(z_mask_old, pos, len); 1363 if (pos == 0) { 1364 ctx->a_mask = z_mask_old ^ z_mask; 1365 } 1366 ctx->z_mask = z_mask; 1367 ctx->s_mask = smask_from_zmask(z_mask); 1368 1369 return fold_masks(ctx, op); 1370 } 1371 1372 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1373 { 1374 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1375 uint64_t v1 = arg_info(op->args[1])->val; 1376 uint64_t v2 = arg_info(op->args[2])->val; 1377 int shr = op->args[3]; 1378 1379 if (op->opc == INDEX_op_extract2_i64) { 1380 v1 >>= shr; 1381 v2 <<= 64 - shr; 1382 } else { 1383 v1 = (uint32_t)v1 >> shr; 1384 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1385 } 1386 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1387 } 1388 return false; 1389 } 1390 1391 static bool fold_exts(OptContext *ctx, TCGOp *op) 1392 { 1393 uint64_t s_mask_old, s_mask, z_mask, sign; 1394 bool type_change = false; 1395 1396 if (fold_const1(ctx, op)) { 1397 return true; 1398 } 1399 1400 z_mask = arg_info(op->args[1])->z_mask; 1401 s_mask = arg_info(op->args[1])->s_mask; 1402 s_mask_old = s_mask; 1403 1404 switch (op->opc) { 1405 CASE_OP_32_64(ext8s): 1406 sign = INT8_MIN; 1407 z_mask = (uint8_t)z_mask; 1408 break; 1409 CASE_OP_32_64(ext16s): 1410 sign = INT16_MIN; 1411 z_mask = (uint16_t)z_mask; 1412 break; 1413 case INDEX_op_ext_i32_i64: 1414 type_change = true; 1415 QEMU_FALLTHROUGH; 1416 case INDEX_op_ext32s_i64: 1417 sign = INT32_MIN; 1418 z_mask = (uint32_t)z_mask; 1419 break; 1420 default: 1421 g_assert_not_reached(); 1422 } 1423 1424 if (z_mask & sign) { 1425 z_mask |= sign; 1426 } 1427 s_mask |= sign << 1; 1428 1429 ctx->z_mask = z_mask; 1430 ctx->s_mask = s_mask; 1431 if (!type_change) { 1432 ctx->a_mask = s_mask & ~s_mask_old; 1433 } 1434 1435 return fold_masks(ctx, op); 1436 } 1437 1438 static bool fold_extu(OptContext *ctx, TCGOp *op) 1439 { 1440 uint64_t z_mask_old, z_mask; 1441 bool type_change = false; 1442 1443 if (fold_const1(ctx, op)) { 1444 return true; 1445 } 1446 1447 z_mask_old = z_mask = arg_info(op->args[1])->z_mask; 1448 1449 switch (op->opc) { 1450 CASE_OP_32_64(ext8u): 1451 z_mask = (uint8_t)z_mask; 1452 break; 1453 CASE_OP_32_64(ext16u): 1454 z_mask = (uint16_t)z_mask; 1455 break; 1456 case INDEX_op_extrl_i64_i32: 1457 case INDEX_op_extu_i32_i64: 1458 type_change = true; 1459 QEMU_FALLTHROUGH; 1460 case INDEX_op_ext32u_i64: 1461 z_mask = (uint32_t)z_mask; 1462 break; 1463 case INDEX_op_extrh_i64_i32: 1464 type_change = true; 1465 z_mask >>= 32; 1466 break; 1467 default: 1468 g_assert_not_reached(); 1469 } 1470 1471 ctx->z_mask = z_mask; 1472 ctx->s_mask = smask_from_zmask(z_mask); 1473 if (!type_change) { 1474 ctx->a_mask = z_mask_old ^ z_mask; 1475 } 1476 return fold_masks(ctx, op); 1477 } 1478 1479 static bool fold_mb(OptContext *ctx, TCGOp *op) 1480 { 1481 /* Eliminate duplicate and redundant fence instructions. */ 1482 if (ctx->prev_mb) { 1483 /* 1484 * Merge two barriers of the same type into one, 1485 * or a weaker barrier into a stronger one, 1486 * or two weaker barriers into a stronger one. 1487 * mb X; mb Y => mb X|Y 1488 * mb; strl => mb; st 1489 * ldaq; mb => ld; mb 1490 * ldaq; strl => ld; mb; st 1491 * Other combinations are also merged into a strong 1492 * barrier. This is stricter than specified but for 1493 * the purposes of TCG is better than not optimizing. 1494 */ 1495 ctx->prev_mb->args[0] |= op->args[0]; 1496 tcg_op_remove(ctx->tcg, op); 1497 } else { 1498 ctx->prev_mb = op; 1499 } 1500 return true; 1501 } 1502 1503 static bool fold_mov(OptContext *ctx, TCGOp *op) 1504 { 1505 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1506 } 1507 1508 static bool fold_movcond(OptContext *ctx, TCGOp *op) 1509 { 1510 TCGCond cond = op->args[5]; 1511 int i; 1512 1513 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1514 op->args[5] = cond = tcg_swap_cond(cond); 1515 } 1516 /* 1517 * Canonicalize the "false" input reg to match the destination reg so 1518 * that the tcg backend can implement a "move if true" operation. 1519 */ 1520 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1521 op->args[5] = cond = tcg_invert_cond(cond); 1522 } 1523 1524 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1525 if (i >= 0) { 1526 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 1527 } 1528 1529 ctx->z_mask = arg_info(op->args[3])->z_mask 1530 | arg_info(op->args[4])->z_mask; 1531 ctx->s_mask = arg_info(op->args[3])->s_mask 1532 & arg_info(op->args[4])->s_mask; 1533 1534 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1535 uint64_t tv = arg_info(op->args[3])->val; 1536 uint64_t fv = arg_info(op->args[4])->val; 1537 TCGOpcode opc; 1538 1539 switch (ctx->type) { 1540 case TCG_TYPE_I32: 1541 opc = INDEX_op_setcond_i32; 1542 break; 1543 case TCG_TYPE_I64: 1544 opc = INDEX_op_setcond_i64; 1545 break; 1546 default: 1547 g_assert_not_reached(); 1548 } 1549 1550 if (tv == 1 && fv == 0) { 1551 op->opc = opc; 1552 op->args[3] = cond; 1553 } else if (fv == 1 && tv == 0) { 1554 op->opc = opc; 1555 op->args[3] = tcg_invert_cond(cond); 1556 } 1557 } 1558 return false; 1559 } 1560 1561 static bool fold_mul(OptContext *ctx, TCGOp *op) 1562 { 1563 if (fold_const2(ctx, op) || 1564 fold_xi_to_i(ctx, op, 0) || 1565 fold_xi_to_x(ctx, op, 1)) { 1566 return true; 1567 } 1568 return false; 1569 } 1570 1571 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 1572 { 1573 if (fold_const2_commutative(ctx, op) || 1574 fold_xi_to_i(ctx, op, 0)) { 1575 return true; 1576 } 1577 return false; 1578 } 1579 1580 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 1581 { 1582 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 1583 1584 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1585 uint64_t a = arg_info(op->args[2])->val; 1586 uint64_t b = arg_info(op->args[3])->val; 1587 uint64_t h, l; 1588 TCGArg rl, rh; 1589 TCGOp *op2; 1590 1591 switch (op->opc) { 1592 case INDEX_op_mulu2_i32: 1593 l = (uint64_t)(uint32_t)a * (uint32_t)b; 1594 h = (int32_t)(l >> 32); 1595 l = (int32_t)l; 1596 break; 1597 case INDEX_op_muls2_i32: 1598 l = (int64_t)(int32_t)a * (int32_t)b; 1599 h = l >> 32; 1600 l = (int32_t)l; 1601 break; 1602 case INDEX_op_mulu2_i64: 1603 mulu64(&l, &h, a, b); 1604 break; 1605 case INDEX_op_muls2_i64: 1606 muls64(&l, &h, a, b); 1607 break; 1608 default: 1609 g_assert_not_reached(); 1610 } 1611 1612 rl = op->args[0]; 1613 rh = op->args[1]; 1614 1615 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1616 op2 = tcg_op_insert_before(ctx->tcg, op, 0); 1617 1618 tcg_opt_gen_movi(ctx, op, rl, l); 1619 tcg_opt_gen_movi(ctx, op2, rh, h); 1620 return true; 1621 } 1622 return false; 1623 } 1624 1625 static bool fold_nand(OptContext *ctx, TCGOp *op) 1626 { 1627 if (fold_const2_commutative(ctx, op) || 1628 fold_xi_to_not(ctx, op, -1)) { 1629 return true; 1630 } 1631 1632 ctx->s_mask = arg_info(op->args[1])->s_mask 1633 & arg_info(op->args[2])->s_mask; 1634 return false; 1635 } 1636 1637 static bool fold_neg(OptContext *ctx, TCGOp *op) 1638 { 1639 uint64_t z_mask; 1640 1641 if (fold_const1(ctx, op)) { 1642 return true; 1643 } 1644 1645 /* Set to 1 all bits to the left of the rightmost. */ 1646 z_mask = arg_info(op->args[1])->z_mask; 1647 ctx->z_mask = -(z_mask & -z_mask); 1648 1649 /* 1650 * Because of fold_sub_to_neg, we want to always return true, 1651 * via finish_folding. 1652 */ 1653 finish_folding(ctx, op); 1654 return true; 1655 } 1656 1657 static bool fold_nor(OptContext *ctx, TCGOp *op) 1658 { 1659 if (fold_const2_commutative(ctx, op) || 1660 fold_xi_to_not(ctx, op, 0)) { 1661 return true; 1662 } 1663 1664 ctx->s_mask = arg_info(op->args[1])->s_mask 1665 & arg_info(op->args[2])->s_mask; 1666 return false; 1667 } 1668 1669 static bool fold_not(OptContext *ctx, TCGOp *op) 1670 { 1671 if (fold_const1(ctx, op)) { 1672 return true; 1673 } 1674 1675 ctx->s_mask = arg_info(op->args[1])->s_mask; 1676 1677 /* Because of fold_to_not, we want to always return true, via finish. */ 1678 finish_folding(ctx, op); 1679 return true; 1680 } 1681 1682 static bool fold_or(OptContext *ctx, TCGOp *op) 1683 { 1684 if (fold_const2_commutative(ctx, op) || 1685 fold_xi_to_x(ctx, op, 0) || 1686 fold_xx_to_x(ctx, op)) { 1687 return true; 1688 } 1689 1690 ctx->z_mask = arg_info(op->args[1])->z_mask 1691 | arg_info(op->args[2])->z_mask; 1692 ctx->s_mask = arg_info(op->args[1])->s_mask 1693 & arg_info(op->args[2])->s_mask; 1694 return fold_masks(ctx, op); 1695 } 1696 1697 static bool fold_orc(OptContext *ctx, TCGOp *op) 1698 { 1699 if (fold_const2(ctx, op) || 1700 fold_xx_to_i(ctx, op, -1) || 1701 fold_xi_to_x(ctx, op, -1) || 1702 fold_ix_to_not(ctx, op, 0)) { 1703 return true; 1704 } 1705 1706 ctx->s_mask = arg_info(op->args[1])->s_mask 1707 & arg_info(op->args[2])->s_mask; 1708 return false; 1709 } 1710 1711 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) 1712 { 1713 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1714 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 1715 MemOp mop = get_memop(oi); 1716 int width = 8 * memop_size(mop); 1717 1718 if (width < 64) { 1719 ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); 1720 if (!(mop & MO_SIGN)) { 1721 ctx->z_mask = MAKE_64BIT_MASK(0, width); 1722 ctx->s_mask <<= 1; 1723 } 1724 } 1725 1726 /* Opcodes that touch guest memory stop the mb optimization. */ 1727 ctx->prev_mb = NULL; 1728 return false; 1729 } 1730 1731 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 1732 { 1733 /* Opcodes that touch guest memory stop the mb optimization. */ 1734 ctx->prev_mb = NULL; 1735 return false; 1736 } 1737 1738 static bool fold_remainder(OptContext *ctx, TCGOp *op) 1739 { 1740 if (fold_const2(ctx, op) || 1741 fold_xx_to_i(ctx, op, 0)) { 1742 return true; 1743 } 1744 return false; 1745 } 1746 1747 static bool fold_setcond(OptContext *ctx, TCGOp *op) 1748 { 1749 TCGCond cond = op->args[3]; 1750 int i; 1751 1752 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 1753 op->args[3] = cond = tcg_swap_cond(cond); 1754 } 1755 1756 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1757 if (i >= 0) { 1758 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1759 } 1760 1761 ctx->z_mask = 1; 1762 ctx->s_mask = smask_from_zmask(1); 1763 return false; 1764 } 1765 1766 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 1767 { 1768 TCGCond cond = op->args[5]; 1769 int i, inv = 0; 1770 1771 if (swap_commutative2(&op->args[1], &op->args[3])) { 1772 op->args[5] = cond = tcg_swap_cond(cond); 1773 } 1774 1775 i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); 1776 if (i >= 0) { 1777 goto do_setcond_const; 1778 } 1779 1780 switch (cond) { 1781 case TCG_COND_LT: 1782 case TCG_COND_GE: 1783 /* 1784 * Simplify LT/GE comparisons vs zero to a single compare 1785 * vs the high word of the input. 1786 */ 1787 if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && 1788 arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) { 1789 goto do_setcond_high; 1790 } 1791 break; 1792 1793 case TCG_COND_NE: 1794 inv = 1; 1795 QEMU_FALLTHROUGH; 1796 case TCG_COND_EQ: 1797 /* 1798 * Simplify EQ/NE comparisons where one of the pairs 1799 * can be simplified. 1800 */ 1801 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1802 op->args[3], cond); 1803 switch (i ^ inv) { 1804 case 0: 1805 goto do_setcond_const; 1806 case 1: 1807 goto do_setcond_high; 1808 } 1809 1810 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 1811 op->args[4], cond); 1812 switch (i ^ inv) { 1813 case 0: 1814 goto do_setcond_const; 1815 case 1: 1816 op->args[2] = op->args[3]; 1817 op->args[3] = cond; 1818 op->opc = INDEX_op_setcond_i32; 1819 break; 1820 } 1821 break; 1822 1823 default: 1824 break; 1825 1826 do_setcond_high: 1827 op->args[1] = op->args[2]; 1828 op->args[2] = op->args[4]; 1829 op->args[3] = cond; 1830 op->opc = INDEX_op_setcond_i32; 1831 break; 1832 } 1833 1834 ctx->z_mask = 1; 1835 ctx->s_mask = smask_from_zmask(1); 1836 return false; 1837 1838 do_setcond_const: 1839 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1840 } 1841 1842 static bool fold_sextract(OptContext *ctx, TCGOp *op) 1843 { 1844 uint64_t z_mask, s_mask, s_mask_old; 1845 int pos = op->args[2]; 1846 int len = op->args[3]; 1847 1848 if (arg_is_const(op->args[1])) { 1849 uint64_t t; 1850 1851 t = arg_info(op->args[1])->val; 1852 t = sextract64(t, pos, len); 1853 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1854 } 1855 1856 z_mask = arg_info(op->args[1])->z_mask; 1857 z_mask = sextract64(z_mask, pos, len); 1858 ctx->z_mask = z_mask; 1859 1860 s_mask_old = arg_info(op->args[1])->s_mask; 1861 s_mask = sextract64(s_mask_old, pos, len); 1862 s_mask |= MAKE_64BIT_MASK(len, 64 - len); 1863 ctx->s_mask = s_mask; 1864 1865 if (pos == 0) { 1866 ctx->a_mask = s_mask & ~s_mask_old; 1867 } 1868 1869 return fold_masks(ctx, op); 1870 } 1871 1872 static bool fold_shift(OptContext *ctx, TCGOp *op) 1873 { 1874 uint64_t s_mask, z_mask, sign; 1875 1876 if (fold_const2(ctx, op) || 1877 fold_ix_to_i(ctx, op, 0) || 1878 fold_xi_to_x(ctx, op, 0)) { 1879 return true; 1880 } 1881 1882 s_mask = arg_info(op->args[1])->s_mask; 1883 z_mask = arg_info(op->args[1])->z_mask; 1884 1885 if (arg_is_const(op->args[2])) { 1886 int sh = arg_info(op->args[2])->val; 1887 1888 ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 1889 1890 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 1891 ctx->s_mask = smask_from_smask(s_mask); 1892 1893 return fold_masks(ctx, op); 1894 } 1895 1896 switch (op->opc) { 1897 CASE_OP_32_64(sar): 1898 /* 1899 * Arithmetic right shift will not reduce the number of 1900 * input sign repetitions. 1901 */ 1902 ctx->s_mask = s_mask; 1903 break; 1904 CASE_OP_32_64(shr): 1905 /* 1906 * If the sign bit is known zero, then logical right shift 1907 * will not reduced the number of input sign repetitions. 1908 */ 1909 sign = (s_mask & -s_mask) >> 1; 1910 if (!(z_mask & sign)) { 1911 ctx->s_mask = s_mask; 1912 } 1913 break; 1914 default: 1915 break; 1916 } 1917 1918 return false; 1919 } 1920 1921 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 1922 { 1923 TCGOpcode neg_op; 1924 bool have_neg; 1925 1926 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 1927 return false; 1928 } 1929 1930 switch (ctx->type) { 1931 case TCG_TYPE_I32: 1932 neg_op = INDEX_op_neg_i32; 1933 have_neg = TCG_TARGET_HAS_neg_i32; 1934 break; 1935 case TCG_TYPE_I64: 1936 neg_op = INDEX_op_neg_i64; 1937 have_neg = TCG_TARGET_HAS_neg_i64; 1938 break; 1939 case TCG_TYPE_V64: 1940 case TCG_TYPE_V128: 1941 case TCG_TYPE_V256: 1942 neg_op = INDEX_op_neg_vec; 1943 have_neg = (TCG_TARGET_HAS_neg_vec && 1944 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 1945 break; 1946 default: 1947 g_assert_not_reached(); 1948 } 1949 if (have_neg) { 1950 op->opc = neg_op; 1951 op->args[1] = op->args[2]; 1952 return fold_neg(ctx, op); 1953 } 1954 return false; 1955 } 1956 1957 /* We cannot as yet do_constant_folding with vectors. */ 1958 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 1959 { 1960 if (fold_xx_to_i(ctx, op, 0) || 1961 fold_xi_to_x(ctx, op, 0) || 1962 fold_sub_to_neg(ctx, op)) { 1963 return true; 1964 } 1965 return false; 1966 } 1967 1968 static bool fold_sub(OptContext *ctx, TCGOp *op) 1969 { 1970 return fold_const2(ctx, op) || fold_sub_vec(ctx, op); 1971 } 1972 1973 static bool fold_sub2(OptContext *ctx, TCGOp *op) 1974 { 1975 return fold_addsub2(ctx, op, false); 1976 } 1977 1978 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 1979 { 1980 /* We can't do any folding with a load, but we can record bits. */ 1981 switch (op->opc) { 1982 CASE_OP_32_64(ld8s): 1983 ctx->s_mask = MAKE_64BIT_MASK(8, 56); 1984 break; 1985 CASE_OP_32_64(ld8u): 1986 ctx->z_mask = MAKE_64BIT_MASK(0, 8); 1987 ctx->s_mask = MAKE_64BIT_MASK(9, 55); 1988 break; 1989 CASE_OP_32_64(ld16s): 1990 ctx->s_mask = MAKE_64BIT_MASK(16, 48); 1991 break; 1992 CASE_OP_32_64(ld16u): 1993 ctx->z_mask = MAKE_64BIT_MASK(0, 16); 1994 ctx->s_mask = MAKE_64BIT_MASK(17, 47); 1995 break; 1996 case INDEX_op_ld32s_i64: 1997 ctx->s_mask = MAKE_64BIT_MASK(32, 32); 1998 break; 1999 case INDEX_op_ld32u_i64: 2000 ctx->z_mask = MAKE_64BIT_MASK(0, 32); 2001 ctx->s_mask = MAKE_64BIT_MASK(33, 31); 2002 break; 2003 default: 2004 g_assert_not_reached(); 2005 } 2006 return false; 2007 } 2008 2009 static bool fold_xor(OptContext *ctx, TCGOp *op) 2010 { 2011 if (fold_const2_commutative(ctx, op) || 2012 fold_xx_to_i(ctx, op, 0) || 2013 fold_xi_to_x(ctx, op, 0) || 2014 fold_xi_to_not(ctx, op, -1)) { 2015 return true; 2016 } 2017 2018 ctx->z_mask = arg_info(op->args[1])->z_mask 2019 | arg_info(op->args[2])->z_mask; 2020 ctx->s_mask = arg_info(op->args[1])->s_mask 2021 & arg_info(op->args[2])->s_mask; 2022 return fold_masks(ctx, op); 2023 } 2024 2025 /* Propagate constants and copies, fold constant expressions. */ 2026 void tcg_optimize(TCGContext *s) 2027 { 2028 int nb_temps, i; 2029 TCGOp *op, *op_next; 2030 OptContext ctx = { .tcg = s }; 2031 2032 /* Array VALS has an element for each temp. 2033 If this temp holds a constant then its value is kept in VALS' element. 2034 If this temp is a copy of other ones then the other copies are 2035 available through the doubly linked circular list. */ 2036 2037 nb_temps = s->nb_temps; 2038 for (i = 0; i < nb_temps; ++i) { 2039 s->temps[i].state_ptr = NULL; 2040 } 2041 2042 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2043 TCGOpcode opc = op->opc; 2044 const TCGOpDef *def; 2045 bool done = false; 2046 2047 /* Calls are special. */ 2048 if (opc == INDEX_op_call) { 2049 fold_call(&ctx, op); 2050 continue; 2051 } 2052 2053 def = &tcg_op_defs[opc]; 2054 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 2055 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 2056 2057 /* Pre-compute the type of the operation. */ 2058 if (def->flags & TCG_OPF_VECTOR) { 2059 ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); 2060 } else if (def->flags & TCG_OPF_64BIT) { 2061 ctx.type = TCG_TYPE_I64; 2062 } else { 2063 ctx.type = TCG_TYPE_I32; 2064 } 2065 2066 /* Assume all bits affected, no bits known zero, no sign reps. */ 2067 ctx.a_mask = -1; 2068 ctx.z_mask = -1; 2069 ctx.s_mask = 0; 2070 2071 /* 2072 * Process each opcode. 2073 * Sorted alphabetically by opcode as much as possible. 2074 */ 2075 switch (opc) { 2076 CASE_OP_32_64(add): 2077 done = fold_add(&ctx, op); 2078 break; 2079 case INDEX_op_add_vec: 2080 done = fold_add_vec(&ctx, op); 2081 break; 2082 CASE_OP_32_64(add2): 2083 done = fold_add2(&ctx, op); 2084 break; 2085 CASE_OP_32_64_VEC(and): 2086 done = fold_and(&ctx, op); 2087 break; 2088 CASE_OP_32_64_VEC(andc): 2089 done = fold_andc(&ctx, op); 2090 break; 2091 CASE_OP_32_64(brcond): 2092 done = fold_brcond(&ctx, op); 2093 break; 2094 case INDEX_op_brcond2_i32: 2095 done = fold_brcond2(&ctx, op); 2096 break; 2097 CASE_OP_32_64(bswap16): 2098 CASE_OP_32_64(bswap32): 2099 case INDEX_op_bswap64_i64: 2100 done = fold_bswap(&ctx, op); 2101 break; 2102 CASE_OP_32_64(clz): 2103 CASE_OP_32_64(ctz): 2104 done = fold_count_zeros(&ctx, op); 2105 break; 2106 CASE_OP_32_64(ctpop): 2107 done = fold_ctpop(&ctx, op); 2108 break; 2109 CASE_OP_32_64(deposit): 2110 done = fold_deposit(&ctx, op); 2111 break; 2112 CASE_OP_32_64(div): 2113 CASE_OP_32_64(divu): 2114 done = fold_divide(&ctx, op); 2115 break; 2116 case INDEX_op_dup_vec: 2117 done = fold_dup(&ctx, op); 2118 break; 2119 case INDEX_op_dup2_vec: 2120 done = fold_dup2(&ctx, op); 2121 break; 2122 CASE_OP_32_64(eqv): 2123 done = fold_eqv(&ctx, op); 2124 break; 2125 CASE_OP_32_64(extract): 2126 done = fold_extract(&ctx, op); 2127 break; 2128 CASE_OP_32_64(extract2): 2129 done = fold_extract2(&ctx, op); 2130 break; 2131 CASE_OP_32_64(ext8s): 2132 CASE_OP_32_64(ext16s): 2133 case INDEX_op_ext32s_i64: 2134 case INDEX_op_ext_i32_i64: 2135 done = fold_exts(&ctx, op); 2136 break; 2137 CASE_OP_32_64(ext8u): 2138 CASE_OP_32_64(ext16u): 2139 case INDEX_op_ext32u_i64: 2140 case INDEX_op_extu_i32_i64: 2141 case INDEX_op_extrl_i64_i32: 2142 case INDEX_op_extrh_i64_i32: 2143 done = fold_extu(&ctx, op); 2144 break; 2145 CASE_OP_32_64(ld8s): 2146 CASE_OP_32_64(ld8u): 2147 CASE_OP_32_64(ld16s): 2148 CASE_OP_32_64(ld16u): 2149 case INDEX_op_ld32s_i64: 2150 case INDEX_op_ld32u_i64: 2151 done = fold_tcg_ld(&ctx, op); 2152 break; 2153 case INDEX_op_mb: 2154 done = fold_mb(&ctx, op); 2155 break; 2156 CASE_OP_32_64_VEC(mov): 2157 done = fold_mov(&ctx, op); 2158 break; 2159 CASE_OP_32_64(movcond): 2160 done = fold_movcond(&ctx, op); 2161 break; 2162 CASE_OP_32_64(mul): 2163 done = fold_mul(&ctx, op); 2164 break; 2165 CASE_OP_32_64(mulsh): 2166 CASE_OP_32_64(muluh): 2167 done = fold_mul_highpart(&ctx, op); 2168 break; 2169 CASE_OP_32_64(muls2): 2170 CASE_OP_32_64(mulu2): 2171 done = fold_multiply2(&ctx, op); 2172 break; 2173 CASE_OP_32_64(nand): 2174 done = fold_nand(&ctx, op); 2175 break; 2176 CASE_OP_32_64(neg): 2177 done = fold_neg(&ctx, op); 2178 break; 2179 CASE_OP_32_64(nor): 2180 done = fold_nor(&ctx, op); 2181 break; 2182 CASE_OP_32_64_VEC(not): 2183 done = fold_not(&ctx, op); 2184 break; 2185 CASE_OP_32_64_VEC(or): 2186 done = fold_or(&ctx, op); 2187 break; 2188 CASE_OP_32_64_VEC(orc): 2189 done = fold_orc(&ctx, op); 2190 break; 2191 case INDEX_op_qemu_ld_i32: 2192 case INDEX_op_qemu_ld_i64: 2193 done = fold_qemu_ld(&ctx, op); 2194 break; 2195 case INDEX_op_qemu_st_i32: 2196 case INDEX_op_qemu_st8_i32: 2197 case INDEX_op_qemu_st_i64: 2198 done = fold_qemu_st(&ctx, op); 2199 break; 2200 CASE_OP_32_64(rem): 2201 CASE_OP_32_64(remu): 2202 done = fold_remainder(&ctx, op); 2203 break; 2204 CASE_OP_32_64(rotl): 2205 CASE_OP_32_64(rotr): 2206 CASE_OP_32_64(sar): 2207 CASE_OP_32_64(shl): 2208 CASE_OP_32_64(shr): 2209 done = fold_shift(&ctx, op); 2210 break; 2211 CASE_OP_32_64(setcond): 2212 done = fold_setcond(&ctx, op); 2213 break; 2214 case INDEX_op_setcond2_i32: 2215 done = fold_setcond2(&ctx, op); 2216 break; 2217 CASE_OP_32_64(sextract): 2218 done = fold_sextract(&ctx, op); 2219 break; 2220 CASE_OP_32_64(sub): 2221 done = fold_sub(&ctx, op); 2222 break; 2223 case INDEX_op_sub_vec: 2224 done = fold_sub_vec(&ctx, op); 2225 break; 2226 CASE_OP_32_64(sub2): 2227 done = fold_sub2(&ctx, op); 2228 break; 2229 CASE_OP_32_64_VEC(xor): 2230 done = fold_xor(&ctx, op); 2231 break; 2232 default: 2233 break; 2234 } 2235 2236 if (!done) { 2237 finish_folding(&ctx, op); 2238 } 2239 } 2240 } 2241