1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 32 #define CASE_OP_32_64(x) \ 33 glue(glue(case INDEX_op_, x), _i32): \ 34 glue(glue(case INDEX_op_, x), _i64) 35 36 #define CASE_OP_32_64_VEC(x) \ 37 glue(glue(case INDEX_op_, x), _i32): \ 38 glue(glue(case INDEX_op_, x), _i64): \ 39 glue(glue(case INDEX_op_, x), _vec) 40 41 typedef struct MemCopyInfo { 42 IntervalTreeNode itree; 43 QSIMPLEQ_ENTRY (MemCopyInfo) next; 44 TCGTemp *ts; 45 TCGType type; 46 } MemCopyInfo; 47 48 typedef struct TempOptInfo { 49 bool is_const; 50 TCGTemp *prev_copy; 51 TCGTemp *next_copy; 52 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 53 uint64_t val; 54 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 55 uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ 56 } TempOptInfo; 57 58 typedef struct OptContext { 59 TCGContext *tcg; 60 TCGOp *prev_mb; 61 TCGTempSet temps_used; 62 63 IntervalTreeRoot mem_copy; 64 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 65 66 /* In flight values from optimization. */ 67 uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ 68 uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ 69 uint64_t s_mask; /* mask of clrsb(value) bits */ 70 TCGType type; 71 } OptContext; 72 73 /* Calculate the smask for a specific value. */ 74 static uint64_t smask_from_value(uint64_t value) 75 { 76 int rep = clrsb64(value); 77 return ~(~0ull >> rep); 78 } 79 80 /* 81 * Calculate the smask for a given set of known-zeros. 82 * If there are lots of zeros on the left, we can consider the remainder 83 * an unsigned field, and thus the corresponding signed field is one bit 84 * larger. 85 */ 86 static uint64_t smask_from_zmask(uint64_t zmask) 87 { 88 /* 89 * Only the 0 bits are significant for zmask, thus the msb itself 90 * must be zero, else we have no sign information. 91 */ 92 int rep = clz64(zmask); 93 if (rep == 0) { 94 return 0; 95 } 96 rep -= 1; 97 return ~(~0ull >> rep); 98 } 99 100 /* 101 * Recreate a properly left-aligned smask after manipulation. 102 * Some bit-shuffling, particularly shifts and rotates, may 103 * retain sign bits on the left, but may scatter disconnected 104 * sign bits on the right. Retain only what remains to the left. 105 */ 106 static uint64_t smask_from_smask(int64_t smask) 107 { 108 /* Only the 1 bits are significant for smask */ 109 return smask_from_zmask(~smask); 110 } 111 112 static inline TempOptInfo *ts_info(TCGTemp *ts) 113 { 114 return ts->state_ptr; 115 } 116 117 static inline TempOptInfo *arg_info(TCGArg arg) 118 { 119 return ts_info(arg_temp(arg)); 120 } 121 122 static inline bool ts_is_const(TCGTemp *ts) 123 { 124 return ts_info(ts)->is_const; 125 } 126 127 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) 128 { 129 TempOptInfo *ti = ts_info(ts); 130 return ti->is_const && ti->val == val; 131 } 132 133 static inline bool arg_is_const(TCGArg arg) 134 { 135 return ts_is_const(arg_temp(arg)); 136 } 137 138 static inline bool arg_is_const_val(TCGArg arg, uint64_t val) 139 { 140 return ts_is_const_val(arg_temp(arg), val); 141 } 142 143 static inline bool ts_is_copy(TCGTemp *ts) 144 { 145 return ts_info(ts)->next_copy != ts; 146 } 147 148 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 149 { 150 return a->kind < b->kind ? b : a; 151 } 152 153 /* Initialize and activate a temporary. */ 154 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 155 { 156 size_t idx = temp_idx(ts); 157 TempOptInfo *ti; 158 159 if (test_bit(idx, ctx->temps_used.l)) { 160 return; 161 } 162 set_bit(idx, ctx->temps_used.l); 163 164 ti = ts->state_ptr; 165 if (ti == NULL) { 166 ti = tcg_malloc(sizeof(TempOptInfo)); 167 ts->state_ptr = ti; 168 } 169 170 ti->next_copy = ts; 171 ti->prev_copy = ts; 172 QSIMPLEQ_INIT(&ti->mem_copy); 173 if (ts->kind == TEMP_CONST) { 174 ti->is_const = true; 175 ti->val = ts->val; 176 ti->z_mask = ts->val; 177 ti->s_mask = smask_from_value(ts->val); 178 } else { 179 ti->is_const = false; 180 ti->z_mask = -1; 181 ti->s_mask = 0; 182 } 183 } 184 185 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 186 { 187 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 188 return r ? container_of(r, MemCopyInfo, itree) : NULL; 189 } 190 191 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 192 { 193 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 194 return r ? container_of(r, MemCopyInfo, itree) : NULL; 195 } 196 197 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 198 { 199 TCGTemp *ts = mc->ts; 200 TempOptInfo *ti = ts_info(ts); 201 202 interval_tree_remove(&mc->itree, &ctx->mem_copy); 203 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 204 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 205 } 206 207 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 208 { 209 while (true) { 210 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 211 if (!mc) { 212 break; 213 } 214 remove_mem_copy(ctx, mc); 215 } 216 } 217 218 static void remove_mem_copy_all(OptContext *ctx) 219 { 220 remove_mem_copy_in(ctx, 0, -1); 221 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 222 } 223 224 static TCGTemp *find_better_copy(TCGTemp *ts) 225 { 226 TCGTemp *i, *ret; 227 228 /* If this is already readonly, we can't do better. */ 229 if (temp_readonly(ts)) { 230 return ts; 231 } 232 233 ret = ts; 234 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 235 ret = cmp_better_copy(ret, i); 236 } 237 return ret; 238 } 239 240 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 241 { 242 TempOptInfo *si = ts_info(src_ts); 243 TempOptInfo *di = ts_info(dst_ts); 244 MemCopyInfo *mc; 245 246 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 247 tcg_debug_assert(mc->ts == src_ts); 248 mc->ts = dst_ts; 249 } 250 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 251 } 252 253 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 254 static void reset_ts(OptContext *ctx, TCGTemp *ts) 255 { 256 TempOptInfo *ti = ts_info(ts); 257 TCGTemp *pts = ti->prev_copy; 258 TCGTemp *nts = ti->next_copy; 259 TempOptInfo *pi = ts_info(pts); 260 TempOptInfo *ni = ts_info(nts); 261 262 ni->prev_copy = ti->prev_copy; 263 pi->next_copy = ti->next_copy; 264 ti->next_copy = ts; 265 ti->prev_copy = ts; 266 ti->is_const = false; 267 ti->z_mask = -1; 268 ti->s_mask = 0; 269 270 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 271 if (ts == nts) { 272 /* Last temp copy being removed, the mem copies die. */ 273 MemCopyInfo *mc; 274 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 275 interval_tree_remove(&mc->itree, &ctx->mem_copy); 276 } 277 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 278 } else { 279 move_mem_copies(find_better_copy(nts), ts); 280 } 281 } 282 } 283 284 static void reset_temp(OptContext *ctx, TCGArg arg) 285 { 286 reset_ts(ctx, arg_temp(arg)); 287 } 288 289 static void record_mem_copy(OptContext *ctx, TCGType type, 290 TCGTemp *ts, intptr_t start, intptr_t last) 291 { 292 MemCopyInfo *mc; 293 TempOptInfo *ti; 294 295 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 296 if (mc) { 297 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 298 } else { 299 mc = tcg_malloc(sizeof(*mc)); 300 } 301 302 memset(mc, 0, sizeof(*mc)); 303 mc->itree.start = start; 304 mc->itree.last = last; 305 mc->type = type; 306 interval_tree_insert(&mc->itree, &ctx->mem_copy); 307 308 ts = find_better_copy(ts); 309 ti = ts_info(ts); 310 mc->ts = ts; 311 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 312 } 313 314 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 315 { 316 TCGTemp *i; 317 318 if (ts1 == ts2) { 319 return true; 320 } 321 322 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 323 return false; 324 } 325 326 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 327 if (i == ts2) { 328 return true; 329 } 330 } 331 332 return false; 333 } 334 335 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 336 { 337 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 338 } 339 340 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 341 { 342 MemCopyInfo *mc; 343 344 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 345 if (mc->itree.start == s && mc->type == type) { 346 return find_better_copy(mc->ts); 347 } 348 } 349 return NULL; 350 } 351 352 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 353 { 354 TCGType type = ctx->type; 355 TCGTemp *ts; 356 357 if (type == TCG_TYPE_I32) { 358 val = (int32_t)val; 359 } 360 361 ts = tcg_constant_internal(type, val); 362 init_ts_info(ctx, ts); 363 364 return temp_arg(ts); 365 } 366 367 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 368 { 369 TCGTemp *dst_ts = arg_temp(dst); 370 TCGTemp *src_ts = arg_temp(src); 371 TempOptInfo *di; 372 TempOptInfo *si; 373 TCGOpcode new_op; 374 375 if (ts_are_copies(dst_ts, src_ts)) { 376 tcg_op_remove(ctx->tcg, op); 377 return true; 378 } 379 380 reset_ts(ctx, dst_ts); 381 di = ts_info(dst_ts); 382 si = ts_info(src_ts); 383 384 switch (ctx->type) { 385 case TCG_TYPE_I32: 386 new_op = INDEX_op_mov_i32; 387 break; 388 case TCG_TYPE_I64: 389 new_op = INDEX_op_mov_i64; 390 break; 391 case TCG_TYPE_V64: 392 case TCG_TYPE_V128: 393 case TCG_TYPE_V256: 394 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 395 new_op = INDEX_op_mov_vec; 396 break; 397 default: 398 g_assert_not_reached(); 399 } 400 op->opc = new_op; 401 op->args[0] = dst; 402 op->args[1] = src; 403 404 di->z_mask = si->z_mask; 405 di->s_mask = si->s_mask; 406 407 if (src_ts->type == dst_ts->type) { 408 TempOptInfo *ni = ts_info(si->next_copy); 409 410 di->next_copy = si->next_copy; 411 di->prev_copy = src_ts; 412 ni->prev_copy = dst_ts; 413 si->next_copy = dst_ts; 414 di->is_const = si->is_const; 415 di->val = si->val; 416 417 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 418 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 419 move_mem_copies(dst_ts, src_ts); 420 } 421 } 422 return true; 423 } 424 425 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 426 TCGArg dst, uint64_t val) 427 { 428 /* Convert movi to mov with constant temp. */ 429 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 430 } 431 432 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 433 { 434 uint64_t l64, h64; 435 436 switch (op) { 437 CASE_OP_32_64(add): 438 return x + y; 439 440 CASE_OP_32_64(sub): 441 return x - y; 442 443 CASE_OP_32_64(mul): 444 return x * y; 445 446 CASE_OP_32_64_VEC(and): 447 return x & y; 448 449 CASE_OP_32_64_VEC(or): 450 return x | y; 451 452 CASE_OP_32_64_VEC(xor): 453 return x ^ y; 454 455 case INDEX_op_shl_i32: 456 return (uint32_t)x << (y & 31); 457 458 case INDEX_op_shl_i64: 459 return (uint64_t)x << (y & 63); 460 461 case INDEX_op_shr_i32: 462 return (uint32_t)x >> (y & 31); 463 464 case INDEX_op_shr_i64: 465 return (uint64_t)x >> (y & 63); 466 467 case INDEX_op_sar_i32: 468 return (int32_t)x >> (y & 31); 469 470 case INDEX_op_sar_i64: 471 return (int64_t)x >> (y & 63); 472 473 case INDEX_op_rotr_i32: 474 return ror32(x, y & 31); 475 476 case INDEX_op_rotr_i64: 477 return ror64(x, y & 63); 478 479 case INDEX_op_rotl_i32: 480 return rol32(x, y & 31); 481 482 case INDEX_op_rotl_i64: 483 return rol64(x, y & 63); 484 485 CASE_OP_32_64_VEC(not): 486 return ~x; 487 488 CASE_OP_32_64(neg): 489 return -x; 490 491 CASE_OP_32_64_VEC(andc): 492 return x & ~y; 493 494 CASE_OP_32_64_VEC(orc): 495 return x | ~y; 496 497 CASE_OP_32_64_VEC(eqv): 498 return ~(x ^ y); 499 500 CASE_OP_32_64_VEC(nand): 501 return ~(x & y); 502 503 CASE_OP_32_64_VEC(nor): 504 return ~(x | y); 505 506 case INDEX_op_clz_i32: 507 return (uint32_t)x ? clz32(x) : y; 508 509 case INDEX_op_clz_i64: 510 return x ? clz64(x) : y; 511 512 case INDEX_op_ctz_i32: 513 return (uint32_t)x ? ctz32(x) : y; 514 515 case INDEX_op_ctz_i64: 516 return x ? ctz64(x) : y; 517 518 case INDEX_op_ctpop_i32: 519 return ctpop32(x); 520 521 case INDEX_op_ctpop_i64: 522 return ctpop64(x); 523 524 CASE_OP_32_64(ext8s): 525 return (int8_t)x; 526 527 CASE_OP_32_64(ext16s): 528 return (int16_t)x; 529 530 CASE_OP_32_64(ext8u): 531 return (uint8_t)x; 532 533 CASE_OP_32_64(ext16u): 534 return (uint16_t)x; 535 536 CASE_OP_32_64(bswap16): 537 x = bswap16(x); 538 return y & TCG_BSWAP_OS ? (int16_t)x : x; 539 540 CASE_OP_32_64(bswap32): 541 x = bswap32(x); 542 return y & TCG_BSWAP_OS ? (int32_t)x : x; 543 544 case INDEX_op_bswap64_i64: 545 return bswap64(x); 546 547 case INDEX_op_ext_i32_i64: 548 case INDEX_op_ext32s_i64: 549 return (int32_t)x; 550 551 case INDEX_op_extu_i32_i64: 552 case INDEX_op_extrl_i64_i32: 553 case INDEX_op_ext32u_i64: 554 return (uint32_t)x; 555 556 case INDEX_op_extrh_i64_i32: 557 return (uint64_t)x >> 32; 558 559 case INDEX_op_muluh_i32: 560 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 561 case INDEX_op_mulsh_i32: 562 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 563 564 case INDEX_op_muluh_i64: 565 mulu64(&l64, &h64, x, y); 566 return h64; 567 case INDEX_op_mulsh_i64: 568 muls64(&l64, &h64, x, y); 569 return h64; 570 571 case INDEX_op_div_i32: 572 /* Avoid crashing on divide by zero, otherwise undefined. */ 573 return (int32_t)x / ((int32_t)y ? : 1); 574 case INDEX_op_divu_i32: 575 return (uint32_t)x / ((uint32_t)y ? : 1); 576 case INDEX_op_div_i64: 577 return (int64_t)x / ((int64_t)y ? : 1); 578 case INDEX_op_divu_i64: 579 return (uint64_t)x / ((uint64_t)y ? : 1); 580 581 case INDEX_op_rem_i32: 582 return (int32_t)x % ((int32_t)y ? : 1); 583 case INDEX_op_remu_i32: 584 return (uint32_t)x % ((uint32_t)y ? : 1); 585 case INDEX_op_rem_i64: 586 return (int64_t)x % ((int64_t)y ? : 1); 587 case INDEX_op_remu_i64: 588 return (uint64_t)x % ((uint64_t)y ? : 1); 589 590 default: 591 g_assert_not_reached(); 592 } 593 } 594 595 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 596 uint64_t x, uint64_t y) 597 { 598 uint64_t res = do_constant_folding_2(op, x, y); 599 if (type == TCG_TYPE_I32) { 600 res = (int32_t)res; 601 } 602 return res; 603 } 604 605 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 606 { 607 switch (c) { 608 case TCG_COND_EQ: 609 return x == y; 610 case TCG_COND_NE: 611 return x != y; 612 case TCG_COND_LT: 613 return (int32_t)x < (int32_t)y; 614 case TCG_COND_GE: 615 return (int32_t)x >= (int32_t)y; 616 case TCG_COND_LE: 617 return (int32_t)x <= (int32_t)y; 618 case TCG_COND_GT: 619 return (int32_t)x > (int32_t)y; 620 case TCG_COND_LTU: 621 return x < y; 622 case TCG_COND_GEU: 623 return x >= y; 624 case TCG_COND_LEU: 625 return x <= y; 626 case TCG_COND_GTU: 627 return x > y; 628 default: 629 g_assert_not_reached(); 630 } 631 } 632 633 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 634 { 635 switch (c) { 636 case TCG_COND_EQ: 637 return x == y; 638 case TCG_COND_NE: 639 return x != y; 640 case TCG_COND_LT: 641 return (int64_t)x < (int64_t)y; 642 case TCG_COND_GE: 643 return (int64_t)x >= (int64_t)y; 644 case TCG_COND_LE: 645 return (int64_t)x <= (int64_t)y; 646 case TCG_COND_GT: 647 return (int64_t)x > (int64_t)y; 648 case TCG_COND_LTU: 649 return x < y; 650 case TCG_COND_GEU: 651 return x >= y; 652 case TCG_COND_LEU: 653 return x <= y; 654 case TCG_COND_GTU: 655 return x > y; 656 default: 657 g_assert_not_reached(); 658 } 659 } 660 661 static bool do_constant_folding_cond_eq(TCGCond c) 662 { 663 switch (c) { 664 case TCG_COND_GT: 665 case TCG_COND_LTU: 666 case TCG_COND_LT: 667 case TCG_COND_GTU: 668 case TCG_COND_NE: 669 return 0; 670 case TCG_COND_GE: 671 case TCG_COND_GEU: 672 case TCG_COND_LE: 673 case TCG_COND_LEU: 674 case TCG_COND_EQ: 675 return 1; 676 default: 677 g_assert_not_reached(); 678 } 679 } 680 681 /* 682 * Return -1 if the condition can't be simplified, 683 * and the result of the condition (0 or 1) if it can. 684 */ 685 static int do_constant_folding_cond(TCGType type, TCGArg x, 686 TCGArg y, TCGCond c) 687 { 688 if (arg_is_const(x) && arg_is_const(y)) { 689 uint64_t xv = arg_info(x)->val; 690 uint64_t yv = arg_info(y)->val; 691 692 switch (type) { 693 case TCG_TYPE_I32: 694 return do_constant_folding_cond_32(xv, yv, c); 695 case TCG_TYPE_I64: 696 return do_constant_folding_cond_64(xv, yv, c); 697 default: 698 /* Only scalar comparisons are optimizable */ 699 return -1; 700 } 701 } else if (args_are_copies(x, y)) { 702 return do_constant_folding_cond_eq(c); 703 } else if (arg_is_const_val(y, 0)) { 704 switch (c) { 705 case TCG_COND_LTU: 706 return 0; 707 case TCG_COND_GEU: 708 return 1; 709 default: 710 return -1; 711 } 712 } 713 return -1; 714 } 715 716 /** 717 * swap_commutative: 718 * @dest: TCGArg of the destination argument, or NO_DEST. 719 * @p1: first paired argument 720 * @p2: second paired argument 721 * 722 * If *@p1 is a constant and *@p2 is not, swap. 723 * If *@p2 matches @dest, swap. 724 * Return true if a swap was performed. 725 */ 726 727 #define NO_DEST temp_arg(NULL) 728 729 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 730 { 731 TCGArg a1 = *p1, a2 = *p2; 732 int sum = 0; 733 sum += arg_is_const(a1); 734 sum -= arg_is_const(a2); 735 736 /* Prefer the constant in second argument, and then the form 737 op a, a, b, which is better handled on non-RISC hosts. */ 738 if (sum > 0 || (sum == 0 && dest == a2)) { 739 *p1 = a2; 740 *p2 = a1; 741 return true; 742 } 743 return false; 744 } 745 746 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 747 { 748 int sum = 0; 749 sum += arg_is_const(p1[0]); 750 sum += arg_is_const(p1[1]); 751 sum -= arg_is_const(p2[0]); 752 sum -= arg_is_const(p2[1]); 753 if (sum > 0) { 754 TCGArg t; 755 t = p1[0], p1[0] = p2[0], p2[0] = t; 756 t = p1[1], p1[1] = p2[1], p2[1] = t; 757 return true; 758 } 759 return false; 760 } 761 762 /* 763 * Return -1 if the condition can't be simplified, 764 * and the result of the condition (0 or 1) if it can. 765 */ 766 static int do_constant_folding_cond1(OptContext *ctx, TCGArg dest, 767 TCGArg *p1, TCGArg *p2, TCGArg *pcond) 768 { 769 TCGCond cond; 770 bool swap; 771 int r; 772 773 swap = swap_commutative(dest, p1, p2); 774 cond = *pcond; 775 if (swap) { 776 *pcond = cond = tcg_swap_cond(cond); 777 } 778 779 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); 780 return r; 781 } 782 783 static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args) 784 { 785 TCGArg al, ah, bl, bh; 786 TCGCond c; 787 bool swap; 788 789 swap = swap_commutative2(args, args + 2); 790 c = args[4]; 791 if (swap) { 792 args[4] = c = tcg_swap_cond(c); 793 } 794 795 al = args[0]; 796 ah = args[1]; 797 bl = args[2]; 798 bh = args[3]; 799 800 if (arg_is_const(bl) && arg_is_const(bh)) { 801 tcg_target_ulong blv = arg_info(bl)->val; 802 tcg_target_ulong bhv = arg_info(bh)->val; 803 uint64_t b = deposit64(blv, 32, 32, bhv); 804 805 if (arg_is_const(al) && arg_is_const(ah)) { 806 tcg_target_ulong alv = arg_info(al)->val; 807 tcg_target_ulong ahv = arg_info(ah)->val; 808 uint64_t a = deposit64(alv, 32, 32, ahv); 809 return do_constant_folding_cond_64(a, b, c); 810 } 811 if (b == 0) { 812 switch (c) { 813 case TCG_COND_LTU: 814 return 0; 815 case TCG_COND_GEU: 816 return 1; 817 default: 818 break; 819 } 820 } 821 } 822 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 823 return do_constant_folding_cond_eq(c); 824 } 825 return -1; 826 } 827 828 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 829 { 830 for (int i = 0; i < nb_args; i++) { 831 TCGTemp *ts = arg_temp(op->args[i]); 832 init_ts_info(ctx, ts); 833 } 834 } 835 836 static void copy_propagate(OptContext *ctx, TCGOp *op, 837 int nb_oargs, int nb_iargs) 838 { 839 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 840 TCGTemp *ts = arg_temp(op->args[i]); 841 if (ts_is_copy(ts)) { 842 op->args[i] = temp_arg(find_better_copy(ts)); 843 } 844 } 845 } 846 847 static void finish_folding(OptContext *ctx, TCGOp *op) 848 { 849 const TCGOpDef *def = &tcg_op_defs[op->opc]; 850 int i, nb_oargs; 851 852 /* 853 * We only optimize extended basic blocks. If the opcode ends a BB 854 * and is not a conditional branch, reset all temp data. 855 */ 856 if (def->flags & TCG_OPF_BB_END) { 857 ctx->prev_mb = NULL; 858 if (!(def->flags & TCG_OPF_COND_BRANCH)) { 859 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 860 remove_mem_copy_all(ctx); 861 } 862 return; 863 } 864 865 nb_oargs = def->nb_oargs; 866 for (i = 0; i < nb_oargs; i++) { 867 TCGTemp *ts = arg_temp(op->args[i]); 868 reset_ts(ctx, ts); 869 /* 870 * Save the corresponding known-zero/sign bits mask for the 871 * first output argument (only one supported so far). 872 */ 873 if (i == 0) { 874 ts_info(ts)->z_mask = ctx->z_mask; 875 ts_info(ts)->s_mask = ctx->s_mask; 876 } 877 } 878 } 879 880 /* 881 * The fold_* functions return true when processing is complete, 882 * usually by folding the operation to a constant or to a copy, 883 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 884 * like collect information about the value produced, for use in 885 * optimizing a subsequent operation. 886 * 887 * These first fold_* functions are all helpers, used by other 888 * folders for more specific operations. 889 */ 890 891 static bool fold_const1(OptContext *ctx, TCGOp *op) 892 { 893 if (arg_is_const(op->args[1])) { 894 uint64_t t; 895 896 t = arg_info(op->args[1])->val; 897 t = do_constant_folding(op->opc, ctx->type, t, 0); 898 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 899 } 900 return false; 901 } 902 903 static bool fold_const2(OptContext *ctx, TCGOp *op) 904 { 905 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 906 uint64_t t1 = arg_info(op->args[1])->val; 907 uint64_t t2 = arg_info(op->args[2])->val; 908 909 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 910 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 911 } 912 return false; 913 } 914 915 static bool fold_commutative(OptContext *ctx, TCGOp *op) 916 { 917 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 918 return false; 919 } 920 921 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 922 { 923 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 924 return fold_const2(ctx, op); 925 } 926 927 static bool fold_masks(OptContext *ctx, TCGOp *op) 928 { 929 uint64_t a_mask = ctx->a_mask; 930 uint64_t z_mask = ctx->z_mask; 931 uint64_t s_mask = ctx->s_mask; 932 933 /* 934 * 32-bit ops generate 32-bit results, which for the purpose of 935 * simplifying tcg are sign-extended. Certainly that's how we 936 * represent our constants elsewhere. Note that the bits will 937 * be reset properly for a 64-bit value when encountering the 938 * type changing opcodes. 939 */ 940 if (ctx->type == TCG_TYPE_I32) { 941 a_mask = (int32_t)a_mask; 942 z_mask = (int32_t)z_mask; 943 s_mask |= MAKE_64BIT_MASK(32, 32); 944 ctx->z_mask = z_mask; 945 ctx->s_mask = s_mask; 946 } 947 948 if (z_mask == 0) { 949 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 950 } 951 if (a_mask == 0) { 952 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 953 } 954 return false; 955 } 956 957 /* 958 * Convert @op to NOT, if NOT is supported by the host. 959 * Return true f the conversion is successful, which will still 960 * indicate that the processing is complete. 961 */ 962 static bool fold_not(OptContext *ctx, TCGOp *op); 963 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 964 { 965 TCGOpcode not_op; 966 bool have_not; 967 968 switch (ctx->type) { 969 case TCG_TYPE_I32: 970 not_op = INDEX_op_not_i32; 971 have_not = TCG_TARGET_HAS_not_i32; 972 break; 973 case TCG_TYPE_I64: 974 not_op = INDEX_op_not_i64; 975 have_not = TCG_TARGET_HAS_not_i64; 976 break; 977 case TCG_TYPE_V64: 978 case TCG_TYPE_V128: 979 case TCG_TYPE_V256: 980 not_op = INDEX_op_not_vec; 981 have_not = TCG_TARGET_HAS_not_vec; 982 break; 983 default: 984 g_assert_not_reached(); 985 } 986 if (have_not) { 987 op->opc = not_op; 988 op->args[1] = op->args[idx]; 989 return fold_not(ctx, op); 990 } 991 return false; 992 } 993 994 /* If the binary operation has first argument @i, fold to @i. */ 995 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 996 { 997 if (arg_is_const_val(op->args[1], i)) { 998 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 999 } 1000 return false; 1001 } 1002 1003 /* If the binary operation has first argument @i, fold to NOT. */ 1004 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1005 { 1006 if (arg_is_const_val(op->args[1], i)) { 1007 return fold_to_not(ctx, op, 2); 1008 } 1009 return false; 1010 } 1011 1012 /* If the binary operation has second argument @i, fold to @i. */ 1013 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1014 { 1015 if (arg_is_const_val(op->args[2], i)) { 1016 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1017 } 1018 return false; 1019 } 1020 1021 /* If the binary operation has second argument @i, fold to identity. */ 1022 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 1023 { 1024 if (arg_is_const_val(op->args[2], i)) { 1025 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1026 } 1027 return false; 1028 } 1029 1030 /* If the binary operation has second argument @i, fold to NOT. */ 1031 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1032 { 1033 if (arg_is_const_val(op->args[2], i)) { 1034 return fold_to_not(ctx, op, 1); 1035 } 1036 return false; 1037 } 1038 1039 /* If the binary operation has both arguments equal, fold to @i. */ 1040 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1041 { 1042 if (args_are_copies(op->args[1], op->args[2])) { 1043 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1044 } 1045 return false; 1046 } 1047 1048 /* If the binary operation has both arguments equal, fold to identity. */ 1049 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1050 { 1051 if (args_are_copies(op->args[1], op->args[2])) { 1052 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1053 } 1054 return false; 1055 } 1056 1057 /* 1058 * These outermost fold_<op> functions are sorted alphabetically. 1059 * 1060 * The ordering of the transformations should be: 1061 * 1) those that produce a constant 1062 * 2) those that produce a copy 1063 * 3) those that produce information about the result value. 1064 */ 1065 1066 static bool fold_add(OptContext *ctx, TCGOp *op) 1067 { 1068 if (fold_const2_commutative(ctx, op) || 1069 fold_xi_to_x(ctx, op, 0)) { 1070 return true; 1071 } 1072 return false; 1073 } 1074 1075 /* We cannot as yet do_constant_folding with vectors. */ 1076 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1077 { 1078 if (fold_commutative(ctx, op) || 1079 fold_xi_to_x(ctx, op, 0)) { 1080 return true; 1081 } 1082 return false; 1083 } 1084 1085 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) 1086 { 1087 bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); 1088 bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); 1089 1090 if (a_const && b_const) { 1091 uint64_t al = arg_info(op->args[2])->val; 1092 uint64_t ah = arg_info(op->args[3])->val; 1093 uint64_t bl = arg_info(op->args[4])->val; 1094 uint64_t bh = arg_info(op->args[5])->val; 1095 TCGArg rl, rh; 1096 TCGOp *op2; 1097 1098 if (ctx->type == TCG_TYPE_I32) { 1099 uint64_t a = deposit64(al, 32, 32, ah); 1100 uint64_t b = deposit64(bl, 32, 32, bh); 1101 1102 if (add) { 1103 a += b; 1104 } else { 1105 a -= b; 1106 } 1107 1108 al = sextract64(a, 0, 32); 1109 ah = sextract64(a, 32, 32); 1110 } else { 1111 Int128 a = int128_make128(al, ah); 1112 Int128 b = int128_make128(bl, bh); 1113 1114 if (add) { 1115 a = int128_add(a, b); 1116 } else { 1117 a = int128_sub(a, b); 1118 } 1119 1120 al = int128_getlo(a); 1121 ah = int128_gethi(a); 1122 } 1123 1124 rl = op->args[0]; 1125 rh = op->args[1]; 1126 1127 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1128 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1129 1130 tcg_opt_gen_movi(ctx, op, rl, al); 1131 tcg_opt_gen_movi(ctx, op2, rh, ah); 1132 return true; 1133 } 1134 1135 /* Fold sub2 r,x,i to add2 r,x,-i */ 1136 if (!add && b_const) { 1137 uint64_t bl = arg_info(op->args[4])->val; 1138 uint64_t bh = arg_info(op->args[5])->val; 1139 1140 /* Negate the two parts without assembling and disassembling. */ 1141 bl = -bl; 1142 bh = ~bh + !bl; 1143 1144 op->opc = (ctx->type == TCG_TYPE_I32 1145 ? INDEX_op_add2_i32 : INDEX_op_add2_i64); 1146 op->args[4] = arg_new_constant(ctx, bl); 1147 op->args[5] = arg_new_constant(ctx, bh); 1148 } 1149 return false; 1150 } 1151 1152 static bool fold_add2(OptContext *ctx, TCGOp *op) 1153 { 1154 /* Note that the high and low parts may be independently swapped. */ 1155 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 1156 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 1157 1158 return fold_addsub2(ctx, op, true); 1159 } 1160 1161 static bool fold_and(OptContext *ctx, TCGOp *op) 1162 { 1163 uint64_t z1, z2; 1164 1165 if (fold_const2_commutative(ctx, op) || 1166 fold_xi_to_i(ctx, op, 0) || 1167 fold_xi_to_x(ctx, op, -1) || 1168 fold_xx_to_x(ctx, op)) { 1169 return true; 1170 } 1171 1172 z1 = arg_info(op->args[1])->z_mask; 1173 z2 = arg_info(op->args[2])->z_mask; 1174 ctx->z_mask = z1 & z2; 1175 1176 /* 1177 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1178 * Bitwise operations preserve the relative quantity of the repetitions. 1179 */ 1180 ctx->s_mask = arg_info(op->args[1])->s_mask 1181 & arg_info(op->args[2])->s_mask; 1182 1183 /* 1184 * Known-zeros does not imply known-ones. Therefore unless 1185 * arg2 is constant, we can't infer affected bits from it. 1186 */ 1187 if (arg_is_const(op->args[2])) { 1188 ctx->a_mask = z1 & ~z2; 1189 } 1190 1191 return fold_masks(ctx, op); 1192 } 1193 1194 static bool fold_andc(OptContext *ctx, TCGOp *op) 1195 { 1196 uint64_t z1; 1197 1198 if (fold_const2(ctx, op) || 1199 fold_xx_to_i(ctx, op, 0) || 1200 fold_xi_to_x(ctx, op, 0) || 1201 fold_ix_to_not(ctx, op, -1)) { 1202 return true; 1203 } 1204 1205 z1 = arg_info(op->args[1])->z_mask; 1206 1207 /* 1208 * Known-zeros does not imply known-ones. Therefore unless 1209 * arg2 is constant, we can't infer anything from it. 1210 */ 1211 if (arg_is_const(op->args[2])) { 1212 uint64_t z2 = ~arg_info(op->args[2])->z_mask; 1213 ctx->a_mask = z1 & ~z2; 1214 z1 &= z2; 1215 } 1216 ctx->z_mask = z1; 1217 1218 ctx->s_mask = arg_info(op->args[1])->s_mask 1219 & arg_info(op->args[2])->s_mask; 1220 return fold_masks(ctx, op); 1221 } 1222 1223 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1224 { 1225 int i = do_constant_folding_cond1(ctx, NO_DEST, &op->args[0], 1226 &op->args[1], &op->args[2]); 1227 if (i == 0) { 1228 tcg_op_remove(ctx->tcg, op); 1229 return true; 1230 } 1231 if (i > 0) { 1232 op->opc = INDEX_op_br; 1233 op->args[0] = op->args[3]; 1234 } 1235 return false; 1236 } 1237 1238 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1239 { 1240 TCGCond cond; 1241 TCGArg label; 1242 int i, inv = 0; 1243 1244 i = do_constant_folding_cond2(ctx, &op->args[0]); 1245 cond = op->args[4]; 1246 label = op->args[5]; 1247 if (i >= 0) { 1248 goto do_brcond_const; 1249 } 1250 1251 switch (cond) { 1252 case TCG_COND_LT: 1253 case TCG_COND_GE: 1254 /* 1255 * Simplify LT/GE comparisons vs zero to a single compare 1256 * vs the high word of the input. 1257 */ 1258 if (arg_is_const_val(op->args[2], 0) && 1259 arg_is_const_val(op->args[3], 0)) { 1260 goto do_brcond_high; 1261 } 1262 break; 1263 1264 case TCG_COND_NE: 1265 inv = 1; 1266 QEMU_FALLTHROUGH; 1267 case TCG_COND_EQ: 1268 /* 1269 * Simplify EQ/NE comparisons where one of the pairs 1270 * can be simplified. 1271 */ 1272 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1273 op->args[2], cond); 1274 switch (i ^ inv) { 1275 case 0: 1276 goto do_brcond_const; 1277 case 1: 1278 goto do_brcond_high; 1279 } 1280 1281 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1282 op->args[3], cond); 1283 switch (i ^ inv) { 1284 case 0: 1285 goto do_brcond_const; 1286 case 1: 1287 op->opc = INDEX_op_brcond_i32; 1288 op->args[1] = op->args[2]; 1289 op->args[2] = cond; 1290 op->args[3] = label; 1291 break; 1292 } 1293 break; 1294 1295 default: 1296 break; 1297 1298 do_brcond_high: 1299 op->opc = INDEX_op_brcond_i32; 1300 op->args[0] = op->args[1]; 1301 op->args[1] = op->args[3]; 1302 op->args[2] = cond; 1303 op->args[3] = label; 1304 break; 1305 1306 do_brcond_const: 1307 if (i == 0) { 1308 tcg_op_remove(ctx->tcg, op); 1309 return true; 1310 } 1311 op->opc = INDEX_op_br; 1312 op->args[0] = label; 1313 break; 1314 } 1315 return false; 1316 } 1317 1318 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1319 { 1320 uint64_t z_mask, s_mask, sign; 1321 1322 if (arg_is_const(op->args[1])) { 1323 uint64_t t = arg_info(op->args[1])->val; 1324 1325 t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); 1326 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1327 } 1328 1329 z_mask = arg_info(op->args[1])->z_mask; 1330 1331 switch (op->opc) { 1332 case INDEX_op_bswap16_i32: 1333 case INDEX_op_bswap16_i64: 1334 z_mask = bswap16(z_mask); 1335 sign = INT16_MIN; 1336 break; 1337 case INDEX_op_bswap32_i32: 1338 case INDEX_op_bswap32_i64: 1339 z_mask = bswap32(z_mask); 1340 sign = INT32_MIN; 1341 break; 1342 case INDEX_op_bswap64_i64: 1343 z_mask = bswap64(z_mask); 1344 sign = INT64_MIN; 1345 break; 1346 default: 1347 g_assert_not_reached(); 1348 } 1349 s_mask = smask_from_zmask(z_mask); 1350 1351 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1352 case TCG_BSWAP_OZ: 1353 break; 1354 case TCG_BSWAP_OS: 1355 /* If the sign bit may be 1, force all the bits above to 1. */ 1356 if (z_mask & sign) { 1357 z_mask |= sign; 1358 s_mask = sign << 1; 1359 } 1360 break; 1361 default: 1362 /* The high bits are undefined: force all bits above the sign to 1. */ 1363 z_mask |= sign << 1; 1364 s_mask = 0; 1365 break; 1366 } 1367 ctx->z_mask = z_mask; 1368 ctx->s_mask = s_mask; 1369 1370 return fold_masks(ctx, op); 1371 } 1372 1373 static bool fold_call(OptContext *ctx, TCGOp *op) 1374 { 1375 TCGContext *s = ctx->tcg; 1376 int nb_oargs = TCGOP_CALLO(op); 1377 int nb_iargs = TCGOP_CALLI(op); 1378 int flags, i; 1379 1380 init_arguments(ctx, op, nb_oargs + nb_iargs); 1381 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1382 1383 /* If the function reads or writes globals, reset temp data. */ 1384 flags = tcg_call_flags(op); 1385 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1386 int nb_globals = s->nb_globals; 1387 1388 for (i = 0; i < nb_globals; i++) { 1389 if (test_bit(i, ctx->temps_used.l)) { 1390 reset_ts(ctx, &ctx->tcg->temps[i]); 1391 } 1392 } 1393 } 1394 1395 /* If the function has side effects, reset mem data. */ 1396 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1397 remove_mem_copy_all(ctx); 1398 } 1399 1400 /* Reset temp data for outputs. */ 1401 for (i = 0; i < nb_oargs; i++) { 1402 reset_temp(ctx, op->args[i]); 1403 } 1404 1405 /* Stop optimizing MB across calls. */ 1406 ctx->prev_mb = NULL; 1407 return true; 1408 } 1409 1410 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1411 { 1412 uint64_t z_mask; 1413 1414 if (arg_is_const(op->args[1])) { 1415 uint64_t t = arg_info(op->args[1])->val; 1416 1417 if (t != 0) { 1418 t = do_constant_folding(op->opc, ctx->type, t, 0); 1419 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1420 } 1421 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1422 } 1423 1424 switch (ctx->type) { 1425 case TCG_TYPE_I32: 1426 z_mask = 31; 1427 break; 1428 case TCG_TYPE_I64: 1429 z_mask = 63; 1430 break; 1431 default: 1432 g_assert_not_reached(); 1433 } 1434 ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; 1435 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1436 return false; 1437 } 1438 1439 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1440 { 1441 if (fold_const1(ctx, op)) { 1442 return true; 1443 } 1444 1445 switch (ctx->type) { 1446 case TCG_TYPE_I32: 1447 ctx->z_mask = 32 | 31; 1448 break; 1449 case TCG_TYPE_I64: 1450 ctx->z_mask = 64 | 63; 1451 break; 1452 default: 1453 g_assert_not_reached(); 1454 } 1455 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1456 return false; 1457 } 1458 1459 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1460 { 1461 TCGOpcode and_opc; 1462 1463 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1464 uint64_t t1 = arg_info(op->args[1])->val; 1465 uint64_t t2 = arg_info(op->args[2])->val; 1466 1467 t1 = deposit64(t1, op->args[3], op->args[4], t2); 1468 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1469 } 1470 1471 switch (ctx->type) { 1472 case TCG_TYPE_I32: 1473 and_opc = INDEX_op_and_i32; 1474 break; 1475 case TCG_TYPE_I64: 1476 and_opc = INDEX_op_and_i64; 1477 break; 1478 default: 1479 g_assert_not_reached(); 1480 } 1481 1482 /* Inserting a value into zero at offset 0. */ 1483 if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) { 1484 uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); 1485 1486 op->opc = and_opc; 1487 op->args[1] = op->args[2]; 1488 op->args[2] = arg_new_constant(ctx, mask); 1489 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1490 return false; 1491 } 1492 1493 /* Inserting zero into a value. */ 1494 if (arg_is_const_val(op->args[2], 0)) { 1495 uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); 1496 1497 op->opc = and_opc; 1498 op->args[2] = arg_new_constant(ctx, mask); 1499 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1500 return false; 1501 } 1502 1503 ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, 1504 op->args[3], op->args[4], 1505 arg_info(op->args[2])->z_mask); 1506 return false; 1507 } 1508 1509 static bool fold_divide(OptContext *ctx, TCGOp *op) 1510 { 1511 if (fold_const2(ctx, op) || 1512 fold_xi_to_x(ctx, op, 1)) { 1513 return true; 1514 } 1515 return false; 1516 } 1517 1518 static bool fold_dup(OptContext *ctx, TCGOp *op) 1519 { 1520 if (arg_is_const(op->args[1])) { 1521 uint64_t t = arg_info(op->args[1])->val; 1522 t = dup_const(TCGOP_VECE(op), t); 1523 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1524 } 1525 return false; 1526 } 1527 1528 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1529 { 1530 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1531 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1532 arg_info(op->args[2])->val); 1533 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1534 } 1535 1536 if (args_are_copies(op->args[1], op->args[2])) { 1537 op->opc = INDEX_op_dup_vec; 1538 TCGOP_VECE(op) = MO_32; 1539 } 1540 return false; 1541 } 1542 1543 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1544 { 1545 if (fold_const2_commutative(ctx, op) || 1546 fold_xi_to_x(ctx, op, -1) || 1547 fold_xi_to_not(ctx, op, 0)) { 1548 return true; 1549 } 1550 1551 ctx->s_mask = arg_info(op->args[1])->s_mask 1552 & arg_info(op->args[2])->s_mask; 1553 return false; 1554 } 1555 1556 static bool fold_extract(OptContext *ctx, TCGOp *op) 1557 { 1558 uint64_t z_mask_old, z_mask; 1559 int pos = op->args[2]; 1560 int len = op->args[3]; 1561 1562 if (arg_is_const(op->args[1])) { 1563 uint64_t t; 1564 1565 t = arg_info(op->args[1])->val; 1566 t = extract64(t, pos, len); 1567 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1568 } 1569 1570 z_mask_old = arg_info(op->args[1])->z_mask; 1571 z_mask = extract64(z_mask_old, pos, len); 1572 if (pos == 0) { 1573 ctx->a_mask = z_mask_old ^ z_mask; 1574 } 1575 ctx->z_mask = z_mask; 1576 ctx->s_mask = smask_from_zmask(z_mask); 1577 1578 return fold_masks(ctx, op); 1579 } 1580 1581 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1582 { 1583 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1584 uint64_t v1 = arg_info(op->args[1])->val; 1585 uint64_t v2 = arg_info(op->args[2])->val; 1586 int shr = op->args[3]; 1587 1588 if (op->opc == INDEX_op_extract2_i64) { 1589 v1 >>= shr; 1590 v2 <<= 64 - shr; 1591 } else { 1592 v1 = (uint32_t)v1 >> shr; 1593 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1594 } 1595 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1596 } 1597 return false; 1598 } 1599 1600 static bool fold_exts(OptContext *ctx, TCGOp *op) 1601 { 1602 uint64_t s_mask_old, s_mask, z_mask, sign; 1603 bool type_change = false; 1604 1605 if (fold_const1(ctx, op)) { 1606 return true; 1607 } 1608 1609 z_mask = arg_info(op->args[1])->z_mask; 1610 s_mask = arg_info(op->args[1])->s_mask; 1611 s_mask_old = s_mask; 1612 1613 switch (op->opc) { 1614 CASE_OP_32_64(ext8s): 1615 sign = INT8_MIN; 1616 z_mask = (uint8_t)z_mask; 1617 break; 1618 CASE_OP_32_64(ext16s): 1619 sign = INT16_MIN; 1620 z_mask = (uint16_t)z_mask; 1621 break; 1622 case INDEX_op_ext_i32_i64: 1623 type_change = true; 1624 QEMU_FALLTHROUGH; 1625 case INDEX_op_ext32s_i64: 1626 sign = INT32_MIN; 1627 z_mask = (uint32_t)z_mask; 1628 break; 1629 default: 1630 g_assert_not_reached(); 1631 } 1632 1633 if (z_mask & sign) { 1634 z_mask |= sign; 1635 } 1636 s_mask |= sign << 1; 1637 1638 ctx->z_mask = z_mask; 1639 ctx->s_mask = s_mask; 1640 if (!type_change) { 1641 ctx->a_mask = s_mask & ~s_mask_old; 1642 } 1643 1644 return fold_masks(ctx, op); 1645 } 1646 1647 static bool fold_extu(OptContext *ctx, TCGOp *op) 1648 { 1649 uint64_t z_mask_old, z_mask; 1650 bool type_change = false; 1651 1652 if (fold_const1(ctx, op)) { 1653 return true; 1654 } 1655 1656 z_mask_old = z_mask = arg_info(op->args[1])->z_mask; 1657 1658 switch (op->opc) { 1659 CASE_OP_32_64(ext8u): 1660 z_mask = (uint8_t)z_mask; 1661 break; 1662 CASE_OP_32_64(ext16u): 1663 z_mask = (uint16_t)z_mask; 1664 break; 1665 case INDEX_op_extrl_i64_i32: 1666 case INDEX_op_extu_i32_i64: 1667 type_change = true; 1668 QEMU_FALLTHROUGH; 1669 case INDEX_op_ext32u_i64: 1670 z_mask = (uint32_t)z_mask; 1671 break; 1672 case INDEX_op_extrh_i64_i32: 1673 type_change = true; 1674 z_mask >>= 32; 1675 break; 1676 default: 1677 g_assert_not_reached(); 1678 } 1679 1680 ctx->z_mask = z_mask; 1681 ctx->s_mask = smask_from_zmask(z_mask); 1682 if (!type_change) { 1683 ctx->a_mask = z_mask_old ^ z_mask; 1684 } 1685 return fold_masks(ctx, op); 1686 } 1687 1688 static bool fold_mb(OptContext *ctx, TCGOp *op) 1689 { 1690 /* Eliminate duplicate and redundant fence instructions. */ 1691 if (ctx->prev_mb) { 1692 /* 1693 * Merge two barriers of the same type into one, 1694 * or a weaker barrier into a stronger one, 1695 * or two weaker barriers into a stronger one. 1696 * mb X; mb Y => mb X|Y 1697 * mb; strl => mb; st 1698 * ldaq; mb => ld; mb 1699 * ldaq; strl => ld; mb; st 1700 * Other combinations are also merged into a strong 1701 * barrier. This is stricter than specified but for 1702 * the purposes of TCG is better than not optimizing. 1703 */ 1704 ctx->prev_mb->args[0] |= op->args[0]; 1705 tcg_op_remove(ctx->tcg, op); 1706 } else { 1707 ctx->prev_mb = op; 1708 } 1709 return true; 1710 } 1711 1712 static bool fold_mov(OptContext *ctx, TCGOp *op) 1713 { 1714 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1715 } 1716 1717 static bool fold_movcond(OptContext *ctx, TCGOp *op) 1718 { 1719 int i; 1720 1721 /* 1722 * Canonicalize the "false" input reg to match the destination reg so 1723 * that the tcg backend can implement a "move if true" operation. 1724 */ 1725 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1726 op->args[5] = tcg_invert_cond(op->args[5]); 1727 } 1728 1729 i = do_constant_folding_cond1(ctx, NO_DEST, &op->args[1], 1730 &op->args[2], &op->args[5]); 1731 if (i >= 0) { 1732 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 1733 } 1734 1735 ctx->z_mask = arg_info(op->args[3])->z_mask 1736 | arg_info(op->args[4])->z_mask; 1737 ctx->s_mask = arg_info(op->args[3])->s_mask 1738 & arg_info(op->args[4])->s_mask; 1739 1740 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1741 uint64_t tv = arg_info(op->args[3])->val; 1742 uint64_t fv = arg_info(op->args[4])->val; 1743 TCGOpcode opc, negopc = 0; 1744 TCGCond cond = op->args[5]; 1745 1746 switch (ctx->type) { 1747 case TCG_TYPE_I32: 1748 opc = INDEX_op_setcond_i32; 1749 if (TCG_TARGET_HAS_negsetcond_i32) { 1750 negopc = INDEX_op_negsetcond_i32; 1751 } 1752 tv = (int32_t)tv; 1753 fv = (int32_t)fv; 1754 break; 1755 case TCG_TYPE_I64: 1756 opc = INDEX_op_setcond_i64; 1757 if (TCG_TARGET_HAS_negsetcond_i64) { 1758 negopc = INDEX_op_negsetcond_i64; 1759 } 1760 break; 1761 default: 1762 g_assert_not_reached(); 1763 } 1764 1765 if (tv == 1 && fv == 0) { 1766 op->opc = opc; 1767 op->args[3] = cond; 1768 } else if (fv == 1 && tv == 0) { 1769 op->opc = opc; 1770 op->args[3] = tcg_invert_cond(cond); 1771 } else if (negopc) { 1772 if (tv == -1 && fv == 0) { 1773 op->opc = negopc; 1774 op->args[3] = cond; 1775 } else if (fv == -1 && tv == 0) { 1776 op->opc = negopc; 1777 op->args[3] = tcg_invert_cond(cond); 1778 } 1779 } 1780 } 1781 return false; 1782 } 1783 1784 static bool fold_mul(OptContext *ctx, TCGOp *op) 1785 { 1786 if (fold_const2(ctx, op) || 1787 fold_xi_to_i(ctx, op, 0) || 1788 fold_xi_to_x(ctx, op, 1)) { 1789 return true; 1790 } 1791 return false; 1792 } 1793 1794 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 1795 { 1796 if (fold_const2_commutative(ctx, op) || 1797 fold_xi_to_i(ctx, op, 0)) { 1798 return true; 1799 } 1800 return false; 1801 } 1802 1803 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 1804 { 1805 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 1806 1807 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1808 uint64_t a = arg_info(op->args[2])->val; 1809 uint64_t b = arg_info(op->args[3])->val; 1810 uint64_t h, l; 1811 TCGArg rl, rh; 1812 TCGOp *op2; 1813 1814 switch (op->opc) { 1815 case INDEX_op_mulu2_i32: 1816 l = (uint64_t)(uint32_t)a * (uint32_t)b; 1817 h = (int32_t)(l >> 32); 1818 l = (int32_t)l; 1819 break; 1820 case INDEX_op_muls2_i32: 1821 l = (int64_t)(int32_t)a * (int32_t)b; 1822 h = l >> 32; 1823 l = (int32_t)l; 1824 break; 1825 case INDEX_op_mulu2_i64: 1826 mulu64(&l, &h, a, b); 1827 break; 1828 case INDEX_op_muls2_i64: 1829 muls64(&l, &h, a, b); 1830 break; 1831 default: 1832 g_assert_not_reached(); 1833 } 1834 1835 rl = op->args[0]; 1836 rh = op->args[1]; 1837 1838 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1839 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1840 1841 tcg_opt_gen_movi(ctx, op, rl, l); 1842 tcg_opt_gen_movi(ctx, op2, rh, h); 1843 return true; 1844 } 1845 return false; 1846 } 1847 1848 static bool fold_nand(OptContext *ctx, TCGOp *op) 1849 { 1850 if (fold_const2_commutative(ctx, op) || 1851 fold_xi_to_not(ctx, op, -1)) { 1852 return true; 1853 } 1854 1855 ctx->s_mask = arg_info(op->args[1])->s_mask 1856 & arg_info(op->args[2])->s_mask; 1857 return false; 1858 } 1859 1860 static bool fold_neg(OptContext *ctx, TCGOp *op) 1861 { 1862 uint64_t z_mask; 1863 1864 if (fold_const1(ctx, op)) { 1865 return true; 1866 } 1867 1868 /* Set to 1 all bits to the left of the rightmost. */ 1869 z_mask = arg_info(op->args[1])->z_mask; 1870 ctx->z_mask = -(z_mask & -z_mask); 1871 1872 /* 1873 * Because of fold_sub_to_neg, we want to always return true, 1874 * via finish_folding. 1875 */ 1876 finish_folding(ctx, op); 1877 return true; 1878 } 1879 1880 static bool fold_nor(OptContext *ctx, TCGOp *op) 1881 { 1882 if (fold_const2_commutative(ctx, op) || 1883 fold_xi_to_not(ctx, op, 0)) { 1884 return true; 1885 } 1886 1887 ctx->s_mask = arg_info(op->args[1])->s_mask 1888 & arg_info(op->args[2])->s_mask; 1889 return false; 1890 } 1891 1892 static bool fold_not(OptContext *ctx, TCGOp *op) 1893 { 1894 if (fold_const1(ctx, op)) { 1895 return true; 1896 } 1897 1898 ctx->s_mask = arg_info(op->args[1])->s_mask; 1899 1900 /* Because of fold_to_not, we want to always return true, via finish. */ 1901 finish_folding(ctx, op); 1902 return true; 1903 } 1904 1905 static bool fold_or(OptContext *ctx, TCGOp *op) 1906 { 1907 if (fold_const2_commutative(ctx, op) || 1908 fold_xi_to_x(ctx, op, 0) || 1909 fold_xx_to_x(ctx, op)) { 1910 return true; 1911 } 1912 1913 ctx->z_mask = arg_info(op->args[1])->z_mask 1914 | arg_info(op->args[2])->z_mask; 1915 ctx->s_mask = arg_info(op->args[1])->s_mask 1916 & arg_info(op->args[2])->s_mask; 1917 return fold_masks(ctx, op); 1918 } 1919 1920 static bool fold_orc(OptContext *ctx, TCGOp *op) 1921 { 1922 if (fold_const2(ctx, op) || 1923 fold_xx_to_i(ctx, op, -1) || 1924 fold_xi_to_x(ctx, op, -1) || 1925 fold_ix_to_not(ctx, op, 0)) { 1926 return true; 1927 } 1928 1929 ctx->s_mask = arg_info(op->args[1])->s_mask 1930 & arg_info(op->args[2])->s_mask; 1931 return false; 1932 } 1933 1934 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) 1935 { 1936 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1937 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 1938 MemOp mop = get_memop(oi); 1939 int width = 8 * memop_size(mop); 1940 1941 if (width < 64) { 1942 ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); 1943 if (!(mop & MO_SIGN)) { 1944 ctx->z_mask = MAKE_64BIT_MASK(0, width); 1945 ctx->s_mask <<= 1; 1946 } 1947 } 1948 1949 /* Opcodes that touch guest memory stop the mb optimization. */ 1950 ctx->prev_mb = NULL; 1951 return false; 1952 } 1953 1954 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 1955 { 1956 /* Opcodes that touch guest memory stop the mb optimization. */ 1957 ctx->prev_mb = NULL; 1958 return false; 1959 } 1960 1961 static bool fold_remainder(OptContext *ctx, TCGOp *op) 1962 { 1963 if (fold_const2(ctx, op) || 1964 fold_xx_to_i(ctx, op, 0)) { 1965 return true; 1966 } 1967 return false; 1968 } 1969 1970 static bool fold_setcond(OptContext *ctx, TCGOp *op) 1971 { 1972 int i = do_constant_folding_cond1(ctx, op->args[0], &op->args[1], 1973 &op->args[2], &op->args[3]); 1974 if (i >= 0) { 1975 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1976 } 1977 1978 ctx->z_mask = 1; 1979 ctx->s_mask = smask_from_zmask(1); 1980 return false; 1981 } 1982 1983 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 1984 { 1985 int i = do_constant_folding_cond1(ctx, op->args[0], &op->args[1], 1986 &op->args[2], &op->args[3]); 1987 if (i >= 0) { 1988 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 1989 } 1990 1991 /* Value is {0,-1} so all bits are repetitions of the sign. */ 1992 ctx->s_mask = -1; 1993 return false; 1994 } 1995 1996 1997 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 1998 { 1999 TCGCond cond; 2000 int i, inv = 0; 2001 2002 i = do_constant_folding_cond2(ctx, &op->args[1]); 2003 cond = op->args[5]; 2004 if (i >= 0) { 2005 goto do_setcond_const; 2006 } 2007 2008 switch (cond) { 2009 case TCG_COND_LT: 2010 case TCG_COND_GE: 2011 /* 2012 * Simplify LT/GE comparisons vs zero to a single compare 2013 * vs the high word of the input. 2014 */ 2015 if (arg_is_const_val(op->args[3], 0) && 2016 arg_is_const_val(op->args[4], 0)) { 2017 goto do_setcond_high; 2018 } 2019 break; 2020 2021 case TCG_COND_NE: 2022 inv = 1; 2023 QEMU_FALLTHROUGH; 2024 case TCG_COND_EQ: 2025 /* 2026 * Simplify EQ/NE comparisons where one of the pairs 2027 * can be simplified. 2028 */ 2029 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2030 op->args[3], cond); 2031 switch (i ^ inv) { 2032 case 0: 2033 goto do_setcond_const; 2034 case 1: 2035 goto do_setcond_high; 2036 } 2037 2038 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2039 op->args[4], cond); 2040 switch (i ^ inv) { 2041 case 0: 2042 goto do_setcond_const; 2043 case 1: 2044 op->args[2] = op->args[3]; 2045 op->args[3] = cond; 2046 op->opc = INDEX_op_setcond_i32; 2047 break; 2048 } 2049 break; 2050 2051 default: 2052 break; 2053 2054 do_setcond_high: 2055 op->args[1] = op->args[2]; 2056 op->args[2] = op->args[4]; 2057 op->args[3] = cond; 2058 op->opc = INDEX_op_setcond_i32; 2059 break; 2060 } 2061 2062 ctx->z_mask = 1; 2063 ctx->s_mask = smask_from_zmask(1); 2064 return false; 2065 2066 do_setcond_const: 2067 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2068 } 2069 2070 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2071 { 2072 uint64_t z_mask, s_mask, s_mask_old; 2073 int pos = op->args[2]; 2074 int len = op->args[3]; 2075 2076 if (arg_is_const(op->args[1])) { 2077 uint64_t t; 2078 2079 t = arg_info(op->args[1])->val; 2080 t = sextract64(t, pos, len); 2081 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 2082 } 2083 2084 z_mask = arg_info(op->args[1])->z_mask; 2085 z_mask = sextract64(z_mask, pos, len); 2086 ctx->z_mask = z_mask; 2087 2088 s_mask_old = arg_info(op->args[1])->s_mask; 2089 s_mask = sextract64(s_mask_old, pos, len); 2090 s_mask |= MAKE_64BIT_MASK(len, 64 - len); 2091 ctx->s_mask = s_mask; 2092 2093 if (pos == 0) { 2094 ctx->a_mask = s_mask & ~s_mask_old; 2095 } 2096 2097 return fold_masks(ctx, op); 2098 } 2099 2100 static bool fold_shift(OptContext *ctx, TCGOp *op) 2101 { 2102 uint64_t s_mask, z_mask, sign; 2103 2104 if (fold_const2(ctx, op) || 2105 fold_ix_to_i(ctx, op, 0) || 2106 fold_xi_to_x(ctx, op, 0)) { 2107 return true; 2108 } 2109 2110 s_mask = arg_info(op->args[1])->s_mask; 2111 z_mask = arg_info(op->args[1])->z_mask; 2112 2113 if (arg_is_const(op->args[2])) { 2114 int sh = arg_info(op->args[2])->val; 2115 2116 ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2117 2118 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2119 ctx->s_mask = smask_from_smask(s_mask); 2120 2121 return fold_masks(ctx, op); 2122 } 2123 2124 switch (op->opc) { 2125 CASE_OP_32_64(sar): 2126 /* 2127 * Arithmetic right shift will not reduce the number of 2128 * input sign repetitions. 2129 */ 2130 ctx->s_mask = s_mask; 2131 break; 2132 CASE_OP_32_64(shr): 2133 /* 2134 * If the sign bit is known zero, then logical right shift 2135 * will not reduced the number of input sign repetitions. 2136 */ 2137 sign = (s_mask & -s_mask) >> 1; 2138 if (!(z_mask & sign)) { 2139 ctx->s_mask = s_mask; 2140 } 2141 break; 2142 default: 2143 break; 2144 } 2145 2146 return false; 2147 } 2148 2149 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2150 { 2151 TCGOpcode neg_op; 2152 bool have_neg; 2153 2154 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 2155 return false; 2156 } 2157 2158 switch (ctx->type) { 2159 case TCG_TYPE_I32: 2160 neg_op = INDEX_op_neg_i32; 2161 have_neg = true; 2162 break; 2163 case TCG_TYPE_I64: 2164 neg_op = INDEX_op_neg_i64; 2165 have_neg = true; 2166 break; 2167 case TCG_TYPE_V64: 2168 case TCG_TYPE_V128: 2169 case TCG_TYPE_V256: 2170 neg_op = INDEX_op_neg_vec; 2171 have_neg = (TCG_TARGET_HAS_neg_vec && 2172 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2173 break; 2174 default: 2175 g_assert_not_reached(); 2176 } 2177 if (have_neg) { 2178 op->opc = neg_op; 2179 op->args[1] = op->args[2]; 2180 return fold_neg(ctx, op); 2181 } 2182 return false; 2183 } 2184 2185 /* We cannot as yet do_constant_folding with vectors. */ 2186 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2187 { 2188 if (fold_xx_to_i(ctx, op, 0) || 2189 fold_xi_to_x(ctx, op, 0) || 2190 fold_sub_to_neg(ctx, op)) { 2191 return true; 2192 } 2193 return false; 2194 } 2195 2196 static bool fold_sub(OptContext *ctx, TCGOp *op) 2197 { 2198 if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) { 2199 return true; 2200 } 2201 2202 /* Fold sub r,x,i to add r,x,-i */ 2203 if (arg_is_const(op->args[2])) { 2204 uint64_t val = arg_info(op->args[2])->val; 2205 2206 op->opc = (ctx->type == TCG_TYPE_I32 2207 ? INDEX_op_add_i32 : INDEX_op_add_i64); 2208 op->args[2] = arg_new_constant(ctx, -val); 2209 } 2210 return false; 2211 } 2212 2213 static bool fold_sub2(OptContext *ctx, TCGOp *op) 2214 { 2215 return fold_addsub2(ctx, op, false); 2216 } 2217 2218 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2219 { 2220 /* We can't do any folding with a load, but we can record bits. */ 2221 switch (op->opc) { 2222 CASE_OP_32_64(ld8s): 2223 ctx->s_mask = MAKE_64BIT_MASK(8, 56); 2224 break; 2225 CASE_OP_32_64(ld8u): 2226 ctx->z_mask = MAKE_64BIT_MASK(0, 8); 2227 ctx->s_mask = MAKE_64BIT_MASK(9, 55); 2228 break; 2229 CASE_OP_32_64(ld16s): 2230 ctx->s_mask = MAKE_64BIT_MASK(16, 48); 2231 break; 2232 CASE_OP_32_64(ld16u): 2233 ctx->z_mask = MAKE_64BIT_MASK(0, 16); 2234 ctx->s_mask = MAKE_64BIT_MASK(17, 47); 2235 break; 2236 case INDEX_op_ld32s_i64: 2237 ctx->s_mask = MAKE_64BIT_MASK(32, 32); 2238 break; 2239 case INDEX_op_ld32u_i64: 2240 ctx->z_mask = MAKE_64BIT_MASK(0, 32); 2241 ctx->s_mask = MAKE_64BIT_MASK(33, 31); 2242 break; 2243 default: 2244 g_assert_not_reached(); 2245 } 2246 return false; 2247 } 2248 2249 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2250 { 2251 TCGTemp *dst, *src; 2252 intptr_t ofs; 2253 TCGType type; 2254 2255 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2256 return false; 2257 } 2258 2259 type = ctx->type; 2260 ofs = op->args[2]; 2261 dst = arg_temp(op->args[0]); 2262 src = find_mem_copy_for(ctx, type, ofs); 2263 if (src && src->base_type == type) { 2264 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 2265 } 2266 2267 reset_ts(ctx, dst); 2268 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 2269 return true; 2270 } 2271 2272 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 2273 { 2274 intptr_t ofs = op->args[2]; 2275 intptr_t lm1; 2276 2277 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2278 remove_mem_copy_all(ctx); 2279 return false; 2280 } 2281 2282 switch (op->opc) { 2283 CASE_OP_32_64(st8): 2284 lm1 = 0; 2285 break; 2286 CASE_OP_32_64(st16): 2287 lm1 = 1; 2288 break; 2289 case INDEX_op_st32_i64: 2290 case INDEX_op_st_i32: 2291 lm1 = 3; 2292 break; 2293 case INDEX_op_st_i64: 2294 lm1 = 7; 2295 break; 2296 case INDEX_op_st_vec: 2297 lm1 = tcg_type_size(ctx->type) - 1; 2298 break; 2299 default: 2300 g_assert_not_reached(); 2301 } 2302 remove_mem_copy_in(ctx, ofs, ofs + lm1); 2303 return false; 2304 } 2305 2306 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 2307 { 2308 TCGTemp *src; 2309 intptr_t ofs, last; 2310 TCGType type; 2311 2312 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2313 fold_tcg_st(ctx, op); 2314 return false; 2315 } 2316 2317 src = arg_temp(op->args[0]); 2318 ofs = op->args[2]; 2319 type = ctx->type; 2320 2321 /* 2322 * Eliminate duplicate stores of a constant. 2323 * This happens frequently when the target ISA zero-extends. 2324 */ 2325 if (ts_is_const(src)) { 2326 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 2327 if (src == prev) { 2328 tcg_op_remove(ctx->tcg, op); 2329 return true; 2330 } 2331 } 2332 2333 last = ofs + tcg_type_size(type) - 1; 2334 remove_mem_copy_in(ctx, ofs, last); 2335 record_mem_copy(ctx, type, src, ofs, last); 2336 return false; 2337 } 2338 2339 static bool fold_xor(OptContext *ctx, TCGOp *op) 2340 { 2341 if (fold_const2_commutative(ctx, op) || 2342 fold_xx_to_i(ctx, op, 0) || 2343 fold_xi_to_x(ctx, op, 0) || 2344 fold_xi_to_not(ctx, op, -1)) { 2345 return true; 2346 } 2347 2348 ctx->z_mask = arg_info(op->args[1])->z_mask 2349 | arg_info(op->args[2])->z_mask; 2350 ctx->s_mask = arg_info(op->args[1])->s_mask 2351 & arg_info(op->args[2])->s_mask; 2352 return fold_masks(ctx, op); 2353 } 2354 2355 /* Propagate constants and copies, fold constant expressions. */ 2356 void tcg_optimize(TCGContext *s) 2357 { 2358 int nb_temps, i; 2359 TCGOp *op, *op_next; 2360 OptContext ctx = { .tcg = s }; 2361 2362 QSIMPLEQ_INIT(&ctx.mem_free); 2363 2364 /* Array VALS has an element for each temp. 2365 If this temp holds a constant then its value is kept in VALS' element. 2366 If this temp is a copy of other ones then the other copies are 2367 available through the doubly linked circular list. */ 2368 2369 nb_temps = s->nb_temps; 2370 for (i = 0; i < nb_temps; ++i) { 2371 s->temps[i].state_ptr = NULL; 2372 } 2373 2374 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2375 TCGOpcode opc = op->opc; 2376 const TCGOpDef *def; 2377 bool done = false; 2378 2379 /* Calls are special. */ 2380 if (opc == INDEX_op_call) { 2381 fold_call(&ctx, op); 2382 continue; 2383 } 2384 2385 def = &tcg_op_defs[opc]; 2386 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 2387 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 2388 2389 /* Pre-compute the type of the operation. */ 2390 if (def->flags & TCG_OPF_VECTOR) { 2391 ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); 2392 } else if (def->flags & TCG_OPF_64BIT) { 2393 ctx.type = TCG_TYPE_I64; 2394 } else { 2395 ctx.type = TCG_TYPE_I32; 2396 } 2397 2398 /* Assume all bits affected, no bits known zero, no sign reps. */ 2399 ctx.a_mask = -1; 2400 ctx.z_mask = -1; 2401 ctx.s_mask = 0; 2402 2403 /* 2404 * Process each opcode. 2405 * Sorted alphabetically by opcode as much as possible. 2406 */ 2407 switch (opc) { 2408 CASE_OP_32_64(add): 2409 done = fold_add(&ctx, op); 2410 break; 2411 case INDEX_op_add_vec: 2412 done = fold_add_vec(&ctx, op); 2413 break; 2414 CASE_OP_32_64(add2): 2415 done = fold_add2(&ctx, op); 2416 break; 2417 CASE_OP_32_64_VEC(and): 2418 done = fold_and(&ctx, op); 2419 break; 2420 CASE_OP_32_64_VEC(andc): 2421 done = fold_andc(&ctx, op); 2422 break; 2423 CASE_OP_32_64(brcond): 2424 done = fold_brcond(&ctx, op); 2425 break; 2426 case INDEX_op_brcond2_i32: 2427 done = fold_brcond2(&ctx, op); 2428 break; 2429 CASE_OP_32_64(bswap16): 2430 CASE_OP_32_64(bswap32): 2431 case INDEX_op_bswap64_i64: 2432 done = fold_bswap(&ctx, op); 2433 break; 2434 CASE_OP_32_64(clz): 2435 CASE_OP_32_64(ctz): 2436 done = fold_count_zeros(&ctx, op); 2437 break; 2438 CASE_OP_32_64(ctpop): 2439 done = fold_ctpop(&ctx, op); 2440 break; 2441 CASE_OP_32_64(deposit): 2442 done = fold_deposit(&ctx, op); 2443 break; 2444 CASE_OP_32_64(div): 2445 CASE_OP_32_64(divu): 2446 done = fold_divide(&ctx, op); 2447 break; 2448 case INDEX_op_dup_vec: 2449 done = fold_dup(&ctx, op); 2450 break; 2451 case INDEX_op_dup2_vec: 2452 done = fold_dup2(&ctx, op); 2453 break; 2454 CASE_OP_32_64_VEC(eqv): 2455 done = fold_eqv(&ctx, op); 2456 break; 2457 CASE_OP_32_64(extract): 2458 done = fold_extract(&ctx, op); 2459 break; 2460 CASE_OP_32_64(extract2): 2461 done = fold_extract2(&ctx, op); 2462 break; 2463 CASE_OP_32_64(ext8s): 2464 CASE_OP_32_64(ext16s): 2465 case INDEX_op_ext32s_i64: 2466 case INDEX_op_ext_i32_i64: 2467 done = fold_exts(&ctx, op); 2468 break; 2469 CASE_OP_32_64(ext8u): 2470 CASE_OP_32_64(ext16u): 2471 case INDEX_op_ext32u_i64: 2472 case INDEX_op_extu_i32_i64: 2473 case INDEX_op_extrl_i64_i32: 2474 case INDEX_op_extrh_i64_i32: 2475 done = fold_extu(&ctx, op); 2476 break; 2477 CASE_OP_32_64(ld8s): 2478 CASE_OP_32_64(ld8u): 2479 CASE_OP_32_64(ld16s): 2480 CASE_OP_32_64(ld16u): 2481 case INDEX_op_ld32s_i64: 2482 case INDEX_op_ld32u_i64: 2483 done = fold_tcg_ld(&ctx, op); 2484 break; 2485 case INDEX_op_ld_i32: 2486 case INDEX_op_ld_i64: 2487 case INDEX_op_ld_vec: 2488 done = fold_tcg_ld_memcopy(&ctx, op); 2489 break; 2490 CASE_OP_32_64(st8): 2491 CASE_OP_32_64(st16): 2492 case INDEX_op_st32_i64: 2493 done = fold_tcg_st(&ctx, op); 2494 break; 2495 case INDEX_op_st_i32: 2496 case INDEX_op_st_i64: 2497 case INDEX_op_st_vec: 2498 done = fold_tcg_st_memcopy(&ctx, op); 2499 break; 2500 case INDEX_op_mb: 2501 done = fold_mb(&ctx, op); 2502 break; 2503 CASE_OP_32_64_VEC(mov): 2504 done = fold_mov(&ctx, op); 2505 break; 2506 CASE_OP_32_64(movcond): 2507 done = fold_movcond(&ctx, op); 2508 break; 2509 CASE_OP_32_64(mul): 2510 done = fold_mul(&ctx, op); 2511 break; 2512 CASE_OP_32_64(mulsh): 2513 CASE_OP_32_64(muluh): 2514 done = fold_mul_highpart(&ctx, op); 2515 break; 2516 CASE_OP_32_64(muls2): 2517 CASE_OP_32_64(mulu2): 2518 done = fold_multiply2(&ctx, op); 2519 break; 2520 CASE_OP_32_64_VEC(nand): 2521 done = fold_nand(&ctx, op); 2522 break; 2523 CASE_OP_32_64(neg): 2524 done = fold_neg(&ctx, op); 2525 break; 2526 CASE_OP_32_64_VEC(nor): 2527 done = fold_nor(&ctx, op); 2528 break; 2529 CASE_OP_32_64_VEC(not): 2530 done = fold_not(&ctx, op); 2531 break; 2532 CASE_OP_32_64_VEC(or): 2533 done = fold_or(&ctx, op); 2534 break; 2535 CASE_OP_32_64_VEC(orc): 2536 done = fold_orc(&ctx, op); 2537 break; 2538 case INDEX_op_qemu_ld_a32_i32: 2539 case INDEX_op_qemu_ld_a64_i32: 2540 case INDEX_op_qemu_ld_a32_i64: 2541 case INDEX_op_qemu_ld_a64_i64: 2542 case INDEX_op_qemu_ld_a32_i128: 2543 case INDEX_op_qemu_ld_a64_i128: 2544 done = fold_qemu_ld(&ctx, op); 2545 break; 2546 case INDEX_op_qemu_st8_a32_i32: 2547 case INDEX_op_qemu_st8_a64_i32: 2548 case INDEX_op_qemu_st_a32_i32: 2549 case INDEX_op_qemu_st_a64_i32: 2550 case INDEX_op_qemu_st_a32_i64: 2551 case INDEX_op_qemu_st_a64_i64: 2552 case INDEX_op_qemu_st_a32_i128: 2553 case INDEX_op_qemu_st_a64_i128: 2554 done = fold_qemu_st(&ctx, op); 2555 break; 2556 CASE_OP_32_64(rem): 2557 CASE_OP_32_64(remu): 2558 done = fold_remainder(&ctx, op); 2559 break; 2560 CASE_OP_32_64(rotl): 2561 CASE_OP_32_64(rotr): 2562 CASE_OP_32_64(sar): 2563 CASE_OP_32_64(shl): 2564 CASE_OP_32_64(shr): 2565 done = fold_shift(&ctx, op); 2566 break; 2567 CASE_OP_32_64(setcond): 2568 done = fold_setcond(&ctx, op); 2569 break; 2570 CASE_OP_32_64(negsetcond): 2571 done = fold_negsetcond(&ctx, op); 2572 break; 2573 case INDEX_op_setcond2_i32: 2574 done = fold_setcond2(&ctx, op); 2575 break; 2576 CASE_OP_32_64(sextract): 2577 done = fold_sextract(&ctx, op); 2578 break; 2579 CASE_OP_32_64(sub): 2580 done = fold_sub(&ctx, op); 2581 break; 2582 case INDEX_op_sub_vec: 2583 done = fold_sub_vec(&ctx, op); 2584 break; 2585 CASE_OP_32_64(sub2): 2586 done = fold_sub2(&ctx, op); 2587 break; 2588 CASE_OP_32_64_VEC(xor): 2589 done = fold_xor(&ctx, op); 2590 break; 2591 default: 2592 break; 2593 } 2594 2595 if (!done) { 2596 finish_folding(&ctx, op); 2597 } 2598 } 2599 } 2600