1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 32 #define CASE_OP_32_64(x) \ 33 glue(glue(case INDEX_op_, x), _i32): \ 34 glue(glue(case INDEX_op_, x), _i64) 35 36 #define CASE_OP_32_64_VEC(x) \ 37 glue(glue(case INDEX_op_, x), _i32): \ 38 glue(glue(case INDEX_op_, x), _i64): \ 39 glue(glue(case INDEX_op_, x), _vec) 40 41 typedef struct MemCopyInfo { 42 IntervalTreeNode itree; 43 QSIMPLEQ_ENTRY (MemCopyInfo) next; 44 TCGTemp *ts; 45 TCGType type; 46 } MemCopyInfo; 47 48 typedef struct TempOptInfo { 49 bool is_const; 50 TCGTemp *prev_copy; 51 TCGTemp *next_copy; 52 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 53 uint64_t val; 54 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 55 uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ 56 } TempOptInfo; 57 58 typedef struct OptContext { 59 TCGContext *tcg; 60 TCGOp *prev_mb; 61 TCGTempSet temps_used; 62 63 IntervalTreeRoot mem_copy; 64 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 65 66 /* In flight values from optimization. */ 67 uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ 68 uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ 69 uint64_t s_mask; /* mask of clrsb(value) bits */ 70 TCGType type; 71 } OptContext; 72 73 /* Calculate the smask for a specific value. */ 74 static uint64_t smask_from_value(uint64_t value) 75 { 76 int rep = clrsb64(value); 77 return ~(~0ull >> rep); 78 } 79 80 /* 81 * Calculate the smask for a given set of known-zeros. 82 * If there are lots of zeros on the left, we can consider the remainder 83 * an unsigned field, and thus the corresponding signed field is one bit 84 * larger. 85 */ 86 static uint64_t smask_from_zmask(uint64_t zmask) 87 { 88 /* 89 * Only the 0 bits are significant for zmask, thus the msb itself 90 * must be zero, else we have no sign information. 91 */ 92 int rep = clz64(zmask); 93 if (rep == 0) { 94 return 0; 95 } 96 rep -= 1; 97 return ~(~0ull >> rep); 98 } 99 100 /* 101 * Recreate a properly left-aligned smask after manipulation. 102 * Some bit-shuffling, particularly shifts and rotates, may 103 * retain sign bits on the left, but may scatter disconnected 104 * sign bits on the right. Retain only what remains to the left. 105 */ 106 static uint64_t smask_from_smask(int64_t smask) 107 { 108 /* Only the 1 bits are significant for smask */ 109 return smask_from_zmask(~smask); 110 } 111 112 static inline TempOptInfo *ts_info(TCGTemp *ts) 113 { 114 return ts->state_ptr; 115 } 116 117 static inline TempOptInfo *arg_info(TCGArg arg) 118 { 119 return ts_info(arg_temp(arg)); 120 } 121 122 static inline bool ts_is_const(TCGTemp *ts) 123 { 124 return ts_info(ts)->is_const; 125 } 126 127 static inline bool arg_is_const(TCGArg arg) 128 { 129 return ts_is_const(arg_temp(arg)); 130 } 131 132 static inline bool ts_is_copy(TCGTemp *ts) 133 { 134 return ts_info(ts)->next_copy != ts; 135 } 136 137 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 138 { 139 return a->kind < b->kind ? b : a; 140 } 141 142 /* Initialize and activate a temporary. */ 143 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 144 { 145 size_t idx = temp_idx(ts); 146 TempOptInfo *ti; 147 148 if (test_bit(idx, ctx->temps_used.l)) { 149 return; 150 } 151 set_bit(idx, ctx->temps_used.l); 152 153 ti = ts->state_ptr; 154 if (ti == NULL) { 155 ti = tcg_malloc(sizeof(TempOptInfo)); 156 ts->state_ptr = ti; 157 } 158 159 ti->next_copy = ts; 160 ti->prev_copy = ts; 161 QSIMPLEQ_INIT(&ti->mem_copy); 162 if (ts->kind == TEMP_CONST) { 163 ti->is_const = true; 164 ti->val = ts->val; 165 ti->z_mask = ts->val; 166 ti->s_mask = smask_from_value(ts->val); 167 } else { 168 ti->is_const = false; 169 ti->z_mask = -1; 170 ti->s_mask = 0; 171 } 172 } 173 174 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 175 { 176 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 177 return r ? container_of(r, MemCopyInfo, itree) : NULL; 178 } 179 180 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 181 { 182 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 183 return r ? container_of(r, MemCopyInfo, itree) : NULL; 184 } 185 186 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 187 { 188 TCGTemp *ts = mc->ts; 189 TempOptInfo *ti = ts_info(ts); 190 191 interval_tree_remove(&mc->itree, &ctx->mem_copy); 192 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 193 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 194 } 195 196 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 197 { 198 while (true) { 199 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 200 if (!mc) { 201 break; 202 } 203 remove_mem_copy(ctx, mc); 204 } 205 } 206 207 static void remove_mem_copy_all(OptContext *ctx) 208 { 209 remove_mem_copy_in(ctx, 0, -1); 210 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 211 } 212 213 static TCGTemp *find_better_copy(TCGTemp *ts) 214 { 215 TCGTemp *i, *ret; 216 217 /* If this is already readonly, we can't do better. */ 218 if (temp_readonly(ts)) { 219 return ts; 220 } 221 222 ret = ts; 223 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 224 ret = cmp_better_copy(ret, i); 225 } 226 return ret; 227 } 228 229 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 230 { 231 TempOptInfo *si = ts_info(src_ts); 232 TempOptInfo *di = ts_info(dst_ts); 233 MemCopyInfo *mc; 234 235 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 236 tcg_debug_assert(mc->ts == src_ts); 237 mc->ts = dst_ts; 238 } 239 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 240 } 241 242 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 243 static void reset_ts(OptContext *ctx, TCGTemp *ts) 244 { 245 TempOptInfo *ti = ts_info(ts); 246 TCGTemp *pts = ti->prev_copy; 247 TCGTemp *nts = ti->next_copy; 248 TempOptInfo *pi = ts_info(pts); 249 TempOptInfo *ni = ts_info(nts); 250 251 ni->prev_copy = ti->prev_copy; 252 pi->next_copy = ti->next_copy; 253 ti->next_copy = ts; 254 ti->prev_copy = ts; 255 ti->is_const = false; 256 ti->z_mask = -1; 257 ti->s_mask = 0; 258 259 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 260 if (ts == nts) { 261 /* Last temp copy being removed, the mem copies die. */ 262 MemCopyInfo *mc; 263 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 264 interval_tree_remove(&mc->itree, &ctx->mem_copy); 265 } 266 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 267 } else { 268 move_mem_copies(find_better_copy(nts), ts); 269 } 270 } 271 } 272 273 static void reset_temp(OptContext *ctx, TCGArg arg) 274 { 275 reset_ts(ctx, arg_temp(arg)); 276 } 277 278 static void record_mem_copy(OptContext *ctx, TCGType type, 279 TCGTemp *ts, intptr_t start, intptr_t last) 280 { 281 MemCopyInfo *mc; 282 TempOptInfo *ti; 283 284 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 285 if (mc) { 286 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 287 } else { 288 mc = tcg_malloc(sizeof(*mc)); 289 } 290 291 memset(mc, 0, sizeof(*mc)); 292 mc->itree.start = start; 293 mc->itree.last = last; 294 mc->type = type; 295 interval_tree_insert(&mc->itree, &ctx->mem_copy); 296 297 ts = find_better_copy(ts); 298 ti = ts_info(ts); 299 mc->ts = ts; 300 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 301 } 302 303 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 304 { 305 TCGTemp *i; 306 307 if (ts1 == ts2) { 308 return true; 309 } 310 311 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 312 return false; 313 } 314 315 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 316 if (i == ts2) { 317 return true; 318 } 319 } 320 321 return false; 322 } 323 324 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 325 { 326 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 327 } 328 329 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 330 { 331 MemCopyInfo *mc; 332 333 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 334 if (mc->itree.start == s && mc->type == type) { 335 return find_better_copy(mc->ts); 336 } 337 } 338 return NULL; 339 } 340 341 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 342 { 343 TCGType type = ctx->type; 344 TCGTemp *ts; 345 346 if (type == TCG_TYPE_I32) { 347 val = (int32_t)val; 348 } 349 350 ts = tcg_constant_internal(type, val); 351 init_ts_info(ctx, ts); 352 353 return temp_arg(ts); 354 } 355 356 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 357 { 358 TCGTemp *dst_ts = arg_temp(dst); 359 TCGTemp *src_ts = arg_temp(src); 360 TempOptInfo *di; 361 TempOptInfo *si; 362 TCGOpcode new_op; 363 364 if (ts_are_copies(dst_ts, src_ts)) { 365 tcg_op_remove(ctx->tcg, op); 366 return true; 367 } 368 369 reset_ts(ctx, dst_ts); 370 di = ts_info(dst_ts); 371 si = ts_info(src_ts); 372 373 switch (ctx->type) { 374 case TCG_TYPE_I32: 375 new_op = INDEX_op_mov_i32; 376 break; 377 case TCG_TYPE_I64: 378 new_op = INDEX_op_mov_i64; 379 break; 380 case TCG_TYPE_V64: 381 case TCG_TYPE_V128: 382 case TCG_TYPE_V256: 383 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 384 new_op = INDEX_op_mov_vec; 385 break; 386 default: 387 g_assert_not_reached(); 388 } 389 op->opc = new_op; 390 op->args[0] = dst; 391 op->args[1] = src; 392 393 di->z_mask = si->z_mask; 394 di->s_mask = si->s_mask; 395 396 if (src_ts->type == dst_ts->type) { 397 TempOptInfo *ni = ts_info(si->next_copy); 398 399 di->next_copy = si->next_copy; 400 di->prev_copy = src_ts; 401 ni->prev_copy = dst_ts; 402 si->next_copy = dst_ts; 403 di->is_const = si->is_const; 404 di->val = si->val; 405 406 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 407 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 408 move_mem_copies(dst_ts, src_ts); 409 } 410 } 411 return true; 412 } 413 414 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 415 TCGArg dst, uint64_t val) 416 { 417 /* Convert movi to mov with constant temp. */ 418 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 419 } 420 421 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 422 { 423 uint64_t l64, h64; 424 425 switch (op) { 426 CASE_OP_32_64(add): 427 return x + y; 428 429 CASE_OP_32_64(sub): 430 return x - y; 431 432 CASE_OP_32_64(mul): 433 return x * y; 434 435 CASE_OP_32_64_VEC(and): 436 return x & y; 437 438 CASE_OP_32_64_VEC(or): 439 return x | y; 440 441 CASE_OP_32_64_VEC(xor): 442 return x ^ y; 443 444 case INDEX_op_shl_i32: 445 return (uint32_t)x << (y & 31); 446 447 case INDEX_op_shl_i64: 448 return (uint64_t)x << (y & 63); 449 450 case INDEX_op_shr_i32: 451 return (uint32_t)x >> (y & 31); 452 453 case INDEX_op_shr_i64: 454 return (uint64_t)x >> (y & 63); 455 456 case INDEX_op_sar_i32: 457 return (int32_t)x >> (y & 31); 458 459 case INDEX_op_sar_i64: 460 return (int64_t)x >> (y & 63); 461 462 case INDEX_op_rotr_i32: 463 return ror32(x, y & 31); 464 465 case INDEX_op_rotr_i64: 466 return ror64(x, y & 63); 467 468 case INDEX_op_rotl_i32: 469 return rol32(x, y & 31); 470 471 case INDEX_op_rotl_i64: 472 return rol64(x, y & 63); 473 474 CASE_OP_32_64_VEC(not): 475 return ~x; 476 477 CASE_OP_32_64(neg): 478 return -x; 479 480 CASE_OP_32_64_VEC(andc): 481 return x & ~y; 482 483 CASE_OP_32_64_VEC(orc): 484 return x | ~y; 485 486 CASE_OP_32_64_VEC(eqv): 487 return ~(x ^ y); 488 489 CASE_OP_32_64_VEC(nand): 490 return ~(x & y); 491 492 CASE_OP_32_64_VEC(nor): 493 return ~(x | y); 494 495 case INDEX_op_clz_i32: 496 return (uint32_t)x ? clz32(x) : y; 497 498 case INDEX_op_clz_i64: 499 return x ? clz64(x) : y; 500 501 case INDEX_op_ctz_i32: 502 return (uint32_t)x ? ctz32(x) : y; 503 504 case INDEX_op_ctz_i64: 505 return x ? ctz64(x) : y; 506 507 case INDEX_op_ctpop_i32: 508 return ctpop32(x); 509 510 case INDEX_op_ctpop_i64: 511 return ctpop64(x); 512 513 CASE_OP_32_64(ext8s): 514 return (int8_t)x; 515 516 CASE_OP_32_64(ext16s): 517 return (int16_t)x; 518 519 CASE_OP_32_64(ext8u): 520 return (uint8_t)x; 521 522 CASE_OP_32_64(ext16u): 523 return (uint16_t)x; 524 525 CASE_OP_32_64(bswap16): 526 x = bswap16(x); 527 return y & TCG_BSWAP_OS ? (int16_t)x : x; 528 529 CASE_OP_32_64(bswap32): 530 x = bswap32(x); 531 return y & TCG_BSWAP_OS ? (int32_t)x : x; 532 533 case INDEX_op_bswap64_i64: 534 return bswap64(x); 535 536 case INDEX_op_ext_i32_i64: 537 case INDEX_op_ext32s_i64: 538 return (int32_t)x; 539 540 case INDEX_op_extu_i32_i64: 541 case INDEX_op_extrl_i64_i32: 542 case INDEX_op_ext32u_i64: 543 return (uint32_t)x; 544 545 case INDEX_op_extrh_i64_i32: 546 return (uint64_t)x >> 32; 547 548 case INDEX_op_muluh_i32: 549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 550 case INDEX_op_mulsh_i32: 551 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 552 553 case INDEX_op_muluh_i64: 554 mulu64(&l64, &h64, x, y); 555 return h64; 556 case INDEX_op_mulsh_i64: 557 muls64(&l64, &h64, x, y); 558 return h64; 559 560 case INDEX_op_div_i32: 561 /* Avoid crashing on divide by zero, otherwise undefined. */ 562 return (int32_t)x / ((int32_t)y ? : 1); 563 case INDEX_op_divu_i32: 564 return (uint32_t)x / ((uint32_t)y ? : 1); 565 case INDEX_op_div_i64: 566 return (int64_t)x / ((int64_t)y ? : 1); 567 case INDEX_op_divu_i64: 568 return (uint64_t)x / ((uint64_t)y ? : 1); 569 570 case INDEX_op_rem_i32: 571 return (int32_t)x % ((int32_t)y ? : 1); 572 case INDEX_op_remu_i32: 573 return (uint32_t)x % ((uint32_t)y ? : 1); 574 case INDEX_op_rem_i64: 575 return (int64_t)x % ((int64_t)y ? : 1); 576 case INDEX_op_remu_i64: 577 return (uint64_t)x % ((uint64_t)y ? : 1); 578 579 default: 580 g_assert_not_reached(); 581 } 582 } 583 584 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 585 uint64_t x, uint64_t y) 586 { 587 uint64_t res = do_constant_folding_2(op, x, y); 588 if (type == TCG_TYPE_I32) { 589 res = (int32_t)res; 590 } 591 return res; 592 } 593 594 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 595 { 596 switch (c) { 597 case TCG_COND_EQ: 598 return x == y; 599 case TCG_COND_NE: 600 return x != y; 601 case TCG_COND_LT: 602 return (int32_t)x < (int32_t)y; 603 case TCG_COND_GE: 604 return (int32_t)x >= (int32_t)y; 605 case TCG_COND_LE: 606 return (int32_t)x <= (int32_t)y; 607 case TCG_COND_GT: 608 return (int32_t)x > (int32_t)y; 609 case TCG_COND_LTU: 610 return x < y; 611 case TCG_COND_GEU: 612 return x >= y; 613 case TCG_COND_LEU: 614 return x <= y; 615 case TCG_COND_GTU: 616 return x > y; 617 default: 618 g_assert_not_reached(); 619 } 620 } 621 622 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 623 { 624 switch (c) { 625 case TCG_COND_EQ: 626 return x == y; 627 case TCG_COND_NE: 628 return x != y; 629 case TCG_COND_LT: 630 return (int64_t)x < (int64_t)y; 631 case TCG_COND_GE: 632 return (int64_t)x >= (int64_t)y; 633 case TCG_COND_LE: 634 return (int64_t)x <= (int64_t)y; 635 case TCG_COND_GT: 636 return (int64_t)x > (int64_t)y; 637 case TCG_COND_LTU: 638 return x < y; 639 case TCG_COND_GEU: 640 return x >= y; 641 case TCG_COND_LEU: 642 return x <= y; 643 case TCG_COND_GTU: 644 return x > y; 645 default: 646 g_assert_not_reached(); 647 } 648 } 649 650 static bool do_constant_folding_cond_eq(TCGCond c) 651 { 652 switch (c) { 653 case TCG_COND_GT: 654 case TCG_COND_LTU: 655 case TCG_COND_LT: 656 case TCG_COND_GTU: 657 case TCG_COND_NE: 658 return 0; 659 case TCG_COND_GE: 660 case TCG_COND_GEU: 661 case TCG_COND_LE: 662 case TCG_COND_LEU: 663 case TCG_COND_EQ: 664 return 1; 665 default: 666 g_assert_not_reached(); 667 } 668 } 669 670 /* 671 * Return -1 if the condition can't be simplified, 672 * and the result of the condition (0 or 1) if it can. 673 */ 674 static int do_constant_folding_cond(TCGType type, TCGArg x, 675 TCGArg y, TCGCond c) 676 { 677 if (arg_is_const(x) && arg_is_const(y)) { 678 uint64_t xv = arg_info(x)->val; 679 uint64_t yv = arg_info(y)->val; 680 681 switch (type) { 682 case TCG_TYPE_I32: 683 return do_constant_folding_cond_32(xv, yv, c); 684 case TCG_TYPE_I64: 685 return do_constant_folding_cond_64(xv, yv, c); 686 default: 687 /* Only scalar comparisons are optimizable */ 688 return -1; 689 } 690 } else if (args_are_copies(x, y)) { 691 return do_constant_folding_cond_eq(c); 692 } else if (arg_is_const(y) && arg_info(y)->val == 0) { 693 switch (c) { 694 case TCG_COND_LTU: 695 return 0; 696 case TCG_COND_GEU: 697 return 1; 698 default: 699 return -1; 700 } 701 } 702 return -1; 703 } 704 705 /* 706 * Return -1 if the condition can't be simplified, 707 * and the result of the condition (0 or 1) if it can. 708 */ 709 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 710 { 711 TCGArg al = p1[0], ah = p1[1]; 712 TCGArg bl = p2[0], bh = p2[1]; 713 714 if (arg_is_const(bl) && arg_is_const(bh)) { 715 tcg_target_ulong blv = arg_info(bl)->val; 716 tcg_target_ulong bhv = arg_info(bh)->val; 717 uint64_t b = deposit64(blv, 32, 32, bhv); 718 719 if (arg_is_const(al) && arg_is_const(ah)) { 720 tcg_target_ulong alv = arg_info(al)->val; 721 tcg_target_ulong ahv = arg_info(ah)->val; 722 uint64_t a = deposit64(alv, 32, 32, ahv); 723 return do_constant_folding_cond_64(a, b, c); 724 } 725 if (b == 0) { 726 switch (c) { 727 case TCG_COND_LTU: 728 return 0; 729 case TCG_COND_GEU: 730 return 1; 731 default: 732 break; 733 } 734 } 735 } 736 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 737 return do_constant_folding_cond_eq(c); 738 } 739 return -1; 740 } 741 742 /** 743 * swap_commutative: 744 * @dest: TCGArg of the destination argument, or NO_DEST. 745 * @p1: first paired argument 746 * @p2: second paired argument 747 * 748 * If *@p1 is a constant and *@p2 is not, swap. 749 * If *@p2 matches @dest, swap. 750 * Return true if a swap was performed. 751 */ 752 753 #define NO_DEST temp_arg(NULL) 754 755 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 756 { 757 TCGArg a1 = *p1, a2 = *p2; 758 int sum = 0; 759 sum += arg_is_const(a1); 760 sum -= arg_is_const(a2); 761 762 /* Prefer the constant in second argument, and then the form 763 op a, a, b, which is better handled on non-RISC hosts. */ 764 if (sum > 0 || (sum == 0 && dest == a2)) { 765 *p1 = a2; 766 *p2 = a1; 767 return true; 768 } 769 return false; 770 } 771 772 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 773 { 774 int sum = 0; 775 sum += arg_is_const(p1[0]); 776 sum += arg_is_const(p1[1]); 777 sum -= arg_is_const(p2[0]); 778 sum -= arg_is_const(p2[1]); 779 if (sum > 0) { 780 TCGArg t; 781 t = p1[0], p1[0] = p2[0], p2[0] = t; 782 t = p1[1], p1[1] = p2[1], p2[1] = t; 783 return true; 784 } 785 return false; 786 } 787 788 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 789 { 790 for (int i = 0; i < nb_args; i++) { 791 TCGTemp *ts = arg_temp(op->args[i]); 792 init_ts_info(ctx, ts); 793 } 794 } 795 796 static void copy_propagate(OptContext *ctx, TCGOp *op, 797 int nb_oargs, int nb_iargs) 798 { 799 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 800 TCGTemp *ts = arg_temp(op->args[i]); 801 if (ts_is_copy(ts)) { 802 op->args[i] = temp_arg(find_better_copy(ts)); 803 } 804 } 805 } 806 807 static void finish_folding(OptContext *ctx, TCGOp *op) 808 { 809 const TCGOpDef *def = &tcg_op_defs[op->opc]; 810 int i, nb_oargs; 811 812 /* 813 * We only optimize extended basic blocks. If the opcode ends a BB 814 * and is not a conditional branch, reset all temp data. 815 */ 816 if (def->flags & TCG_OPF_BB_END) { 817 ctx->prev_mb = NULL; 818 if (!(def->flags & TCG_OPF_COND_BRANCH)) { 819 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 820 remove_mem_copy_all(ctx); 821 } 822 return; 823 } 824 825 nb_oargs = def->nb_oargs; 826 for (i = 0; i < nb_oargs; i++) { 827 TCGTemp *ts = arg_temp(op->args[i]); 828 reset_ts(ctx, ts); 829 /* 830 * Save the corresponding known-zero/sign bits mask for the 831 * first output argument (only one supported so far). 832 */ 833 if (i == 0) { 834 ts_info(ts)->z_mask = ctx->z_mask; 835 ts_info(ts)->s_mask = ctx->s_mask; 836 } 837 } 838 } 839 840 /* 841 * The fold_* functions return true when processing is complete, 842 * usually by folding the operation to a constant or to a copy, 843 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 844 * like collect information about the value produced, for use in 845 * optimizing a subsequent operation. 846 * 847 * These first fold_* functions are all helpers, used by other 848 * folders for more specific operations. 849 */ 850 851 static bool fold_const1(OptContext *ctx, TCGOp *op) 852 { 853 if (arg_is_const(op->args[1])) { 854 uint64_t t; 855 856 t = arg_info(op->args[1])->val; 857 t = do_constant_folding(op->opc, ctx->type, t, 0); 858 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 859 } 860 return false; 861 } 862 863 static bool fold_const2(OptContext *ctx, TCGOp *op) 864 { 865 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 866 uint64_t t1 = arg_info(op->args[1])->val; 867 uint64_t t2 = arg_info(op->args[2])->val; 868 869 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 870 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 871 } 872 return false; 873 } 874 875 static bool fold_commutative(OptContext *ctx, TCGOp *op) 876 { 877 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 878 return false; 879 } 880 881 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 882 { 883 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 884 return fold_const2(ctx, op); 885 } 886 887 static bool fold_masks(OptContext *ctx, TCGOp *op) 888 { 889 uint64_t a_mask = ctx->a_mask; 890 uint64_t z_mask = ctx->z_mask; 891 uint64_t s_mask = ctx->s_mask; 892 893 /* 894 * 32-bit ops generate 32-bit results, which for the purpose of 895 * simplifying tcg are sign-extended. Certainly that's how we 896 * represent our constants elsewhere. Note that the bits will 897 * be reset properly for a 64-bit value when encountering the 898 * type changing opcodes. 899 */ 900 if (ctx->type == TCG_TYPE_I32) { 901 a_mask = (int32_t)a_mask; 902 z_mask = (int32_t)z_mask; 903 s_mask |= MAKE_64BIT_MASK(32, 32); 904 ctx->z_mask = z_mask; 905 ctx->s_mask = s_mask; 906 } 907 908 if (z_mask == 0) { 909 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 910 } 911 if (a_mask == 0) { 912 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 913 } 914 return false; 915 } 916 917 /* 918 * Convert @op to NOT, if NOT is supported by the host. 919 * Return true f the conversion is successful, which will still 920 * indicate that the processing is complete. 921 */ 922 static bool fold_not(OptContext *ctx, TCGOp *op); 923 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 924 { 925 TCGOpcode not_op; 926 bool have_not; 927 928 switch (ctx->type) { 929 case TCG_TYPE_I32: 930 not_op = INDEX_op_not_i32; 931 have_not = TCG_TARGET_HAS_not_i32; 932 break; 933 case TCG_TYPE_I64: 934 not_op = INDEX_op_not_i64; 935 have_not = TCG_TARGET_HAS_not_i64; 936 break; 937 case TCG_TYPE_V64: 938 case TCG_TYPE_V128: 939 case TCG_TYPE_V256: 940 not_op = INDEX_op_not_vec; 941 have_not = TCG_TARGET_HAS_not_vec; 942 break; 943 default: 944 g_assert_not_reached(); 945 } 946 if (have_not) { 947 op->opc = not_op; 948 op->args[1] = op->args[idx]; 949 return fold_not(ctx, op); 950 } 951 return false; 952 } 953 954 /* If the binary operation has first argument @i, fold to @i. */ 955 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 956 { 957 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 958 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 959 } 960 return false; 961 } 962 963 /* If the binary operation has first argument @i, fold to NOT. */ 964 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 965 { 966 if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 967 return fold_to_not(ctx, op, 2); 968 } 969 return false; 970 } 971 972 /* If the binary operation has second argument @i, fold to @i. */ 973 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 974 { 975 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 976 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 977 } 978 return false; 979 } 980 981 /* If the binary operation has second argument @i, fold to identity. */ 982 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 983 { 984 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 985 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 986 } 987 return false; 988 } 989 990 /* If the binary operation has second argument @i, fold to NOT. */ 991 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 992 { 993 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 994 return fold_to_not(ctx, op, 1); 995 } 996 return false; 997 } 998 999 /* If the binary operation has both arguments equal, fold to @i. */ 1000 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1001 { 1002 if (args_are_copies(op->args[1], op->args[2])) { 1003 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1004 } 1005 return false; 1006 } 1007 1008 /* If the binary operation has both arguments equal, fold to identity. */ 1009 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1010 { 1011 if (args_are_copies(op->args[1], op->args[2])) { 1012 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1013 } 1014 return false; 1015 } 1016 1017 /* 1018 * These outermost fold_<op> functions are sorted alphabetically. 1019 * 1020 * The ordering of the transformations should be: 1021 * 1) those that produce a constant 1022 * 2) those that produce a copy 1023 * 3) those that produce information about the result value. 1024 */ 1025 1026 static bool fold_add(OptContext *ctx, TCGOp *op) 1027 { 1028 if (fold_const2_commutative(ctx, op) || 1029 fold_xi_to_x(ctx, op, 0)) { 1030 return true; 1031 } 1032 return false; 1033 } 1034 1035 /* We cannot as yet do_constant_folding with vectors. */ 1036 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1037 { 1038 if (fold_commutative(ctx, op) || 1039 fold_xi_to_x(ctx, op, 0)) { 1040 return true; 1041 } 1042 return false; 1043 } 1044 1045 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) 1046 { 1047 bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); 1048 bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); 1049 1050 if (a_const && b_const) { 1051 uint64_t al = arg_info(op->args[2])->val; 1052 uint64_t ah = arg_info(op->args[3])->val; 1053 uint64_t bl = arg_info(op->args[4])->val; 1054 uint64_t bh = arg_info(op->args[5])->val; 1055 TCGArg rl, rh; 1056 TCGOp *op2; 1057 1058 if (ctx->type == TCG_TYPE_I32) { 1059 uint64_t a = deposit64(al, 32, 32, ah); 1060 uint64_t b = deposit64(bl, 32, 32, bh); 1061 1062 if (add) { 1063 a += b; 1064 } else { 1065 a -= b; 1066 } 1067 1068 al = sextract64(a, 0, 32); 1069 ah = sextract64(a, 32, 32); 1070 } else { 1071 Int128 a = int128_make128(al, ah); 1072 Int128 b = int128_make128(bl, bh); 1073 1074 if (add) { 1075 a = int128_add(a, b); 1076 } else { 1077 a = int128_sub(a, b); 1078 } 1079 1080 al = int128_getlo(a); 1081 ah = int128_gethi(a); 1082 } 1083 1084 rl = op->args[0]; 1085 rh = op->args[1]; 1086 1087 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1088 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1089 1090 tcg_opt_gen_movi(ctx, op, rl, al); 1091 tcg_opt_gen_movi(ctx, op2, rh, ah); 1092 return true; 1093 } 1094 1095 /* Fold sub2 r,x,i to add2 r,x,-i */ 1096 if (!add && b_const) { 1097 uint64_t bl = arg_info(op->args[4])->val; 1098 uint64_t bh = arg_info(op->args[5])->val; 1099 1100 /* Negate the two parts without assembling and disassembling. */ 1101 bl = -bl; 1102 bh = ~bh + !bl; 1103 1104 op->opc = (ctx->type == TCG_TYPE_I32 1105 ? INDEX_op_add2_i32 : INDEX_op_add2_i64); 1106 op->args[4] = arg_new_constant(ctx, bl); 1107 op->args[5] = arg_new_constant(ctx, bh); 1108 } 1109 return false; 1110 } 1111 1112 static bool fold_add2(OptContext *ctx, TCGOp *op) 1113 { 1114 /* Note that the high and low parts may be independently swapped. */ 1115 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 1116 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 1117 1118 return fold_addsub2(ctx, op, true); 1119 } 1120 1121 static bool fold_and(OptContext *ctx, TCGOp *op) 1122 { 1123 uint64_t z1, z2; 1124 1125 if (fold_const2_commutative(ctx, op) || 1126 fold_xi_to_i(ctx, op, 0) || 1127 fold_xi_to_x(ctx, op, -1) || 1128 fold_xx_to_x(ctx, op)) { 1129 return true; 1130 } 1131 1132 z1 = arg_info(op->args[1])->z_mask; 1133 z2 = arg_info(op->args[2])->z_mask; 1134 ctx->z_mask = z1 & z2; 1135 1136 /* 1137 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1138 * Bitwise operations preserve the relative quantity of the repetitions. 1139 */ 1140 ctx->s_mask = arg_info(op->args[1])->s_mask 1141 & arg_info(op->args[2])->s_mask; 1142 1143 /* 1144 * Known-zeros does not imply known-ones. Therefore unless 1145 * arg2 is constant, we can't infer affected bits from it. 1146 */ 1147 if (arg_is_const(op->args[2])) { 1148 ctx->a_mask = z1 & ~z2; 1149 } 1150 1151 return fold_masks(ctx, op); 1152 } 1153 1154 static bool fold_andc(OptContext *ctx, TCGOp *op) 1155 { 1156 uint64_t z1; 1157 1158 if (fold_const2(ctx, op) || 1159 fold_xx_to_i(ctx, op, 0) || 1160 fold_xi_to_x(ctx, op, 0) || 1161 fold_ix_to_not(ctx, op, -1)) { 1162 return true; 1163 } 1164 1165 z1 = arg_info(op->args[1])->z_mask; 1166 1167 /* 1168 * Known-zeros does not imply known-ones. Therefore unless 1169 * arg2 is constant, we can't infer anything from it. 1170 */ 1171 if (arg_is_const(op->args[2])) { 1172 uint64_t z2 = ~arg_info(op->args[2])->z_mask; 1173 ctx->a_mask = z1 & ~z2; 1174 z1 &= z2; 1175 } 1176 ctx->z_mask = z1; 1177 1178 ctx->s_mask = arg_info(op->args[1])->s_mask 1179 & arg_info(op->args[2])->s_mask; 1180 return fold_masks(ctx, op); 1181 } 1182 1183 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1184 { 1185 TCGCond cond = op->args[2]; 1186 int i; 1187 1188 if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { 1189 op->args[2] = cond = tcg_swap_cond(cond); 1190 } 1191 1192 i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); 1193 if (i == 0) { 1194 tcg_op_remove(ctx->tcg, op); 1195 return true; 1196 } 1197 if (i > 0) { 1198 op->opc = INDEX_op_br; 1199 op->args[0] = op->args[3]; 1200 } 1201 return false; 1202 } 1203 1204 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1205 { 1206 TCGCond cond = op->args[4]; 1207 TCGArg label = op->args[5]; 1208 int i, inv = 0; 1209 1210 if (swap_commutative2(&op->args[0], &op->args[2])) { 1211 op->args[4] = cond = tcg_swap_cond(cond); 1212 } 1213 1214 i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); 1215 if (i >= 0) { 1216 goto do_brcond_const; 1217 } 1218 1219 switch (cond) { 1220 case TCG_COND_LT: 1221 case TCG_COND_GE: 1222 /* 1223 * Simplify LT/GE comparisons vs zero to a single compare 1224 * vs the high word of the input. 1225 */ 1226 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && 1227 arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) { 1228 goto do_brcond_high; 1229 } 1230 break; 1231 1232 case TCG_COND_NE: 1233 inv = 1; 1234 QEMU_FALLTHROUGH; 1235 case TCG_COND_EQ: 1236 /* 1237 * Simplify EQ/NE comparisons where one of the pairs 1238 * can be simplified. 1239 */ 1240 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1241 op->args[2], cond); 1242 switch (i ^ inv) { 1243 case 0: 1244 goto do_brcond_const; 1245 case 1: 1246 goto do_brcond_high; 1247 } 1248 1249 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1250 op->args[3], cond); 1251 switch (i ^ inv) { 1252 case 0: 1253 goto do_brcond_const; 1254 case 1: 1255 op->opc = INDEX_op_brcond_i32; 1256 op->args[1] = op->args[2]; 1257 op->args[2] = cond; 1258 op->args[3] = label; 1259 break; 1260 } 1261 break; 1262 1263 default: 1264 break; 1265 1266 do_brcond_high: 1267 op->opc = INDEX_op_brcond_i32; 1268 op->args[0] = op->args[1]; 1269 op->args[1] = op->args[3]; 1270 op->args[2] = cond; 1271 op->args[3] = label; 1272 break; 1273 1274 do_brcond_const: 1275 if (i == 0) { 1276 tcg_op_remove(ctx->tcg, op); 1277 return true; 1278 } 1279 op->opc = INDEX_op_br; 1280 op->args[0] = label; 1281 break; 1282 } 1283 return false; 1284 } 1285 1286 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1287 { 1288 uint64_t z_mask, s_mask, sign; 1289 1290 if (arg_is_const(op->args[1])) { 1291 uint64_t t = arg_info(op->args[1])->val; 1292 1293 t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); 1294 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1295 } 1296 1297 z_mask = arg_info(op->args[1])->z_mask; 1298 1299 switch (op->opc) { 1300 case INDEX_op_bswap16_i32: 1301 case INDEX_op_bswap16_i64: 1302 z_mask = bswap16(z_mask); 1303 sign = INT16_MIN; 1304 break; 1305 case INDEX_op_bswap32_i32: 1306 case INDEX_op_bswap32_i64: 1307 z_mask = bswap32(z_mask); 1308 sign = INT32_MIN; 1309 break; 1310 case INDEX_op_bswap64_i64: 1311 z_mask = bswap64(z_mask); 1312 sign = INT64_MIN; 1313 break; 1314 default: 1315 g_assert_not_reached(); 1316 } 1317 s_mask = smask_from_zmask(z_mask); 1318 1319 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1320 case TCG_BSWAP_OZ: 1321 break; 1322 case TCG_BSWAP_OS: 1323 /* If the sign bit may be 1, force all the bits above to 1. */ 1324 if (z_mask & sign) { 1325 z_mask |= sign; 1326 s_mask = sign << 1; 1327 } 1328 break; 1329 default: 1330 /* The high bits are undefined: force all bits above the sign to 1. */ 1331 z_mask |= sign << 1; 1332 s_mask = 0; 1333 break; 1334 } 1335 ctx->z_mask = z_mask; 1336 ctx->s_mask = s_mask; 1337 1338 return fold_masks(ctx, op); 1339 } 1340 1341 static bool fold_call(OptContext *ctx, TCGOp *op) 1342 { 1343 TCGContext *s = ctx->tcg; 1344 int nb_oargs = TCGOP_CALLO(op); 1345 int nb_iargs = TCGOP_CALLI(op); 1346 int flags, i; 1347 1348 init_arguments(ctx, op, nb_oargs + nb_iargs); 1349 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1350 1351 /* If the function reads or writes globals, reset temp data. */ 1352 flags = tcg_call_flags(op); 1353 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1354 int nb_globals = s->nb_globals; 1355 1356 for (i = 0; i < nb_globals; i++) { 1357 if (test_bit(i, ctx->temps_used.l)) { 1358 reset_ts(ctx, &ctx->tcg->temps[i]); 1359 } 1360 } 1361 } 1362 1363 /* If the function has side effects, reset mem data. */ 1364 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1365 remove_mem_copy_all(ctx); 1366 } 1367 1368 /* Reset temp data for outputs. */ 1369 for (i = 0; i < nb_oargs; i++) { 1370 reset_temp(ctx, op->args[i]); 1371 } 1372 1373 /* Stop optimizing MB across calls. */ 1374 ctx->prev_mb = NULL; 1375 return true; 1376 } 1377 1378 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1379 { 1380 uint64_t z_mask; 1381 1382 if (arg_is_const(op->args[1])) { 1383 uint64_t t = arg_info(op->args[1])->val; 1384 1385 if (t != 0) { 1386 t = do_constant_folding(op->opc, ctx->type, t, 0); 1387 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1388 } 1389 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1390 } 1391 1392 switch (ctx->type) { 1393 case TCG_TYPE_I32: 1394 z_mask = 31; 1395 break; 1396 case TCG_TYPE_I64: 1397 z_mask = 63; 1398 break; 1399 default: 1400 g_assert_not_reached(); 1401 } 1402 ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; 1403 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1404 return false; 1405 } 1406 1407 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1408 { 1409 if (fold_const1(ctx, op)) { 1410 return true; 1411 } 1412 1413 switch (ctx->type) { 1414 case TCG_TYPE_I32: 1415 ctx->z_mask = 32 | 31; 1416 break; 1417 case TCG_TYPE_I64: 1418 ctx->z_mask = 64 | 63; 1419 break; 1420 default: 1421 g_assert_not_reached(); 1422 } 1423 ctx->s_mask = smask_from_zmask(ctx->z_mask); 1424 return false; 1425 } 1426 1427 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1428 { 1429 TCGOpcode and_opc; 1430 1431 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1432 uint64_t t1 = arg_info(op->args[1])->val; 1433 uint64_t t2 = arg_info(op->args[2])->val; 1434 1435 t1 = deposit64(t1, op->args[3], op->args[4], t2); 1436 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1437 } 1438 1439 switch (ctx->type) { 1440 case TCG_TYPE_I32: 1441 and_opc = INDEX_op_and_i32; 1442 break; 1443 case TCG_TYPE_I64: 1444 and_opc = INDEX_op_and_i64; 1445 break; 1446 default: 1447 g_assert_not_reached(); 1448 } 1449 1450 /* Inserting a value into zero at offset 0. */ 1451 if (arg_is_const(op->args[1]) 1452 && arg_info(op->args[1])->val == 0 1453 && op->args[3] == 0) { 1454 uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); 1455 1456 op->opc = and_opc; 1457 op->args[1] = op->args[2]; 1458 op->args[2] = arg_new_constant(ctx, mask); 1459 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1460 return false; 1461 } 1462 1463 /* Inserting zero into a value. */ 1464 if (arg_is_const(op->args[2]) 1465 && arg_info(op->args[2])->val == 0) { 1466 uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); 1467 1468 op->opc = and_opc; 1469 op->args[2] = arg_new_constant(ctx, mask); 1470 ctx->z_mask = mask & arg_info(op->args[1])->z_mask; 1471 return false; 1472 } 1473 1474 ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, 1475 op->args[3], op->args[4], 1476 arg_info(op->args[2])->z_mask); 1477 return false; 1478 } 1479 1480 static bool fold_divide(OptContext *ctx, TCGOp *op) 1481 { 1482 if (fold_const2(ctx, op) || 1483 fold_xi_to_x(ctx, op, 1)) { 1484 return true; 1485 } 1486 return false; 1487 } 1488 1489 static bool fold_dup(OptContext *ctx, TCGOp *op) 1490 { 1491 if (arg_is_const(op->args[1])) { 1492 uint64_t t = arg_info(op->args[1])->val; 1493 t = dup_const(TCGOP_VECE(op), t); 1494 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1495 } 1496 return false; 1497 } 1498 1499 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1500 { 1501 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1502 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1503 arg_info(op->args[2])->val); 1504 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1505 } 1506 1507 if (args_are_copies(op->args[1], op->args[2])) { 1508 op->opc = INDEX_op_dup_vec; 1509 TCGOP_VECE(op) = MO_32; 1510 } 1511 return false; 1512 } 1513 1514 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1515 { 1516 if (fold_const2_commutative(ctx, op) || 1517 fold_xi_to_x(ctx, op, -1) || 1518 fold_xi_to_not(ctx, op, 0)) { 1519 return true; 1520 } 1521 1522 ctx->s_mask = arg_info(op->args[1])->s_mask 1523 & arg_info(op->args[2])->s_mask; 1524 return false; 1525 } 1526 1527 static bool fold_extract(OptContext *ctx, TCGOp *op) 1528 { 1529 uint64_t z_mask_old, z_mask; 1530 int pos = op->args[2]; 1531 int len = op->args[3]; 1532 1533 if (arg_is_const(op->args[1])) { 1534 uint64_t t; 1535 1536 t = arg_info(op->args[1])->val; 1537 t = extract64(t, pos, len); 1538 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1539 } 1540 1541 z_mask_old = arg_info(op->args[1])->z_mask; 1542 z_mask = extract64(z_mask_old, pos, len); 1543 if (pos == 0) { 1544 ctx->a_mask = z_mask_old ^ z_mask; 1545 } 1546 ctx->z_mask = z_mask; 1547 ctx->s_mask = smask_from_zmask(z_mask); 1548 1549 return fold_masks(ctx, op); 1550 } 1551 1552 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1553 { 1554 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1555 uint64_t v1 = arg_info(op->args[1])->val; 1556 uint64_t v2 = arg_info(op->args[2])->val; 1557 int shr = op->args[3]; 1558 1559 if (op->opc == INDEX_op_extract2_i64) { 1560 v1 >>= shr; 1561 v2 <<= 64 - shr; 1562 } else { 1563 v1 = (uint32_t)v1 >> shr; 1564 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1565 } 1566 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1567 } 1568 return false; 1569 } 1570 1571 static bool fold_exts(OptContext *ctx, TCGOp *op) 1572 { 1573 uint64_t s_mask_old, s_mask, z_mask, sign; 1574 bool type_change = false; 1575 1576 if (fold_const1(ctx, op)) { 1577 return true; 1578 } 1579 1580 z_mask = arg_info(op->args[1])->z_mask; 1581 s_mask = arg_info(op->args[1])->s_mask; 1582 s_mask_old = s_mask; 1583 1584 switch (op->opc) { 1585 CASE_OP_32_64(ext8s): 1586 sign = INT8_MIN; 1587 z_mask = (uint8_t)z_mask; 1588 break; 1589 CASE_OP_32_64(ext16s): 1590 sign = INT16_MIN; 1591 z_mask = (uint16_t)z_mask; 1592 break; 1593 case INDEX_op_ext_i32_i64: 1594 type_change = true; 1595 QEMU_FALLTHROUGH; 1596 case INDEX_op_ext32s_i64: 1597 sign = INT32_MIN; 1598 z_mask = (uint32_t)z_mask; 1599 break; 1600 default: 1601 g_assert_not_reached(); 1602 } 1603 1604 if (z_mask & sign) { 1605 z_mask |= sign; 1606 } 1607 s_mask |= sign << 1; 1608 1609 ctx->z_mask = z_mask; 1610 ctx->s_mask = s_mask; 1611 if (!type_change) { 1612 ctx->a_mask = s_mask & ~s_mask_old; 1613 } 1614 1615 return fold_masks(ctx, op); 1616 } 1617 1618 static bool fold_extu(OptContext *ctx, TCGOp *op) 1619 { 1620 uint64_t z_mask_old, z_mask; 1621 bool type_change = false; 1622 1623 if (fold_const1(ctx, op)) { 1624 return true; 1625 } 1626 1627 z_mask_old = z_mask = arg_info(op->args[1])->z_mask; 1628 1629 switch (op->opc) { 1630 CASE_OP_32_64(ext8u): 1631 z_mask = (uint8_t)z_mask; 1632 break; 1633 CASE_OP_32_64(ext16u): 1634 z_mask = (uint16_t)z_mask; 1635 break; 1636 case INDEX_op_extrl_i64_i32: 1637 case INDEX_op_extu_i32_i64: 1638 type_change = true; 1639 QEMU_FALLTHROUGH; 1640 case INDEX_op_ext32u_i64: 1641 z_mask = (uint32_t)z_mask; 1642 break; 1643 case INDEX_op_extrh_i64_i32: 1644 type_change = true; 1645 z_mask >>= 32; 1646 break; 1647 default: 1648 g_assert_not_reached(); 1649 } 1650 1651 ctx->z_mask = z_mask; 1652 ctx->s_mask = smask_from_zmask(z_mask); 1653 if (!type_change) { 1654 ctx->a_mask = z_mask_old ^ z_mask; 1655 } 1656 return fold_masks(ctx, op); 1657 } 1658 1659 static bool fold_mb(OptContext *ctx, TCGOp *op) 1660 { 1661 /* Eliminate duplicate and redundant fence instructions. */ 1662 if (ctx->prev_mb) { 1663 /* 1664 * Merge two barriers of the same type into one, 1665 * or a weaker barrier into a stronger one, 1666 * or two weaker barriers into a stronger one. 1667 * mb X; mb Y => mb X|Y 1668 * mb; strl => mb; st 1669 * ldaq; mb => ld; mb 1670 * ldaq; strl => ld; mb; st 1671 * Other combinations are also merged into a strong 1672 * barrier. This is stricter than specified but for 1673 * the purposes of TCG is better than not optimizing. 1674 */ 1675 ctx->prev_mb->args[0] |= op->args[0]; 1676 tcg_op_remove(ctx->tcg, op); 1677 } else { 1678 ctx->prev_mb = op; 1679 } 1680 return true; 1681 } 1682 1683 static bool fold_mov(OptContext *ctx, TCGOp *op) 1684 { 1685 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1686 } 1687 1688 static bool fold_movcond(OptContext *ctx, TCGOp *op) 1689 { 1690 TCGCond cond = op->args[5]; 1691 int i; 1692 1693 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1694 op->args[5] = cond = tcg_swap_cond(cond); 1695 } 1696 /* 1697 * Canonicalize the "false" input reg to match the destination reg so 1698 * that the tcg backend can implement a "move if true" operation. 1699 */ 1700 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1701 op->args[5] = cond = tcg_invert_cond(cond); 1702 } 1703 1704 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1705 if (i >= 0) { 1706 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 1707 } 1708 1709 ctx->z_mask = arg_info(op->args[3])->z_mask 1710 | arg_info(op->args[4])->z_mask; 1711 ctx->s_mask = arg_info(op->args[3])->s_mask 1712 & arg_info(op->args[4])->s_mask; 1713 1714 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1715 uint64_t tv = arg_info(op->args[3])->val; 1716 uint64_t fv = arg_info(op->args[4])->val; 1717 TCGOpcode opc, negopc = 0; 1718 1719 switch (ctx->type) { 1720 case TCG_TYPE_I32: 1721 opc = INDEX_op_setcond_i32; 1722 if (TCG_TARGET_HAS_negsetcond_i32) { 1723 negopc = INDEX_op_negsetcond_i32; 1724 } 1725 tv = (int32_t)tv; 1726 fv = (int32_t)fv; 1727 break; 1728 case TCG_TYPE_I64: 1729 opc = INDEX_op_setcond_i64; 1730 if (TCG_TARGET_HAS_negsetcond_i64) { 1731 negopc = INDEX_op_negsetcond_i64; 1732 } 1733 break; 1734 default: 1735 g_assert_not_reached(); 1736 } 1737 1738 if (tv == 1 && fv == 0) { 1739 op->opc = opc; 1740 op->args[3] = cond; 1741 } else if (fv == 1 && tv == 0) { 1742 op->opc = opc; 1743 op->args[3] = tcg_invert_cond(cond); 1744 } else if (negopc) { 1745 if (tv == -1 && fv == 0) { 1746 op->opc = negopc; 1747 op->args[3] = cond; 1748 } else if (fv == -1 && tv == 0) { 1749 op->opc = negopc; 1750 op->args[3] = tcg_invert_cond(cond); 1751 } 1752 } 1753 } 1754 return false; 1755 } 1756 1757 static bool fold_mul(OptContext *ctx, TCGOp *op) 1758 { 1759 if (fold_const2(ctx, op) || 1760 fold_xi_to_i(ctx, op, 0) || 1761 fold_xi_to_x(ctx, op, 1)) { 1762 return true; 1763 } 1764 return false; 1765 } 1766 1767 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 1768 { 1769 if (fold_const2_commutative(ctx, op) || 1770 fold_xi_to_i(ctx, op, 0)) { 1771 return true; 1772 } 1773 return false; 1774 } 1775 1776 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 1777 { 1778 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 1779 1780 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1781 uint64_t a = arg_info(op->args[2])->val; 1782 uint64_t b = arg_info(op->args[3])->val; 1783 uint64_t h, l; 1784 TCGArg rl, rh; 1785 TCGOp *op2; 1786 1787 switch (op->opc) { 1788 case INDEX_op_mulu2_i32: 1789 l = (uint64_t)(uint32_t)a * (uint32_t)b; 1790 h = (int32_t)(l >> 32); 1791 l = (int32_t)l; 1792 break; 1793 case INDEX_op_muls2_i32: 1794 l = (int64_t)(int32_t)a * (int32_t)b; 1795 h = l >> 32; 1796 l = (int32_t)l; 1797 break; 1798 case INDEX_op_mulu2_i64: 1799 mulu64(&l, &h, a, b); 1800 break; 1801 case INDEX_op_muls2_i64: 1802 muls64(&l, &h, a, b); 1803 break; 1804 default: 1805 g_assert_not_reached(); 1806 } 1807 1808 rl = op->args[0]; 1809 rh = op->args[1]; 1810 1811 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1812 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1813 1814 tcg_opt_gen_movi(ctx, op, rl, l); 1815 tcg_opt_gen_movi(ctx, op2, rh, h); 1816 return true; 1817 } 1818 return false; 1819 } 1820 1821 static bool fold_nand(OptContext *ctx, TCGOp *op) 1822 { 1823 if (fold_const2_commutative(ctx, op) || 1824 fold_xi_to_not(ctx, op, -1)) { 1825 return true; 1826 } 1827 1828 ctx->s_mask = arg_info(op->args[1])->s_mask 1829 & arg_info(op->args[2])->s_mask; 1830 return false; 1831 } 1832 1833 static bool fold_neg(OptContext *ctx, TCGOp *op) 1834 { 1835 uint64_t z_mask; 1836 1837 if (fold_const1(ctx, op)) { 1838 return true; 1839 } 1840 1841 /* Set to 1 all bits to the left of the rightmost. */ 1842 z_mask = arg_info(op->args[1])->z_mask; 1843 ctx->z_mask = -(z_mask & -z_mask); 1844 1845 /* 1846 * Because of fold_sub_to_neg, we want to always return true, 1847 * via finish_folding. 1848 */ 1849 finish_folding(ctx, op); 1850 return true; 1851 } 1852 1853 static bool fold_nor(OptContext *ctx, TCGOp *op) 1854 { 1855 if (fold_const2_commutative(ctx, op) || 1856 fold_xi_to_not(ctx, op, 0)) { 1857 return true; 1858 } 1859 1860 ctx->s_mask = arg_info(op->args[1])->s_mask 1861 & arg_info(op->args[2])->s_mask; 1862 return false; 1863 } 1864 1865 static bool fold_not(OptContext *ctx, TCGOp *op) 1866 { 1867 if (fold_const1(ctx, op)) { 1868 return true; 1869 } 1870 1871 ctx->s_mask = arg_info(op->args[1])->s_mask; 1872 1873 /* Because of fold_to_not, we want to always return true, via finish. */ 1874 finish_folding(ctx, op); 1875 return true; 1876 } 1877 1878 static bool fold_or(OptContext *ctx, TCGOp *op) 1879 { 1880 if (fold_const2_commutative(ctx, op) || 1881 fold_xi_to_x(ctx, op, 0) || 1882 fold_xx_to_x(ctx, op)) { 1883 return true; 1884 } 1885 1886 ctx->z_mask = arg_info(op->args[1])->z_mask 1887 | arg_info(op->args[2])->z_mask; 1888 ctx->s_mask = arg_info(op->args[1])->s_mask 1889 & arg_info(op->args[2])->s_mask; 1890 return fold_masks(ctx, op); 1891 } 1892 1893 static bool fold_orc(OptContext *ctx, TCGOp *op) 1894 { 1895 if (fold_const2(ctx, op) || 1896 fold_xx_to_i(ctx, op, -1) || 1897 fold_xi_to_x(ctx, op, -1) || 1898 fold_ix_to_not(ctx, op, 0)) { 1899 return true; 1900 } 1901 1902 ctx->s_mask = arg_info(op->args[1])->s_mask 1903 & arg_info(op->args[2])->s_mask; 1904 return false; 1905 } 1906 1907 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) 1908 { 1909 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1910 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 1911 MemOp mop = get_memop(oi); 1912 int width = 8 * memop_size(mop); 1913 1914 if (width < 64) { 1915 ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); 1916 if (!(mop & MO_SIGN)) { 1917 ctx->z_mask = MAKE_64BIT_MASK(0, width); 1918 ctx->s_mask <<= 1; 1919 } 1920 } 1921 1922 /* Opcodes that touch guest memory stop the mb optimization. */ 1923 ctx->prev_mb = NULL; 1924 return false; 1925 } 1926 1927 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 1928 { 1929 /* Opcodes that touch guest memory stop the mb optimization. */ 1930 ctx->prev_mb = NULL; 1931 return false; 1932 } 1933 1934 static bool fold_remainder(OptContext *ctx, TCGOp *op) 1935 { 1936 if (fold_const2(ctx, op) || 1937 fold_xx_to_i(ctx, op, 0)) { 1938 return true; 1939 } 1940 return false; 1941 } 1942 1943 static bool fold_setcond(OptContext *ctx, TCGOp *op) 1944 { 1945 TCGCond cond = op->args[3]; 1946 int i; 1947 1948 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 1949 op->args[3] = cond = tcg_swap_cond(cond); 1950 } 1951 1952 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1953 if (i >= 0) { 1954 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1955 } 1956 1957 ctx->z_mask = 1; 1958 ctx->s_mask = smask_from_zmask(1); 1959 return false; 1960 } 1961 1962 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 1963 { 1964 TCGCond cond = op->args[3]; 1965 int i; 1966 1967 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 1968 op->args[3] = cond = tcg_swap_cond(cond); 1969 } 1970 1971 i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1972 if (i >= 0) { 1973 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 1974 } 1975 1976 /* Value is {0,-1} so all bits are repetitions of the sign. */ 1977 ctx->s_mask = -1; 1978 return false; 1979 } 1980 1981 1982 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 1983 { 1984 TCGCond cond = op->args[5]; 1985 int i, inv = 0; 1986 1987 if (swap_commutative2(&op->args[1], &op->args[3])) { 1988 op->args[5] = cond = tcg_swap_cond(cond); 1989 } 1990 1991 i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); 1992 if (i >= 0) { 1993 goto do_setcond_const; 1994 } 1995 1996 switch (cond) { 1997 case TCG_COND_LT: 1998 case TCG_COND_GE: 1999 /* 2000 * Simplify LT/GE comparisons vs zero to a single compare 2001 * vs the high word of the input. 2002 */ 2003 if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && 2004 arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) { 2005 goto do_setcond_high; 2006 } 2007 break; 2008 2009 case TCG_COND_NE: 2010 inv = 1; 2011 QEMU_FALLTHROUGH; 2012 case TCG_COND_EQ: 2013 /* 2014 * Simplify EQ/NE comparisons where one of the pairs 2015 * can be simplified. 2016 */ 2017 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2018 op->args[3], cond); 2019 switch (i ^ inv) { 2020 case 0: 2021 goto do_setcond_const; 2022 case 1: 2023 goto do_setcond_high; 2024 } 2025 2026 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2027 op->args[4], cond); 2028 switch (i ^ inv) { 2029 case 0: 2030 goto do_setcond_const; 2031 case 1: 2032 op->args[2] = op->args[3]; 2033 op->args[3] = cond; 2034 op->opc = INDEX_op_setcond_i32; 2035 break; 2036 } 2037 break; 2038 2039 default: 2040 break; 2041 2042 do_setcond_high: 2043 op->args[1] = op->args[2]; 2044 op->args[2] = op->args[4]; 2045 op->args[3] = cond; 2046 op->opc = INDEX_op_setcond_i32; 2047 break; 2048 } 2049 2050 ctx->z_mask = 1; 2051 ctx->s_mask = smask_from_zmask(1); 2052 return false; 2053 2054 do_setcond_const: 2055 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2056 } 2057 2058 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2059 { 2060 uint64_t z_mask, s_mask, s_mask_old; 2061 int pos = op->args[2]; 2062 int len = op->args[3]; 2063 2064 if (arg_is_const(op->args[1])) { 2065 uint64_t t; 2066 2067 t = arg_info(op->args[1])->val; 2068 t = sextract64(t, pos, len); 2069 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 2070 } 2071 2072 z_mask = arg_info(op->args[1])->z_mask; 2073 z_mask = sextract64(z_mask, pos, len); 2074 ctx->z_mask = z_mask; 2075 2076 s_mask_old = arg_info(op->args[1])->s_mask; 2077 s_mask = sextract64(s_mask_old, pos, len); 2078 s_mask |= MAKE_64BIT_MASK(len, 64 - len); 2079 ctx->s_mask = s_mask; 2080 2081 if (pos == 0) { 2082 ctx->a_mask = s_mask & ~s_mask_old; 2083 } 2084 2085 return fold_masks(ctx, op); 2086 } 2087 2088 static bool fold_shift(OptContext *ctx, TCGOp *op) 2089 { 2090 uint64_t s_mask, z_mask, sign; 2091 2092 if (fold_const2(ctx, op) || 2093 fold_ix_to_i(ctx, op, 0) || 2094 fold_xi_to_x(ctx, op, 0)) { 2095 return true; 2096 } 2097 2098 s_mask = arg_info(op->args[1])->s_mask; 2099 z_mask = arg_info(op->args[1])->z_mask; 2100 2101 if (arg_is_const(op->args[2])) { 2102 int sh = arg_info(op->args[2])->val; 2103 2104 ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2105 2106 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2107 ctx->s_mask = smask_from_smask(s_mask); 2108 2109 return fold_masks(ctx, op); 2110 } 2111 2112 switch (op->opc) { 2113 CASE_OP_32_64(sar): 2114 /* 2115 * Arithmetic right shift will not reduce the number of 2116 * input sign repetitions. 2117 */ 2118 ctx->s_mask = s_mask; 2119 break; 2120 CASE_OP_32_64(shr): 2121 /* 2122 * If the sign bit is known zero, then logical right shift 2123 * will not reduced the number of input sign repetitions. 2124 */ 2125 sign = (s_mask & -s_mask) >> 1; 2126 if (!(z_mask & sign)) { 2127 ctx->s_mask = s_mask; 2128 } 2129 break; 2130 default: 2131 break; 2132 } 2133 2134 return false; 2135 } 2136 2137 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2138 { 2139 TCGOpcode neg_op; 2140 bool have_neg; 2141 2142 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 2143 return false; 2144 } 2145 2146 switch (ctx->type) { 2147 case TCG_TYPE_I32: 2148 neg_op = INDEX_op_neg_i32; 2149 have_neg = true; 2150 break; 2151 case TCG_TYPE_I64: 2152 neg_op = INDEX_op_neg_i64; 2153 have_neg = true; 2154 break; 2155 case TCG_TYPE_V64: 2156 case TCG_TYPE_V128: 2157 case TCG_TYPE_V256: 2158 neg_op = INDEX_op_neg_vec; 2159 have_neg = (TCG_TARGET_HAS_neg_vec && 2160 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2161 break; 2162 default: 2163 g_assert_not_reached(); 2164 } 2165 if (have_neg) { 2166 op->opc = neg_op; 2167 op->args[1] = op->args[2]; 2168 return fold_neg(ctx, op); 2169 } 2170 return false; 2171 } 2172 2173 /* We cannot as yet do_constant_folding with vectors. */ 2174 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2175 { 2176 if (fold_xx_to_i(ctx, op, 0) || 2177 fold_xi_to_x(ctx, op, 0) || 2178 fold_sub_to_neg(ctx, op)) { 2179 return true; 2180 } 2181 return false; 2182 } 2183 2184 static bool fold_sub(OptContext *ctx, TCGOp *op) 2185 { 2186 if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) { 2187 return true; 2188 } 2189 2190 /* Fold sub r,x,i to add r,x,-i */ 2191 if (arg_is_const(op->args[2])) { 2192 uint64_t val = arg_info(op->args[2])->val; 2193 2194 op->opc = (ctx->type == TCG_TYPE_I32 2195 ? INDEX_op_add_i32 : INDEX_op_add_i64); 2196 op->args[2] = arg_new_constant(ctx, -val); 2197 } 2198 return false; 2199 } 2200 2201 static bool fold_sub2(OptContext *ctx, TCGOp *op) 2202 { 2203 return fold_addsub2(ctx, op, false); 2204 } 2205 2206 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2207 { 2208 /* We can't do any folding with a load, but we can record bits. */ 2209 switch (op->opc) { 2210 CASE_OP_32_64(ld8s): 2211 ctx->s_mask = MAKE_64BIT_MASK(8, 56); 2212 break; 2213 CASE_OP_32_64(ld8u): 2214 ctx->z_mask = MAKE_64BIT_MASK(0, 8); 2215 ctx->s_mask = MAKE_64BIT_MASK(9, 55); 2216 break; 2217 CASE_OP_32_64(ld16s): 2218 ctx->s_mask = MAKE_64BIT_MASK(16, 48); 2219 break; 2220 CASE_OP_32_64(ld16u): 2221 ctx->z_mask = MAKE_64BIT_MASK(0, 16); 2222 ctx->s_mask = MAKE_64BIT_MASK(17, 47); 2223 break; 2224 case INDEX_op_ld32s_i64: 2225 ctx->s_mask = MAKE_64BIT_MASK(32, 32); 2226 break; 2227 case INDEX_op_ld32u_i64: 2228 ctx->z_mask = MAKE_64BIT_MASK(0, 32); 2229 ctx->s_mask = MAKE_64BIT_MASK(33, 31); 2230 break; 2231 default: 2232 g_assert_not_reached(); 2233 } 2234 return false; 2235 } 2236 2237 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2238 { 2239 TCGTemp *dst, *src; 2240 intptr_t ofs; 2241 TCGType type; 2242 2243 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2244 return false; 2245 } 2246 2247 type = ctx->type; 2248 ofs = op->args[2]; 2249 dst = arg_temp(op->args[0]); 2250 src = find_mem_copy_for(ctx, type, ofs); 2251 if (src && src->base_type == type) { 2252 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 2253 } 2254 2255 reset_ts(ctx, dst); 2256 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 2257 return true; 2258 } 2259 2260 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 2261 { 2262 intptr_t ofs = op->args[2]; 2263 intptr_t lm1; 2264 2265 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2266 remove_mem_copy_all(ctx); 2267 return false; 2268 } 2269 2270 switch (op->opc) { 2271 CASE_OP_32_64(st8): 2272 lm1 = 0; 2273 break; 2274 CASE_OP_32_64(st16): 2275 lm1 = 1; 2276 break; 2277 case INDEX_op_st32_i64: 2278 case INDEX_op_st_i32: 2279 lm1 = 3; 2280 break; 2281 case INDEX_op_st_i64: 2282 lm1 = 7; 2283 break; 2284 case INDEX_op_st_vec: 2285 lm1 = tcg_type_size(ctx->type) - 1; 2286 break; 2287 default: 2288 g_assert_not_reached(); 2289 } 2290 remove_mem_copy_in(ctx, ofs, ofs + lm1); 2291 return false; 2292 } 2293 2294 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 2295 { 2296 TCGTemp *src; 2297 intptr_t ofs, last; 2298 TCGType type; 2299 2300 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2301 fold_tcg_st(ctx, op); 2302 return false; 2303 } 2304 2305 src = arg_temp(op->args[0]); 2306 ofs = op->args[2]; 2307 type = ctx->type; 2308 2309 /* 2310 * Eliminate duplicate stores of a constant. 2311 * This happens frequently when the target ISA zero-extends. 2312 */ 2313 if (ts_is_const(src)) { 2314 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 2315 if (src == prev) { 2316 tcg_op_remove(ctx->tcg, op); 2317 return true; 2318 } 2319 } 2320 2321 last = ofs + tcg_type_size(type) - 1; 2322 remove_mem_copy_in(ctx, ofs, last); 2323 record_mem_copy(ctx, type, src, ofs, last); 2324 return false; 2325 } 2326 2327 static bool fold_xor(OptContext *ctx, TCGOp *op) 2328 { 2329 if (fold_const2_commutative(ctx, op) || 2330 fold_xx_to_i(ctx, op, 0) || 2331 fold_xi_to_x(ctx, op, 0) || 2332 fold_xi_to_not(ctx, op, -1)) { 2333 return true; 2334 } 2335 2336 ctx->z_mask = arg_info(op->args[1])->z_mask 2337 | arg_info(op->args[2])->z_mask; 2338 ctx->s_mask = arg_info(op->args[1])->s_mask 2339 & arg_info(op->args[2])->s_mask; 2340 return fold_masks(ctx, op); 2341 } 2342 2343 /* Propagate constants and copies, fold constant expressions. */ 2344 void tcg_optimize(TCGContext *s) 2345 { 2346 int nb_temps, i; 2347 TCGOp *op, *op_next; 2348 OptContext ctx = { .tcg = s }; 2349 2350 QSIMPLEQ_INIT(&ctx.mem_free); 2351 2352 /* Array VALS has an element for each temp. 2353 If this temp holds a constant then its value is kept in VALS' element. 2354 If this temp is a copy of other ones then the other copies are 2355 available through the doubly linked circular list. */ 2356 2357 nb_temps = s->nb_temps; 2358 for (i = 0; i < nb_temps; ++i) { 2359 s->temps[i].state_ptr = NULL; 2360 } 2361 2362 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2363 TCGOpcode opc = op->opc; 2364 const TCGOpDef *def; 2365 bool done = false; 2366 2367 /* Calls are special. */ 2368 if (opc == INDEX_op_call) { 2369 fold_call(&ctx, op); 2370 continue; 2371 } 2372 2373 def = &tcg_op_defs[opc]; 2374 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 2375 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 2376 2377 /* Pre-compute the type of the operation. */ 2378 if (def->flags & TCG_OPF_VECTOR) { 2379 ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); 2380 } else if (def->flags & TCG_OPF_64BIT) { 2381 ctx.type = TCG_TYPE_I64; 2382 } else { 2383 ctx.type = TCG_TYPE_I32; 2384 } 2385 2386 /* Assume all bits affected, no bits known zero, no sign reps. */ 2387 ctx.a_mask = -1; 2388 ctx.z_mask = -1; 2389 ctx.s_mask = 0; 2390 2391 /* 2392 * Process each opcode. 2393 * Sorted alphabetically by opcode as much as possible. 2394 */ 2395 switch (opc) { 2396 CASE_OP_32_64(add): 2397 done = fold_add(&ctx, op); 2398 break; 2399 case INDEX_op_add_vec: 2400 done = fold_add_vec(&ctx, op); 2401 break; 2402 CASE_OP_32_64(add2): 2403 done = fold_add2(&ctx, op); 2404 break; 2405 CASE_OP_32_64_VEC(and): 2406 done = fold_and(&ctx, op); 2407 break; 2408 CASE_OP_32_64_VEC(andc): 2409 done = fold_andc(&ctx, op); 2410 break; 2411 CASE_OP_32_64(brcond): 2412 done = fold_brcond(&ctx, op); 2413 break; 2414 case INDEX_op_brcond2_i32: 2415 done = fold_brcond2(&ctx, op); 2416 break; 2417 CASE_OP_32_64(bswap16): 2418 CASE_OP_32_64(bswap32): 2419 case INDEX_op_bswap64_i64: 2420 done = fold_bswap(&ctx, op); 2421 break; 2422 CASE_OP_32_64(clz): 2423 CASE_OP_32_64(ctz): 2424 done = fold_count_zeros(&ctx, op); 2425 break; 2426 CASE_OP_32_64(ctpop): 2427 done = fold_ctpop(&ctx, op); 2428 break; 2429 CASE_OP_32_64(deposit): 2430 done = fold_deposit(&ctx, op); 2431 break; 2432 CASE_OP_32_64(div): 2433 CASE_OP_32_64(divu): 2434 done = fold_divide(&ctx, op); 2435 break; 2436 case INDEX_op_dup_vec: 2437 done = fold_dup(&ctx, op); 2438 break; 2439 case INDEX_op_dup2_vec: 2440 done = fold_dup2(&ctx, op); 2441 break; 2442 CASE_OP_32_64_VEC(eqv): 2443 done = fold_eqv(&ctx, op); 2444 break; 2445 CASE_OP_32_64(extract): 2446 done = fold_extract(&ctx, op); 2447 break; 2448 CASE_OP_32_64(extract2): 2449 done = fold_extract2(&ctx, op); 2450 break; 2451 CASE_OP_32_64(ext8s): 2452 CASE_OP_32_64(ext16s): 2453 case INDEX_op_ext32s_i64: 2454 case INDEX_op_ext_i32_i64: 2455 done = fold_exts(&ctx, op); 2456 break; 2457 CASE_OP_32_64(ext8u): 2458 CASE_OP_32_64(ext16u): 2459 case INDEX_op_ext32u_i64: 2460 case INDEX_op_extu_i32_i64: 2461 case INDEX_op_extrl_i64_i32: 2462 case INDEX_op_extrh_i64_i32: 2463 done = fold_extu(&ctx, op); 2464 break; 2465 CASE_OP_32_64(ld8s): 2466 CASE_OP_32_64(ld8u): 2467 CASE_OP_32_64(ld16s): 2468 CASE_OP_32_64(ld16u): 2469 case INDEX_op_ld32s_i64: 2470 case INDEX_op_ld32u_i64: 2471 done = fold_tcg_ld(&ctx, op); 2472 break; 2473 case INDEX_op_ld_i32: 2474 case INDEX_op_ld_i64: 2475 case INDEX_op_ld_vec: 2476 done = fold_tcg_ld_memcopy(&ctx, op); 2477 break; 2478 CASE_OP_32_64(st8): 2479 CASE_OP_32_64(st16): 2480 case INDEX_op_st32_i64: 2481 done = fold_tcg_st(&ctx, op); 2482 break; 2483 case INDEX_op_st_i32: 2484 case INDEX_op_st_i64: 2485 case INDEX_op_st_vec: 2486 done = fold_tcg_st_memcopy(&ctx, op); 2487 break; 2488 case INDEX_op_mb: 2489 done = fold_mb(&ctx, op); 2490 break; 2491 CASE_OP_32_64_VEC(mov): 2492 done = fold_mov(&ctx, op); 2493 break; 2494 CASE_OP_32_64(movcond): 2495 done = fold_movcond(&ctx, op); 2496 break; 2497 CASE_OP_32_64(mul): 2498 done = fold_mul(&ctx, op); 2499 break; 2500 CASE_OP_32_64(mulsh): 2501 CASE_OP_32_64(muluh): 2502 done = fold_mul_highpart(&ctx, op); 2503 break; 2504 CASE_OP_32_64(muls2): 2505 CASE_OP_32_64(mulu2): 2506 done = fold_multiply2(&ctx, op); 2507 break; 2508 CASE_OP_32_64_VEC(nand): 2509 done = fold_nand(&ctx, op); 2510 break; 2511 CASE_OP_32_64(neg): 2512 done = fold_neg(&ctx, op); 2513 break; 2514 CASE_OP_32_64_VEC(nor): 2515 done = fold_nor(&ctx, op); 2516 break; 2517 CASE_OP_32_64_VEC(not): 2518 done = fold_not(&ctx, op); 2519 break; 2520 CASE_OP_32_64_VEC(or): 2521 done = fold_or(&ctx, op); 2522 break; 2523 CASE_OP_32_64_VEC(orc): 2524 done = fold_orc(&ctx, op); 2525 break; 2526 case INDEX_op_qemu_ld_a32_i32: 2527 case INDEX_op_qemu_ld_a64_i32: 2528 case INDEX_op_qemu_ld_a32_i64: 2529 case INDEX_op_qemu_ld_a64_i64: 2530 case INDEX_op_qemu_ld_a32_i128: 2531 case INDEX_op_qemu_ld_a64_i128: 2532 done = fold_qemu_ld(&ctx, op); 2533 break; 2534 case INDEX_op_qemu_st8_a32_i32: 2535 case INDEX_op_qemu_st8_a64_i32: 2536 case INDEX_op_qemu_st_a32_i32: 2537 case INDEX_op_qemu_st_a64_i32: 2538 case INDEX_op_qemu_st_a32_i64: 2539 case INDEX_op_qemu_st_a64_i64: 2540 case INDEX_op_qemu_st_a32_i128: 2541 case INDEX_op_qemu_st_a64_i128: 2542 done = fold_qemu_st(&ctx, op); 2543 break; 2544 CASE_OP_32_64(rem): 2545 CASE_OP_32_64(remu): 2546 done = fold_remainder(&ctx, op); 2547 break; 2548 CASE_OP_32_64(rotl): 2549 CASE_OP_32_64(rotr): 2550 CASE_OP_32_64(sar): 2551 CASE_OP_32_64(shl): 2552 CASE_OP_32_64(shr): 2553 done = fold_shift(&ctx, op); 2554 break; 2555 CASE_OP_32_64(setcond): 2556 done = fold_setcond(&ctx, op); 2557 break; 2558 CASE_OP_32_64(negsetcond): 2559 done = fold_negsetcond(&ctx, op); 2560 break; 2561 case INDEX_op_setcond2_i32: 2562 done = fold_setcond2(&ctx, op); 2563 break; 2564 CASE_OP_32_64(sextract): 2565 done = fold_sextract(&ctx, op); 2566 break; 2567 CASE_OP_32_64(sub): 2568 done = fold_sub(&ctx, op); 2569 break; 2570 case INDEX_op_sub_vec: 2571 done = fold_sub_vec(&ctx, op); 2572 break; 2573 CASE_OP_32_64(sub2): 2574 done = fold_sub2(&ctx, op); 2575 break; 2576 CASE_OP_32_64_VEC(xor): 2577 done = fold_xor(&ctx, op); 2578 break; 2579 default: 2580 break; 2581 } 2582 2583 if (!done) { 2584 finish_folding(&ctx, op); 2585 } 2586 } 2587 } 2588