1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 #include "tcg-has.h" 32 33 #define CASE_OP_32_64(x) \ 34 glue(glue(case INDEX_op_, x), _i32): \ 35 glue(glue(case INDEX_op_, x), _i64) 36 37 #define CASE_OP_32_64_VEC(x) \ 38 glue(glue(case INDEX_op_, x), _i32): \ 39 glue(glue(case INDEX_op_, x), _i64): \ 40 glue(glue(case INDEX_op_, x), _vec) 41 42 typedef struct MemCopyInfo { 43 IntervalTreeNode itree; 44 QSIMPLEQ_ENTRY (MemCopyInfo) next; 45 TCGTemp *ts; 46 TCGType type; 47 } MemCopyInfo; 48 49 typedef struct TempOptInfo { 50 bool is_const; 51 TCGTemp *prev_copy; 52 TCGTemp *next_copy; 53 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 54 uint64_t val; 55 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 56 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ 57 } TempOptInfo; 58 59 typedef struct OptContext { 60 TCGContext *tcg; 61 TCGOp *prev_mb; 62 TCGTempSet temps_used; 63 64 IntervalTreeRoot mem_copy; 65 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 66 67 /* In flight values from optimization. */ 68 TCGType type; 69 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ 70 } OptContext; 71 72 static inline TempOptInfo *ts_info(TCGTemp *ts) 73 { 74 return ts->state_ptr; 75 } 76 77 static inline TempOptInfo *arg_info(TCGArg arg) 78 { 79 return ts_info(arg_temp(arg)); 80 } 81 82 static inline bool ti_is_const(TempOptInfo *ti) 83 { 84 return ti->is_const; 85 } 86 87 static inline uint64_t ti_const_val(TempOptInfo *ti) 88 { 89 return ti->val; 90 } 91 92 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) 93 { 94 return ti_is_const(ti) && ti_const_val(ti) == val; 95 } 96 97 static inline bool ts_is_const(TCGTemp *ts) 98 { 99 return ti_is_const(ts_info(ts)); 100 } 101 102 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) 103 { 104 return ti_is_const_val(ts_info(ts), val); 105 } 106 107 static inline bool arg_is_const(TCGArg arg) 108 { 109 return ts_is_const(arg_temp(arg)); 110 } 111 112 static inline bool arg_is_const_val(TCGArg arg, uint64_t val) 113 { 114 return ts_is_const_val(arg_temp(arg), val); 115 } 116 117 static inline bool ts_is_copy(TCGTemp *ts) 118 { 119 return ts_info(ts)->next_copy != ts; 120 } 121 122 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 123 { 124 return a->kind < b->kind ? b : a; 125 } 126 127 /* Initialize and activate a temporary. */ 128 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 129 { 130 size_t idx = temp_idx(ts); 131 TempOptInfo *ti; 132 133 if (test_bit(idx, ctx->temps_used.l)) { 134 return; 135 } 136 set_bit(idx, ctx->temps_used.l); 137 138 ti = ts->state_ptr; 139 if (ti == NULL) { 140 ti = tcg_malloc(sizeof(TempOptInfo)); 141 ts->state_ptr = ti; 142 } 143 144 ti->next_copy = ts; 145 ti->prev_copy = ts; 146 QSIMPLEQ_INIT(&ti->mem_copy); 147 if (ts->kind == TEMP_CONST) { 148 ti->is_const = true; 149 ti->val = ts->val; 150 ti->z_mask = ts->val; 151 ti->s_mask = INT64_MIN >> clrsb64(ts->val); 152 } else { 153 ti->is_const = false; 154 ti->z_mask = -1; 155 ti->s_mask = 0; 156 } 157 } 158 159 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 160 { 161 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 162 return r ? container_of(r, MemCopyInfo, itree) : NULL; 163 } 164 165 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 166 { 167 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 168 return r ? container_of(r, MemCopyInfo, itree) : NULL; 169 } 170 171 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 172 { 173 TCGTemp *ts = mc->ts; 174 TempOptInfo *ti = ts_info(ts); 175 176 interval_tree_remove(&mc->itree, &ctx->mem_copy); 177 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 178 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 179 } 180 181 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 182 { 183 while (true) { 184 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 185 if (!mc) { 186 break; 187 } 188 remove_mem_copy(ctx, mc); 189 } 190 } 191 192 static void remove_mem_copy_all(OptContext *ctx) 193 { 194 remove_mem_copy_in(ctx, 0, -1); 195 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 196 } 197 198 static TCGTemp *find_better_copy(TCGTemp *ts) 199 { 200 TCGTemp *i, *ret; 201 202 /* If this is already readonly, we can't do better. */ 203 if (temp_readonly(ts)) { 204 return ts; 205 } 206 207 ret = ts; 208 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 209 ret = cmp_better_copy(ret, i); 210 } 211 return ret; 212 } 213 214 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 215 { 216 TempOptInfo *si = ts_info(src_ts); 217 TempOptInfo *di = ts_info(dst_ts); 218 MemCopyInfo *mc; 219 220 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 221 tcg_debug_assert(mc->ts == src_ts); 222 mc->ts = dst_ts; 223 } 224 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 225 } 226 227 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 228 static void reset_ts(OptContext *ctx, TCGTemp *ts) 229 { 230 TempOptInfo *ti = ts_info(ts); 231 TCGTemp *pts = ti->prev_copy; 232 TCGTemp *nts = ti->next_copy; 233 TempOptInfo *pi = ts_info(pts); 234 TempOptInfo *ni = ts_info(nts); 235 236 ni->prev_copy = ti->prev_copy; 237 pi->next_copy = ti->next_copy; 238 ti->next_copy = ts; 239 ti->prev_copy = ts; 240 ti->is_const = false; 241 ti->z_mask = -1; 242 ti->s_mask = 0; 243 244 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 245 if (ts == nts) { 246 /* Last temp copy being removed, the mem copies die. */ 247 MemCopyInfo *mc; 248 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 249 interval_tree_remove(&mc->itree, &ctx->mem_copy); 250 } 251 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 252 } else { 253 move_mem_copies(find_better_copy(nts), ts); 254 } 255 } 256 } 257 258 static void reset_temp(OptContext *ctx, TCGArg arg) 259 { 260 reset_ts(ctx, arg_temp(arg)); 261 } 262 263 static void record_mem_copy(OptContext *ctx, TCGType type, 264 TCGTemp *ts, intptr_t start, intptr_t last) 265 { 266 MemCopyInfo *mc; 267 TempOptInfo *ti; 268 269 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 270 if (mc) { 271 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 272 } else { 273 mc = tcg_malloc(sizeof(*mc)); 274 } 275 276 memset(mc, 0, sizeof(*mc)); 277 mc->itree.start = start; 278 mc->itree.last = last; 279 mc->type = type; 280 interval_tree_insert(&mc->itree, &ctx->mem_copy); 281 282 ts = find_better_copy(ts); 283 ti = ts_info(ts); 284 mc->ts = ts; 285 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 286 } 287 288 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 289 { 290 TCGTemp *i; 291 292 if (ts1 == ts2) { 293 return true; 294 } 295 296 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 297 return false; 298 } 299 300 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 301 if (i == ts2) { 302 return true; 303 } 304 } 305 306 return false; 307 } 308 309 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 310 { 311 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 312 } 313 314 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 315 { 316 MemCopyInfo *mc; 317 318 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 319 if (mc->itree.start == s && mc->type == type) { 320 return find_better_copy(mc->ts); 321 } 322 } 323 return NULL; 324 } 325 326 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 327 { 328 TCGType type = ctx->type; 329 TCGTemp *ts; 330 331 if (type == TCG_TYPE_I32) { 332 val = (int32_t)val; 333 } 334 335 ts = tcg_constant_internal(type, val); 336 init_ts_info(ctx, ts); 337 338 return temp_arg(ts); 339 } 340 341 static TCGArg arg_new_temp(OptContext *ctx) 342 { 343 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); 344 init_ts_info(ctx, ts); 345 return temp_arg(ts); 346 } 347 348 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, 349 TCGOpcode opc, unsigned narg) 350 { 351 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); 352 } 353 354 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, 355 TCGOpcode opc, unsigned narg) 356 { 357 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); 358 } 359 360 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 361 { 362 TCGTemp *dst_ts = arg_temp(dst); 363 TCGTemp *src_ts = arg_temp(src); 364 TempOptInfo *di; 365 TempOptInfo *si; 366 TCGOpcode new_op; 367 368 if (ts_are_copies(dst_ts, src_ts)) { 369 tcg_op_remove(ctx->tcg, op); 370 return true; 371 } 372 373 reset_ts(ctx, dst_ts); 374 di = ts_info(dst_ts); 375 si = ts_info(src_ts); 376 377 switch (ctx->type) { 378 case TCG_TYPE_I32: 379 case TCG_TYPE_I64: 380 new_op = INDEX_op_mov; 381 break; 382 case TCG_TYPE_V64: 383 case TCG_TYPE_V128: 384 case TCG_TYPE_V256: 385 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ 386 new_op = INDEX_op_mov_vec; 387 break; 388 default: 389 g_assert_not_reached(); 390 } 391 op->opc = new_op; 392 op->args[0] = dst; 393 op->args[1] = src; 394 395 di->z_mask = si->z_mask; 396 di->s_mask = si->s_mask; 397 398 if (src_ts->type == dst_ts->type) { 399 TempOptInfo *ni = ts_info(si->next_copy); 400 401 di->next_copy = si->next_copy; 402 di->prev_copy = src_ts; 403 ni->prev_copy = dst_ts; 404 si->next_copy = dst_ts; 405 di->is_const = si->is_const; 406 di->val = si->val; 407 408 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 409 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 410 move_mem_copies(dst_ts, src_ts); 411 } 412 } 413 return true; 414 } 415 416 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 417 TCGArg dst, uint64_t val) 418 { 419 /* Convert movi to mov with constant temp. */ 420 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 421 } 422 423 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, 424 uint64_t x, uint64_t y) 425 { 426 uint64_t l64, h64; 427 428 switch (op) { 429 case INDEX_op_add: 430 return x + y; 431 432 case INDEX_op_sub: 433 return x - y; 434 435 case INDEX_op_mul: 436 return x * y; 437 438 case INDEX_op_and: 439 case INDEX_op_and_vec: 440 return x & y; 441 442 case INDEX_op_or: 443 case INDEX_op_or_vec: 444 return x | y; 445 446 case INDEX_op_xor: 447 case INDEX_op_xor_vec: 448 return x ^ y; 449 450 case INDEX_op_shl: 451 if (type == TCG_TYPE_I32) { 452 return (uint32_t)x << (y & 31); 453 } 454 return (uint64_t)x << (y & 63); 455 456 case INDEX_op_shr: 457 if (type == TCG_TYPE_I32) { 458 return (uint32_t)x >> (y & 31); 459 } 460 return (uint64_t)x >> (y & 63); 461 462 case INDEX_op_sar: 463 if (type == TCG_TYPE_I32) { 464 return (int32_t)x >> (y & 31); 465 } 466 return (int64_t)x >> (y & 63); 467 468 case INDEX_op_rotr: 469 if (type == TCG_TYPE_I32) { 470 return ror32(x, y & 31); 471 } 472 return ror64(x, y & 63); 473 474 case INDEX_op_rotl: 475 if (type == TCG_TYPE_I32) { 476 return rol32(x, y & 31); 477 } 478 return rol64(x, y & 63); 479 480 case INDEX_op_not: 481 case INDEX_op_not_vec: 482 return ~x; 483 484 case INDEX_op_neg: 485 return -x; 486 487 case INDEX_op_andc: 488 case INDEX_op_andc_vec: 489 return x & ~y; 490 491 case INDEX_op_orc: 492 case INDEX_op_orc_vec: 493 return x | ~y; 494 495 case INDEX_op_eqv: 496 case INDEX_op_eqv_vec: 497 return ~(x ^ y); 498 499 case INDEX_op_nand: 500 case INDEX_op_nand_vec: 501 return ~(x & y); 502 503 case INDEX_op_nor: 504 case INDEX_op_nor_vec: 505 return ~(x | y); 506 507 case INDEX_op_clz: 508 if (type == TCG_TYPE_I32) { 509 return (uint32_t)x ? clz32(x) : y; 510 } 511 return x ? clz64(x) : y; 512 513 case INDEX_op_ctz: 514 if (type == TCG_TYPE_I32) { 515 return (uint32_t)x ? ctz32(x) : y; 516 } 517 return x ? ctz64(x) : y; 518 519 case INDEX_op_ctpop: 520 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); 521 522 case INDEX_op_bswap16: 523 x = bswap16(x); 524 return y & TCG_BSWAP_OS ? (int16_t)x : x; 525 526 case INDEX_op_bswap32: 527 x = bswap32(x); 528 return y & TCG_BSWAP_OS ? (int32_t)x : x; 529 530 case INDEX_op_bswap64: 531 return bswap64(x); 532 533 case INDEX_op_ext_i32_i64: 534 return (int32_t)x; 535 536 case INDEX_op_extu_i32_i64: 537 case INDEX_op_extrl_i64_i32: 538 return (uint32_t)x; 539 540 case INDEX_op_extrh_i64_i32: 541 return (uint64_t)x >> 32; 542 543 case INDEX_op_muluh: 544 if (type == TCG_TYPE_I32) { 545 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 546 } 547 mulu64(&l64, &h64, x, y); 548 return h64; 549 550 case INDEX_op_mulsh: 551 if (type == TCG_TYPE_I32) { 552 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 553 } 554 muls64(&l64, &h64, x, y); 555 return h64; 556 557 case INDEX_op_divs: 558 /* Avoid crashing on divide by zero, otherwise undefined. */ 559 if (type == TCG_TYPE_I32) { 560 return (int32_t)x / ((int32_t)y ? : 1); 561 } 562 return (int64_t)x / ((int64_t)y ? : 1); 563 564 case INDEX_op_divu: 565 if (type == TCG_TYPE_I32) { 566 return (uint32_t)x / ((uint32_t)y ? : 1); 567 } 568 return (uint64_t)x / ((uint64_t)y ? : 1); 569 570 case INDEX_op_rems: 571 if (type == TCG_TYPE_I32) { 572 return (int32_t)x % ((int32_t)y ? : 1); 573 } 574 return (int64_t)x % ((int64_t)y ? : 1); 575 576 case INDEX_op_remu: 577 if (type == TCG_TYPE_I32) { 578 return (uint32_t)x % ((uint32_t)y ? : 1); 579 } 580 return (uint64_t)x % ((uint64_t)y ? : 1); 581 582 default: 583 g_assert_not_reached(); 584 } 585 } 586 587 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 588 uint64_t x, uint64_t y) 589 { 590 uint64_t res = do_constant_folding_2(op, type, x, y); 591 if (type == TCG_TYPE_I32) { 592 res = (int32_t)res; 593 } 594 return res; 595 } 596 597 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 598 { 599 switch (c) { 600 case TCG_COND_EQ: 601 return x == y; 602 case TCG_COND_NE: 603 return x != y; 604 case TCG_COND_LT: 605 return (int32_t)x < (int32_t)y; 606 case TCG_COND_GE: 607 return (int32_t)x >= (int32_t)y; 608 case TCG_COND_LE: 609 return (int32_t)x <= (int32_t)y; 610 case TCG_COND_GT: 611 return (int32_t)x > (int32_t)y; 612 case TCG_COND_LTU: 613 return x < y; 614 case TCG_COND_GEU: 615 return x >= y; 616 case TCG_COND_LEU: 617 return x <= y; 618 case TCG_COND_GTU: 619 return x > y; 620 case TCG_COND_TSTEQ: 621 return (x & y) == 0; 622 case TCG_COND_TSTNE: 623 return (x & y) != 0; 624 case TCG_COND_ALWAYS: 625 case TCG_COND_NEVER: 626 break; 627 } 628 g_assert_not_reached(); 629 } 630 631 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 632 { 633 switch (c) { 634 case TCG_COND_EQ: 635 return x == y; 636 case TCG_COND_NE: 637 return x != y; 638 case TCG_COND_LT: 639 return (int64_t)x < (int64_t)y; 640 case TCG_COND_GE: 641 return (int64_t)x >= (int64_t)y; 642 case TCG_COND_LE: 643 return (int64_t)x <= (int64_t)y; 644 case TCG_COND_GT: 645 return (int64_t)x > (int64_t)y; 646 case TCG_COND_LTU: 647 return x < y; 648 case TCG_COND_GEU: 649 return x >= y; 650 case TCG_COND_LEU: 651 return x <= y; 652 case TCG_COND_GTU: 653 return x > y; 654 case TCG_COND_TSTEQ: 655 return (x & y) == 0; 656 case TCG_COND_TSTNE: 657 return (x & y) != 0; 658 case TCG_COND_ALWAYS: 659 case TCG_COND_NEVER: 660 break; 661 } 662 g_assert_not_reached(); 663 } 664 665 static int do_constant_folding_cond_eq(TCGCond c) 666 { 667 switch (c) { 668 case TCG_COND_GT: 669 case TCG_COND_LTU: 670 case TCG_COND_LT: 671 case TCG_COND_GTU: 672 case TCG_COND_NE: 673 return 0; 674 case TCG_COND_GE: 675 case TCG_COND_GEU: 676 case TCG_COND_LE: 677 case TCG_COND_LEU: 678 case TCG_COND_EQ: 679 return 1; 680 case TCG_COND_TSTEQ: 681 case TCG_COND_TSTNE: 682 return -1; 683 case TCG_COND_ALWAYS: 684 case TCG_COND_NEVER: 685 break; 686 } 687 g_assert_not_reached(); 688 } 689 690 /* 691 * Return -1 if the condition can't be simplified, 692 * and the result of the condition (0 or 1) if it can. 693 */ 694 static int do_constant_folding_cond(TCGType type, TCGArg x, 695 TCGArg y, TCGCond c) 696 { 697 if (arg_is_const(x) && arg_is_const(y)) { 698 uint64_t xv = arg_info(x)->val; 699 uint64_t yv = arg_info(y)->val; 700 701 switch (type) { 702 case TCG_TYPE_I32: 703 return do_constant_folding_cond_32(xv, yv, c); 704 case TCG_TYPE_I64: 705 return do_constant_folding_cond_64(xv, yv, c); 706 default: 707 /* Only scalar comparisons are optimizable */ 708 return -1; 709 } 710 } else if (args_are_copies(x, y)) { 711 return do_constant_folding_cond_eq(c); 712 } else if (arg_is_const_val(y, 0)) { 713 switch (c) { 714 case TCG_COND_LTU: 715 case TCG_COND_TSTNE: 716 return 0; 717 case TCG_COND_GEU: 718 case TCG_COND_TSTEQ: 719 return 1; 720 default: 721 return -1; 722 } 723 } 724 return -1; 725 } 726 727 /** 728 * swap_commutative: 729 * @dest: TCGArg of the destination argument, or NO_DEST. 730 * @p1: first paired argument 731 * @p2: second paired argument 732 * 733 * If *@p1 is a constant and *@p2 is not, swap. 734 * If *@p2 matches @dest, swap. 735 * Return true if a swap was performed. 736 */ 737 738 #define NO_DEST temp_arg(NULL) 739 740 static int pref_commutative(TempOptInfo *ti) 741 { 742 /* Slight preference for non-zero constants second. */ 743 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; 744 } 745 746 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 747 { 748 TCGArg a1 = *p1, a2 = *p2; 749 int sum = 0; 750 sum += pref_commutative(arg_info(a1)); 751 sum -= pref_commutative(arg_info(a2)); 752 753 /* Prefer the constant in second argument, and then the form 754 op a, a, b, which is better handled on non-RISC hosts. */ 755 if (sum > 0 || (sum == 0 && dest == a2)) { 756 *p1 = a2; 757 *p2 = a1; 758 return true; 759 } 760 return false; 761 } 762 763 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 764 { 765 int sum = 0; 766 sum += pref_commutative(arg_info(p1[0])); 767 sum += pref_commutative(arg_info(p1[1])); 768 sum -= pref_commutative(arg_info(p2[0])); 769 sum -= pref_commutative(arg_info(p2[1])); 770 if (sum > 0) { 771 TCGArg t; 772 t = p1[0], p1[0] = p2[0], p2[0] = t; 773 t = p1[1], p1[1] = p2[1], p2[1] = t; 774 return true; 775 } 776 return false; 777 } 778 779 /* 780 * Return -1 if the condition can't be simplified, 781 * and the result of the condition (0 or 1) if it can. 782 */ 783 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, 784 TCGArg *p1, TCGArg *p2, TCGArg *pcond) 785 { 786 TCGCond cond; 787 TempOptInfo *i1; 788 bool swap; 789 int r; 790 791 swap = swap_commutative(dest, p1, p2); 792 cond = *pcond; 793 if (swap) { 794 *pcond = cond = tcg_swap_cond(cond); 795 } 796 797 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); 798 if (r >= 0) { 799 return r; 800 } 801 if (!is_tst_cond(cond)) { 802 return -1; 803 } 804 805 i1 = arg_info(*p1); 806 807 /* 808 * TSTNE x,x -> NE x,0 809 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x 810 */ 811 if (args_are_copies(*p1, *p2) || 812 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) { 813 *p2 = arg_new_constant(ctx, 0); 814 *pcond = tcg_tst_eqne_cond(cond); 815 return -1; 816 } 817 818 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ 819 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) { 820 *p2 = arg_new_constant(ctx, 0); 821 *pcond = tcg_tst_ltge_cond(cond); 822 return -1; 823 } 824 825 /* Expand to AND with a temporary if no backend support. */ 826 if (!TCG_TARGET_HAS_tst) { 827 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 828 TCGArg tmp = arg_new_temp(ctx); 829 830 op2->args[0] = tmp; 831 op2->args[1] = *p1; 832 op2->args[2] = *p2; 833 834 *p1 = tmp; 835 *p2 = arg_new_constant(ctx, 0); 836 *pcond = tcg_tst_eqne_cond(cond); 837 } 838 return -1; 839 } 840 841 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) 842 { 843 TCGArg al, ah, bl, bh; 844 TCGCond c; 845 bool swap; 846 int r; 847 848 swap = swap_commutative2(args, args + 2); 849 c = args[4]; 850 if (swap) { 851 args[4] = c = tcg_swap_cond(c); 852 } 853 854 al = args[0]; 855 ah = args[1]; 856 bl = args[2]; 857 bh = args[3]; 858 859 if (arg_is_const(bl) && arg_is_const(bh)) { 860 tcg_target_ulong blv = arg_info(bl)->val; 861 tcg_target_ulong bhv = arg_info(bh)->val; 862 uint64_t b = deposit64(blv, 32, 32, bhv); 863 864 if (arg_is_const(al) && arg_is_const(ah)) { 865 tcg_target_ulong alv = arg_info(al)->val; 866 tcg_target_ulong ahv = arg_info(ah)->val; 867 uint64_t a = deposit64(alv, 32, 32, ahv); 868 869 r = do_constant_folding_cond_64(a, b, c); 870 if (r >= 0) { 871 return r; 872 } 873 } 874 875 if (b == 0) { 876 switch (c) { 877 case TCG_COND_LTU: 878 case TCG_COND_TSTNE: 879 return 0; 880 case TCG_COND_GEU: 881 case TCG_COND_TSTEQ: 882 return 1; 883 default: 884 break; 885 } 886 } 887 888 /* TSTNE x,-1 -> NE x,0 */ 889 if (b == -1 && is_tst_cond(c)) { 890 args[3] = args[2] = arg_new_constant(ctx, 0); 891 args[4] = tcg_tst_eqne_cond(c); 892 return -1; 893 } 894 895 /* TSTNE x,sign -> LT x,0 */ 896 if (b == INT64_MIN && is_tst_cond(c)) { 897 /* bl must be 0, so copy that to bh */ 898 args[3] = bl; 899 args[4] = tcg_tst_ltge_cond(c); 900 return -1; 901 } 902 } 903 904 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 905 r = do_constant_folding_cond_eq(c); 906 if (r >= 0) { 907 return r; 908 } 909 910 /* TSTNE x,x -> NE x,0 */ 911 if (is_tst_cond(c)) { 912 args[3] = args[2] = arg_new_constant(ctx, 0); 913 args[4] = tcg_tst_eqne_cond(c); 914 return -1; 915 } 916 } 917 918 /* Expand to AND with a temporary if no backend support. */ 919 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { 920 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); 921 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 922 TCGArg t1 = arg_new_temp(ctx); 923 TCGArg t2 = arg_new_temp(ctx); 924 925 op1->args[0] = t1; 926 op1->args[1] = al; 927 op1->args[2] = bl; 928 op2->args[0] = t2; 929 op2->args[1] = ah; 930 op2->args[2] = bh; 931 932 args[0] = t1; 933 args[1] = t2; 934 args[3] = args[2] = arg_new_constant(ctx, 0); 935 args[4] = tcg_tst_eqne_cond(c); 936 } 937 return -1; 938 } 939 940 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 941 { 942 for (int i = 0; i < nb_args; i++) { 943 TCGTemp *ts = arg_temp(op->args[i]); 944 init_ts_info(ctx, ts); 945 } 946 } 947 948 static void copy_propagate(OptContext *ctx, TCGOp *op, 949 int nb_oargs, int nb_iargs) 950 { 951 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 952 TCGTemp *ts = arg_temp(op->args[i]); 953 if (ts_is_copy(ts)) { 954 op->args[i] = temp_arg(find_better_copy(ts)); 955 } 956 } 957 } 958 959 static void finish_bb(OptContext *ctx) 960 { 961 /* We only optimize memory barriers across basic blocks. */ 962 ctx->prev_mb = NULL; 963 } 964 965 static void finish_ebb(OptContext *ctx) 966 { 967 finish_bb(ctx); 968 /* We only optimize across extended basic blocks. */ 969 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 970 remove_mem_copy_all(ctx); 971 } 972 973 static bool finish_folding(OptContext *ctx, TCGOp *op) 974 { 975 const TCGOpDef *def = &tcg_op_defs[op->opc]; 976 int i, nb_oargs; 977 978 nb_oargs = def->nb_oargs; 979 for (i = 0; i < nb_oargs; i++) { 980 TCGTemp *ts = arg_temp(op->args[i]); 981 reset_ts(ctx, ts); 982 } 983 return true; 984 } 985 986 /* 987 * The fold_* functions return true when processing is complete, 988 * usually by folding the operation to a constant or to a copy, 989 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 990 * like collect information about the value produced, for use in 991 * optimizing a subsequent operation. 992 * 993 * These first fold_* functions are all helpers, used by other 994 * folders for more specific operations. 995 */ 996 997 static bool fold_const1(OptContext *ctx, TCGOp *op) 998 { 999 if (arg_is_const(op->args[1])) { 1000 uint64_t t; 1001 1002 t = arg_info(op->args[1])->val; 1003 t = do_constant_folding(op->opc, ctx->type, t, 0); 1004 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1005 } 1006 return false; 1007 } 1008 1009 static bool fold_const2(OptContext *ctx, TCGOp *op) 1010 { 1011 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1012 uint64_t t1 = arg_info(op->args[1])->val; 1013 uint64_t t2 = arg_info(op->args[2])->val; 1014 1015 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 1016 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1017 } 1018 return false; 1019 } 1020 1021 static bool fold_commutative(OptContext *ctx, TCGOp *op) 1022 { 1023 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1024 return false; 1025 } 1026 1027 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 1028 { 1029 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1030 return fold_const2(ctx, op); 1031 } 1032 1033 /* 1034 * Record "zero" and "sign" masks for the single output of @op. 1035 * See TempOptInfo definition of z_mask and s_mask. 1036 * If z_mask allows, fold the output to constant zero. 1037 * The passed s_mask may be augmented by z_mask. 1038 */ 1039 static bool fold_masks_zs(OptContext *ctx, TCGOp *op, 1040 uint64_t z_mask, int64_t s_mask) 1041 { 1042 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1043 TCGTemp *ts; 1044 TempOptInfo *ti; 1045 int rep; 1046 1047 /* Only single-output opcodes are supported here. */ 1048 tcg_debug_assert(def->nb_oargs == 1); 1049 1050 /* 1051 * 32-bit ops generate 32-bit results, which for the purpose of 1052 * simplifying tcg are sign-extended. Certainly that's how we 1053 * represent our constants elsewhere. Note that the bits will 1054 * be reset properly for a 64-bit value when encountering the 1055 * type changing opcodes. 1056 */ 1057 if (ctx->type == TCG_TYPE_I32) { 1058 z_mask = (int32_t)z_mask; 1059 s_mask |= INT32_MIN; 1060 } 1061 1062 if (z_mask == 0) { 1063 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 1064 } 1065 1066 ts = arg_temp(op->args[0]); 1067 reset_ts(ctx, ts); 1068 1069 ti = ts_info(ts); 1070 ti->z_mask = z_mask; 1071 1072 /* Canonicalize s_mask and incorporate data from z_mask. */ 1073 rep = clz64(~s_mask); 1074 rep = MAX(rep, clz64(z_mask)); 1075 rep = MAX(rep - 1, 0); 1076 ti->s_mask = INT64_MIN >> rep; 1077 1078 return true; 1079 } 1080 1081 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) 1082 { 1083 return fold_masks_zs(ctx, op, z_mask, 0); 1084 } 1085 1086 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) 1087 { 1088 return fold_masks_zs(ctx, op, -1, s_mask); 1089 } 1090 1091 /* 1092 * An "affected" mask bit is 0 if and only if the result is identical 1093 * to the first input. Thus if the entire mask is 0, the operation 1094 * is equivalent to a copy. 1095 */ 1096 static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask) 1097 { 1098 if (ctx->type == TCG_TYPE_I32) { 1099 a_mask = (uint32_t)a_mask; 1100 } 1101 if (a_mask == 0) { 1102 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1103 } 1104 return false; 1105 } 1106 1107 /* 1108 * Convert @op to NOT, if NOT is supported by the host. 1109 * Return true f the conversion is successful, which will still 1110 * indicate that the processing is complete. 1111 */ 1112 static bool fold_not(OptContext *ctx, TCGOp *op); 1113 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 1114 { 1115 TCGOpcode not_op; 1116 bool have_not; 1117 1118 switch (ctx->type) { 1119 case TCG_TYPE_I32: 1120 case TCG_TYPE_I64: 1121 not_op = INDEX_op_not; 1122 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); 1123 break; 1124 case TCG_TYPE_V64: 1125 case TCG_TYPE_V128: 1126 case TCG_TYPE_V256: 1127 not_op = INDEX_op_not_vec; 1128 have_not = TCG_TARGET_HAS_not_vec; 1129 break; 1130 default: 1131 g_assert_not_reached(); 1132 } 1133 if (have_not) { 1134 op->opc = not_op; 1135 op->args[1] = op->args[idx]; 1136 return fold_not(ctx, op); 1137 } 1138 return false; 1139 } 1140 1141 /* If the binary operation has first argument @i, fold to @i. */ 1142 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1143 { 1144 if (arg_is_const_val(op->args[1], i)) { 1145 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1146 } 1147 return false; 1148 } 1149 1150 /* If the binary operation has first argument @i, fold to NOT. */ 1151 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1152 { 1153 if (arg_is_const_val(op->args[1], i)) { 1154 return fold_to_not(ctx, op, 2); 1155 } 1156 return false; 1157 } 1158 1159 /* If the binary operation has second argument @i, fold to @i. */ 1160 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1161 { 1162 if (arg_is_const_val(op->args[2], i)) { 1163 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1164 } 1165 return false; 1166 } 1167 1168 /* If the binary operation has second argument @i, fold to identity. */ 1169 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 1170 { 1171 if (arg_is_const_val(op->args[2], i)) { 1172 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1173 } 1174 return false; 1175 } 1176 1177 /* If the binary operation has second argument @i, fold to NOT. */ 1178 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1179 { 1180 if (arg_is_const_val(op->args[2], i)) { 1181 return fold_to_not(ctx, op, 1); 1182 } 1183 return false; 1184 } 1185 1186 /* If the binary operation has both arguments equal, fold to @i. */ 1187 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1188 { 1189 if (args_are_copies(op->args[1], op->args[2])) { 1190 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1191 } 1192 return false; 1193 } 1194 1195 /* If the binary operation has both arguments equal, fold to identity. */ 1196 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1197 { 1198 if (args_are_copies(op->args[1], op->args[2])) { 1199 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1200 } 1201 return false; 1202 } 1203 1204 /* 1205 * These outermost fold_<op> functions are sorted alphabetically. 1206 * 1207 * The ordering of the transformations should be: 1208 * 1) those that produce a constant 1209 * 2) those that produce a copy 1210 * 3) those that produce information about the result value. 1211 */ 1212 1213 static bool fold_addco(OptContext *ctx, TCGOp *op); 1214 static bool fold_or(OptContext *ctx, TCGOp *op); 1215 static bool fold_orc(OptContext *ctx, TCGOp *op); 1216 static bool fold_subbo(OptContext *ctx, TCGOp *op); 1217 static bool fold_xor(OptContext *ctx, TCGOp *op); 1218 1219 static bool fold_add(OptContext *ctx, TCGOp *op) 1220 { 1221 if (fold_const2_commutative(ctx, op) || 1222 fold_xi_to_x(ctx, op, 0)) { 1223 return true; 1224 } 1225 return finish_folding(ctx, op); 1226 } 1227 1228 /* We cannot as yet do_constant_folding with vectors. */ 1229 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1230 { 1231 if (fold_commutative(ctx, op) || 1232 fold_xi_to_x(ctx, op, 0)) { 1233 return true; 1234 } 1235 return finish_folding(ctx, op); 1236 } 1237 1238 static void squash_prev_carryout(OptContext *ctx, TCGOp *op) 1239 { 1240 TempOptInfo *t2; 1241 1242 op = QTAILQ_PREV(op, link); 1243 switch (op->opc) { 1244 case INDEX_op_addco: 1245 op->opc = INDEX_op_add; 1246 fold_add(ctx, op); 1247 break; 1248 case INDEX_op_addcio: 1249 op->opc = INDEX_op_addci; 1250 break; 1251 case INDEX_op_addc1o: 1252 op->opc = INDEX_op_add; 1253 t2 = arg_info(op->args[2]); 1254 if (ti_is_const(t2)) { 1255 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1256 /* Perform other constant folding, if needed. */ 1257 fold_add(ctx, op); 1258 } else { 1259 TCGArg ret = op->args[0]; 1260 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 1261 op->args[0] = ret; 1262 op->args[1] = ret; 1263 op->args[2] = arg_new_constant(ctx, 1); 1264 } 1265 break; 1266 default: 1267 g_assert_not_reached(); 1268 } 1269 } 1270 1271 static bool fold_addci(OptContext *ctx, TCGOp *op) 1272 { 1273 fold_commutative(ctx, op); 1274 1275 if (ctx->carry_state < 0) { 1276 return finish_folding(ctx, op); 1277 } 1278 1279 squash_prev_carryout(ctx, op); 1280 op->opc = INDEX_op_add; 1281 1282 if (ctx->carry_state > 0) { 1283 TempOptInfo *t2 = arg_info(op->args[2]); 1284 1285 /* 1286 * Propagate the known carry-in into a constant, if possible. 1287 * Otherwise emit a second add +1. 1288 */ 1289 if (ti_is_const(t2)) { 1290 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1291 } else { 1292 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); 1293 1294 op2->args[0] = op->args[0]; 1295 op2->args[1] = op->args[1]; 1296 op2->args[2] = op->args[2]; 1297 fold_add(ctx, op2); 1298 1299 op->args[1] = op->args[0]; 1300 op->args[2] = arg_new_constant(ctx, 1); 1301 } 1302 } 1303 1304 ctx->carry_state = -1; 1305 return fold_add(ctx, op); 1306 } 1307 1308 static bool fold_addcio(OptContext *ctx, TCGOp *op) 1309 { 1310 TempOptInfo *t1, *t2; 1311 int carry_out = -1; 1312 uint64_t sum, max; 1313 1314 fold_commutative(ctx, op); 1315 t1 = arg_info(op->args[1]); 1316 t2 = arg_info(op->args[2]); 1317 1318 /* 1319 * The z_mask value is >= the maximum value that can be represented 1320 * with the known zero bits. So adding the z_mask values will not 1321 * overflow if and only if the true values cannot overflow. 1322 */ 1323 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && 1324 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { 1325 carry_out = 0; 1326 } 1327 1328 if (ctx->carry_state < 0) { 1329 ctx->carry_state = carry_out; 1330 return finish_folding(ctx, op); 1331 } 1332 1333 squash_prev_carryout(ctx, op); 1334 if (ctx->carry_state == 0) { 1335 goto do_addco; 1336 } 1337 1338 /* Propagate the known carry-in into a constant, if possible. */ 1339 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 1340 if (ti_is_const(t2)) { 1341 uint64_t v = ti_const_val(t2) & max; 1342 if (v < max) { 1343 op->args[2] = arg_new_constant(ctx, v + 1); 1344 goto do_addco; 1345 } 1346 /* max + known carry in produces known carry out. */ 1347 carry_out = 1; 1348 } 1349 if (ti_is_const(t1)) { 1350 uint64_t v = ti_const_val(t1) & max; 1351 if (v < max) { 1352 op->args[1] = arg_new_constant(ctx, v + 1); 1353 goto do_addco; 1354 } 1355 carry_out = 1; 1356 } 1357 1358 /* Adjust the opcode to remember the known carry-in. */ 1359 op->opc = INDEX_op_addc1o; 1360 ctx->carry_state = carry_out; 1361 return finish_folding(ctx, op); 1362 1363 do_addco: 1364 op->opc = INDEX_op_addco; 1365 return fold_addco(ctx, op); 1366 } 1367 1368 static bool fold_addco(OptContext *ctx, TCGOp *op) 1369 { 1370 TempOptInfo *t1, *t2; 1371 int carry_out = -1; 1372 uint64_t ign; 1373 1374 fold_commutative(ctx, op); 1375 t1 = arg_info(op->args[1]); 1376 t2 = arg_info(op->args[2]); 1377 1378 if (ti_is_const(t2)) { 1379 uint64_t v2 = ti_const_val(t2); 1380 1381 if (ti_is_const(t1)) { 1382 uint64_t v1 = ti_const_val(t1); 1383 /* Given sign-extension of z_mask for I32, we need not truncate. */ 1384 carry_out = uadd64_overflow(v1, v2, &ign); 1385 } else if (v2 == 0) { 1386 carry_out = 0; 1387 } 1388 } else { 1389 /* 1390 * The z_mask value is >= the maximum value that can be represented 1391 * with the known zero bits. So adding the z_mask values will not 1392 * overflow if and only if the true values cannot overflow. 1393 */ 1394 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { 1395 carry_out = 0; 1396 } 1397 } 1398 ctx->carry_state = carry_out; 1399 return finish_folding(ctx, op); 1400 } 1401 1402 static bool fold_and(OptContext *ctx, TCGOp *op) 1403 { 1404 uint64_t z1, z2, z_mask, s_mask; 1405 TempOptInfo *t1, *t2; 1406 1407 if (fold_const2_commutative(ctx, op) || 1408 fold_xi_to_i(ctx, op, 0) || 1409 fold_xi_to_x(ctx, op, -1) || 1410 fold_xx_to_x(ctx, op)) { 1411 return true; 1412 } 1413 1414 t1 = arg_info(op->args[1]); 1415 t2 = arg_info(op->args[2]); 1416 z1 = t1->z_mask; 1417 z2 = t2->z_mask; 1418 1419 /* 1420 * Known-zeros does not imply known-ones. Therefore unless 1421 * arg2 is constant, we can't infer affected bits from it. 1422 */ 1423 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) { 1424 return true; 1425 } 1426 1427 z_mask = z1 & z2; 1428 1429 /* 1430 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1431 * Bitwise operations preserve the relative quantity of the repetitions. 1432 */ 1433 s_mask = t1->s_mask & t2->s_mask; 1434 1435 return fold_masks_zs(ctx, op, z_mask, s_mask); 1436 } 1437 1438 static bool fold_andc(OptContext *ctx, TCGOp *op) 1439 { 1440 uint64_t z_mask, s_mask; 1441 TempOptInfo *t1, *t2; 1442 1443 if (fold_const2(ctx, op) || 1444 fold_xx_to_i(ctx, op, 0) || 1445 fold_xi_to_x(ctx, op, 0) || 1446 fold_ix_to_not(ctx, op, -1)) { 1447 return true; 1448 } 1449 1450 t1 = arg_info(op->args[1]); 1451 t2 = arg_info(op->args[2]); 1452 z_mask = t1->z_mask; 1453 1454 if (ti_is_const(t2)) { 1455 /* Fold andc r,x,i to and r,x,~i. */ 1456 switch (ctx->type) { 1457 case TCG_TYPE_I32: 1458 case TCG_TYPE_I64: 1459 op->opc = INDEX_op_and; 1460 break; 1461 case TCG_TYPE_V64: 1462 case TCG_TYPE_V128: 1463 case TCG_TYPE_V256: 1464 op->opc = INDEX_op_and_vec; 1465 break; 1466 default: 1467 g_assert_not_reached(); 1468 } 1469 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1470 return fold_and(ctx, op); 1471 } 1472 1473 /* 1474 * Known-zeros does not imply known-ones. Therefore unless 1475 * arg2 is constant, we can't infer anything from it. 1476 */ 1477 if (ti_is_const(t2)) { 1478 uint64_t v2 = ti_const_val(t2); 1479 if (fold_affected_mask(ctx, op, z_mask & v2)) { 1480 return true; 1481 } 1482 z_mask &= ~v2; 1483 } 1484 1485 s_mask = t1->s_mask & t2->s_mask; 1486 return fold_masks_zs(ctx, op, z_mask, s_mask); 1487 } 1488 1489 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) 1490 { 1491 /* If true and false values are the same, eliminate the cmp. */ 1492 if (args_are_copies(op->args[2], op->args[3])) { 1493 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1494 } 1495 1496 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1497 uint64_t tv = arg_info(op->args[2])->val; 1498 uint64_t fv = arg_info(op->args[3])->val; 1499 1500 if (tv == -1 && fv == 0) { 1501 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1502 } 1503 if (tv == 0 && fv == -1) { 1504 if (TCG_TARGET_HAS_not_vec) { 1505 op->opc = INDEX_op_not_vec; 1506 return fold_not(ctx, op); 1507 } else { 1508 op->opc = INDEX_op_xor_vec; 1509 op->args[2] = arg_new_constant(ctx, -1); 1510 return fold_xor(ctx, op); 1511 } 1512 } 1513 } 1514 if (arg_is_const(op->args[2])) { 1515 uint64_t tv = arg_info(op->args[2])->val; 1516 if (tv == -1) { 1517 op->opc = INDEX_op_or_vec; 1518 op->args[2] = op->args[3]; 1519 return fold_or(ctx, op); 1520 } 1521 if (tv == 0 && TCG_TARGET_HAS_andc_vec) { 1522 op->opc = INDEX_op_andc_vec; 1523 op->args[2] = op->args[1]; 1524 op->args[1] = op->args[3]; 1525 return fold_andc(ctx, op); 1526 } 1527 } 1528 if (arg_is_const(op->args[3])) { 1529 uint64_t fv = arg_info(op->args[3])->val; 1530 if (fv == 0) { 1531 op->opc = INDEX_op_and_vec; 1532 return fold_and(ctx, op); 1533 } 1534 if (fv == -1 && TCG_TARGET_HAS_orc_vec) { 1535 op->opc = INDEX_op_orc_vec; 1536 op->args[2] = op->args[1]; 1537 op->args[1] = op->args[3]; 1538 return fold_orc(ctx, op); 1539 } 1540 } 1541 return finish_folding(ctx, op); 1542 } 1543 1544 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1545 { 1546 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], 1547 &op->args[1], &op->args[2]); 1548 if (i == 0) { 1549 tcg_op_remove(ctx->tcg, op); 1550 return true; 1551 } 1552 if (i > 0) { 1553 op->opc = INDEX_op_br; 1554 op->args[0] = op->args[3]; 1555 finish_ebb(ctx); 1556 } else { 1557 finish_bb(ctx); 1558 } 1559 return true; 1560 } 1561 1562 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1563 { 1564 TCGCond cond; 1565 TCGArg label; 1566 int i, inv = 0; 1567 1568 i = do_constant_folding_cond2(ctx, op, &op->args[0]); 1569 cond = op->args[4]; 1570 label = op->args[5]; 1571 if (i >= 0) { 1572 goto do_brcond_const; 1573 } 1574 1575 switch (cond) { 1576 case TCG_COND_LT: 1577 case TCG_COND_GE: 1578 /* 1579 * Simplify LT/GE comparisons vs zero to a single compare 1580 * vs the high word of the input. 1581 */ 1582 if (arg_is_const_val(op->args[2], 0) && 1583 arg_is_const_val(op->args[3], 0)) { 1584 goto do_brcond_high; 1585 } 1586 break; 1587 1588 case TCG_COND_NE: 1589 inv = 1; 1590 QEMU_FALLTHROUGH; 1591 case TCG_COND_EQ: 1592 /* 1593 * Simplify EQ/NE comparisons where one of the pairs 1594 * can be simplified. 1595 */ 1596 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1597 op->args[2], cond); 1598 switch (i ^ inv) { 1599 case 0: 1600 goto do_brcond_const; 1601 case 1: 1602 goto do_brcond_high; 1603 } 1604 1605 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1606 op->args[3], cond); 1607 switch (i ^ inv) { 1608 case 0: 1609 goto do_brcond_const; 1610 case 1: 1611 goto do_brcond_low; 1612 } 1613 break; 1614 1615 case TCG_COND_TSTEQ: 1616 case TCG_COND_TSTNE: 1617 if (arg_is_const_val(op->args[2], 0)) { 1618 goto do_brcond_high; 1619 } 1620 if (arg_is_const_val(op->args[3], 0)) { 1621 goto do_brcond_low; 1622 } 1623 break; 1624 1625 default: 1626 break; 1627 1628 do_brcond_low: 1629 op->opc = INDEX_op_brcond; 1630 op->args[1] = op->args[2]; 1631 op->args[2] = cond; 1632 op->args[3] = label; 1633 return fold_brcond(ctx, op); 1634 1635 do_brcond_high: 1636 op->opc = INDEX_op_brcond; 1637 op->args[0] = op->args[1]; 1638 op->args[1] = op->args[3]; 1639 op->args[2] = cond; 1640 op->args[3] = label; 1641 return fold_brcond(ctx, op); 1642 1643 do_brcond_const: 1644 if (i == 0) { 1645 tcg_op_remove(ctx->tcg, op); 1646 return true; 1647 } 1648 op->opc = INDEX_op_br; 1649 op->args[0] = label; 1650 finish_ebb(ctx); 1651 return true; 1652 } 1653 1654 finish_bb(ctx); 1655 return true; 1656 } 1657 1658 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1659 { 1660 uint64_t z_mask, s_mask, sign; 1661 TempOptInfo *t1 = arg_info(op->args[1]); 1662 1663 if (ti_is_const(t1)) { 1664 return tcg_opt_gen_movi(ctx, op, op->args[0], 1665 do_constant_folding(op->opc, ctx->type, 1666 ti_const_val(t1), 1667 op->args[2])); 1668 } 1669 1670 z_mask = t1->z_mask; 1671 switch (op->opc) { 1672 case INDEX_op_bswap16: 1673 z_mask = bswap16(z_mask); 1674 sign = INT16_MIN; 1675 break; 1676 case INDEX_op_bswap32: 1677 z_mask = bswap32(z_mask); 1678 sign = INT32_MIN; 1679 break; 1680 case INDEX_op_bswap64: 1681 z_mask = bswap64(z_mask); 1682 sign = INT64_MIN; 1683 break; 1684 default: 1685 g_assert_not_reached(); 1686 } 1687 1688 s_mask = 0; 1689 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1690 case TCG_BSWAP_OZ: 1691 break; 1692 case TCG_BSWAP_OS: 1693 /* If the sign bit may be 1, force all the bits above to 1. */ 1694 if (z_mask & sign) { 1695 z_mask |= sign; 1696 } 1697 /* The value and therefore s_mask is explicitly sign-extended. */ 1698 s_mask = sign; 1699 break; 1700 default: 1701 /* The high bits are undefined: force all bits above the sign to 1. */ 1702 z_mask |= sign << 1; 1703 break; 1704 } 1705 1706 return fold_masks_zs(ctx, op, z_mask, s_mask); 1707 } 1708 1709 static bool fold_call(OptContext *ctx, TCGOp *op) 1710 { 1711 TCGContext *s = ctx->tcg; 1712 int nb_oargs = TCGOP_CALLO(op); 1713 int nb_iargs = TCGOP_CALLI(op); 1714 int flags, i; 1715 1716 init_arguments(ctx, op, nb_oargs + nb_iargs); 1717 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1718 1719 /* If the function reads or writes globals, reset temp data. */ 1720 flags = tcg_call_flags(op); 1721 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1722 int nb_globals = s->nb_globals; 1723 1724 for (i = 0; i < nb_globals; i++) { 1725 if (test_bit(i, ctx->temps_used.l)) { 1726 reset_ts(ctx, &ctx->tcg->temps[i]); 1727 } 1728 } 1729 } 1730 1731 /* If the function has side effects, reset mem data. */ 1732 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1733 remove_mem_copy_all(ctx); 1734 } 1735 1736 /* Reset temp data for outputs. */ 1737 for (i = 0; i < nb_oargs; i++) { 1738 reset_temp(ctx, op->args[i]); 1739 } 1740 1741 /* Stop optimizing MB across calls. */ 1742 ctx->prev_mb = NULL; 1743 return true; 1744 } 1745 1746 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) 1747 { 1748 /* Canonicalize the comparison to put immediate second. */ 1749 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1750 op->args[3] = tcg_swap_cond(op->args[3]); 1751 } 1752 return finish_folding(ctx, op); 1753 } 1754 1755 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) 1756 { 1757 /* If true and false values are the same, eliminate the cmp. */ 1758 if (args_are_copies(op->args[3], op->args[4])) { 1759 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 1760 } 1761 1762 /* Canonicalize the comparison to put immediate second. */ 1763 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1764 op->args[5] = tcg_swap_cond(op->args[5]); 1765 } 1766 /* 1767 * Canonicalize the "false" input reg to match the destination, 1768 * so that the tcg backend can implement "move if true". 1769 */ 1770 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1771 op->args[5] = tcg_invert_cond(op->args[5]); 1772 } 1773 return finish_folding(ctx, op); 1774 } 1775 1776 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1777 { 1778 uint64_t z_mask, s_mask; 1779 TempOptInfo *t1 = arg_info(op->args[1]); 1780 TempOptInfo *t2 = arg_info(op->args[2]); 1781 1782 if (ti_is_const(t1)) { 1783 uint64_t t = ti_const_val(t1); 1784 1785 if (t != 0) { 1786 t = do_constant_folding(op->opc, ctx->type, t, 0); 1787 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1788 } 1789 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1790 } 1791 1792 switch (ctx->type) { 1793 case TCG_TYPE_I32: 1794 z_mask = 31; 1795 break; 1796 case TCG_TYPE_I64: 1797 z_mask = 63; 1798 break; 1799 default: 1800 g_assert_not_reached(); 1801 } 1802 s_mask = ~z_mask; 1803 z_mask |= t2->z_mask; 1804 s_mask &= t2->s_mask; 1805 1806 return fold_masks_zs(ctx, op, z_mask, s_mask); 1807 } 1808 1809 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1810 { 1811 uint64_t z_mask; 1812 1813 if (fold_const1(ctx, op)) { 1814 return true; 1815 } 1816 1817 switch (ctx->type) { 1818 case TCG_TYPE_I32: 1819 z_mask = 32 | 31; 1820 break; 1821 case TCG_TYPE_I64: 1822 z_mask = 64 | 63; 1823 break; 1824 default: 1825 g_assert_not_reached(); 1826 } 1827 return fold_masks_z(ctx, op, z_mask); 1828 } 1829 1830 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1831 { 1832 TempOptInfo *t1 = arg_info(op->args[1]); 1833 TempOptInfo *t2 = arg_info(op->args[2]); 1834 int ofs = op->args[3]; 1835 int len = op->args[4]; 1836 int width = 8 * tcg_type_size(ctx->type); 1837 uint64_t z_mask, s_mask; 1838 1839 if (ti_is_const(t1) && ti_is_const(t2)) { 1840 return tcg_opt_gen_movi(ctx, op, op->args[0], 1841 deposit64(ti_const_val(t1), ofs, len, 1842 ti_const_val(t2))); 1843 } 1844 1845 /* Inserting a value into zero at offset 0. */ 1846 if (ti_is_const_val(t1, 0) && ofs == 0) { 1847 uint64_t mask = MAKE_64BIT_MASK(0, len); 1848 1849 op->opc = INDEX_op_and; 1850 op->args[1] = op->args[2]; 1851 op->args[2] = arg_new_constant(ctx, mask); 1852 return fold_and(ctx, op); 1853 } 1854 1855 /* Inserting zero into a value. */ 1856 if (ti_is_const_val(t2, 0)) { 1857 uint64_t mask = deposit64(-1, ofs, len, 0); 1858 1859 op->opc = INDEX_op_and; 1860 op->args[2] = arg_new_constant(ctx, mask); 1861 return fold_and(ctx, op); 1862 } 1863 1864 /* The s_mask from the top portion of the deposit is still valid. */ 1865 if (ofs + len == width) { 1866 s_mask = t2->s_mask << ofs; 1867 } else { 1868 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); 1869 } 1870 1871 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); 1872 return fold_masks_zs(ctx, op, z_mask, s_mask); 1873 } 1874 1875 static bool fold_divide(OptContext *ctx, TCGOp *op) 1876 { 1877 if (fold_const2(ctx, op) || 1878 fold_xi_to_x(ctx, op, 1)) { 1879 return true; 1880 } 1881 return finish_folding(ctx, op); 1882 } 1883 1884 static bool fold_dup(OptContext *ctx, TCGOp *op) 1885 { 1886 if (arg_is_const(op->args[1])) { 1887 uint64_t t = arg_info(op->args[1])->val; 1888 t = dup_const(TCGOP_VECE(op), t); 1889 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1890 } 1891 return finish_folding(ctx, op); 1892 } 1893 1894 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1895 { 1896 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1897 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1898 arg_info(op->args[2])->val); 1899 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1900 } 1901 1902 if (args_are_copies(op->args[1], op->args[2])) { 1903 op->opc = INDEX_op_dup_vec; 1904 TCGOP_VECE(op) = MO_32; 1905 } 1906 return finish_folding(ctx, op); 1907 } 1908 1909 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1910 { 1911 uint64_t s_mask; 1912 TempOptInfo *t1, *t2; 1913 1914 if (fold_const2_commutative(ctx, op) || 1915 fold_xi_to_x(ctx, op, -1) || 1916 fold_xi_to_not(ctx, op, 0)) { 1917 return true; 1918 } 1919 1920 t2 = arg_info(op->args[2]); 1921 if (ti_is_const(t2)) { 1922 /* Fold eqv r,x,i to xor r,x,~i. */ 1923 switch (ctx->type) { 1924 case TCG_TYPE_I32: 1925 case TCG_TYPE_I64: 1926 op->opc = INDEX_op_xor; 1927 break; 1928 case TCG_TYPE_V64: 1929 case TCG_TYPE_V128: 1930 case TCG_TYPE_V256: 1931 op->opc = INDEX_op_xor_vec; 1932 break; 1933 default: 1934 g_assert_not_reached(); 1935 } 1936 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1937 return fold_xor(ctx, op); 1938 } 1939 1940 t1 = arg_info(op->args[1]); 1941 s_mask = t1->s_mask & t2->s_mask; 1942 return fold_masks_s(ctx, op, s_mask); 1943 } 1944 1945 static bool fold_extract(OptContext *ctx, TCGOp *op) 1946 { 1947 uint64_t z_mask_old, z_mask; 1948 TempOptInfo *t1 = arg_info(op->args[1]); 1949 int pos = op->args[2]; 1950 int len = op->args[3]; 1951 1952 if (ti_is_const(t1)) { 1953 return tcg_opt_gen_movi(ctx, op, op->args[0], 1954 extract64(ti_const_val(t1), pos, len)); 1955 } 1956 1957 z_mask_old = t1->z_mask; 1958 z_mask = extract64(z_mask_old, pos, len); 1959 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { 1960 return true; 1961 } 1962 1963 return fold_masks_z(ctx, op, z_mask); 1964 } 1965 1966 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1967 { 1968 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1969 uint64_t v1 = arg_info(op->args[1])->val; 1970 uint64_t v2 = arg_info(op->args[2])->val; 1971 int shr = op->args[3]; 1972 1973 if (ctx->type == TCG_TYPE_I32) { 1974 v1 = (uint32_t)v1 >> shr; 1975 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1976 } else { 1977 v1 >>= shr; 1978 v2 <<= 64 - shr; 1979 } 1980 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1981 } 1982 return finish_folding(ctx, op); 1983 } 1984 1985 static bool fold_exts(OptContext *ctx, TCGOp *op) 1986 { 1987 uint64_t s_mask, z_mask; 1988 TempOptInfo *t1; 1989 1990 if (fold_const1(ctx, op)) { 1991 return true; 1992 } 1993 1994 t1 = arg_info(op->args[1]); 1995 z_mask = t1->z_mask; 1996 s_mask = t1->s_mask; 1997 1998 switch (op->opc) { 1999 case INDEX_op_ext_i32_i64: 2000 s_mask |= INT32_MIN; 2001 z_mask = (int32_t)z_mask; 2002 break; 2003 default: 2004 g_assert_not_reached(); 2005 } 2006 return fold_masks_zs(ctx, op, z_mask, s_mask); 2007 } 2008 2009 static bool fold_extu(OptContext *ctx, TCGOp *op) 2010 { 2011 uint64_t z_mask; 2012 2013 if (fold_const1(ctx, op)) { 2014 return true; 2015 } 2016 2017 z_mask = arg_info(op->args[1])->z_mask; 2018 switch (op->opc) { 2019 case INDEX_op_extrl_i64_i32: 2020 case INDEX_op_extu_i32_i64: 2021 z_mask = (uint32_t)z_mask; 2022 break; 2023 case INDEX_op_extrh_i64_i32: 2024 z_mask >>= 32; 2025 break; 2026 default: 2027 g_assert_not_reached(); 2028 } 2029 return fold_masks_z(ctx, op, z_mask); 2030 } 2031 2032 static bool fold_mb(OptContext *ctx, TCGOp *op) 2033 { 2034 /* Eliminate duplicate and redundant fence instructions. */ 2035 if (ctx->prev_mb) { 2036 /* 2037 * Merge two barriers of the same type into one, 2038 * or a weaker barrier into a stronger one, 2039 * or two weaker barriers into a stronger one. 2040 * mb X; mb Y => mb X|Y 2041 * mb; strl => mb; st 2042 * ldaq; mb => ld; mb 2043 * ldaq; strl => ld; mb; st 2044 * Other combinations are also merged into a strong 2045 * barrier. This is stricter than specified but for 2046 * the purposes of TCG is better than not optimizing. 2047 */ 2048 ctx->prev_mb->args[0] |= op->args[0]; 2049 tcg_op_remove(ctx->tcg, op); 2050 } else { 2051 ctx->prev_mb = op; 2052 } 2053 return true; 2054 } 2055 2056 static bool fold_mov(OptContext *ctx, TCGOp *op) 2057 { 2058 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2059 } 2060 2061 static bool fold_movcond(OptContext *ctx, TCGOp *op) 2062 { 2063 uint64_t z_mask, s_mask; 2064 TempOptInfo *tt, *ft; 2065 int i; 2066 2067 /* If true and false values are the same, eliminate the cmp. */ 2068 if (args_are_copies(op->args[3], op->args[4])) { 2069 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 2070 } 2071 2072 /* 2073 * Canonicalize the "false" input reg to match the destination reg so 2074 * that the tcg backend can implement a "move if true" operation. 2075 */ 2076 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 2077 op->args[5] = tcg_invert_cond(op->args[5]); 2078 } 2079 2080 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], 2081 &op->args[2], &op->args[5]); 2082 if (i >= 0) { 2083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 2084 } 2085 2086 tt = arg_info(op->args[3]); 2087 ft = arg_info(op->args[4]); 2088 z_mask = tt->z_mask | ft->z_mask; 2089 s_mask = tt->s_mask & ft->s_mask; 2090 2091 if (ti_is_const(tt) && ti_is_const(ft)) { 2092 uint64_t tv = ti_const_val(tt); 2093 uint64_t fv = ti_const_val(ft); 2094 TCGCond cond = op->args[5]; 2095 2096 if (tv == 1 && fv == 0) { 2097 op->opc = INDEX_op_setcond; 2098 op->args[3] = cond; 2099 } else if (fv == 1 && tv == 0) { 2100 op->opc = INDEX_op_setcond; 2101 op->args[3] = tcg_invert_cond(cond); 2102 } else if (tv == -1 && fv == 0) { 2103 op->opc = INDEX_op_negsetcond; 2104 op->args[3] = cond; 2105 } else if (fv == -1 && tv == 0) { 2106 op->opc = INDEX_op_negsetcond; 2107 op->args[3] = tcg_invert_cond(cond); 2108 } 2109 } 2110 2111 return fold_masks_zs(ctx, op, z_mask, s_mask); 2112 } 2113 2114 static bool fold_mul(OptContext *ctx, TCGOp *op) 2115 { 2116 if (fold_const2(ctx, op) || 2117 fold_xi_to_i(ctx, op, 0) || 2118 fold_xi_to_x(ctx, op, 1)) { 2119 return true; 2120 } 2121 return finish_folding(ctx, op); 2122 } 2123 2124 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 2125 { 2126 if (fold_const2_commutative(ctx, op) || 2127 fold_xi_to_i(ctx, op, 0)) { 2128 return true; 2129 } 2130 return finish_folding(ctx, op); 2131 } 2132 2133 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 2134 { 2135 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 2136 2137 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 2138 uint64_t a = arg_info(op->args[2])->val; 2139 uint64_t b = arg_info(op->args[3])->val; 2140 uint64_t h, l; 2141 TCGArg rl, rh; 2142 TCGOp *op2; 2143 2144 switch (op->opc) { 2145 case INDEX_op_mulu2: 2146 if (ctx->type == TCG_TYPE_I32) { 2147 l = (uint64_t)(uint32_t)a * (uint32_t)b; 2148 h = (int32_t)(l >> 32); 2149 l = (int32_t)l; 2150 } else { 2151 mulu64(&l, &h, a, b); 2152 } 2153 break; 2154 case INDEX_op_muls2: 2155 if (ctx->type == TCG_TYPE_I32) { 2156 l = (int64_t)(int32_t)a * (int32_t)b; 2157 h = l >> 32; 2158 l = (int32_t)l; 2159 } else { 2160 muls64(&l, &h, a, b); 2161 } 2162 break; 2163 default: 2164 g_assert_not_reached(); 2165 } 2166 2167 rl = op->args[0]; 2168 rh = op->args[1]; 2169 2170 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 2171 op2 = opt_insert_before(ctx, op, 0, 2); 2172 2173 tcg_opt_gen_movi(ctx, op, rl, l); 2174 tcg_opt_gen_movi(ctx, op2, rh, h); 2175 return true; 2176 } 2177 return finish_folding(ctx, op); 2178 } 2179 2180 static bool fold_nand(OptContext *ctx, TCGOp *op) 2181 { 2182 uint64_t s_mask; 2183 2184 if (fold_const2_commutative(ctx, op) || 2185 fold_xi_to_not(ctx, op, -1)) { 2186 return true; 2187 } 2188 2189 s_mask = arg_info(op->args[1])->s_mask 2190 & arg_info(op->args[2])->s_mask; 2191 return fold_masks_s(ctx, op, s_mask); 2192 } 2193 2194 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) 2195 { 2196 /* Set to 1 all bits to the left of the rightmost. */ 2197 uint64_t z_mask = arg_info(op->args[1])->z_mask; 2198 z_mask = -(z_mask & -z_mask); 2199 2200 return fold_masks_z(ctx, op, z_mask); 2201 } 2202 2203 static bool fold_neg(OptContext *ctx, TCGOp *op) 2204 { 2205 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op); 2206 } 2207 2208 static bool fold_nor(OptContext *ctx, TCGOp *op) 2209 { 2210 uint64_t s_mask; 2211 2212 if (fold_const2_commutative(ctx, op) || 2213 fold_xi_to_not(ctx, op, 0)) { 2214 return true; 2215 } 2216 2217 s_mask = arg_info(op->args[1])->s_mask 2218 & arg_info(op->args[2])->s_mask; 2219 return fold_masks_s(ctx, op, s_mask); 2220 } 2221 2222 static bool fold_not(OptContext *ctx, TCGOp *op) 2223 { 2224 if (fold_const1(ctx, op)) { 2225 return true; 2226 } 2227 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask); 2228 } 2229 2230 static bool fold_or(OptContext *ctx, TCGOp *op) 2231 { 2232 uint64_t z_mask, s_mask; 2233 TempOptInfo *t1, *t2; 2234 2235 if (fold_const2_commutative(ctx, op) || 2236 fold_xi_to_x(ctx, op, 0) || 2237 fold_xx_to_x(ctx, op)) { 2238 return true; 2239 } 2240 2241 t1 = arg_info(op->args[1]); 2242 t2 = arg_info(op->args[2]); 2243 z_mask = t1->z_mask | t2->z_mask; 2244 s_mask = t1->s_mask & t2->s_mask; 2245 return fold_masks_zs(ctx, op, z_mask, s_mask); 2246 } 2247 2248 static bool fold_orc(OptContext *ctx, TCGOp *op) 2249 { 2250 uint64_t s_mask; 2251 TempOptInfo *t1, *t2; 2252 2253 if (fold_const2(ctx, op) || 2254 fold_xx_to_i(ctx, op, -1) || 2255 fold_xi_to_x(ctx, op, -1) || 2256 fold_ix_to_not(ctx, op, 0)) { 2257 return true; 2258 } 2259 2260 t2 = arg_info(op->args[2]); 2261 if (ti_is_const(t2)) { 2262 /* Fold orc r,x,i to or r,x,~i. */ 2263 switch (ctx->type) { 2264 case TCG_TYPE_I32: 2265 case TCG_TYPE_I64: 2266 op->opc = INDEX_op_or; 2267 break; 2268 case TCG_TYPE_V64: 2269 case TCG_TYPE_V128: 2270 case TCG_TYPE_V256: 2271 op->opc = INDEX_op_or_vec; 2272 break; 2273 default: 2274 g_assert_not_reached(); 2275 } 2276 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 2277 return fold_or(ctx, op); 2278 } 2279 2280 t1 = arg_info(op->args[1]); 2281 s_mask = t1->s_mask & t2->s_mask; 2282 return fold_masks_s(ctx, op, s_mask); 2283 } 2284 2285 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) 2286 { 2287 const TCGOpDef *def = &tcg_op_defs[op->opc]; 2288 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 2289 MemOp mop = get_memop(oi); 2290 int width = 8 * memop_size(mop); 2291 uint64_t z_mask = -1, s_mask = 0; 2292 2293 if (width < 64) { 2294 if (mop & MO_SIGN) { 2295 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); 2296 } else { 2297 z_mask = MAKE_64BIT_MASK(0, width); 2298 } 2299 } 2300 2301 /* Opcodes that touch guest memory stop the mb optimization. */ 2302 ctx->prev_mb = NULL; 2303 2304 return fold_masks_zs(ctx, op, z_mask, s_mask); 2305 } 2306 2307 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) 2308 { 2309 /* Opcodes that touch guest memory stop the mb optimization. */ 2310 ctx->prev_mb = NULL; 2311 return finish_folding(ctx, op); 2312 } 2313 2314 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 2315 { 2316 /* Opcodes that touch guest memory stop the mb optimization. */ 2317 ctx->prev_mb = NULL; 2318 return true; 2319 } 2320 2321 static bool fold_remainder(OptContext *ctx, TCGOp *op) 2322 { 2323 if (fold_const2(ctx, op) || 2324 fold_xx_to_i(ctx, op, 0)) { 2325 return true; 2326 } 2327 return finish_folding(ctx, op); 2328 } 2329 2330 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */ 2331 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) 2332 { 2333 uint64_t a_zmask, b_val; 2334 TCGCond cond; 2335 2336 if (!arg_is_const(op->args[2])) { 2337 return false; 2338 } 2339 2340 a_zmask = arg_info(op->args[1])->z_mask; 2341 b_val = arg_info(op->args[2])->val; 2342 cond = op->args[3]; 2343 2344 if (ctx->type == TCG_TYPE_I32) { 2345 a_zmask = (uint32_t)a_zmask; 2346 b_val = (uint32_t)b_val; 2347 } 2348 2349 /* 2350 * A with only low bits set vs B with high bits set means that A < B. 2351 */ 2352 if (a_zmask < b_val) { 2353 bool inv = false; 2354 2355 switch (cond) { 2356 case TCG_COND_NE: 2357 case TCG_COND_LEU: 2358 case TCG_COND_LTU: 2359 inv = true; 2360 /* fall through */ 2361 case TCG_COND_GTU: 2362 case TCG_COND_GEU: 2363 case TCG_COND_EQ: 2364 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); 2365 default: 2366 break; 2367 } 2368 } 2369 2370 /* 2371 * A with only lsb set is already boolean. 2372 */ 2373 if (a_zmask <= 1) { 2374 bool convert = false; 2375 bool inv = false; 2376 2377 switch (cond) { 2378 case TCG_COND_EQ: 2379 inv = true; 2380 /* fall through */ 2381 case TCG_COND_NE: 2382 convert = (b_val == 0); 2383 break; 2384 case TCG_COND_LTU: 2385 case TCG_COND_TSTEQ: 2386 inv = true; 2387 /* fall through */ 2388 case TCG_COND_GEU: 2389 case TCG_COND_TSTNE: 2390 convert = (b_val == 1); 2391 break; 2392 default: 2393 break; 2394 } 2395 if (convert) { 2396 if (!inv && !neg) { 2397 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2398 } 2399 2400 if (!inv) { 2401 op->opc = INDEX_op_neg; 2402 } else if (neg) { 2403 op->opc = INDEX_op_add; 2404 op->args[2] = arg_new_constant(ctx, -1); 2405 } else { 2406 op->opc = INDEX_op_xor; 2407 op->args[2] = arg_new_constant(ctx, 1); 2408 } 2409 return -1; 2410 } 2411 } 2412 return 0; 2413 } 2414 2415 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) 2416 { 2417 TCGCond cond = op->args[3]; 2418 TCGArg ret, src1, src2; 2419 TCGOp *op2; 2420 uint64_t val; 2421 int sh; 2422 bool inv; 2423 2424 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { 2425 return; 2426 } 2427 2428 src2 = op->args[2]; 2429 val = arg_info(src2)->val; 2430 if (!is_power_of_2(val)) { 2431 return; 2432 } 2433 sh = ctz64(val); 2434 2435 ret = op->args[0]; 2436 src1 = op->args[1]; 2437 inv = cond == TCG_COND_TSTEQ; 2438 2439 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { 2440 op->opc = INDEX_op_sextract; 2441 op->args[1] = src1; 2442 op->args[2] = sh; 2443 op->args[3] = 1; 2444 return; 2445 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { 2446 op->opc = INDEX_op_extract; 2447 op->args[1] = src1; 2448 op->args[2] = sh; 2449 op->args[3] = 1; 2450 } else { 2451 if (sh) { 2452 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); 2453 op2->args[0] = ret; 2454 op2->args[1] = src1; 2455 op2->args[2] = arg_new_constant(ctx, sh); 2456 src1 = ret; 2457 } 2458 op->opc = INDEX_op_and; 2459 op->args[1] = src1; 2460 op->args[2] = arg_new_constant(ctx, 1); 2461 } 2462 2463 if (neg && inv) { 2464 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); 2465 op2->args[0] = ret; 2466 op2->args[1] = ret; 2467 op2->args[2] = arg_new_constant(ctx, -1); 2468 } else if (inv) { 2469 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); 2470 op2->args[0] = ret; 2471 op2->args[1] = ret; 2472 op2->args[2] = arg_new_constant(ctx, 1); 2473 } else if (neg) { 2474 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); 2475 op2->args[0] = ret; 2476 op2->args[1] = ret; 2477 } 2478 } 2479 2480 static bool fold_setcond(OptContext *ctx, TCGOp *op) 2481 { 2482 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2483 &op->args[2], &op->args[3]); 2484 if (i >= 0) { 2485 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2486 } 2487 2488 i = fold_setcond_zmask(ctx, op, false); 2489 if (i > 0) { 2490 return true; 2491 } 2492 if (i == 0) { 2493 fold_setcond_tst_pow2(ctx, op, false); 2494 } 2495 2496 return fold_masks_z(ctx, op, 1); 2497 } 2498 2499 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 2500 { 2501 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2502 &op->args[2], &op->args[3]); 2503 if (i >= 0) { 2504 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 2505 } 2506 2507 i = fold_setcond_zmask(ctx, op, true); 2508 if (i > 0) { 2509 return true; 2510 } 2511 if (i == 0) { 2512 fold_setcond_tst_pow2(ctx, op, true); 2513 } 2514 2515 /* Value is {0,-1} so all bits are repetitions of the sign. */ 2516 return fold_masks_s(ctx, op, -1); 2517 } 2518 2519 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 2520 { 2521 TCGCond cond; 2522 int i, inv = 0; 2523 2524 i = do_constant_folding_cond2(ctx, op, &op->args[1]); 2525 cond = op->args[5]; 2526 if (i >= 0) { 2527 goto do_setcond_const; 2528 } 2529 2530 switch (cond) { 2531 case TCG_COND_LT: 2532 case TCG_COND_GE: 2533 /* 2534 * Simplify LT/GE comparisons vs zero to a single compare 2535 * vs the high word of the input. 2536 */ 2537 if (arg_is_const_val(op->args[3], 0) && 2538 arg_is_const_val(op->args[4], 0)) { 2539 goto do_setcond_high; 2540 } 2541 break; 2542 2543 case TCG_COND_NE: 2544 inv = 1; 2545 QEMU_FALLTHROUGH; 2546 case TCG_COND_EQ: 2547 /* 2548 * Simplify EQ/NE comparisons where one of the pairs 2549 * can be simplified. 2550 */ 2551 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2552 op->args[3], cond); 2553 switch (i ^ inv) { 2554 case 0: 2555 goto do_setcond_const; 2556 case 1: 2557 goto do_setcond_high; 2558 } 2559 2560 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2561 op->args[4], cond); 2562 switch (i ^ inv) { 2563 case 0: 2564 goto do_setcond_const; 2565 case 1: 2566 goto do_setcond_low; 2567 } 2568 break; 2569 2570 case TCG_COND_TSTEQ: 2571 case TCG_COND_TSTNE: 2572 if (arg_is_const_val(op->args[3], 0)) { 2573 goto do_setcond_high; 2574 } 2575 if (arg_is_const_val(op->args[4], 0)) { 2576 goto do_setcond_low; 2577 } 2578 break; 2579 2580 default: 2581 break; 2582 2583 do_setcond_low: 2584 op->args[2] = op->args[3]; 2585 op->args[3] = cond; 2586 op->opc = INDEX_op_setcond; 2587 return fold_setcond(ctx, op); 2588 2589 do_setcond_high: 2590 op->args[1] = op->args[2]; 2591 op->args[2] = op->args[4]; 2592 op->args[3] = cond; 2593 op->opc = INDEX_op_setcond; 2594 return fold_setcond(ctx, op); 2595 } 2596 2597 return fold_masks_z(ctx, op, 1); 2598 2599 do_setcond_const: 2600 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2601 } 2602 2603 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2604 { 2605 uint64_t z_mask, s_mask, s_mask_old; 2606 TempOptInfo *t1 = arg_info(op->args[1]); 2607 int pos = op->args[2]; 2608 int len = op->args[3]; 2609 2610 if (ti_is_const(t1)) { 2611 return tcg_opt_gen_movi(ctx, op, op->args[0], 2612 sextract64(ti_const_val(t1), pos, len)); 2613 } 2614 2615 s_mask_old = t1->s_mask; 2616 s_mask = s_mask_old >> pos; 2617 s_mask |= -1ull << (len - 1); 2618 2619 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { 2620 return true; 2621 } 2622 2623 z_mask = sextract64(t1->z_mask, pos, len); 2624 return fold_masks_zs(ctx, op, z_mask, s_mask); 2625 } 2626 2627 static bool fold_shift(OptContext *ctx, TCGOp *op) 2628 { 2629 uint64_t s_mask, z_mask; 2630 TempOptInfo *t1, *t2; 2631 2632 if (fold_const2(ctx, op) || 2633 fold_ix_to_i(ctx, op, 0) || 2634 fold_xi_to_x(ctx, op, 0)) { 2635 return true; 2636 } 2637 2638 t1 = arg_info(op->args[1]); 2639 t2 = arg_info(op->args[2]); 2640 s_mask = t1->s_mask; 2641 z_mask = t1->z_mask; 2642 2643 if (ti_is_const(t2)) { 2644 int sh = ti_const_val(t2); 2645 2646 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2647 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2648 2649 return fold_masks_zs(ctx, op, z_mask, s_mask); 2650 } 2651 2652 switch (op->opc) { 2653 case INDEX_op_sar: 2654 /* 2655 * Arithmetic right shift will not reduce the number of 2656 * input sign repetitions. 2657 */ 2658 return fold_masks_s(ctx, op, s_mask); 2659 case INDEX_op_shr: 2660 /* 2661 * If the sign bit is known zero, then logical right shift 2662 * will not reduce the number of input sign repetitions. 2663 */ 2664 if (~z_mask & -s_mask) { 2665 return fold_masks_s(ctx, op, s_mask); 2666 } 2667 break; 2668 default: 2669 break; 2670 } 2671 2672 return finish_folding(ctx, op); 2673 } 2674 2675 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2676 { 2677 TCGOpcode neg_op; 2678 bool have_neg; 2679 2680 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 2681 return false; 2682 } 2683 2684 switch (ctx->type) { 2685 case TCG_TYPE_I32: 2686 case TCG_TYPE_I64: 2687 neg_op = INDEX_op_neg; 2688 have_neg = true; 2689 break; 2690 case TCG_TYPE_V64: 2691 case TCG_TYPE_V128: 2692 case TCG_TYPE_V256: 2693 neg_op = INDEX_op_neg_vec; 2694 have_neg = (TCG_TARGET_HAS_neg_vec && 2695 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2696 break; 2697 default: 2698 g_assert_not_reached(); 2699 } 2700 if (have_neg) { 2701 op->opc = neg_op; 2702 op->args[1] = op->args[2]; 2703 return fold_neg_no_const(ctx, op); 2704 } 2705 return false; 2706 } 2707 2708 /* We cannot as yet do_constant_folding with vectors. */ 2709 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2710 { 2711 if (fold_xx_to_i(ctx, op, 0) || 2712 fold_xi_to_x(ctx, op, 0) || 2713 fold_sub_to_neg(ctx, op)) { 2714 return true; 2715 } 2716 return finish_folding(ctx, op); 2717 } 2718 2719 static bool fold_sub(OptContext *ctx, TCGOp *op) 2720 { 2721 if (fold_const2(ctx, op) || 2722 fold_xx_to_i(ctx, op, 0) || 2723 fold_xi_to_x(ctx, op, 0) || 2724 fold_sub_to_neg(ctx, op)) { 2725 return true; 2726 } 2727 2728 /* Fold sub r,x,i to add r,x,-i */ 2729 if (arg_is_const(op->args[2])) { 2730 uint64_t val = arg_info(op->args[2])->val; 2731 2732 op->opc = INDEX_op_add; 2733 op->args[2] = arg_new_constant(ctx, -val); 2734 } 2735 return finish_folding(ctx, op); 2736 } 2737 2738 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) 2739 { 2740 TempOptInfo *t2; 2741 2742 op = QTAILQ_PREV(op, link); 2743 switch (op->opc) { 2744 case INDEX_op_subbo: 2745 op->opc = INDEX_op_sub; 2746 fold_sub(ctx, op); 2747 break; 2748 case INDEX_op_subbio: 2749 op->opc = INDEX_op_subbi; 2750 break; 2751 case INDEX_op_subb1o: 2752 t2 = arg_info(op->args[2]); 2753 if (ti_is_const(t2)) { 2754 op->opc = INDEX_op_add; 2755 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2756 /* Perform other constant folding, if needed. */ 2757 fold_add(ctx, op); 2758 } else { 2759 TCGArg ret = op->args[0]; 2760 op->opc = INDEX_op_sub; 2761 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 2762 op->args[0] = ret; 2763 op->args[1] = ret; 2764 op->args[2] = arg_new_constant(ctx, -1); 2765 } 2766 break; 2767 default: 2768 g_assert_not_reached(); 2769 } 2770 } 2771 2772 static bool fold_subbi(OptContext *ctx, TCGOp *op) 2773 { 2774 TempOptInfo *t2; 2775 int borrow_in = ctx->carry_state; 2776 2777 if (borrow_in < 0) { 2778 return finish_folding(ctx, op); 2779 } 2780 ctx->carry_state = -1; 2781 2782 squash_prev_borrowout(ctx, op); 2783 if (borrow_in == 0) { 2784 op->opc = INDEX_op_sub; 2785 return fold_sub(ctx, op); 2786 } 2787 2788 /* 2789 * Propagate the known carry-in into any constant, then negate to 2790 * transform from sub to add. If there is no constant, emit a 2791 * separate add -1. 2792 */ 2793 t2 = arg_info(op->args[2]); 2794 if (ti_is_const(t2)) { 2795 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2796 } else { 2797 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); 2798 2799 op2->args[0] = op->args[0]; 2800 op2->args[1] = op->args[1]; 2801 op2->args[2] = op->args[2]; 2802 fold_sub(ctx, op2); 2803 2804 op->args[1] = op->args[0]; 2805 op->args[2] = arg_new_constant(ctx, -1); 2806 } 2807 op->opc = INDEX_op_add; 2808 return fold_add(ctx, op); 2809 } 2810 2811 static bool fold_subbio(OptContext *ctx, TCGOp *op) 2812 { 2813 TempOptInfo *t1, *t2; 2814 int borrow_out = -1; 2815 2816 if (ctx->carry_state < 0) { 2817 return finish_folding(ctx, op); 2818 } 2819 2820 squash_prev_borrowout(ctx, op); 2821 if (ctx->carry_state == 0) { 2822 goto do_subbo; 2823 } 2824 2825 t1 = arg_info(op->args[1]); 2826 t2 = arg_info(op->args[2]); 2827 2828 /* Propagate the known borrow-in into a constant, if possible. */ 2829 if (ti_is_const(t2)) { 2830 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 2831 uint64_t v = ti_const_val(t2) & max; 2832 2833 if (v < max) { 2834 op->args[2] = arg_new_constant(ctx, v + 1); 2835 goto do_subbo; 2836 } 2837 /* subtracting max + 1 produces known borrow out. */ 2838 borrow_out = 1; 2839 } 2840 if (ti_is_const(t1)) { 2841 uint64_t v = ti_const_val(t1); 2842 if (v != 0) { 2843 op->args[2] = arg_new_constant(ctx, v - 1); 2844 goto do_subbo; 2845 } 2846 } 2847 2848 /* Adjust the opcode to remember the known carry-in. */ 2849 op->opc = INDEX_op_subb1o; 2850 ctx->carry_state = borrow_out; 2851 return finish_folding(ctx, op); 2852 2853 do_subbo: 2854 op->opc = INDEX_op_subbo; 2855 return fold_subbo(ctx, op); 2856 } 2857 2858 static bool fold_subbo(OptContext *ctx, TCGOp *op) 2859 { 2860 TempOptInfo *t1 = arg_info(op->args[1]); 2861 TempOptInfo *t2 = arg_info(op->args[2]); 2862 int borrow_out = -1; 2863 2864 if (ti_is_const(t2)) { 2865 uint64_t v2 = ti_const_val(t2); 2866 if (v2 == 0) { 2867 borrow_out = 0; 2868 } else if (ti_is_const(t1)) { 2869 uint64_t v1 = ti_const_val(t1); 2870 borrow_out = v1 < v2; 2871 } 2872 } 2873 ctx->carry_state = borrow_out; 2874 return finish_folding(ctx, op); 2875 } 2876 2877 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2878 { 2879 uint64_t z_mask = -1, s_mask = 0; 2880 2881 /* We can't do any folding with a load, but we can record bits. */ 2882 switch (op->opc) { 2883 CASE_OP_32_64(ld8s): 2884 s_mask = INT8_MIN; 2885 break; 2886 CASE_OP_32_64(ld8u): 2887 z_mask = MAKE_64BIT_MASK(0, 8); 2888 break; 2889 CASE_OP_32_64(ld16s): 2890 s_mask = INT16_MIN; 2891 break; 2892 CASE_OP_32_64(ld16u): 2893 z_mask = MAKE_64BIT_MASK(0, 16); 2894 break; 2895 case INDEX_op_ld32s_i64: 2896 s_mask = INT32_MIN; 2897 break; 2898 case INDEX_op_ld32u_i64: 2899 z_mask = MAKE_64BIT_MASK(0, 32); 2900 break; 2901 default: 2902 g_assert_not_reached(); 2903 } 2904 return fold_masks_zs(ctx, op, z_mask, s_mask); 2905 } 2906 2907 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2908 { 2909 TCGTemp *dst, *src; 2910 intptr_t ofs; 2911 TCGType type; 2912 2913 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2914 return finish_folding(ctx, op); 2915 } 2916 2917 type = ctx->type; 2918 ofs = op->args[2]; 2919 dst = arg_temp(op->args[0]); 2920 src = find_mem_copy_for(ctx, type, ofs); 2921 if (src && src->base_type == type) { 2922 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 2923 } 2924 2925 reset_ts(ctx, dst); 2926 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 2927 return true; 2928 } 2929 2930 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 2931 { 2932 intptr_t ofs = op->args[2]; 2933 intptr_t lm1; 2934 2935 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2936 remove_mem_copy_all(ctx); 2937 return true; 2938 } 2939 2940 switch (op->opc) { 2941 CASE_OP_32_64(st8): 2942 lm1 = 0; 2943 break; 2944 CASE_OP_32_64(st16): 2945 lm1 = 1; 2946 break; 2947 case INDEX_op_st32_i64: 2948 case INDEX_op_st_i32: 2949 lm1 = 3; 2950 break; 2951 case INDEX_op_st_i64: 2952 lm1 = 7; 2953 break; 2954 case INDEX_op_st_vec: 2955 lm1 = tcg_type_size(ctx->type) - 1; 2956 break; 2957 default: 2958 g_assert_not_reached(); 2959 } 2960 remove_mem_copy_in(ctx, ofs, ofs + lm1); 2961 return true; 2962 } 2963 2964 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 2965 { 2966 TCGTemp *src; 2967 intptr_t ofs, last; 2968 TCGType type; 2969 2970 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2971 return fold_tcg_st(ctx, op); 2972 } 2973 2974 src = arg_temp(op->args[0]); 2975 ofs = op->args[2]; 2976 type = ctx->type; 2977 2978 /* 2979 * Eliminate duplicate stores of a constant. 2980 * This happens frequently when the target ISA zero-extends. 2981 */ 2982 if (ts_is_const(src)) { 2983 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 2984 if (src == prev) { 2985 tcg_op_remove(ctx->tcg, op); 2986 return true; 2987 } 2988 } 2989 2990 last = ofs + tcg_type_size(type) - 1; 2991 remove_mem_copy_in(ctx, ofs, last); 2992 record_mem_copy(ctx, type, src, ofs, last); 2993 return true; 2994 } 2995 2996 static bool fold_xor(OptContext *ctx, TCGOp *op) 2997 { 2998 uint64_t z_mask, s_mask; 2999 TempOptInfo *t1, *t2; 3000 3001 if (fold_const2_commutative(ctx, op) || 3002 fold_xx_to_i(ctx, op, 0) || 3003 fold_xi_to_x(ctx, op, 0) || 3004 fold_xi_to_not(ctx, op, -1)) { 3005 return true; 3006 } 3007 3008 t1 = arg_info(op->args[1]); 3009 t2 = arg_info(op->args[2]); 3010 z_mask = t1->z_mask | t2->z_mask; 3011 s_mask = t1->s_mask & t2->s_mask; 3012 return fold_masks_zs(ctx, op, z_mask, s_mask); 3013 } 3014 3015 /* Propagate constants and copies, fold constant expressions. */ 3016 void tcg_optimize(TCGContext *s) 3017 { 3018 int nb_temps, i; 3019 TCGOp *op, *op_next; 3020 OptContext ctx = { .tcg = s }; 3021 3022 QSIMPLEQ_INIT(&ctx.mem_free); 3023 3024 /* Array VALS has an element for each temp. 3025 If this temp holds a constant then its value is kept in VALS' element. 3026 If this temp is a copy of other ones then the other copies are 3027 available through the doubly linked circular list. */ 3028 3029 nb_temps = s->nb_temps; 3030 for (i = 0; i < nb_temps; ++i) { 3031 s->temps[i].state_ptr = NULL; 3032 } 3033 3034 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3035 TCGOpcode opc = op->opc; 3036 const TCGOpDef *def; 3037 bool done = false; 3038 3039 /* Calls are special. */ 3040 if (opc == INDEX_op_call) { 3041 fold_call(&ctx, op); 3042 continue; 3043 } 3044 3045 def = &tcg_op_defs[opc]; 3046 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 3047 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 3048 3049 /* Pre-compute the type of the operation. */ 3050 ctx.type = TCGOP_TYPE(op); 3051 3052 /* 3053 * Process each opcode. 3054 * Sorted alphabetically by opcode as much as possible. 3055 */ 3056 switch (opc) { 3057 case INDEX_op_add: 3058 done = fold_add(&ctx, op); 3059 break; 3060 case INDEX_op_add_vec: 3061 done = fold_add_vec(&ctx, op); 3062 break; 3063 case INDEX_op_addci: 3064 done = fold_addci(&ctx, op); 3065 break; 3066 case INDEX_op_addcio: 3067 done = fold_addcio(&ctx, op); 3068 break; 3069 case INDEX_op_addco: 3070 done = fold_addco(&ctx, op); 3071 break; 3072 case INDEX_op_and: 3073 case INDEX_op_and_vec: 3074 done = fold_and(&ctx, op); 3075 break; 3076 case INDEX_op_andc: 3077 case INDEX_op_andc_vec: 3078 done = fold_andc(&ctx, op); 3079 break; 3080 case INDEX_op_brcond: 3081 done = fold_brcond(&ctx, op); 3082 break; 3083 case INDEX_op_brcond2_i32: 3084 done = fold_brcond2(&ctx, op); 3085 break; 3086 case INDEX_op_bswap16: 3087 case INDEX_op_bswap32: 3088 case INDEX_op_bswap64: 3089 done = fold_bswap(&ctx, op); 3090 break; 3091 case INDEX_op_clz: 3092 case INDEX_op_ctz: 3093 done = fold_count_zeros(&ctx, op); 3094 break; 3095 case INDEX_op_ctpop: 3096 done = fold_ctpop(&ctx, op); 3097 break; 3098 case INDEX_op_deposit: 3099 done = fold_deposit(&ctx, op); 3100 break; 3101 case INDEX_op_divs: 3102 case INDEX_op_divu: 3103 done = fold_divide(&ctx, op); 3104 break; 3105 case INDEX_op_dup_vec: 3106 done = fold_dup(&ctx, op); 3107 break; 3108 case INDEX_op_dup2_vec: 3109 done = fold_dup2(&ctx, op); 3110 break; 3111 case INDEX_op_eqv: 3112 case INDEX_op_eqv_vec: 3113 done = fold_eqv(&ctx, op); 3114 break; 3115 case INDEX_op_extract: 3116 done = fold_extract(&ctx, op); 3117 break; 3118 case INDEX_op_extract2: 3119 done = fold_extract2(&ctx, op); 3120 break; 3121 case INDEX_op_ext_i32_i64: 3122 done = fold_exts(&ctx, op); 3123 break; 3124 case INDEX_op_extu_i32_i64: 3125 case INDEX_op_extrl_i64_i32: 3126 case INDEX_op_extrh_i64_i32: 3127 done = fold_extu(&ctx, op); 3128 break; 3129 CASE_OP_32_64(ld8s): 3130 CASE_OP_32_64(ld8u): 3131 CASE_OP_32_64(ld16s): 3132 CASE_OP_32_64(ld16u): 3133 case INDEX_op_ld32s_i64: 3134 case INDEX_op_ld32u_i64: 3135 done = fold_tcg_ld(&ctx, op); 3136 break; 3137 case INDEX_op_ld_i32: 3138 case INDEX_op_ld_i64: 3139 case INDEX_op_ld_vec: 3140 done = fold_tcg_ld_memcopy(&ctx, op); 3141 break; 3142 CASE_OP_32_64(st8): 3143 CASE_OP_32_64(st16): 3144 case INDEX_op_st32_i64: 3145 done = fold_tcg_st(&ctx, op); 3146 break; 3147 case INDEX_op_st_i32: 3148 case INDEX_op_st_i64: 3149 case INDEX_op_st_vec: 3150 done = fold_tcg_st_memcopy(&ctx, op); 3151 break; 3152 case INDEX_op_mb: 3153 done = fold_mb(&ctx, op); 3154 break; 3155 case INDEX_op_mov: 3156 case INDEX_op_mov_vec: 3157 done = fold_mov(&ctx, op); 3158 break; 3159 case INDEX_op_movcond: 3160 done = fold_movcond(&ctx, op); 3161 break; 3162 case INDEX_op_mul: 3163 done = fold_mul(&ctx, op); 3164 break; 3165 case INDEX_op_mulsh: 3166 case INDEX_op_muluh: 3167 done = fold_mul_highpart(&ctx, op); 3168 break; 3169 case INDEX_op_muls2: 3170 case INDEX_op_mulu2: 3171 done = fold_multiply2(&ctx, op); 3172 break; 3173 case INDEX_op_nand: 3174 case INDEX_op_nand_vec: 3175 done = fold_nand(&ctx, op); 3176 break; 3177 case INDEX_op_neg: 3178 done = fold_neg(&ctx, op); 3179 break; 3180 case INDEX_op_nor: 3181 case INDEX_op_nor_vec: 3182 done = fold_nor(&ctx, op); 3183 break; 3184 case INDEX_op_not: 3185 case INDEX_op_not_vec: 3186 done = fold_not(&ctx, op); 3187 break; 3188 case INDEX_op_or: 3189 case INDEX_op_or_vec: 3190 done = fold_or(&ctx, op); 3191 break; 3192 case INDEX_op_orc: 3193 case INDEX_op_orc_vec: 3194 done = fold_orc(&ctx, op); 3195 break; 3196 case INDEX_op_qemu_ld_i32: 3197 done = fold_qemu_ld_1reg(&ctx, op); 3198 break; 3199 case INDEX_op_qemu_ld_i64: 3200 if (TCG_TARGET_REG_BITS == 64) { 3201 done = fold_qemu_ld_1reg(&ctx, op); 3202 break; 3203 } 3204 QEMU_FALLTHROUGH; 3205 case INDEX_op_qemu_ld_i128: 3206 done = fold_qemu_ld_2reg(&ctx, op); 3207 break; 3208 case INDEX_op_qemu_st8_i32: 3209 case INDEX_op_qemu_st_i32: 3210 case INDEX_op_qemu_st_i64: 3211 case INDEX_op_qemu_st_i128: 3212 done = fold_qemu_st(&ctx, op); 3213 break; 3214 case INDEX_op_rems: 3215 case INDEX_op_remu: 3216 done = fold_remainder(&ctx, op); 3217 break; 3218 case INDEX_op_rotl: 3219 case INDEX_op_rotr: 3220 case INDEX_op_sar: 3221 case INDEX_op_shl: 3222 case INDEX_op_shr: 3223 done = fold_shift(&ctx, op); 3224 break; 3225 case INDEX_op_setcond: 3226 done = fold_setcond(&ctx, op); 3227 break; 3228 case INDEX_op_negsetcond: 3229 done = fold_negsetcond(&ctx, op); 3230 break; 3231 case INDEX_op_setcond2_i32: 3232 done = fold_setcond2(&ctx, op); 3233 break; 3234 case INDEX_op_cmp_vec: 3235 done = fold_cmp_vec(&ctx, op); 3236 break; 3237 case INDEX_op_cmpsel_vec: 3238 done = fold_cmpsel_vec(&ctx, op); 3239 break; 3240 case INDEX_op_bitsel_vec: 3241 done = fold_bitsel_vec(&ctx, op); 3242 break; 3243 case INDEX_op_sextract: 3244 done = fold_sextract(&ctx, op); 3245 break; 3246 case INDEX_op_sub: 3247 done = fold_sub(&ctx, op); 3248 break; 3249 case INDEX_op_subbi: 3250 done = fold_subbi(&ctx, op); 3251 break; 3252 case INDEX_op_subbio: 3253 done = fold_subbio(&ctx, op); 3254 break; 3255 case INDEX_op_subbo: 3256 done = fold_subbo(&ctx, op); 3257 break; 3258 case INDEX_op_sub_vec: 3259 done = fold_sub_vec(&ctx, op); 3260 break; 3261 case INDEX_op_xor: 3262 case INDEX_op_xor_vec: 3263 done = fold_xor(&ctx, op); 3264 break; 3265 case INDEX_op_set_label: 3266 case INDEX_op_br: 3267 case INDEX_op_exit_tb: 3268 case INDEX_op_goto_tb: 3269 case INDEX_op_goto_ptr: 3270 finish_ebb(&ctx); 3271 done = true; 3272 break; 3273 default: 3274 done = finish_folding(&ctx, op); 3275 break; 3276 } 3277 tcg_debug_assert(done); 3278 } 3279 } 3280