1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 #include "tcg-has.h" 32 33 34 typedef struct MemCopyInfo { 35 IntervalTreeNode itree; 36 QSIMPLEQ_ENTRY (MemCopyInfo) next; 37 TCGTemp *ts; 38 TCGType type; 39 } MemCopyInfo; 40 41 typedef struct TempOptInfo { 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 46 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ 47 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ 48 } TempOptInfo; 49 50 typedef struct OptContext { 51 TCGContext *tcg; 52 TCGOp *prev_mb; 53 TCGTempSet temps_used; 54 55 IntervalTreeRoot mem_copy; 56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 57 58 /* In flight values from optimization. */ 59 TCGType type; 60 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ 61 } OptContext; 62 63 static inline TempOptInfo *ts_info(TCGTemp *ts) 64 { 65 return ts->state_ptr; 66 } 67 68 static inline TempOptInfo *arg_info(TCGArg arg) 69 { 70 return ts_info(arg_temp(arg)); 71 } 72 73 static inline bool ti_is_const(TempOptInfo *ti) 74 { 75 /* If all bits that are not known zeros are known ones, it's constant. */ 76 return ti->z_mask == ti->o_mask; 77 } 78 79 static inline uint64_t ti_const_val(TempOptInfo *ti) 80 { 81 /* If constant, both z_mask and o_mask contain the value. */ 82 return ti->z_mask; 83 } 84 85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) 86 { 87 return ti_is_const(ti) && ti_const_val(ti) == val; 88 } 89 90 static inline bool ts_is_const(TCGTemp *ts) 91 { 92 return ti_is_const(ts_info(ts)); 93 } 94 95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) 96 { 97 return ti_is_const_val(ts_info(ts), val); 98 } 99 100 static inline bool arg_is_const(TCGArg arg) 101 { 102 return ts_is_const(arg_temp(arg)); 103 } 104 105 static inline uint64_t arg_const_val(TCGArg arg) 106 { 107 return ti_const_val(arg_info(arg)); 108 } 109 110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val) 111 { 112 return ts_is_const_val(arg_temp(arg), val); 113 } 114 115 static inline bool ts_is_copy(TCGTemp *ts) 116 { 117 return ts_info(ts)->next_copy != ts; 118 } 119 120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 121 { 122 return a->kind < b->kind ? b : a; 123 } 124 125 /* Initialize and activate a temporary. */ 126 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 127 { 128 size_t idx = temp_idx(ts); 129 TempOptInfo *ti; 130 131 if (test_bit(idx, ctx->temps_used.l)) { 132 return; 133 } 134 set_bit(idx, ctx->temps_used.l); 135 136 ti = ts->state_ptr; 137 if (ti == NULL) { 138 ti = tcg_malloc(sizeof(TempOptInfo)); 139 ts->state_ptr = ti; 140 } 141 142 ti->next_copy = ts; 143 ti->prev_copy = ts; 144 QSIMPLEQ_INIT(&ti->mem_copy); 145 if (ts->kind == TEMP_CONST) { 146 ti->z_mask = ts->val; 147 ti->o_mask = ts->val; 148 ti->s_mask = INT64_MIN >> clrsb64(ts->val); 149 } else { 150 ti->z_mask = -1; 151 ti->o_mask = 0; 152 ti->s_mask = 0; 153 } 154 } 155 156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 157 { 158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 159 return r ? container_of(r, MemCopyInfo, itree) : NULL; 160 } 161 162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 163 { 164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 165 return r ? container_of(r, MemCopyInfo, itree) : NULL; 166 } 167 168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 169 { 170 TCGTemp *ts = mc->ts; 171 TempOptInfo *ti = ts_info(ts); 172 173 interval_tree_remove(&mc->itree, &ctx->mem_copy); 174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 176 } 177 178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 179 { 180 while (true) { 181 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 182 if (!mc) { 183 break; 184 } 185 remove_mem_copy(ctx, mc); 186 } 187 } 188 189 static void remove_mem_copy_all(OptContext *ctx) 190 { 191 remove_mem_copy_in(ctx, 0, -1); 192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 193 } 194 195 static TCGTemp *find_better_copy(TCGTemp *ts) 196 { 197 TCGTemp *i, *ret; 198 199 /* If this is already readonly, we can't do better. */ 200 if (temp_readonly(ts)) { 201 return ts; 202 } 203 204 ret = ts; 205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 206 ret = cmp_better_copy(ret, i); 207 } 208 return ret; 209 } 210 211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 212 { 213 TempOptInfo *si = ts_info(src_ts); 214 TempOptInfo *di = ts_info(dst_ts); 215 MemCopyInfo *mc; 216 217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 218 tcg_debug_assert(mc->ts == src_ts); 219 mc->ts = dst_ts; 220 } 221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 222 } 223 224 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 225 static void reset_ts(OptContext *ctx, TCGTemp *ts) 226 { 227 TempOptInfo *ti = ts_info(ts); 228 TCGTemp *pts = ti->prev_copy; 229 TCGTemp *nts = ti->next_copy; 230 TempOptInfo *pi = ts_info(pts); 231 TempOptInfo *ni = ts_info(nts); 232 233 ni->prev_copy = ti->prev_copy; 234 pi->next_copy = ti->next_copy; 235 ti->next_copy = ts; 236 ti->prev_copy = ts; 237 ti->z_mask = -1; 238 ti->o_mask = 0; 239 ti->s_mask = 0; 240 241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 242 if (ts == nts) { 243 /* Last temp copy being removed, the mem copies die. */ 244 MemCopyInfo *mc; 245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 246 interval_tree_remove(&mc->itree, &ctx->mem_copy); 247 } 248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 249 } else { 250 move_mem_copies(find_better_copy(nts), ts); 251 } 252 } 253 } 254 255 static void reset_temp(OptContext *ctx, TCGArg arg) 256 { 257 reset_ts(ctx, arg_temp(arg)); 258 } 259 260 static void record_mem_copy(OptContext *ctx, TCGType type, 261 TCGTemp *ts, intptr_t start, intptr_t last) 262 { 263 MemCopyInfo *mc; 264 TempOptInfo *ti; 265 266 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 267 if (mc) { 268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 269 } else { 270 mc = tcg_malloc(sizeof(*mc)); 271 } 272 273 memset(mc, 0, sizeof(*mc)); 274 mc->itree.start = start; 275 mc->itree.last = last; 276 mc->type = type; 277 interval_tree_insert(&mc->itree, &ctx->mem_copy); 278 279 ts = find_better_copy(ts); 280 ti = ts_info(ts); 281 mc->ts = ts; 282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 283 } 284 285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 286 { 287 TCGTemp *i; 288 289 if (ts1 == ts2) { 290 return true; 291 } 292 293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 294 return false; 295 } 296 297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 298 if (i == ts2) { 299 return true; 300 } 301 } 302 303 return false; 304 } 305 306 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 307 { 308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 309 } 310 311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 312 { 313 MemCopyInfo *mc; 314 315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 316 if (mc->itree.start == s && mc->type == type) { 317 return find_better_copy(mc->ts); 318 } 319 } 320 return NULL; 321 } 322 323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 324 { 325 TCGType type = ctx->type; 326 TCGTemp *ts; 327 328 if (type == TCG_TYPE_I32) { 329 val = (int32_t)val; 330 } 331 332 ts = tcg_constant_internal(type, val); 333 init_ts_info(ctx, ts); 334 335 return temp_arg(ts); 336 } 337 338 static TCGArg arg_new_temp(OptContext *ctx) 339 { 340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); 341 init_ts_info(ctx, ts); 342 return temp_arg(ts); 343 } 344 345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, 346 TCGOpcode opc, unsigned narg) 347 { 348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); 349 } 350 351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, 352 TCGOpcode opc, unsigned narg) 353 { 354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); 355 } 356 357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 358 { 359 TCGTemp *dst_ts = arg_temp(dst); 360 TCGTemp *src_ts = arg_temp(src); 361 TempOptInfo *di; 362 TempOptInfo *si; 363 TCGOpcode new_op; 364 365 if (ts_are_copies(dst_ts, src_ts)) { 366 tcg_op_remove(ctx->tcg, op); 367 return true; 368 } 369 370 reset_ts(ctx, dst_ts); 371 di = ts_info(dst_ts); 372 si = ts_info(src_ts); 373 374 switch (ctx->type) { 375 case TCG_TYPE_I32: 376 case TCG_TYPE_I64: 377 new_op = INDEX_op_mov; 378 break; 379 case TCG_TYPE_V64: 380 case TCG_TYPE_V128: 381 case TCG_TYPE_V256: 382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ 383 new_op = INDEX_op_mov_vec; 384 break; 385 default: 386 g_assert_not_reached(); 387 } 388 op->opc = new_op; 389 op->args[0] = dst; 390 op->args[1] = src; 391 392 di->z_mask = si->z_mask; 393 di->o_mask = si->o_mask; 394 di->s_mask = si->s_mask; 395 396 if (src_ts->type == dst_ts->type) { 397 TempOptInfo *ni = ts_info(si->next_copy); 398 399 di->next_copy = si->next_copy; 400 di->prev_copy = src_ts; 401 ni->prev_copy = dst_ts; 402 si->next_copy = dst_ts; 403 404 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 406 move_mem_copies(dst_ts, src_ts); 407 } 408 } else if (dst_ts->type == TCG_TYPE_I32) { 409 di->z_mask = (int32_t)di->z_mask; 410 di->o_mask = (int32_t)di->o_mask; 411 di->s_mask |= INT32_MIN; 412 } else { 413 di->z_mask |= MAKE_64BIT_MASK(32, 32); 414 di->o_mask = (uint32_t)di->o_mask; 415 di->s_mask = INT64_MIN; 416 } 417 return true; 418 } 419 420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 421 TCGArg dst, uint64_t val) 422 { 423 /* Convert movi to mov with constant temp. */ 424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 425 } 426 427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, 428 uint64_t x, uint64_t y) 429 { 430 uint64_t l64, h64; 431 432 switch (op) { 433 case INDEX_op_add: 434 return x + y; 435 436 case INDEX_op_sub: 437 return x - y; 438 439 case INDEX_op_mul: 440 return x * y; 441 442 case INDEX_op_and: 443 case INDEX_op_and_vec: 444 return x & y; 445 446 case INDEX_op_or: 447 case INDEX_op_or_vec: 448 return x | y; 449 450 case INDEX_op_xor: 451 case INDEX_op_xor_vec: 452 return x ^ y; 453 454 case INDEX_op_shl: 455 if (type == TCG_TYPE_I32) { 456 return (uint32_t)x << (y & 31); 457 } 458 return (uint64_t)x << (y & 63); 459 460 case INDEX_op_shr: 461 if (type == TCG_TYPE_I32) { 462 return (uint32_t)x >> (y & 31); 463 } 464 return (uint64_t)x >> (y & 63); 465 466 case INDEX_op_sar: 467 if (type == TCG_TYPE_I32) { 468 return (int32_t)x >> (y & 31); 469 } 470 return (int64_t)x >> (y & 63); 471 472 case INDEX_op_rotr: 473 if (type == TCG_TYPE_I32) { 474 return ror32(x, y & 31); 475 } 476 return ror64(x, y & 63); 477 478 case INDEX_op_rotl: 479 if (type == TCG_TYPE_I32) { 480 return rol32(x, y & 31); 481 } 482 return rol64(x, y & 63); 483 484 case INDEX_op_not: 485 case INDEX_op_not_vec: 486 return ~x; 487 488 case INDEX_op_neg: 489 return -x; 490 491 case INDEX_op_andc: 492 case INDEX_op_andc_vec: 493 return x & ~y; 494 495 case INDEX_op_orc: 496 case INDEX_op_orc_vec: 497 return x | ~y; 498 499 case INDEX_op_eqv: 500 case INDEX_op_eqv_vec: 501 return ~(x ^ y); 502 503 case INDEX_op_nand: 504 case INDEX_op_nand_vec: 505 return ~(x & y); 506 507 case INDEX_op_nor: 508 case INDEX_op_nor_vec: 509 return ~(x | y); 510 511 case INDEX_op_clz: 512 if (type == TCG_TYPE_I32) { 513 return (uint32_t)x ? clz32(x) : y; 514 } 515 return x ? clz64(x) : y; 516 517 case INDEX_op_ctz: 518 if (type == TCG_TYPE_I32) { 519 return (uint32_t)x ? ctz32(x) : y; 520 } 521 return x ? ctz64(x) : y; 522 523 case INDEX_op_ctpop: 524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); 525 526 case INDEX_op_bswap16: 527 x = bswap16(x); 528 return y & TCG_BSWAP_OS ? (int16_t)x : x; 529 530 case INDEX_op_bswap32: 531 x = bswap32(x); 532 return y & TCG_BSWAP_OS ? (int32_t)x : x; 533 534 case INDEX_op_bswap64: 535 return bswap64(x); 536 537 case INDEX_op_ext_i32_i64: 538 return (int32_t)x; 539 540 case INDEX_op_extu_i32_i64: 541 case INDEX_op_extrl_i64_i32: 542 return (uint32_t)x; 543 544 case INDEX_op_extrh_i64_i32: 545 return (uint64_t)x >> 32; 546 547 case INDEX_op_muluh: 548 if (type == TCG_TYPE_I32) { 549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 550 } 551 mulu64(&l64, &h64, x, y); 552 return h64; 553 554 case INDEX_op_mulsh: 555 if (type == TCG_TYPE_I32) { 556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 557 } 558 muls64(&l64, &h64, x, y); 559 return h64; 560 561 case INDEX_op_divs: 562 /* Avoid crashing on divide by zero, otherwise undefined. */ 563 if (type == TCG_TYPE_I32) { 564 return (int32_t)x / ((int32_t)y ? : 1); 565 } 566 return (int64_t)x / ((int64_t)y ? : 1); 567 568 case INDEX_op_divu: 569 if (type == TCG_TYPE_I32) { 570 return (uint32_t)x / ((uint32_t)y ? : 1); 571 } 572 return (uint64_t)x / ((uint64_t)y ? : 1); 573 574 case INDEX_op_rems: 575 if (type == TCG_TYPE_I32) { 576 return (int32_t)x % ((int32_t)y ? : 1); 577 } 578 return (int64_t)x % ((int64_t)y ? : 1); 579 580 case INDEX_op_remu: 581 if (type == TCG_TYPE_I32) { 582 return (uint32_t)x % ((uint32_t)y ? : 1); 583 } 584 return (uint64_t)x % ((uint64_t)y ? : 1); 585 586 default: 587 g_assert_not_reached(); 588 } 589 } 590 591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 592 uint64_t x, uint64_t y) 593 { 594 uint64_t res = do_constant_folding_2(op, type, x, y); 595 if (type == TCG_TYPE_I32) { 596 res = (int32_t)res; 597 } 598 return res; 599 } 600 601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 602 { 603 switch (c) { 604 case TCG_COND_EQ: 605 return x == y; 606 case TCG_COND_NE: 607 return x != y; 608 case TCG_COND_LT: 609 return (int32_t)x < (int32_t)y; 610 case TCG_COND_GE: 611 return (int32_t)x >= (int32_t)y; 612 case TCG_COND_LE: 613 return (int32_t)x <= (int32_t)y; 614 case TCG_COND_GT: 615 return (int32_t)x > (int32_t)y; 616 case TCG_COND_LTU: 617 return x < y; 618 case TCG_COND_GEU: 619 return x >= y; 620 case TCG_COND_LEU: 621 return x <= y; 622 case TCG_COND_GTU: 623 return x > y; 624 case TCG_COND_TSTEQ: 625 return (x & y) == 0; 626 case TCG_COND_TSTNE: 627 return (x & y) != 0; 628 case TCG_COND_ALWAYS: 629 case TCG_COND_NEVER: 630 break; 631 } 632 g_assert_not_reached(); 633 } 634 635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 636 { 637 switch (c) { 638 case TCG_COND_EQ: 639 return x == y; 640 case TCG_COND_NE: 641 return x != y; 642 case TCG_COND_LT: 643 return (int64_t)x < (int64_t)y; 644 case TCG_COND_GE: 645 return (int64_t)x >= (int64_t)y; 646 case TCG_COND_LE: 647 return (int64_t)x <= (int64_t)y; 648 case TCG_COND_GT: 649 return (int64_t)x > (int64_t)y; 650 case TCG_COND_LTU: 651 return x < y; 652 case TCG_COND_GEU: 653 return x >= y; 654 case TCG_COND_LEU: 655 return x <= y; 656 case TCG_COND_GTU: 657 return x > y; 658 case TCG_COND_TSTEQ: 659 return (x & y) == 0; 660 case TCG_COND_TSTNE: 661 return (x & y) != 0; 662 case TCG_COND_ALWAYS: 663 case TCG_COND_NEVER: 664 break; 665 } 666 g_assert_not_reached(); 667 } 668 669 static int do_constant_folding_cond_eq(TCGCond c) 670 { 671 switch (c) { 672 case TCG_COND_GT: 673 case TCG_COND_LTU: 674 case TCG_COND_LT: 675 case TCG_COND_GTU: 676 case TCG_COND_NE: 677 return 0; 678 case TCG_COND_GE: 679 case TCG_COND_GEU: 680 case TCG_COND_LE: 681 case TCG_COND_LEU: 682 case TCG_COND_EQ: 683 return 1; 684 case TCG_COND_TSTEQ: 685 case TCG_COND_TSTNE: 686 return -1; 687 case TCG_COND_ALWAYS: 688 case TCG_COND_NEVER: 689 break; 690 } 691 g_assert_not_reached(); 692 } 693 694 /* 695 * Return -1 if the condition can't be simplified, 696 * and the result of the condition (0 or 1) if it can. 697 */ 698 static int do_constant_folding_cond(TCGType type, TCGArg x, 699 TCGArg y, TCGCond c) 700 { 701 if (arg_is_const(x) && arg_is_const(y)) { 702 uint64_t xv = arg_const_val(x); 703 uint64_t yv = arg_const_val(y); 704 705 switch (type) { 706 case TCG_TYPE_I32: 707 return do_constant_folding_cond_32(xv, yv, c); 708 case TCG_TYPE_I64: 709 return do_constant_folding_cond_64(xv, yv, c); 710 default: 711 /* Only scalar comparisons are optimizable */ 712 return -1; 713 } 714 } else if (args_are_copies(x, y)) { 715 return do_constant_folding_cond_eq(c); 716 } else if (arg_is_const_val(y, 0)) { 717 switch (c) { 718 case TCG_COND_LTU: 719 case TCG_COND_TSTNE: 720 return 0; 721 case TCG_COND_GEU: 722 case TCG_COND_TSTEQ: 723 return 1; 724 default: 725 return -1; 726 } 727 } 728 return -1; 729 } 730 731 /** 732 * swap_commutative: 733 * @dest: TCGArg of the destination argument, or NO_DEST. 734 * @p1: first paired argument 735 * @p2: second paired argument 736 * 737 * If *@p1 is a constant and *@p2 is not, swap. 738 * If *@p2 matches @dest, swap. 739 * Return true if a swap was performed. 740 */ 741 742 #define NO_DEST temp_arg(NULL) 743 744 static int pref_commutative(TempOptInfo *ti) 745 { 746 /* Slight preference for non-zero constants second. */ 747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; 748 } 749 750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 751 { 752 TCGArg a1 = *p1, a2 = *p2; 753 int sum = 0; 754 sum += pref_commutative(arg_info(a1)); 755 sum -= pref_commutative(arg_info(a2)); 756 757 /* Prefer the constant in second argument, and then the form 758 op a, a, b, which is better handled on non-RISC hosts. */ 759 if (sum > 0 || (sum == 0 && dest == a2)) { 760 *p1 = a2; 761 *p2 = a1; 762 return true; 763 } 764 return false; 765 } 766 767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 768 { 769 int sum = 0; 770 sum += pref_commutative(arg_info(p1[0])); 771 sum += pref_commutative(arg_info(p1[1])); 772 sum -= pref_commutative(arg_info(p2[0])); 773 sum -= pref_commutative(arg_info(p2[1])); 774 if (sum > 0) { 775 TCGArg t; 776 t = p1[0], p1[0] = p2[0], p2[0] = t; 777 t = p1[1], p1[1] = p2[1], p2[1] = t; 778 return true; 779 } 780 return false; 781 } 782 783 /* 784 * Return -1 if the condition can't be simplified, 785 * and the result of the condition (0 or 1) if it can. 786 */ 787 static bool fold_and(OptContext *ctx, TCGOp *op); 788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, 789 TCGArg *p1, TCGArg *p2, TCGArg *pcond) 790 { 791 TCGCond cond; 792 TempOptInfo *i1; 793 bool swap; 794 int r; 795 796 swap = swap_commutative(dest, p1, p2); 797 cond = *pcond; 798 if (swap) { 799 *pcond = cond = tcg_swap_cond(cond); 800 } 801 802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); 803 if (r >= 0) { 804 return r; 805 } 806 if (!is_tst_cond(cond)) { 807 return -1; 808 } 809 810 i1 = arg_info(*p1); 811 812 /* 813 * TSTNE x,x -> NE x,0 814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x 815 */ 816 if (args_are_copies(*p1, *p2) || 817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { 818 *p2 = arg_new_constant(ctx, 0); 819 *pcond = tcg_tst_eqne_cond(cond); 820 return -1; 821 } 822 823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ 824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { 825 *p2 = arg_new_constant(ctx, 0); 826 *pcond = tcg_tst_ltge_cond(cond); 827 return -1; 828 } 829 830 /* Expand to AND with a temporary if no backend support. */ 831 if (!TCG_TARGET_HAS_tst) { 832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 833 TCGArg tmp = arg_new_temp(ctx); 834 835 op2->args[0] = tmp; 836 op2->args[1] = *p1; 837 op2->args[2] = *p2; 838 fold_and(ctx, op2); 839 840 *p1 = tmp; 841 *p2 = arg_new_constant(ctx, 0); 842 *pcond = tcg_tst_eqne_cond(cond); 843 } 844 return -1; 845 } 846 847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) 848 { 849 TCGArg al, ah, bl, bh; 850 TCGCond c; 851 bool swap; 852 int r; 853 854 swap = swap_commutative2(args, args + 2); 855 c = args[4]; 856 if (swap) { 857 args[4] = c = tcg_swap_cond(c); 858 } 859 860 al = args[0]; 861 ah = args[1]; 862 bl = args[2]; 863 bh = args[3]; 864 865 if (arg_is_const(bl) && arg_is_const(bh)) { 866 tcg_target_ulong blv = arg_const_val(bl); 867 tcg_target_ulong bhv = arg_const_val(bh); 868 uint64_t b = deposit64(blv, 32, 32, bhv); 869 870 if (arg_is_const(al) && arg_is_const(ah)) { 871 tcg_target_ulong alv = arg_const_val(al); 872 tcg_target_ulong ahv = arg_const_val(ah); 873 uint64_t a = deposit64(alv, 32, 32, ahv); 874 875 r = do_constant_folding_cond_64(a, b, c); 876 if (r >= 0) { 877 return r; 878 } 879 } 880 881 if (b == 0) { 882 switch (c) { 883 case TCG_COND_LTU: 884 case TCG_COND_TSTNE: 885 return 0; 886 case TCG_COND_GEU: 887 case TCG_COND_TSTEQ: 888 return 1; 889 default: 890 break; 891 } 892 } 893 894 /* TSTNE x,-1 -> NE x,0 */ 895 if (b == -1 && is_tst_cond(c)) { 896 args[3] = args[2] = arg_new_constant(ctx, 0); 897 args[4] = tcg_tst_eqne_cond(c); 898 return -1; 899 } 900 901 /* TSTNE x,sign -> LT x,0 */ 902 if (b == INT64_MIN && is_tst_cond(c)) { 903 /* bl must be 0, so copy that to bh */ 904 args[3] = bl; 905 args[4] = tcg_tst_ltge_cond(c); 906 return -1; 907 } 908 } 909 910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 911 r = do_constant_folding_cond_eq(c); 912 if (r >= 0) { 913 return r; 914 } 915 916 /* TSTNE x,x -> NE x,0 */ 917 if (is_tst_cond(c)) { 918 args[3] = args[2] = arg_new_constant(ctx, 0); 919 args[4] = tcg_tst_eqne_cond(c); 920 return -1; 921 } 922 } 923 924 /* Expand to AND with a temporary if no backend support. */ 925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { 926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); 927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 928 TCGArg t1 = arg_new_temp(ctx); 929 TCGArg t2 = arg_new_temp(ctx); 930 931 op1->args[0] = t1; 932 op1->args[1] = al; 933 op1->args[2] = bl; 934 fold_and(ctx, op1); 935 936 op2->args[0] = t2; 937 op2->args[1] = ah; 938 op2->args[2] = bh; 939 fold_and(ctx, op1); 940 941 args[0] = t1; 942 args[1] = t2; 943 args[3] = args[2] = arg_new_constant(ctx, 0); 944 args[4] = tcg_tst_eqne_cond(c); 945 } 946 return -1; 947 } 948 949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 950 { 951 for (int i = 0; i < nb_args; i++) { 952 TCGTemp *ts = arg_temp(op->args[i]); 953 init_ts_info(ctx, ts); 954 } 955 } 956 957 static void copy_propagate(OptContext *ctx, TCGOp *op, 958 int nb_oargs, int nb_iargs) 959 { 960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 961 TCGTemp *ts = arg_temp(op->args[i]); 962 if (ts_is_copy(ts)) { 963 op->args[i] = temp_arg(find_better_copy(ts)); 964 } 965 } 966 } 967 968 static void finish_bb(OptContext *ctx) 969 { 970 /* We only optimize memory barriers across basic blocks. */ 971 ctx->prev_mb = NULL; 972 } 973 974 static void finish_ebb(OptContext *ctx) 975 { 976 finish_bb(ctx); 977 /* We only optimize across extended basic blocks. */ 978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 979 remove_mem_copy_all(ctx); 980 } 981 982 static bool finish_folding(OptContext *ctx, TCGOp *op) 983 { 984 const TCGOpDef *def = &tcg_op_defs[op->opc]; 985 int i, nb_oargs; 986 987 nb_oargs = def->nb_oargs; 988 for (i = 0; i < nb_oargs; i++) { 989 TCGTemp *ts = arg_temp(op->args[i]); 990 reset_ts(ctx, ts); 991 } 992 return true; 993 } 994 995 /* 996 * The fold_* functions return true when processing is complete, 997 * usually by folding the operation to a constant or to a copy, 998 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 999 * like collect information about the value produced, for use in 1000 * optimizing a subsequent operation. 1001 * 1002 * These first fold_* functions are all helpers, used by other 1003 * folders for more specific operations. 1004 */ 1005 1006 static bool fold_const1(OptContext *ctx, TCGOp *op) 1007 { 1008 if (arg_is_const(op->args[1])) { 1009 uint64_t t = arg_const_val(op->args[1]); 1010 1011 t = do_constant_folding(op->opc, ctx->type, t, 0); 1012 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1013 } 1014 return false; 1015 } 1016 1017 static bool fold_const2(OptContext *ctx, TCGOp *op) 1018 { 1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1020 uint64_t t1 = arg_const_val(op->args[1]); 1021 uint64_t t2 = arg_const_val(op->args[2]); 1022 1023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 1024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1025 } 1026 return false; 1027 } 1028 1029 static bool fold_commutative(OptContext *ctx, TCGOp *op) 1030 { 1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1032 return false; 1033 } 1034 1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 1036 { 1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1038 return fold_const2(ctx, op); 1039 } 1040 1041 /* 1042 * Record "zero" and "sign" masks for the single output of @op. 1043 * See TempOptInfo definition of z_mask and s_mask. 1044 * If z_mask allows, fold the output to constant zero. 1045 * The passed s_mask may be augmented by z_mask. 1046 */ 1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, 1048 uint64_t z_mask, uint64_t o_mask, 1049 int64_t s_mask, uint64_t a_mask) 1050 { 1051 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1052 TCGTemp *ts; 1053 TempOptInfo *ti; 1054 int rep; 1055 1056 /* Only single-output opcodes are supported here. */ 1057 tcg_debug_assert(def->nb_oargs == 1); 1058 1059 /* 1060 * 32-bit ops generate 32-bit results, which for the purpose of 1061 * simplifying tcg are sign-extended. Certainly that's how we 1062 * represent our constants elsewhere. Note that the bits will 1063 * be reset properly for a 64-bit value when encountering the 1064 * type changing opcodes. 1065 */ 1066 if (ctx->type == TCG_TYPE_I32) { 1067 z_mask = (int32_t)z_mask; 1068 o_mask = (int32_t)o_mask; 1069 s_mask |= INT32_MIN; 1070 a_mask = (uint32_t)a_mask; 1071 } 1072 1073 /* Bits that are known 1 and bits that are known 0 must not overlap. */ 1074 tcg_debug_assert((o_mask & ~z_mask) == 0); 1075 1076 /* All bits that are not known zero are known one is a constant. */ 1077 if (z_mask == o_mask) { 1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); 1079 } 1080 1081 /* If no bits are affected, the operation devolves to a copy. */ 1082 if (a_mask == 0) { 1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1084 } 1085 1086 ts = arg_temp(op->args[0]); 1087 reset_ts(ctx, ts); 1088 1089 ti = ts_info(ts); 1090 ti->z_mask = z_mask; 1091 1092 /* Canonicalize s_mask and incorporate data from z_mask. */ 1093 rep = clz64(~s_mask); 1094 rep = MAX(rep, clz64(z_mask)); 1095 rep = MAX(rep, clz64(~o_mask)); 1096 rep = MAX(rep - 1, 0); 1097 ti->s_mask = INT64_MIN >> rep; 1098 1099 return false; 1100 } 1101 1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, 1103 uint64_t o_mask, int64_t s_mask, uint64_t a_mask) 1104 { 1105 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); 1106 return true; 1107 } 1108 1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op, 1110 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) 1111 { 1112 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); 1113 } 1114 1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op, 1116 uint64_t z_mask, uint64_t o_mask) 1117 { 1118 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); 1119 } 1120 1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op, 1122 uint64_t z_mask, uint64_t s_mask) 1123 { 1124 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); 1125 } 1126 1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) 1128 { 1129 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); 1130 } 1131 1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) 1133 { 1134 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); 1135 } 1136 1137 /* 1138 * Convert @op to NOT, if NOT is supported by the host. 1139 * Return true f the conversion is successful, which will still 1140 * indicate that the processing is complete. 1141 */ 1142 static bool fold_not(OptContext *ctx, TCGOp *op); 1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 1144 { 1145 TCGOpcode not_op; 1146 bool have_not; 1147 1148 switch (ctx->type) { 1149 case TCG_TYPE_I32: 1150 case TCG_TYPE_I64: 1151 not_op = INDEX_op_not; 1152 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); 1153 break; 1154 case TCG_TYPE_V64: 1155 case TCG_TYPE_V128: 1156 case TCG_TYPE_V256: 1157 not_op = INDEX_op_not_vec; 1158 have_not = TCG_TARGET_HAS_not_vec; 1159 break; 1160 default: 1161 g_assert_not_reached(); 1162 } 1163 if (have_not) { 1164 op->opc = not_op; 1165 op->args[1] = op->args[idx]; 1166 return fold_not(ctx, op); 1167 } 1168 return false; 1169 } 1170 1171 /* If the binary operation has first argument @i, fold to @i. */ 1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1173 { 1174 if (arg_is_const_val(op->args[1], i)) { 1175 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1176 } 1177 return false; 1178 } 1179 1180 /* If the binary operation has first argument @i, fold to NOT. */ 1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1182 { 1183 if (arg_is_const_val(op->args[1], i)) { 1184 return fold_to_not(ctx, op, 2); 1185 } 1186 return false; 1187 } 1188 1189 /* If the binary operation has second argument @i, fold to @i. */ 1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1191 { 1192 if (arg_is_const_val(op->args[2], i)) { 1193 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1194 } 1195 return false; 1196 } 1197 1198 /* If the binary operation has second argument @i, fold to identity. */ 1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 1200 { 1201 if (arg_is_const_val(op->args[2], i)) { 1202 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1203 } 1204 return false; 1205 } 1206 1207 /* If the binary operation has second argument @i, fold to NOT. */ 1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1209 { 1210 if (arg_is_const_val(op->args[2], i)) { 1211 return fold_to_not(ctx, op, 1); 1212 } 1213 return false; 1214 } 1215 1216 /* If the binary operation has both arguments equal, fold to @i. */ 1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1218 { 1219 if (args_are_copies(op->args[1], op->args[2])) { 1220 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1221 } 1222 return false; 1223 } 1224 1225 /* If the binary operation has both arguments equal, fold to identity. */ 1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1227 { 1228 if (args_are_copies(op->args[1], op->args[2])) { 1229 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1230 } 1231 return false; 1232 } 1233 1234 /* 1235 * These outermost fold_<op> functions are sorted alphabetically. 1236 * 1237 * The ordering of the transformations should be: 1238 * 1) those that produce a constant 1239 * 2) those that produce a copy 1240 * 3) those that produce information about the result value. 1241 */ 1242 1243 static bool fold_addco(OptContext *ctx, TCGOp *op); 1244 static bool fold_or(OptContext *ctx, TCGOp *op); 1245 static bool fold_orc(OptContext *ctx, TCGOp *op); 1246 static bool fold_subbo(OptContext *ctx, TCGOp *op); 1247 static bool fold_xor(OptContext *ctx, TCGOp *op); 1248 1249 static bool fold_add(OptContext *ctx, TCGOp *op) 1250 { 1251 if (fold_const2_commutative(ctx, op) || 1252 fold_xi_to_x(ctx, op, 0)) { 1253 return true; 1254 } 1255 return finish_folding(ctx, op); 1256 } 1257 1258 /* We cannot as yet do_constant_folding with vectors. */ 1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1260 { 1261 if (fold_commutative(ctx, op) || 1262 fold_xi_to_x(ctx, op, 0)) { 1263 return true; 1264 } 1265 return finish_folding(ctx, op); 1266 } 1267 1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op) 1269 { 1270 TempOptInfo *t2; 1271 1272 op = QTAILQ_PREV(op, link); 1273 switch (op->opc) { 1274 case INDEX_op_addco: 1275 op->opc = INDEX_op_add; 1276 fold_add(ctx, op); 1277 break; 1278 case INDEX_op_addcio: 1279 op->opc = INDEX_op_addci; 1280 break; 1281 case INDEX_op_addc1o: 1282 op->opc = INDEX_op_add; 1283 t2 = arg_info(op->args[2]); 1284 if (ti_is_const(t2)) { 1285 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1286 /* Perform other constant folding, if needed. */ 1287 fold_add(ctx, op); 1288 } else { 1289 TCGArg ret = op->args[0]; 1290 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 1291 op->args[0] = ret; 1292 op->args[1] = ret; 1293 op->args[2] = arg_new_constant(ctx, 1); 1294 } 1295 break; 1296 default: 1297 g_assert_not_reached(); 1298 } 1299 } 1300 1301 static bool fold_addci(OptContext *ctx, TCGOp *op) 1302 { 1303 fold_commutative(ctx, op); 1304 1305 if (ctx->carry_state < 0) { 1306 return finish_folding(ctx, op); 1307 } 1308 1309 squash_prev_carryout(ctx, op); 1310 op->opc = INDEX_op_add; 1311 1312 if (ctx->carry_state > 0) { 1313 TempOptInfo *t2 = arg_info(op->args[2]); 1314 1315 /* 1316 * Propagate the known carry-in into a constant, if possible. 1317 * Otherwise emit a second add +1. 1318 */ 1319 if (ti_is_const(t2)) { 1320 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1321 } else { 1322 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); 1323 1324 op2->args[0] = op->args[0]; 1325 op2->args[1] = op->args[1]; 1326 op2->args[2] = op->args[2]; 1327 fold_add(ctx, op2); 1328 1329 op->args[1] = op->args[0]; 1330 op->args[2] = arg_new_constant(ctx, 1); 1331 } 1332 } 1333 1334 ctx->carry_state = -1; 1335 return fold_add(ctx, op); 1336 } 1337 1338 static bool fold_addcio(OptContext *ctx, TCGOp *op) 1339 { 1340 TempOptInfo *t1, *t2; 1341 int carry_out = -1; 1342 uint64_t sum, max; 1343 1344 fold_commutative(ctx, op); 1345 t1 = arg_info(op->args[1]); 1346 t2 = arg_info(op->args[2]); 1347 1348 /* 1349 * The z_mask value is >= the maximum value that can be represented 1350 * with the known zero bits. So adding the z_mask values will not 1351 * overflow if and only if the true values cannot overflow. 1352 */ 1353 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && 1354 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { 1355 carry_out = 0; 1356 } 1357 1358 if (ctx->carry_state < 0) { 1359 ctx->carry_state = carry_out; 1360 return finish_folding(ctx, op); 1361 } 1362 1363 squash_prev_carryout(ctx, op); 1364 if (ctx->carry_state == 0) { 1365 goto do_addco; 1366 } 1367 1368 /* Propagate the known carry-in into a constant, if possible. */ 1369 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 1370 if (ti_is_const(t2)) { 1371 uint64_t v = ti_const_val(t2) & max; 1372 if (v < max) { 1373 op->args[2] = arg_new_constant(ctx, v + 1); 1374 goto do_addco; 1375 } 1376 /* max + known carry in produces known carry out. */ 1377 carry_out = 1; 1378 } 1379 if (ti_is_const(t1)) { 1380 uint64_t v = ti_const_val(t1) & max; 1381 if (v < max) { 1382 op->args[1] = arg_new_constant(ctx, v + 1); 1383 goto do_addco; 1384 } 1385 carry_out = 1; 1386 } 1387 1388 /* Adjust the opcode to remember the known carry-in. */ 1389 op->opc = INDEX_op_addc1o; 1390 ctx->carry_state = carry_out; 1391 return finish_folding(ctx, op); 1392 1393 do_addco: 1394 op->opc = INDEX_op_addco; 1395 return fold_addco(ctx, op); 1396 } 1397 1398 static bool fold_addco(OptContext *ctx, TCGOp *op) 1399 { 1400 TempOptInfo *t1, *t2; 1401 int carry_out = -1; 1402 uint64_t ign; 1403 1404 fold_commutative(ctx, op); 1405 t1 = arg_info(op->args[1]); 1406 t2 = arg_info(op->args[2]); 1407 1408 if (ti_is_const(t2)) { 1409 uint64_t v2 = ti_const_val(t2); 1410 1411 if (ti_is_const(t1)) { 1412 uint64_t v1 = ti_const_val(t1); 1413 /* Given sign-extension of z_mask for I32, we need not truncate. */ 1414 carry_out = uadd64_overflow(v1, v2, &ign); 1415 } else if (v2 == 0) { 1416 carry_out = 0; 1417 } 1418 } else { 1419 /* 1420 * The z_mask value is >= the maximum value that can be represented 1421 * with the known zero bits. So adding the z_mask values will not 1422 * overflow if and only if the true values cannot overflow. 1423 */ 1424 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { 1425 carry_out = 0; 1426 } 1427 } 1428 ctx->carry_state = carry_out; 1429 return finish_folding(ctx, op); 1430 } 1431 1432 static bool fold_and(OptContext *ctx, TCGOp *op) 1433 { 1434 uint64_t z_mask, o_mask, s_mask, a_mask; 1435 TempOptInfo *t1, *t2; 1436 1437 if (fold_const2_commutative(ctx, op)) { 1438 return true; 1439 } 1440 1441 t1 = arg_info(op->args[1]); 1442 t2 = arg_info(op->args[2]); 1443 1444 z_mask = t1->z_mask & t2->z_mask; 1445 o_mask = t1->o_mask & t2->o_mask; 1446 1447 /* 1448 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1449 * Bitwise operations preserve the relative quantity of the repetitions. 1450 */ 1451 s_mask = t1->s_mask & t2->s_mask; 1452 1453 /* Affected bits are those not known zero, masked by those known one. */ 1454 a_mask = t1->z_mask & ~t2->o_mask; 1455 1456 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { 1457 if (ti_is_const(t2)) { 1458 /* 1459 * Canonicalize on extract, if valid. This aids x86 with its 1460 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode 1461 * which does not require matching operands. Other backends can 1462 * trivially expand the extract to AND during code generation. 1463 */ 1464 uint64_t val = ti_const_val(t2); 1465 if (!(val & (val + 1))) { 1466 unsigned len = ctz64(~val); 1467 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { 1468 op->opc = INDEX_op_extract; 1469 op->args[2] = 0; 1470 op->args[3] = len; 1471 } 1472 } 1473 } else { 1474 fold_xx_to_x(ctx, op); 1475 } 1476 } 1477 return true; 1478 } 1479 1480 static bool fold_andc(OptContext *ctx, TCGOp *op) 1481 { 1482 uint64_t z_mask, o_mask, s_mask, a_mask; 1483 TempOptInfo *t1, *t2; 1484 1485 if (fold_const2(ctx, op)) { 1486 return true; 1487 } 1488 1489 t1 = arg_info(op->args[1]); 1490 t2 = arg_info(op->args[2]); 1491 1492 if (ti_is_const(t2)) { 1493 /* Fold andc r,x,i to and r,x,~i. */ 1494 switch (ctx->type) { 1495 case TCG_TYPE_I32: 1496 case TCG_TYPE_I64: 1497 op->opc = INDEX_op_and; 1498 break; 1499 case TCG_TYPE_V64: 1500 case TCG_TYPE_V128: 1501 case TCG_TYPE_V256: 1502 op->opc = INDEX_op_and_vec; 1503 break; 1504 default: 1505 g_assert_not_reached(); 1506 } 1507 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1508 return fold_and(ctx, op); 1509 } 1510 if (fold_xx_to_i(ctx, op, 0) || 1511 fold_ix_to_not(ctx, op, -1)) { 1512 return true; 1513 } 1514 1515 z_mask = t1->z_mask & ~t2->o_mask; 1516 o_mask = t1->o_mask & ~t2->z_mask; 1517 s_mask = t1->s_mask & t2->s_mask; 1518 1519 /* Affected bits are those not known zero, masked by those known zero. */ 1520 a_mask = t1->z_mask & t2->z_mask; 1521 1522 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 1523 } 1524 1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) 1526 { 1527 /* If true and false values are the same, eliminate the cmp. */ 1528 if (args_are_copies(op->args[2], op->args[3])) { 1529 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1530 } 1531 1532 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1533 uint64_t tv = arg_const_val(op->args[2]); 1534 uint64_t fv = arg_const_val(op->args[3]); 1535 1536 if (tv == -1 && fv == 0) { 1537 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1538 } 1539 if (tv == 0 && fv == -1) { 1540 if (TCG_TARGET_HAS_not_vec) { 1541 op->opc = INDEX_op_not_vec; 1542 return fold_not(ctx, op); 1543 } else { 1544 op->opc = INDEX_op_xor_vec; 1545 op->args[2] = arg_new_constant(ctx, -1); 1546 return fold_xor(ctx, op); 1547 } 1548 } 1549 } 1550 if (arg_is_const(op->args[2])) { 1551 uint64_t tv = arg_const_val(op->args[2]); 1552 if (tv == -1) { 1553 op->opc = INDEX_op_or_vec; 1554 op->args[2] = op->args[3]; 1555 return fold_or(ctx, op); 1556 } 1557 if (tv == 0 && TCG_TARGET_HAS_andc_vec) { 1558 op->opc = INDEX_op_andc_vec; 1559 op->args[2] = op->args[1]; 1560 op->args[1] = op->args[3]; 1561 return fold_andc(ctx, op); 1562 } 1563 } 1564 if (arg_is_const(op->args[3])) { 1565 uint64_t fv = arg_const_val(op->args[3]); 1566 if (fv == 0) { 1567 op->opc = INDEX_op_and_vec; 1568 return fold_and(ctx, op); 1569 } 1570 if (fv == -1 && TCG_TARGET_HAS_orc_vec) { 1571 op->opc = INDEX_op_orc_vec; 1572 op->args[2] = op->args[1]; 1573 op->args[1] = op->args[3]; 1574 return fold_orc(ctx, op); 1575 } 1576 } 1577 return finish_folding(ctx, op); 1578 } 1579 1580 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1581 { 1582 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], 1583 &op->args[1], &op->args[2]); 1584 if (i == 0) { 1585 tcg_op_remove(ctx->tcg, op); 1586 return true; 1587 } 1588 if (i > 0) { 1589 op->opc = INDEX_op_br; 1590 op->args[0] = op->args[3]; 1591 finish_ebb(ctx); 1592 } else { 1593 finish_bb(ctx); 1594 } 1595 return true; 1596 } 1597 1598 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1599 { 1600 TCGCond cond; 1601 TCGArg label; 1602 int i, inv = 0; 1603 1604 i = do_constant_folding_cond2(ctx, op, &op->args[0]); 1605 cond = op->args[4]; 1606 label = op->args[5]; 1607 if (i >= 0) { 1608 goto do_brcond_const; 1609 } 1610 1611 switch (cond) { 1612 case TCG_COND_LT: 1613 case TCG_COND_GE: 1614 /* 1615 * Simplify LT/GE comparisons vs zero to a single compare 1616 * vs the high word of the input. 1617 */ 1618 if (arg_is_const_val(op->args[2], 0) && 1619 arg_is_const_val(op->args[3], 0)) { 1620 goto do_brcond_high; 1621 } 1622 break; 1623 1624 case TCG_COND_NE: 1625 inv = 1; 1626 QEMU_FALLTHROUGH; 1627 case TCG_COND_EQ: 1628 /* 1629 * Simplify EQ/NE comparisons where one of the pairs 1630 * can be simplified. 1631 */ 1632 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1633 op->args[2], cond); 1634 switch (i ^ inv) { 1635 case 0: 1636 goto do_brcond_const; 1637 case 1: 1638 goto do_brcond_high; 1639 } 1640 1641 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1642 op->args[3], cond); 1643 switch (i ^ inv) { 1644 case 0: 1645 goto do_brcond_const; 1646 case 1: 1647 goto do_brcond_low; 1648 } 1649 break; 1650 1651 case TCG_COND_TSTEQ: 1652 case TCG_COND_TSTNE: 1653 if (arg_is_const_val(op->args[2], 0)) { 1654 goto do_brcond_high; 1655 } 1656 if (arg_is_const_val(op->args[3], 0)) { 1657 goto do_brcond_low; 1658 } 1659 break; 1660 1661 default: 1662 break; 1663 1664 do_brcond_low: 1665 op->opc = INDEX_op_brcond; 1666 op->args[1] = op->args[2]; 1667 op->args[2] = cond; 1668 op->args[3] = label; 1669 return fold_brcond(ctx, op); 1670 1671 do_brcond_high: 1672 op->opc = INDEX_op_brcond; 1673 op->args[0] = op->args[1]; 1674 op->args[1] = op->args[3]; 1675 op->args[2] = cond; 1676 op->args[3] = label; 1677 return fold_brcond(ctx, op); 1678 1679 do_brcond_const: 1680 if (i == 0) { 1681 tcg_op_remove(ctx->tcg, op); 1682 return true; 1683 } 1684 op->opc = INDEX_op_br; 1685 op->args[0] = label; 1686 finish_ebb(ctx); 1687 return true; 1688 } 1689 1690 finish_bb(ctx); 1691 return true; 1692 } 1693 1694 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1695 { 1696 uint64_t z_mask, o_mask, s_mask; 1697 TempOptInfo *t1 = arg_info(op->args[1]); 1698 int flags = op->args[2]; 1699 1700 if (ti_is_const(t1)) { 1701 return tcg_opt_gen_movi(ctx, op, op->args[0], 1702 do_constant_folding(op->opc, ctx->type, 1703 ti_const_val(t1), flags)); 1704 } 1705 1706 z_mask = t1->z_mask; 1707 o_mask = t1->o_mask; 1708 s_mask = 0; 1709 1710 switch (op->opc) { 1711 case INDEX_op_bswap16: 1712 z_mask = bswap16(z_mask); 1713 o_mask = bswap16(o_mask); 1714 if (flags & TCG_BSWAP_OS) { 1715 z_mask = (int16_t)z_mask; 1716 o_mask = (int16_t)o_mask; 1717 s_mask = INT16_MIN; 1718 } else if (!(flags & TCG_BSWAP_OZ)) { 1719 z_mask |= MAKE_64BIT_MASK(16, 48); 1720 } 1721 break; 1722 case INDEX_op_bswap32: 1723 z_mask = bswap32(z_mask); 1724 o_mask = bswap32(o_mask); 1725 if (flags & TCG_BSWAP_OS) { 1726 z_mask = (int32_t)z_mask; 1727 o_mask = (int32_t)o_mask; 1728 s_mask = INT32_MIN; 1729 } else if (!(flags & TCG_BSWAP_OZ)) { 1730 z_mask |= MAKE_64BIT_MASK(32, 32); 1731 } 1732 break; 1733 case INDEX_op_bswap64: 1734 z_mask = bswap64(z_mask); 1735 o_mask = bswap64(o_mask); 1736 break; 1737 default: 1738 g_assert_not_reached(); 1739 } 1740 1741 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1742 } 1743 1744 static bool fold_call(OptContext *ctx, TCGOp *op) 1745 { 1746 TCGContext *s = ctx->tcg; 1747 int nb_oargs = TCGOP_CALLO(op); 1748 int nb_iargs = TCGOP_CALLI(op); 1749 int flags, i; 1750 1751 init_arguments(ctx, op, nb_oargs + nb_iargs); 1752 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1753 1754 /* If the function reads or writes globals, reset temp data. */ 1755 flags = tcg_call_flags(op); 1756 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1757 int nb_globals = s->nb_globals; 1758 1759 for (i = 0; i < nb_globals; i++) { 1760 if (test_bit(i, ctx->temps_used.l)) { 1761 reset_ts(ctx, &ctx->tcg->temps[i]); 1762 } 1763 } 1764 } 1765 1766 /* If the function has side effects, reset mem data. */ 1767 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1768 remove_mem_copy_all(ctx); 1769 } 1770 1771 /* Reset temp data for outputs. */ 1772 for (i = 0; i < nb_oargs; i++) { 1773 reset_temp(ctx, op->args[i]); 1774 } 1775 1776 /* Stop optimizing MB across calls. */ 1777 ctx->prev_mb = NULL; 1778 return true; 1779 } 1780 1781 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) 1782 { 1783 /* Canonicalize the comparison to put immediate second. */ 1784 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1785 op->args[3] = tcg_swap_cond(op->args[3]); 1786 } 1787 return finish_folding(ctx, op); 1788 } 1789 1790 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) 1791 { 1792 /* If true and false values are the same, eliminate the cmp. */ 1793 if (args_are_copies(op->args[3], op->args[4])) { 1794 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 1795 } 1796 1797 /* Canonicalize the comparison to put immediate second. */ 1798 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1799 op->args[5] = tcg_swap_cond(op->args[5]); 1800 } 1801 /* 1802 * Canonicalize the "false" input reg to match the destination, 1803 * so that the tcg backend can implement "move if true". 1804 */ 1805 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1806 op->args[5] = tcg_invert_cond(op->args[5]); 1807 } 1808 return finish_folding(ctx, op); 1809 } 1810 1811 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1812 { 1813 uint64_t z_mask, s_mask; 1814 TempOptInfo *t1 = arg_info(op->args[1]); 1815 TempOptInfo *t2 = arg_info(op->args[2]); 1816 1817 if (ti_is_const(t1)) { 1818 uint64_t t = ti_const_val(t1); 1819 1820 if (t != 0) { 1821 t = do_constant_folding(op->opc, ctx->type, t, 0); 1822 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1823 } 1824 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1825 } 1826 1827 switch (ctx->type) { 1828 case TCG_TYPE_I32: 1829 z_mask = 31; 1830 break; 1831 case TCG_TYPE_I64: 1832 z_mask = 63; 1833 break; 1834 default: 1835 g_assert_not_reached(); 1836 } 1837 s_mask = ~z_mask; 1838 z_mask |= t2->z_mask; 1839 s_mask &= t2->s_mask; 1840 1841 return fold_masks_zs(ctx, op, z_mask, s_mask); 1842 } 1843 1844 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1845 { 1846 uint64_t z_mask; 1847 1848 if (fold_const1(ctx, op)) { 1849 return true; 1850 } 1851 1852 switch (ctx->type) { 1853 case TCG_TYPE_I32: 1854 z_mask = 32 | 31; 1855 break; 1856 case TCG_TYPE_I64: 1857 z_mask = 64 | 63; 1858 break; 1859 default: 1860 g_assert_not_reached(); 1861 } 1862 return fold_masks_z(ctx, op, z_mask); 1863 } 1864 1865 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1866 { 1867 TempOptInfo *t1 = arg_info(op->args[1]); 1868 TempOptInfo *t2 = arg_info(op->args[2]); 1869 int ofs = op->args[3]; 1870 int len = op->args[4]; 1871 int width = 8 * tcg_type_size(ctx->type); 1872 uint64_t z_mask, o_mask, s_mask; 1873 1874 if (ti_is_const(t1) && ti_is_const(t2)) { 1875 return tcg_opt_gen_movi(ctx, op, op->args[0], 1876 deposit64(ti_const_val(t1), ofs, len, 1877 ti_const_val(t2))); 1878 } 1879 1880 /* Inserting a value into zero at offset 0. */ 1881 if (ti_is_const_val(t1, 0) && ofs == 0) { 1882 uint64_t mask = MAKE_64BIT_MASK(0, len); 1883 1884 op->opc = INDEX_op_and; 1885 op->args[1] = op->args[2]; 1886 op->args[2] = arg_new_constant(ctx, mask); 1887 return fold_and(ctx, op); 1888 } 1889 1890 /* Inserting zero into a value. */ 1891 if (ti_is_const_val(t2, 0)) { 1892 uint64_t mask = deposit64(-1, ofs, len, 0); 1893 1894 op->opc = INDEX_op_and; 1895 op->args[2] = arg_new_constant(ctx, mask); 1896 return fold_and(ctx, op); 1897 } 1898 1899 /* The s_mask from the top portion of the deposit is still valid. */ 1900 if (ofs + len == width) { 1901 s_mask = t2->s_mask << ofs; 1902 } else { 1903 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); 1904 } 1905 1906 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); 1907 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); 1908 1909 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1910 } 1911 1912 static bool fold_divide(OptContext *ctx, TCGOp *op) 1913 { 1914 if (fold_const2(ctx, op) || 1915 fold_xi_to_x(ctx, op, 1)) { 1916 return true; 1917 } 1918 return finish_folding(ctx, op); 1919 } 1920 1921 static bool fold_dup(OptContext *ctx, TCGOp *op) 1922 { 1923 if (arg_is_const(op->args[1])) { 1924 uint64_t t = arg_const_val(op->args[1]); 1925 t = dup_const(TCGOP_VECE(op), t); 1926 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1927 } 1928 return finish_folding(ctx, op); 1929 } 1930 1931 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1932 { 1933 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1934 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, 1935 arg_const_val(op->args[2])); 1936 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1937 } 1938 1939 if (args_are_copies(op->args[1], op->args[2])) { 1940 op->opc = INDEX_op_dup_vec; 1941 TCGOP_VECE(op) = MO_32; 1942 } 1943 return finish_folding(ctx, op); 1944 } 1945 1946 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1947 { 1948 uint64_t z_mask, o_mask, s_mask; 1949 TempOptInfo *t1, *t2; 1950 1951 if (fold_const2_commutative(ctx, op)) { 1952 return true; 1953 } 1954 1955 t2 = arg_info(op->args[2]); 1956 if (ti_is_const(t2)) { 1957 /* Fold eqv r,x,i to xor r,x,~i. */ 1958 switch (ctx->type) { 1959 case TCG_TYPE_I32: 1960 case TCG_TYPE_I64: 1961 op->opc = INDEX_op_xor; 1962 break; 1963 case TCG_TYPE_V64: 1964 case TCG_TYPE_V128: 1965 case TCG_TYPE_V256: 1966 op->opc = INDEX_op_xor_vec; 1967 break; 1968 default: 1969 g_assert_not_reached(); 1970 } 1971 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1972 return fold_xor(ctx, op); 1973 } 1974 1975 t1 = arg_info(op->args[1]); 1976 1977 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); 1978 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); 1979 s_mask = t1->s_mask & t2->s_mask; 1980 1981 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1982 } 1983 1984 static bool fold_extract(OptContext *ctx, TCGOp *op) 1985 { 1986 uint64_t z_mask, o_mask, a_mask; 1987 TempOptInfo *t1 = arg_info(op->args[1]); 1988 int pos = op->args[2]; 1989 int len = op->args[3]; 1990 1991 if (ti_is_const(t1)) { 1992 return tcg_opt_gen_movi(ctx, op, op->args[0], 1993 extract64(ti_const_val(t1), pos, len)); 1994 } 1995 1996 z_mask = extract64(t1->z_mask, pos, len); 1997 o_mask = extract64(t1->o_mask, pos, len); 1998 a_mask = pos ? -1 : t1->z_mask ^ z_mask; 1999 2000 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); 2001 } 2002 2003 static bool fold_extract2(OptContext *ctx, TCGOp *op) 2004 { 2005 TempOptInfo *t1 = arg_info(op->args[1]); 2006 TempOptInfo *t2 = arg_info(op->args[2]); 2007 uint64_t z1 = t1->z_mask; 2008 uint64_t z2 = t2->z_mask; 2009 uint64_t o1 = t1->o_mask; 2010 uint64_t o2 = t2->o_mask; 2011 int shr = op->args[3]; 2012 2013 if (ctx->type == TCG_TYPE_I32) { 2014 z1 = (uint32_t)z1 >> shr; 2015 o1 = (uint32_t)o1 >> shr; 2016 z2 = (uint64_t)((int32_t)z2 << (32 - shr)); 2017 o2 = (uint64_t)((int32_t)o2 << (32 - shr)); 2018 } else { 2019 z1 >>= shr; 2020 o1 >>= shr; 2021 z2 <<= 64 - shr; 2022 o2 <<= 64 - shr; 2023 } 2024 2025 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); 2026 } 2027 2028 static bool fold_exts(OptContext *ctx, TCGOp *op) 2029 { 2030 uint64_t z_mask, o_mask, s_mask; 2031 TempOptInfo *t1; 2032 2033 if (fold_const1(ctx, op)) { 2034 return true; 2035 } 2036 2037 t1 = arg_info(op->args[1]); 2038 z_mask = t1->z_mask; 2039 o_mask = t1->o_mask; 2040 s_mask = t1->s_mask; 2041 2042 switch (op->opc) { 2043 case INDEX_op_ext_i32_i64: 2044 s_mask |= INT32_MIN; 2045 z_mask = (int32_t)z_mask; 2046 o_mask = (int32_t)o_mask; 2047 break; 2048 default: 2049 g_assert_not_reached(); 2050 } 2051 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2052 } 2053 2054 static bool fold_extu(OptContext *ctx, TCGOp *op) 2055 { 2056 uint64_t z_mask, o_mask; 2057 TempOptInfo *t1; 2058 2059 if (fold_const1(ctx, op)) { 2060 return true; 2061 } 2062 2063 t1 = arg_info(op->args[1]); 2064 z_mask = t1->z_mask; 2065 o_mask = t1->o_mask; 2066 2067 switch (op->opc) { 2068 case INDEX_op_extrl_i64_i32: 2069 case INDEX_op_extu_i32_i64: 2070 z_mask = (uint32_t)z_mask; 2071 o_mask = (uint32_t)o_mask; 2072 break; 2073 case INDEX_op_extrh_i64_i32: 2074 z_mask >>= 32; 2075 o_mask >>= 32; 2076 break; 2077 default: 2078 g_assert_not_reached(); 2079 } 2080 return fold_masks_zo(ctx, op, z_mask, o_mask); 2081 } 2082 2083 static bool fold_mb(OptContext *ctx, TCGOp *op) 2084 { 2085 /* Eliminate duplicate and redundant fence instructions. */ 2086 if (ctx->prev_mb) { 2087 /* 2088 * Merge two barriers of the same type into one, 2089 * or a weaker barrier into a stronger one, 2090 * or two weaker barriers into a stronger one. 2091 * mb X; mb Y => mb X|Y 2092 * mb; strl => mb; st 2093 * ldaq; mb => ld; mb 2094 * ldaq; strl => ld; mb; st 2095 * Other combinations are also merged into a strong 2096 * barrier. This is stricter than specified but for 2097 * the purposes of TCG is better than not optimizing. 2098 */ 2099 ctx->prev_mb->args[0] |= op->args[0]; 2100 tcg_op_remove(ctx->tcg, op); 2101 } else { 2102 ctx->prev_mb = op; 2103 } 2104 return true; 2105 } 2106 2107 static bool fold_mov(OptContext *ctx, TCGOp *op) 2108 { 2109 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2110 } 2111 2112 static bool fold_movcond(OptContext *ctx, TCGOp *op) 2113 { 2114 uint64_t z_mask, o_mask, s_mask; 2115 TempOptInfo *tt, *ft; 2116 int i; 2117 2118 /* If true and false values are the same, eliminate the cmp. */ 2119 if (args_are_copies(op->args[3], op->args[4])) { 2120 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 2121 } 2122 2123 /* 2124 * Canonicalize the "false" input reg to match the destination reg so 2125 * that the tcg backend can implement a "move if true" operation. 2126 */ 2127 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 2128 op->args[5] = tcg_invert_cond(op->args[5]); 2129 } 2130 2131 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], 2132 &op->args[2], &op->args[5]); 2133 if (i >= 0) { 2134 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 2135 } 2136 2137 tt = arg_info(op->args[3]); 2138 ft = arg_info(op->args[4]); 2139 z_mask = tt->z_mask | ft->z_mask; 2140 o_mask = tt->o_mask & ft->o_mask; 2141 s_mask = tt->s_mask & ft->s_mask; 2142 2143 if (ti_is_const(tt) && ti_is_const(ft)) { 2144 uint64_t tv = ti_const_val(tt); 2145 uint64_t fv = ti_const_val(ft); 2146 TCGCond cond = op->args[5]; 2147 2148 if (tv == 1 && fv == 0) { 2149 op->opc = INDEX_op_setcond; 2150 op->args[3] = cond; 2151 } else if (fv == 1 && tv == 0) { 2152 op->opc = INDEX_op_setcond; 2153 op->args[3] = tcg_invert_cond(cond); 2154 } else if (tv == -1 && fv == 0) { 2155 op->opc = INDEX_op_negsetcond; 2156 op->args[3] = cond; 2157 } else if (fv == -1 && tv == 0) { 2158 op->opc = INDEX_op_negsetcond; 2159 op->args[3] = tcg_invert_cond(cond); 2160 } 2161 } 2162 2163 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2164 } 2165 2166 static bool fold_mul(OptContext *ctx, TCGOp *op) 2167 { 2168 if (fold_const2(ctx, op) || 2169 fold_xi_to_i(ctx, op, 0) || 2170 fold_xi_to_x(ctx, op, 1)) { 2171 return true; 2172 } 2173 return finish_folding(ctx, op); 2174 } 2175 2176 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 2177 { 2178 if (fold_const2_commutative(ctx, op) || 2179 fold_xi_to_i(ctx, op, 0)) { 2180 return true; 2181 } 2182 return finish_folding(ctx, op); 2183 } 2184 2185 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 2186 { 2187 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 2188 2189 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 2190 uint64_t a = arg_const_val(op->args[2]); 2191 uint64_t b = arg_const_val(op->args[3]); 2192 uint64_t h, l; 2193 TCGArg rl, rh; 2194 TCGOp *op2; 2195 2196 switch (op->opc) { 2197 case INDEX_op_mulu2: 2198 if (ctx->type == TCG_TYPE_I32) { 2199 l = (uint64_t)(uint32_t)a * (uint32_t)b; 2200 h = (int32_t)(l >> 32); 2201 l = (int32_t)l; 2202 } else { 2203 mulu64(&l, &h, a, b); 2204 } 2205 break; 2206 case INDEX_op_muls2: 2207 if (ctx->type == TCG_TYPE_I32) { 2208 l = (int64_t)(int32_t)a * (int32_t)b; 2209 h = l >> 32; 2210 l = (int32_t)l; 2211 } else { 2212 muls64(&l, &h, a, b); 2213 } 2214 break; 2215 default: 2216 g_assert_not_reached(); 2217 } 2218 2219 rl = op->args[0]; 2220 rh = op->args[1]; 2221 2222 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 2223 op2 = opt_insert_before(ctx, op, 0, 2); 2224 2225 tcg_opt_gen_movi(ctx, op, rl, l); 2226 tcg_opt_gen_movi(ctx, op2, rh, h); 2227 return true; 2228 } 2229 return finish_folding(ctx, op); 2230 } 2231 2232 static bool fold_nand(OptContext *ctx, TCGOp *op) 2233 { 2234 uint64_t z_mask, o_mask, s_mask; 2235 TempOptInfo *t1, *t2; 2236 2237 if (fold_const2_commutative(ctx, op) || 2238 fold_xi_to_not(ctx, op, -1)) { 2239 return true; 2240 } 2241 2242 t1 = arg_info(op->args[1]); 2243 t2 = arg_info(op->args[2]); 2244 2245 z_mask = ~(t1->o_mask & t2->o_mask); 2246 o_mask = ~(t1->z_mask & t2->z_mask); 2247 s_mask = t1->s_mask & t2->s_mask; 2248 2249 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2250 } 2251 2252 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) 2253 { 2254 /* Set to 1 all bits to the left of the rightmost. */ 2255 uint64_t z_mask = arg_info(op->args[1])->z_mask; 2256 z_mask = -(z_mask & -z_mask); 2257 2258 return fold_masks_z(ctx, op, z_mask); 2259 } 2260 2261 static bool fold_neg(OptContext *ctx, TCGOp *op) 2262 { 2263 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op); 2264 } 2265 2266 static bool fold_nor(OptContext *ctx, TCGOp *op) 2267 { 2268 uint64_t z_mask, o_mask, s_mask; 2269 TempOptInfo *t1, *t2; 2270 2271 if (fold_const2_commutative(ctx, op) || 2272 fold_xi_to_not(ctx, op, 0)) { 2273 return true; 2274 } 2275 2276 t1 = arg_info(op->args[1]); 2277 t2 = arg_info(op->args[2]); 2278 2279 z_mask = ~(t1->o_mask | t2->o_mask); 2280 o_mask = ~(t1->z_mask | t2->z_mask); 2281 s_mask = t1->s_mask & t2->s_mask; 2282 2283 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2284 } 2285 2286 static bool fold_not(OptContext *ctx, TCGOp *op) 2287 { 2288 TempOptInfo *t1; 2289 2290 if (fold_const1(ctx, op)) { 2291 return true; 2292 } 2293 2294 t1 = arg_info(op->args[1]); 2295 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); 2296 } 2297 2298 static bool fold_or(OptContext *ctx, TCGOp *op) 2299 { 2300 uint64_t z_mask, o_mask, s_mask, a_mask; 2301 TempOptInfo *t1, *t2; 2302 2303 if (fold_const2_commutative(ctx, op) || 2304 fold_xi_to_x(ctx, op, 0) || 2305 fold_xx_to_x(ctx, op)) { 2306 return true; 2307 } 2308 2309 t1 = arg_info(op->args[1]); 2310 t2 = arg_info(op->args[2]); 2311 2312 z_mask = t1->z_mask | t2->z_mask; 2313 o_mask = t1->o_mask | t2->o_mask; 2314 s_mask = t1->s_mask & t2->s_mask; 2315 2316 /* Affected bits are those not known one, masked by those known zero. */ 2317 a_mask = ~t1->o_mask & t2->z_mask; 2318 2319 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2320 } 2321 2322 static bool fold_orc(OptContext *ctx, TCGOp *op) 2323 { 2324 uint64_t z_mask, o_mask, s_mask, a_mask; 2325 TempOptInfo *t1, *t2; 2326 2327 if (fold_const2(ctx, op)) { 2328 return true; 2329 } 2330 2331 t2 = arg_info(op->args[2]); 2332 if (ti_is_const(t2)) { 2333 /* Fold orc r,x,i to or r,x,~i. */ 2334 switch (ctx->type) { 2335 case TCG_TYPE_I32: 2336 case TCG_TYPE_I64: 2337 op->opc = INDEX_op_or; 2338 break; 2339 case TCG_TYPE_V64: 2340 case TCG_TYPE_V128: 2341 case TCG_TYPE_V256: 2342 op->opc = INDEX_op_or_vec; 2343 break; 2344 default: 2345 g_assert_not_reached(); 2346 } 2347 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 2348 return fold_or(ctx, op); 2349 } 2350 if (fold_xx_to_i(ctx, op, -1) || 2351 fold_ix_to_not(ctx, op, 0)) { 2352 return true; 2353 } 2354 t1 = arg_info(op->args[1]); 2355 2356 z_mask = t1->z_mask | ~t2->o_mask; 2357 o_mask = t1->o_mask | ~t2->z_mask; 2358 s_mask = t1->s_mask & t2->s_mask; 2359 2360 /* Affected bits are those not known one, masked by those known one. */ 2361 a_mask = ~t1->o_mask & t2->o_mask; 2362 2363 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2364 } 2365 2366 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) 2367 { 2368 const TCGOpDef *def = &tcg_op_defs[op->opc]; 2369 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 2370 MemOp mop = get_memop(oi); 2371 int width = 8 * memop_size(mop); 2372 uint64_t z_mask = -1, s_mask = 0; 2373 2374 if (width < 64) { 2375 if (mop & MO_SIGN) { 2376 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); 2377 } else { 2378 z_mask = MAKE_64BIT_MASK(0, width); 2379 } 2380 } 2381 2382 /* Opcodes that touch guest memory stop the mb optimization. */ 2383 ctx->prev_mb = NULL; 2384 2385 return fold_masks_zs(ctx, op, z_mask, s_mask); 2386 } 2387 2388 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) 2389 { 2390 /* Opcodes that touch guest memory stop the mb optimization. */ 2391 ctx->prev_mb = NULL; 2392 return finish_folding(ctx, op); 2393 } 2394 2395 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 2396 { 2397 /* Opcodes that touch guest memory stop the mb optimization. */ 2398 ctx->prev_mb = NULL; 2399 return true; 2400 } 2401 2402 static bool fold_remainder(OptContext *ctx, TCGOp *op) 2403 { 2404 if (fold_const2(ctx, op) || 2405 fold_xx_to_i(ctx, op, 0)) { 2406 return true; 2407 } 2408 return finish_folding(ctx, op); 2409 } 2410 2411 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */ 2412 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) 2413 { 2414 uint64_t a_zmask, b_val; 2415 TCGCond cond; 2416 2417 if (!arg_is_const(op->args[2])) { 2418 return false; 2419 } 2420 2421 a_zmask = arg_info(op->args[1])->z_mask; 2422 b_val = arg_const_val(op->args[2]); 2423 cond = op->args[3]; 2424 2425 if (ctx->type == TCG_TYPE_I32) { 2426 a_zmask = (uint32_t)a_zmask; 2427 b_val = (uint32_t)b_val; 2428 } 2429 2430 /* 2431 * A with only low bits set vs B with high bits set means that A < B. 2432 */ 2433 if (a_zmask < b_val) { 2434 bool inv = false; 2435 2436 switch (cond) { 2437 case TCG_COND_NE: 2438 case TCG_COND_LEU: 2439 case TCG_COND_LTU: 2440 inv = true; 2441 /* fall through */ 2442 case TCG_COND_GTU: 2443 case TCG_COND_GEU: 2444 case TCG_COND_EQ: 2445 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); 2446 default: 2447 break; 2448 } 2449 } 2450 2451 /* 2452 * A with only lsb set is already boolean. 2453 */ 2454 if (a_zmask <= 1) { 2455 bool convert = false; 2456 bool inv = false; 2457 2458 switch (cond) { 2459 case TCG_COND_EQ: 2460 inv = true; 2461 /* fall through */ 2462 case TCG_COND_NE: 2463 convert = (b_val == 0); 2464 break; 2465 case TCG_COND_LTU: 2466 case TCG_COND_TSTEQ: 2467 inv = true; 2468 /* fall through */ 2469 case TCG_COND_GEU: 2470 case TCG_COND_TSTNE: 2471 convert = (b_val == 1); 2472 break; 2473 default: 2474 break; 2475 } 2476 if (convert) { 2477 if (!inv && !neg) { 2478 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2479 } 2480 2481 if (!inv) { 2482 op->opc = INDEX_op_neg; 2483 } else if (neg) { 2484 op->opc = INDEX_op_add; 2485 op->args[2] = arg_new_constant(ctx, -1); 2486 } else { 2487 op->opc = INDEX_op_xor; 2488 op->args[2] = arg_new_constant(ctx, 1); 2489 } 2490 return -1; 2491 } 2492 } 2493 return 0; 2494 } 2495 2496 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) 2497 { 2498 TCGCond cond = op->args[3]; 2499 TCGArg ret, src1, src2; 2500 TCGOp *op2; 2501 uint64_t val; 2502 int sh; 2503 bool inv; 2504 2505 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { 2506 return; 2507 } 2508 2509 src2 = op->args[2]; 2510 val = arg_const_val(src2); 2511 if (!is_power_of_2(val)) { 2512 return; 2513 } 2514 sh = ctz64(val); 2515 2516 ret = op->args[0]; 2517 src1 = op->args[1]; 2518 inv = cond == TCG_COND_TSTEQ; 2519 2520 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { 2521 op->opc = INDEX_op_sextract; 2522 op->args[1] = src1; 2523 op->args[2] = sh; 2524 op->args[3] = 1; 2525 return; 2526 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { 2527 op->opc = INDEX_op_extract; 2528 op->args[1] = src1; 2529 op->args[2] = sh; 2530 op->args[3] = 1; 2531 } else { 2532 if (sh) { 2533 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); 2534 op2->args[0] = ret; 2535 op2->args[1] = src1; 2536 op2->args[2] = arg_new_constant(ctx, sh); 2537 src1 = ret; 2538 } 2539 op->opc = INDEX_op_and; 2540 op->args[1] = src1; 2541 op->args[2] = arg_new_constant(ctx, 1); 2542 } 2543 2544 if (neg && inv) { 2545 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); 2546 op2->args[0] = ret; 2547 op2->args[1] = ret; 2548 op2->args[2] = arg_new_constant(ctx, -1); 2549 } else if (inv) { 2550 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); 2551 op2->args[0] = ret; 2552 op2->args[1] = ret; 2553 op2->args[2] = arg_new_constant(ctx, 1); 2554 } else if (neg) { 2555 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); 2556 op2->args[0] = ret; 2557 op2->args[1] = ret; 2558 } 2559 } 2560 2561 static bool fold_setcond(OptContext *ctx, TCGOp *op) 2562 { 2563 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2564 &op->args[2], &op->args[3]); 2565 if (i >= 0) { 2566 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2567 } 2568 2569 i = fold_setcond_zmask(ctx, op, false); 2570 if (i > 0) { 2571 return true; 2572 } 2573 if (i == 0) { 2574 fold_setcond_tst_pow2(ctx, op, false); 2575 } 2576 2577 return fold_masks_z(ctx, op, 1); 2578 } 2579 2580 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 2581 { 2582 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2583 &op->args[2], &op->args[3]); 2584 if (i >= 0) { 2585 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 2586 } 2587 2588 i = fold_setcond_zmask(ctx, op, true); 2589 if (i > 0) { 2590 return true; 2591 } 2592 if (i == 0) { 2593 fold_setcond_tst_pow2(ctx, op, true); 2594 } 2595 2596 /* Value is {0,-1} so all bits are repetitions of the sign. */ 2597 return fold_masks_s(ctx, op, -1); 2598 } 2599 2600 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 2601 { 2602 TCGCond cond; 2603 int i, inv = 0; 2604 2605 i = do_constant_folding_cond2(ctx, op, &op->args[1]); 2606 cond = op->args[5]; 2607 if (i >= 0) { 2608 goto do_setcond_const; 2609 } 2610 2611 switch (cond) { 2612 case TCG_COND_LT: 2613 case TCG_COND_GE: 2614 /* 2615 * Simplify LT/GE comparisons vs zero to a single compare 2616 * vs the high word of the input. 2617 */ 2618 if (arg_is_const_val(op->args[3], 0) && 2619 arg_is_const_val(op->args[4], 0)) { 2620 goto do_setcond_high; 2621 } 2622 break; 2623 2624 case TCG_COND_NE: 2625 inv = 1; 2626 QEMU_FALLTHROUGH; 2627 case TCG_COND_EQ: 2628 /* 2629 * Simplify EQ/NE comparisons where one of the pairs 2630 * can be simplified. 2631 */ 2632 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2633 op->args[3], cond); 2634 switch (i ^ inv) { 2635 case 0: 2636 goto do_setcond_const; 2637 case 1: 2638 goto do_setcond_high; 2639 } 2640 2641 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2642 op->args[4], cond); 2643 switch (i ^ inv) { 2644 case 0: 2645 goto do_setcond_const; 2646 case 1: 2647 goto do_setcond_low; 2648 } 2649 break; 2650 2651 case TCG_COND_TSTEQ: 2652 case TCG_COND_TSTNE: 2653 if (arg_is_const_val(op->args[3], 0)) { 2654 goto do_setcond_high; 2655 } 2656 if (arg_is_const_val(op->args[4], 0)) { 2657 goto do_setcond_low; 2658 } 2659 break; 2660 2661 default: 2662 break; 2663 2664 do_setcond_low: 2665 op->args[2] = op->args[3]; 2666 op->args[3] = cond; 2667 op->opc = INDEX_op_setcond; 2668 return fold_setcond(ctx, op); 2669 2670 do_setcond_high: 2671 op->args[1] = op->args[2]; 2672 op->args[2] = op->args[4]; 2673 op->args[3] = cond; 2674 op->opc = INDEX_op_setcond; 2675 return fold_setcond(ctx, op); 2676 } 2677 2678 return fold_masks_z(ctx, op, 1); 2679 2680 do_setcond_const: 2681 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2682 } 2683 2684 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2685 { 2686 uint64_t z_mask, o_mask, s_mask, a_mask; 2687 TempOptInfo *t1 = arg_info(op->args[1]); 2688 int pos = op->args[2]; 2689 int len = op->args[3]; 2690 2691 if (ti_is_const(t1)) { 2692 return tcg_opt_gen_movi(ctx, op, op->args[0], 2693 sextract64(ti_const_val(t1), pos, len)); 2694 } 2695 2696 s_mask = t1->s_mask >> pos; 2697 s_mask |= -1ull << (len - 1); 2698 a_mask = pos ? -1 : s_mask & ~t1->s_mask; 2699 2700 z_mask = sextract64(t1->z_mask, pos, len); 2701 o_mask = sextract64(t1->o_mask, pos, len); 2702 2703 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2704 } 2705 2706 static bool fold_shift(OptContext *ctx, TCGOp *op) 2707 { 2708 uint64_t s_mask, z_mask, o_mask; 2709 TempOptInfo *t1, *t2; 2710 2711 if (fold_const2(ctx, op) || 2712 fold_ix_to_i(ctx, op, 0) || 2713 fold_xi_to_x(ctx, op, 0)) { 2714 return true; 2715 } 2716 2717 t1 = arg_info(op->args[1]); 2718 t2 = arg_info(op->args[2]); 2719 s_mask = t1->s_mask; 2720 z_mask = t1->z_mask; 2721 o_mask = t1->o_mask; 2722 2723 if (ti_is_const(t2)) { 2724 int sh = ti_const_val(t2); 2725 2726 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2727 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); 2728 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2729 2730 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2731 } 2732 2733 switch (op->opc) { 2734 case INDEX_op_sar: 2735 /* 2736 * Arithmetic right shift will not reduce the number of 2737 * input sign repetitions. 2738 */ 2739 return fold_masks_s(ctx, op, s_mask); 2740 case INDEX_op_shr: 2741 /* 2742 * If the sign bit is known zero, then logical right shift 2743 * will not reduce the number of input sign repetitions. 2744 */ 2745 if (~z_mask & -s_mask) { 2746 return fold_masks_s(ctx, op, s_mask); 2747 } 2748 break; 2749 default: 2750 break; 2751 } 2752 2753 return finish_folding(ctx, op); 2754 } 2755 2756 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2757 { 2758 TCGOpcode neg_op; 2759 bool have_neg; 2760 2761 if (!arg_is_const_val(op->args[1], 0)) { 2762 return false; 2763 } 2764 2765 switch (ctx->type) { 2766 case TCG_TYPE_I32: 2767 case TCG_TYPE_I64: 2768 neg_op = INDEX_op_neg; 2769 have_neg = true; 2770 break; 2771 case TCG_TYPE_V64: 2772 case TCG_TYPE_V128: 2773 case TCG_TYPE_V256: 2774 neg_op = INDEX_op_neg_vec; 2775 have_neg = (TCG_TARGET_HAS_neg_vec && 2776 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2777 break; 2778 default: 2779 g_assert_not_reached(); 2780 } 2781 if (have_neg) { 2782 op->opc = neg_op; 2783 op->args[1] = op->args[2]; 2784 return fold_neg_no_const(ctx, op); 2785 } 2786 return false; 2787 } 2788 2789 /* We cannot as yet do_constant_folding with vectors. */ 2790 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2791 { 2792 if (fold_xx_to_i(ctx, op, 0) || 2793 fold_xi_to_x(ctx, op, 0) || 2794 fold_sub_to_neg(ctx, op)) { 2795 return true; 2796 } 2797 return finish_folding(ctx, op); 2798 } 2799 2800 static bool fold_sub(OptContext *ctx, TCGOp *op) 2801 { 2802 if (fold_const2(ctx, op) || 2803 fold_xx_to_i(ctx, op, 0) || 2804 fold_xi_to_x(ctx, op, 0) || 2805 fold_sub_to_neg(ctx, op)) { 2806 return true; 2807 } 2808 2809 /* Fold sub r,x,i to add r,x,-i */ 2810 if (arg_is_const(op->args[2])) { 2811 uint64_t val = arg_const_val(op->args[2]); 2812 2813 op->opc = INDEX_op_add; 2814 op->args[2] = arg_new_constant(ctx, -val); 2815 } 2816 return finish_folding(ctx, op); 2817 } 2818 2819 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) 2820 { 2821 TempOptInfo *t2; 2822 2823 op = QTAILQ_PREV(op, link); 2824 switch (op->opc) { 2825 case INDEX_op_subbo: 2826 op->opc = INDEX_op_sub; 2827 fold_sub(ctx, op); 2828 break; 2829 case INDEX_op_subbio: 2830 op->opc = INDEX_op_subbi; 2831 break; 2832 case INDEX_op_subb1o: 2833 t2 = arg_info(op->args[2]); 2834 if (ti_is_const(t2)) { 2835 op->opc = INDEX_op_add; 2836 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2837 /* Perform other constant folding, if needed. */ 2838 fold_add(ctx, op); 2839 } else { 2840 TCGArg ret = op->args[0]; 2841 op->opc = INDEX_op_sub; 2842 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 2843 op->args[0] = ret; 2844 op->args[1] = ret; 2845 op->args[2] = arg_new_constant(ctx, -1); 2846 } 2847 break; 2848 default: 2849 g_assert_not_reached(); 2850 } 2851 } 2852 2853 static bool fold_subbi(OptContext *ctx, TCGOp *op) 2854 { 2855 TempOptInfo *t2; 2856 int borrow_in = ctx->carry_state; 2857 2858 if (borrow_in < 0) { 2859 return finish_folding(ctx, op); 2860 } 2861 ctx->carry_state = -1; 2862 2863 squash_prev_borrowout(ctx, op); 2864 if (borrow_in == 0) { 2865 op->opc = INDEX_op_sub; 2866 return fold_sub(ctx, op); 2867 } 2868 2869 /* 2870 * Propagate the known carry-in into any constant, then negate to 2871 * transform from sub to add. If there is no constant, emit a 2872 * separate add -1. 2873 */ 2874 t2 = arg_info(op->args[2]); 2875 if (ti_is_const(t2)) { 2876 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2877 } else { 2878 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); 2879 2880 op2->args[0] = op->args[0]; 2881 op2->args[1] = op->args[1]; 2882 op2->args[2] = op->args[2]; 2883 fold_sub(ctx, op2); 2884 2885 op->args[1] = op->args[0]; 2886 op->args[2] = arg_new_constant(ctx, -1); 2887 } 2888 op->opc = INDEX_op_add; 2889 return fold_add(ctx, op); 2890 } 2891 2892 static bool fold_subbio(OptContext *ctx, TCGOp *op) 2893 { 2894 TempOptInfo *t1, *t2; 2895 int borrow_out = -1; 2896 2897 if (ctx->carry_state < 0) { 2898 return finish_folding(ctx, op); 2899 } 2900 2901 squash_prev_borrowout(ctx, op); 2902 if (ctx->carry_state == 0) { 2903 goto do_subbo; 2904 } 2905 2906 t1 = arg_info(op->args[1]); 2907 t2 = arg_info(op->args[2]); 2908 2909 /* Propagate the known borrow-in into a constant, if possible. */ 2910 if (ti_is_const(t2)) { 2911 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 2912 uint64_t v = ti_const_val(t2) & max; 2913 2914 if (v < max) { 2915 op->args[2] = arg_new_constant(ctx, v + 1); 2916 goto do_subbo; 2917 } 2918 /* subtracting max + 1 produces known borrow out. */ 2919 borrow_out = 1; 2920 } 2921 if (ti_is_const(t1)) { 2922 uint64_t v = ti_const_val(t1); 2923 if (v != 0) { 2924 op->args[2] = arg_new_constant(ctx, v - 1); 2925 goto do_subbo; 2926 } 2927 } 2928 2929 /* Adjust the opcode to remember the known carry-in. */ 2930 op->opc = INDEX_op_subb1o; 2931 ctx->carry_state = borrow_out; 2932 return finish_folding(ctx, op); 2933 2934 do_subbo: 2935 op->opc = INDEX_op_subbo; 2936 return fold_subbo(ctx, op); 2937 } 2938 2939 static bool fold_subbo(OptContext *ctx, TCGOp *op) 2940 { 2941 TempOptInfo *t1 = arg_info(op->args[1]); 2942 TempOptInfo *t2 = arg_info(op->args[2]); 2943 int borrow_out = -1; 2944 2945 if (ti_is_const(t2)) { 2946 uint64_t v2 = ti_const_val(t2); 2947 if (v2 == 0) { 2948 borrow_out = 0; 2949 } else if (ti_is_const(t1)) { 2950 uint64_t v1 = ti_const_val(t1); 2951 borrow_out = v1 < v2; 2952 } 2953 } 2954 ctx->carry_state = borrow_out; 2955 return finish_folding(ctx, op); 2956 } 2957 2958 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2959 { 2960 uint64_t z_mask = -1, s_mask = 0; 2961 2962 /* We can't do any folding with a load, but we can record bits. */ 2963 switch (op->opc) { 2964 case INDEX_op_ld8s: 2965 s_mask = INT8_MIN; 2966 break; 2967 case INDEX_op_ld8u: 2968 z_mask = MAKE_64BIT_MASK(0, 8); 2969 break; 2970 case INDEX_op_ld16s: 2971 s_mask = INT16_MIN; 2972 break; 2973 case INDEX_op_ld16u: 2974 z_mask = MAKE_64BIT_MASK(0, 16); 2975 break; 2976 case INDEX_op_ld32s: 2977 s_mask = INT32_MIN; 2978 break; 2979 case INDEX_op_ld32u: 2980 z_mask = MAKE_64BIT_MASK(0, 32); 2981 break; 2982 default: 2983 g_assert_not_reached(); 2984 } 2985 return fold_masks_zs(ctx, op, z_mask, s_mask); 2986 } 2987 2988 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2989 { 2990 TCGTemp *dst, *src; 2991 intptr_t ofs; 2992 TCGType type; 2993 2994 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2995 return finish_folding(ctx, op); 2996 } 2997 2998 type = ctx->type; 2999 ofs = op->args[2]; 3000 dst = arg_temp(op->args[0]); 3001 src = find_mem_copy_for(ctx, type, ofs); 3002 if (src && src->base_type == type) { 3003 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 3004 } 3005 3006 reset_ts(ctx, dst); 3007 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 3008 return true; 3009 } 3010 3011 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 3012 { 3013 intptr_t ofs = op->args[2]; 3014 intptr_t lm1; 3015 3016 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 3017 remove_mem_copy_all(ctx); 3018 return true; 3019 } 3020 3021 switch (op->opc) { 3022 case INDEX_op_st8: 3023 lm1 = 0; 3024 break; 3025 case INDEX_op_st16: 3026 lm1 = 1; 3027 break; 3028 case INDEX_op_st32: 3029 lm1 = 3; 3030 break; 3031 case INDEX_op_st: 3032 case INDEX_op_st_vec: 3033 lm1 = tcg_type_size(ctx->type) - 1; 3034 break; 3035 default: 3036 g_assert_not_reached(); 3037 } 3038 remove_mem_copy_in(ctx, ofs, ofs + lm1); 3039 return true; 3040 } 3041 3042 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 3043 { 3044 TCGTemp *src; 3045 intptr_t ofs, last; 3046 TCGType type; 3047 3048 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 3049 return fold_tcg_st(ctx, op); 3050 } 3051 3052 src = arg_temp(op->args[0]); 3053 ofs = op->args[2]; 3054 type = ctx->type; 3055 3056 /* 3057 * Eliminate duplicate stores of a constant. 3058 * This happens frequently when the target ISA zero-extends. 3059 */ 3060 if (ts_is_const(src)) { 3061 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 3062 if (src == prev) { 3063 tcg_op_remove(ctx->tcg, op); 3064 return true; 3065 } 3066 } 3067 3068 last = ofs + tcg_type_size(type) - 1; 3069 remove_mem_copy_in(ctx, ofs, last); 3070 record_mem_copy(ctx, type, src, ofs, last); 3071 return true; 3072 } 3073 3074 static bool fold_xor(OptContext *ctx, TCGOp *op) 3075 { 3076 uint64_t z_mask, o_mask, s_mask; 3077 TempOptInfo *t1, *t2; 3078 3079 if (fold_const2_commutative(ctx, op) || 3080 fold_xx_to_i(ctx, op, 0) || 3081 fold_xi_to_x(ctx, op, 0) || 3082 fold_xi_to_not(ctx, op, -1)) { 3083 return true; 3084 } 3085 3086 t1 = arg_info(op->args[1]); 3087 t2 = arg_info(op->args[2]); 3088 3089 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); 3090 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); 3091 s_mask = t1->s_mask & t2->s_mask; 3092 3093 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 3094 } 3095 3096 /* Propagate constants and copies, fold constant expressions. */ 3097 void tcg_optimize(TCGContext *s) 3098 { 3099 int nb_temps, i; 3100 TCGOp *op, *op_next; 3101 OptContext ctx = { .tcg = s }; 3102 3103 QSIMPLEQ_INIT(&ctx.mem_free); 3104 3105 /* Array VALS has an element for each temp. 3106 If this temp holds a constant then its value is kept in VALS' element. 3107 If this temp is a copy of other ones then the other copies are 3108 available through the doubly linked circular list. */ 3109 3110 nb_temps = s->nb_temps; 3111 for (i = 0; i < nb_temps; ++i) { 3112 s->temps[i].state_ptr = NULL; 3113 } 3114 3115 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3116 TCGOpcode opc = op->opc; 3117 const TCGOpDef *def; 3118 bool done = false; 3119 3120 /* Calls are special. */ 3121 if (opc == INDEX_op_call) { 3122 fold_call(&ctx, op); 3123 continue; 3124 } 3125 3126 def = &tcg_op_defs[opc]; 3127 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 3128 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 3129 3130 /* Pre-compute the type of the operation. */ 3131 ctx.type = TCGOP_TYPE(op); 3132 3133 /* 3134 * Process each opcode. 3135 * Sorted alphabetically by opcode as much as possible. 3136 */ 3137 switch (opc) { 3138 case INDEX_op_add: 3139 done = fold_add(&ctx, op); 3140 break; 3141 case INDEX_op_add_vec: 3142 done = fold_add_vec(&ctx, op); 3143 break; 3144 case INDEX_op_addci: 3145 done = fold_addci(&ctx, op); 3146 break; 3147 case INDEX_op_addcio: 3148 done = fold_addcio(&ctx, op); 3149 break; 3150 case INDEX_op_addco: 3151 done = fold_addco(&ctx, op); 3152 break; 3153 case INDEX_op_and: 3154 case INDEX_op_and_vec: 3155 done = fold_and(&ctx, op); 3156 break; 3157 case INDEX_op_andc: 3158 case INDEX_op_andc_vec: 3159 done = fold_andc(&ctx, op); 3160 break; 3161 case INDEX_op_brcond: 3162 done = fold_brcond(&ctx, op); 3163 break; 3164 case INDEX_op_brcond2_i32: 3165 done = fold_brcond2(&ctx, op); 3166 break; 3167 case INDEX_op_bswap16: 3168 case INDEX_op_bswap32: 3169 case INDEX_op_bswap64: 3170 done = fold_bswap(&ctx, op); 3171 break; 3172 case INDEX_op_clz: 3173 case INDEX_op_ctz: 3174 done = fold_count_zeros(&ctx, op); 3175 break; 3176 case INDEX_op_ctpop: 3177 done = fold_ctpop(&ctx, op); 3178 break; 3179 case INDEX_op_deposit: 3180 done = fold_deposit(&ctx, op); 3181 break; 3182 case INDEX_op_divs: 3183 case INDEX_op_divu: 3184 done = fold_divide(&ctx, op); 3185 break; 3186 case INDEX_op_dup_vec: 3187 done = fold_dup(&ctx, op); 3188 break; 3189 case INDEX_op_dup2_vec: 3190 done = fold_dup2(&ctx, op); 3191 break; 3192 case INDEX_op_eqv: 3193 case INDEX_op_eqv_vec: 3194 done = fold_eqv(&ctx, op); 3195 break; 3196 case INDEX_op_extract: 3197 done = fold_extract(&ctx, op); 3198 break; 3199 case INDEX_op_extract2: 3200 done = fold_extract2(&ctx, op); 3201 break; 3202 case INDEX_op_ext_i32_i64: 3203 done = fold_exts(&ctx, op); 3204 break; 3205 case INDEX_op_extu_i32_i64: 3206 case INDEX_op_extrl_i64_i32: 3207 case INDEX_op_extrh_i64_i32: 3208 done = fold_extu(&ctx, op); 3209 break; 3210 case INDEX_op_ld8s: 3211 case INDEX_op_ld8u: 3212 case INDEX_op_ld16s: 3213 case INDEX_op_ld16u: 3214 case INDEX_op_ld32s: 3215 case INDEX_op_ld32u: 3216 done = fold_tcg_ld(&ctx, op); 3217 break; 3218 case INDEX_op_ld: 3219 case INDEX_op_ld_vec: 3220 done = fold_tcg_ld_memcopy(&ctx, op); 3221 break; 3222 case INDEX_op_st8: 3223 case INDEX_op_st16: 3224 case INDEX_op_st32: 3225 done = fold_tcg_st(&ctx, op); 3226 break; 3227 case INDEX_op_st: 3228 case INDEX_op_st_vec: 3229 done = fold_tcg_st_memcopy(&ctx, op); 3230 break; 3231 case INDEX_op_mb: 3232 done = fold_mb(&ctx, op); 3233 break; 3234 case INDEX_op_mov: 3235 case INDEX_op_mov_vec: 3236 done = fold_mov(&ctx, op); 3237 break; 3238 case INDEX_op_movcond: 3239 done = fold_movcond(&ctx, op); 3240 break; 3241 case INDEX_op_mul: 3242 done = fold_mul(&ctx, op); 3243 break; 3244 case INDEX_op_mulsh: 3245 case INDEX_op_muluh: 3246 done = fold_mul_highpart(&ctx, op); 3247 break; 3248 case INDEX_op_muls2: 3249 case INDEX_op_mulu2: 3250 done = fold_multiply2(&ctx, op); 3251 break; 3252 case INDEX_op_nand: 3253 case INDEX_op_nand_vec: 3254 done = fold_nand(&ctx, op); 3255 break; 3256 case INDEX_op_neg: 3257 done = fold_neg(&ctx, op); 3258 break; 3259 case INDEX_op_nor: 3260 case INDEX_op_nor_vec: 3261 done = fold_nor(&ctx, op); 3262 break; 3263 case INDEX_op_not: 3264 case INDEX_op_not_vec: 3265 done = fold_not(&ctx, op); 3266 break; 3267 case INDEX_op_or: 3268 case INDEX_op_or_vec: 3269 done = fold_or(&ctx, op); 3270 break; 3271 case INDEX_op_orc: 3272 case INDEX_op_orc_vec: 3273 done = fold_orc(&ctx, op); 3274 break; 3275 case INDEX_op_qemu_ld: 3276 done = fold_qemu_ld_1reg(&ctx, op); 3277 break; 3278 case INDEX_op_qemu_ld2: 3279 done = fold_qemu_ld_2reg(&ctx, op); 3280 break; 3281 case INDEX_op_qemu_st: 3282 case INDEX_op_qemu_st2: 3283 done = fold_qemu_st(&ctx, op); 3284 break; 3285 case INDEX_op_rems: 3286 case INDEX_op_remu: 3287 done = fold_remainder(&ctx, op); 3288 break; 3289 case INDEX_op_rotl: 3290 case INDEX_op_rotr: 3291 case INDEX_op_sar: 3292 case INDEX_op_shl: 3293 case INDEX_op_shr: 3294 done = fold_shift(&ctx, op); 3295 break; 3296 case INDEX_op_setcond: 3297 done = fold_setcond(&ctx, op); 3298 break; 3299 case INDEX_op_negsetcond: 3300 done = fold_negsetcond(&ctx, op); 3301 break; 3302 case INDEX_op_setcond2_i32: 3303 done = fold_setcond2(&ctx, op); 3304 break; 3305 case INDEX_op_cmp_vec: 3306 done = fold_cmp_vec(&ctx, op); 3307 break; 3308 case INDEX_op_cmpsel_vec: 3309 done = fold_cmpsel_vec(&ctx, op); 3310 break; 3311 case INDEX_op_bitsel_vec: 3312 done = fold_bitsel_vec(&ctx, op); 3313 break; 3314 case INDEX_op_sextract: 3315 done = fold_sextract(&ctx, op); 3316 break; 3317 case INDEX_op_sub: 3318 done = fold_sub(&ctx, op); 3319 break; 3320 case INDEX_op_subbi: 3321 done = fold_subbi(&ctx, op); 3322 break; 3323 case INDEX_op_subbio: 3324 done = fold_subbio(&ctx, op); 3325 break; 3326 case INDEX_op_subbo: 3327 done = fold_subbo(&ctx, op); 3328 break; 3329 case INDEX_op_sub_vec: 3330 done = fold_sub_vec(&ctx, op); 3331 break; 3332 case INDEX_op_xor: 3333 case INDEX_op_xor_vec: 3334 done = fold_xor(&ctx, op); 3335 break; 3336 case INDEX_op_set_label: 3337 case INDEX_op_br: 3338 case INDEX_op_exit_tb: 3339 case INDEX_op_goto_tb: 3340 case INDEX_op_goto_ptr: 3341 finish_ebb(&ctx); 3342 done = true; 3343 break; 3344 default: 3345 done = finish_folding(&ctx, op); 3346 break; 3347 } 3348 tcg_debug_assert(done); 3349 } 3350 } 3351