1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 #include "tcg-has.h" 32 33 34 typedef struct MemCopyInfo { 35 IntervalTreeNode itree; 36 QSIMPLEQ_ENTRY (MemCopyInfo) next; 37 TCGTemp *ts; 38 TCGType type; 39 } MemCopyInfo; 40 41 typedef struct TempOptInfo { 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 46 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ 47 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ 48 } TempOptInfo; 49 50 typedef struct OptContext { 51 TCGContext *tcg; 52 TCGOp *prev_mb; 53 TCGTempSet temps_used; 54 55 IntervalTreeRoot mem_copy; 56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 57 58 /* In flight values from optimization. */ 59 TCGType type; 60 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ 61 } OptContext; 62 63 static inline TempOptInfo *ts_info(TCGTemp *ts) 64 { 65 return ts->state_ptr; 66 } 67 68 static inline TempOptInfo *arg_info(TCGArg arg) 69 { 70 return ts_info(arg_temp(arg)); 71 } 72 73 static inline bool ti_is_const(TempOptInfo *ti) 74 { 75 /* If all bits that are not known zeros are known ones, it's constant. */ 76 return ti->z_mask == ti->o_mask; 77 } 78 79 static inline uint64_t ti_const_val(TempOptInfo *ti) 80 { 81 /* If constant, both z_mask and o_mask contain the value. */ 82 return ti->z_mask; 83 } 84 85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) 86 { 87 return ti_is_const(ti) && ti_const_val(ti) == val; 88 } 89 90 static inline bool ts_is_const(TCGTemp *ts) 91 { 92 return ti_is_const(ts_info(ts)); 93 } 94 95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) 96 { 97 return ti_is_const_val(ts_info(ts), val); 98 } 99 100 static inline bool arg_is_const(TCGArg arg) 101 { 102 return ts_is_const(arg_temp(arg)); 103 } 104 105 static inline uint64_t arg_const_val(TCGArg arg) 106 { 107 return ti_const_val(arg_info(arg)); 108 } 109 110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val) 111 { 112 return ts_is_const_val(arg_temp(arg), val); 113 } 114 115 static inline bool ts_is_copy(TCGTemp *ts) 116 { 117 return ts_info(ts)->next_copy != ts; 118 } 119 120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 121 { 122 return a->kind < b->kind ? b : a; 123 } 124 125 /* Initialize and activate a temporary. */ 126 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 127 { 128 size_t idx = temp_idx(ts); 129 TempOptInfo *ti; 130 131 if (test_bit(idx, ctx->temps_used.l)) { 132 return; 133 } 134 set_bit(idx, ctx->temps_used.l); 135 136 ti = ts->state_ptr; 137 if (ti == NULL) { 138 ti = tcg_malloc(sizeof(TempOptInfo)); 139 ts->state_ptr = ti; 140 } 141 142 ti->next_copy = ts; 143 ti->prev_copy = ts; 144 QSIMPLEQ_INIT(&ti->mem_copy); 145 if (ts->kind == TEMP_CONST) { 146 ti->z_mask = ts->val; 147 ti->o_mask = ts->val; 148 ti->s_mask = INT64_MIN >> clrsb64(ts->val); 149 } else { 150 ti->z_mask = -1; 151 ti->o_mask = 0; 152 ti->s_mask = 0; 153 } 154 } 155 156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 157 { 158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 159 return r ? container_of(r, MemCopyInfo, itree) : NULL; 160 } 161 162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 163 { 164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 165 return r ? container_of(r, MemCopyInfo, itree) : NULL; 166 } 167 168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 169 { 170 TCGTemp *ts = mc->ts; 171 TempOptInfo *ti = ts_info(ts); 172 173 interval_tree_remove(&mc->itree, &ctx->mem_copy); 174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 176 } 177 178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 179 { 180 while (true) { 181 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 182 if (!mc) { 183 break; 184 } 185 remove_mem_copy(ctx, mc); 186 } 187 } 188 189 static void remove_mem_copy_all(OptContext *ctx) 190 { 191 remove_mem_copy_in(ctx, 0, -1); 192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 193 } 194 195 static TCGTemp *find_better_copy(TCGTemp *ts) 196 { 197 TCGTemp *i, *ret; 198 199 /* If this is already readonly, we can't do better. */ 200 if (temp_readonly(ts)) { 201 return ts; 202 } 203 204 ret = ts; 205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 206 ret = cmp_better_copy(ret, i); 207 } 208 return ret; 209 } 210 211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 212 { 213 TempOptInfo *si = ts_info(src_ts); 214 TempOptInfo *di = ts_info(dst_ts); 215 MemCopyInfo *mc; 216 217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 218 tcg_debug_assert(mc->ts == src_ts); 219 mc->ts = dst_ts; 220 } 221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 222 } 223 224 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 225 static void reset_ts(OptContext *ctx, TCGTemp *ts) 226 { 227 TempOptInfo *ti = ts_info(ts); 228 TCGTemp *pts = ti->prev_copy; 229 TCGTemp *nts = ti->next_copy; 230 TempOptInfo *pi = ts_info(pts); 231 TempOptInfo *ni = ts_info(nts); 232 233 ni->prev_copy = ti->prev_copy; 234 pi->next_copy = ti->next_copy; 235 ti->next_copy = ts; 236 ti->prev_copy = ts; 237 ti->z_mask = -1; 238 ti->o_mask = 0; 239 ti->s_mask = 0; 240 241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 242 if (ts == nts) { 243 /* Last temp copy being removed, the mem copies die. */ 244 MemCopyInfo *mc; 245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 246 interval_tree_remove(&mc->itree, &ctx->mem_copy); 247 } 248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 249 } else { 250 move_mem_copies(find_better_copy(nts), ts); 251 } 252 } 253 } 254 255 static void reset_temp(OptContext *ctx, TCGArg arg) 256 { 257 reset_ts(ctx, arg_temp(arg)); 258 } 259 260 static void record_mem_copy(OptContext *ctx, TCGType type, 261 TCGTemp *ts, intptr_t start, intptr_t last) 262 { 263 MemCopyInfo *mc; 264 TempOptInfo *ti; 265 266 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 267 if (mc) { 268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 269 } else { 270 mc = tcg_malloc(sizeof(*mc)); 271 } 272 273 memset(mc, 0, sizeof(*mc)); 274 mc->itree.start = start; 275 mc->itree.last = last; 276 mc->type = type; 277 interval_tree_insert(&mc->itree, &ctx->mem_copy); 278 279 ts = find_better_copy(ts); 280 ti = ts_info(ts); 281 mc->ts = ts; 282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 283 } 284 285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 286 { 287 TCGTemp *i; 288 289 if (ts1 == ts2) { 290 return true; 291 } 292 293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 294 return false; 295 } 296 297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 298 if (i == ts2) { 299 return true; 300 } 301 } 302 303 return false; 304 } 305 306 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 307 { 308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 309 } 310 311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 312 { 313 MemCopyInfo *mc; 314 315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 316 if (mc->itree.start == s && mc->type == type) { 317 return find_better_copy(mc->ts); 318 } 319 } 320 return NULL; 321 } 322 323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 324 { 325 TCGType type = ctx->type; 326 TCGTemp *ts; 327 328 if (type == TCG_TYPE_I32) { 329 val = (int32_t)val; 330 } 331 332 ts = tcg_constant_internal(type, val); 333 init_ts_info(ctx, ts); 334 335 return temp_arg(ts); 336 } 337 338 static TCGArg arg_new_temp(OptContext *ctx) 339 { 340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); 341 init_ts_info(ctx, ts); 342 return temp_arg(ts); 343 } 344 345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, 346 TCGOpcode opc, unsigned narg) 347 { 348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); 349 } 350 351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, 352 TCGOpcode opc, unsigned narg) 353 { 354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); 355 } 356 357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 358 { 359 TCGTemp *dst_ts = arg_temp(dst); 360 TCGTemp *src_ts = arg_temp(src); 361 TempOptInfo *di; 362 TempOptInfo *si; 363 TCGOpcode new_op; 364 365 if (ts_are_copies(dst_ts, src_ts)) { 366 tcg_op_remove(ctx->tcg, op); 367 return true; 368 } 369 370 reset_ts(ctx, dst_ts); 371 di = ts_info(dst_ts); 372 si = ts_info(src_ts); 373 374 switch (ctx->type) { 375 case TCG_TYPE_I32: 376 case TCG_TYPE_I64: 377 new_op = INDEX_op_mov; 378 break; 379 case TCG_TYPE_V64: 380 case TCG_TYPE_V128: 381 case TCG_TYPE_V256: 382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ 383 new_op = INDEX_op_mov_vec; 384 break; 385 default: 386 g_assert_not_reached(); 387 } 388 op->opc = new_op; 389 op->args[0] = dst; 390 op->args[1] = src; 391 392 di->z_mask = si->z_mask; 393 di->o_mask = si->o_mask; 394 di->s_mask = si->s_mask; 395 396 if (src_ts->type == dst_ts->type) { 397 TempOptInfo *ni = ts_info(si->next_copy); 398 399 di->next_copy = si->next_copy; 400 di->prev_copy = src_ts; 401 ni->prev_copy = dst_ts; 402 si->next_copy = dst_ts; 403 404 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 406 move_mem_copies(dst_ts, src_ts); 407 } 408 } else if (dst_ts->type == TCG_TYPE_I32) { 409 di->z_mask = (int32_t)di->z_mask; 410 di->o_mask = (int32_t)di->o_mask; 411 di->s_mask |= INT32_MIN; 412 } else { 413 di->z_mask |= MAKE_64BIT_MASK(32, 32); 414 di->o_mask = (uint32_t)di->o_mask; 415 di->s_mask = INT64_MIN; 416 } 417 return true; 418 } 419 420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 421 TCGArg dst, uint64_t val) 422 { 423 /* Convert movi to mov with constant temp. */ 424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 425 } 426 427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, 428 uint64_t x, uint64_t y) 429 { 430 uint64_t l64, h64; 431 432 switch (op) { 433 case INDEX_op_add: 434 return x + y; 435 436 case INDEX_op_sub: 437 return x - y; 438 439 case INDEX_op_mul: 440 return x * y; 441 442 case INDEX_op_and: 443 case INDEX_op_and_vec: 444 return x & y; 445 446 case INDEX_op_or: 447 case INDEX_op_or_vec: 448 return x | y; 449 450 case INDEX_op_xor: 451 case INDEX_op_xor_vec: 452 return x ^ y; 453 454 case INDEX_op_shl: 455 if (type == TCG_TYPE_I32) { 456 return (uint32_t)x << (y & 31); 457 } 458 return (uint64_t)x << (y & 63); 459 460 case INDEX_op_shr: 461 if (type == TCG_TYPE_I32) { 462 return (uint32_t)x >> (y & 31); 463 } 464 return (uint64_t)x >> (y & 63); 465 466 case INDEX_op_sar: 467 if (type == TCG_TYPE_I32) { 468 return (int32_t)x >> (y & 31); 469 } 470 return (int64_t)x >> (y & 63); 471 472 case INDEX_op_rotr: 473 if (type == TCG_TYPE_I32) { 474 return ror32(x, y & 31); 475 } 476 return ror64(x, y & 63); 477 478 case INDEX_op_rotl: 479 if (type == TCG_TYPE_I32) { 480 return rol32(x, y & 31); 481 } 482 return rol64(x, y & 63); 483 484 case INDEX_op_not: 485 case INDEX_op_not_vec: 486 return ~x; 487 488 case INDEX_op_neg: 489 return -x; 490 491 case INDEX_op_andc: 492 case INDEX_op_andc_vec: 493 return x & ~y; 494 495 case INDEX_op_orc: 496 case INDEX_op_orc_vec: 497 return x | ~y; 498 499 case INDEX_op_eqv: 500 case INDEX_op_eqv_vec: 501 return ~(x ^ y); 502 503 case INDEX_op_nand: 504 case INDEX_op_nand_vec: 505 return ~(x & y); 506 507 case INDEX_op_nor: 508 case INDEX_op_nor_vec: 509 return ~(x | y); 510 511 case INDEX_op_clz: 512 if (type == TCG_TYPE_I32) { 513 return (uint32_t)x ? clz32(x) : y; 514 } 515 return x ? clz64(x) : y; 516 517 case INDEX_op_ctz: 518 if (type == TCG_TYPE_I32) { 519 return (uint32_t)x ? ctz32(x) : y; 520 } 521 return x ? ctz64(x) : y; 522 523 case INDEX_op_ctpop: 524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); 525 526 case INDEX_op_bswap16: 527 x = bswap16(x); 528 return y & TCG_BSWAP_OS ? (int16_t)x : x; 529 530 case INDEX_op_bswap32: 531 x = bswap32(x); 532 return y & TCG_BSWAP_OS ? (int32_t)x : x; 533 534 case INDEX_op_bswap64: 535 return bswap64(x); 536 537 case INDEX_op_ext_i32_i64: 538 return (int32_t)x; 539 540 case INDEX_op_extu_i32_i64: 541 case INDEX_op_extrl_i64_i32: 542 return (uint32_t)x; 543 544 case INDEX_op_extrh_i64_i32: 545 return (uint64_t)x >> 32; 546 547 case INDEX_op_muluh: 548 if (type == TCG_TYPE_I32) { 549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 550 } 551 mulu64(&l64, &h64, x, y); 552 return h64; 553 554 case INDEX_op_mulsh: 555 if (type == TCG_TYPE_I32) { 556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 557 } 558 muls64(&l64, &h64, x, y); 559 return h64; 560 561 case INDEX_op_divs: 562 /* Avoid crashing on divide by zero, otherwise undefined. */ 563 if (type == TCG_TYPE_I32) { 564 return (int32_t)x / ((int32_t)y ? : 1); 565 } 566 return (int64_t)x / ((int64_t)y ? : 1); 567 568 case INDEX_op_divu: 569 if (type == TCG_TYPE_I32) { 570 return (uint32_t)x / ((uint32_t)y ? : 1); 571 } 572 return (uint64_t)x / ((uint64_t)y ? : 1); 573 574 case INDEX_op_rems: 575 if (type == TCG_TYPE_I32) { 576 return (int32_t)x % ((int32_t)y ? : 1); 577 } 578 return (int64_t)x % ((int64_t)y ? : 1); 579 580 case INDEX_op_remu: 581 if (type == TCG_TYPE_I32) { 582 return (uint32_t)x % ((uint32_t)y ? : 1); 583 } 584 return (uint64_t)x % ((uint64_t)y ? : 1); 585 586 default: 587 g_assert_not_reached(); 588 } 589 } 590 591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 592 uint64_t x, uint64_t y) 593 { 594 uint64_t res = do_constant_folding_2(op, type, x, y); 595 if (type == TCG_TYPE_I32) { 596 res = (int32_t)res; 597 } 598 return res; 599 } 600 601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 602 { 603 switch (c) { 604 case TCG_COND_EQ: 605 return x == y; 606 case TCG_COND_NE: 607 return x != y; 608 case TCG_COND_LT: 609 return (int32_t)x < (int32_t)y; 610 case TCG_COND_GE: 611 return (int32_t)x >= (int32_t)y; 612 case TCG_COND_LE: 613 return (int32_t)x <= (int32_t)y; 614 case TCG_COND_GT: 615 return (int32_t)x > (int32_t)y; 616 case TCG_COND_LTU: 617 return x < y; 618 case TCG_COND_GEU: 619 return x >= y; 620 case TCG_COND_LEU: 621 return x <= y; 622 case TCG_COND_GTU: 623 return x > y; 624 case TCG_COND_TSTEQ: 625 return (x & y) == 0; 626 case TCG_COND_TSTNE: 627 return (x & y) != 0; 628 case TCG_COND_ALWAYS: 629 case TCG_COND_NEVER: 630 break; 631 } 632 g_assert_not_reached(); 633 } 634 635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 636 { 637 switch (c) { 638 case TCG_COND_EQ: 639 return x == y; 640 case TCG_COND_NE: 641 return x != y; 642 case TCG_COND_LT: 643 return (int64_t)x < (int64_t)y; 644 case TCG_COND_GE: 645 return (int64_t)x >= (int64_t)y; 646 case TCG_COND_LE: 647 return (int64_t)x <= (int64_t)y; 648 case TCG_COND_GT: 649 return (int64_t)x > (int64_t)y; 650 case TCG_COND_LTU: 651 return x < y; 652 case TCG_COND_GEU: 653 return x >= y; 654 case TCG_COND_LEU: 655 return x <= y; 656 case TCG_COND_GTU: 657 return x > y; 658 case TCG_COND_TSTEQ: 659 return (x & y) == 0; 660 case TCG_COND_TSTNE: 661 return (x & y) != 0; 662 case TCG_COND_ALWAYS: 663 case TCG_COND_NEVER: 664 break; 665 } 666 g_assert_not_reached(); 667 } 668 669 static int do_constant_folding_cond_eq(TCGCond c) 670 { 671 switch (c) { 672 case TCG_COND_GT: 673 case TCG_COND_LTU: 674 case TCG_COND_LT: 675 case TCG_COND_GTU: 676 case TCG_COND_NE: 677 return 0; 678 case TCG_COND_GE: 679 case TCG_COND_GEU: 680 case TCG_COND_LE: 681 case TCG_COND_LEU: 682 case TCG_COND_EQ: 683 return 1; 684 case TCG_COND_TSTEQ: 685 case TCG_COND_TSTNE: 686 return -1; 687 case TCG_COND_ALWAYS: 688 case TCG_COND_NEVER: 689 break; 690 } 691 g_assert_not_reached(); 692 } 693 694 /* 695 * Return -1 if the condition can't be simplified, 696 * and the result of the condition (0 or 1) if it can. 697 */ 698 static int do_constant_folding_cond(TCGType type, TCGArg x, 699 TCGArg y, TCGCond c) 700 { 701 if (arg_is_const(x) && arg_is_const(y)) { 702 uint64_t xv = arg_const_val(x); 703 uint64_t yv = arg_const_val(y); 704 705 switch (type) { 706 case TCG_TYPE_I32: 707 return do_constant_folding_cond_32(xv, yv, c); 708 case TCG_TYPE_I64: 709 return do_constant_folding_cond_64(xv, yv, c); 710 default: 711 /* Only scalar comparisons are optimizable */ 712 return -1; 713 } 714 } else if (args_are_copies(x, y)) { 715 return do_constant_folding_cond_eq(c); 716 } else if (arg_is_const_val(y, 0)) { 717 switch (c) { 718 case TCG_COND_LTU: 719 case TCG_COND_TSTNE: 720 return 0; 721 case TCG_COND_GEU: 722 case TCG_COND_TSTEQ: 723 return 1; 724 default: 725 return -1; 726 } 727 } 728 return -1; 729 } 730 731 /** 732 * swap_commutative: 733 * @dest: TCGArg of the destination argument, or NO_DEST. 734 * @p1: first paired argument 735 * @p2: second paired argument 736 * 737 * If *@p1 is a constant and *@p2 is not, swap. 738 * If *@p2 matches @dest, swap. 739 * Return true if a swap was performed. 740 */ 741 742 #define NO_DEST temp_arg(NULL) 743 744 static int pref_commutative(TempOptInfo *ti) 745 { 746 /* Slight preference for non-zero constants second. */ 747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; 748 } 749 750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 751 { 752 TCGArg a1 = *p1, a2 = *p2; 753 int sum = 0; 754 sum += pref_commutative(arg_info(a1)); 755 sum -= pref_commutative(arg_info(a2)); 756 757 /* Prefer the constant in second argument, and then the form 758 op a, a, b, which is better handled on non-RISC hosts. */ 759 if (sum > 0 || (sum == 0 && dest == a2)) { 760 *p1 = a2; 761 *p2 = a1; 762 return true; 763 } 764 return false; 765 } 766 767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 768 { 769 int sum = 0; 770 sum += pref_commutative(arg_info(p1[0])); 771 sum += pref_commutative(arg_info(p1[1])); 772 sum -= pref_commutative(arg_info(p2[0])); 773 sum -= pref_commutative(arg_info(p2[1])); 774 if (sum > 0) { 775 TCGArg t; 776 t = p1[0], p1[0] = p2[0], p2[0] = t; 777 t = p1[1], p1[1] = p2[1], p2[1] = t; 778 return true; 779 } 780 return false; 781 } 782 783 /* 784 * Return -1 if the condition can't be simplified, 785 * and the result of the condition (0 or 1) if it can. 786 */ 787 static bool fold_and(OptContext *ctx, TCGOp *op); 788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, 789 TCGArg *p1, TCGArg *p2, TCGArg *pcond) 790 { 791 TCGCond cond; 792 TempOptInfo *i1; 793 bool swap; 794 int r; 795 796 swap = swap_commutative(dest, p1, p2); 797 cond = *pcond; 798 if (swap) { 799 *pcond = cond = tcg_swap_cond(cond); 800 } 801 802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); 803 if (r >= 0) { 804 return r; 805 } 806 if (!is_tst_cond(cond)) { 807 return -1; 808 } 809 810 i1 = arg_info(*p1); 811 812 /* 813 * TSTNE x,x -> NE x,0 814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x 815 */ 816 if (args_are_copies(*p1, *p2) || 817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { 818 *p2 = arg_new_constant(ctx, 0); 819 *pcond = tcg_tst_eqne_cond(cond); 820 return -1; 821 } 822 823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ 824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { 825 *p2 = arg_new_constant(ctx, 0); 826 *pcond = tcg_tst_ltge_cond(cond); 827 return -1; 828 } 829 830 /* Expand to AND with a temporary if no backend support. */ 831 if (!TCG_TARGET_HAS_tst) { 832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 833 TCGArg tmp = arg_new_temp(ctx); 834 835 op2->args[0] = tmp; 836 op2->args[1] = *p1; 837 op2->args[2] = *p2; 838 fold_and(ctx, op2); 839 840 *p1 = tmp; 841 *p2 = arg_new_constant(ctx, 0); 842 *pcond = tcg_tst_eqne_cond(cond); 843 } 844 return -1; 845 } 846 847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) 848 { 849 TCGArg al, ah, bl, bh; 850 TCGCond c; 851 bool swap; 852 int r; 853 854 swap = swap_commutative2(args, args + 2); 855 c = args[4]; 856 if (swap) { 857 args[4] = c = tcg_swap_cond(c); 858 } 859 860 al = args[0]; 861 ah = args[1]; 862 bl = args[2]; 863 bh = args[3]; 864 865 if (arg_is_const(bl) && arg_is_const(bh)) { 866 tcg_target_ulong blv = arg_const_val(bl); 867 tcg_target_ulong bhv = arg_const_val(bh); 868 uint64_t b = deposit64(blv, 32, 32, bhv); 869 870 if (arg_is_const(al) && arg_is_const(ah)) { 871 tcg_target_ulong alv = arg_const_val(al); 872 tcg_target_ulong ahv = arg_const_val(ah); 873 uint64_t a = deposit64(alv, 32, 32, ahv); 874 875 r = do_constant_folding_cond_64(a, b, c); 876 if (r >= 0) { 877 return r; 878 } 879 } 880 881 if (b == 0) { 882 switch (c) { 883 case TCG_COND_LTU: 884 case TCG_COND_TSTNE: 885 return 0; 886 case TCG_COND_GEU: 887 case TCG_COND_TSTEQ: 888 return 1; 889 default: 890 break; 891 } 892 } 893 894 /* TSTNE x,-1 -> NE x,0 */ 895 if (b == -1 && is_tst_cond(c)) { 896 args[3] = args[2] = arg_new_constant(ctx, 0); 897 args[4] = tcg_tst_eqne_cond(c); 898 return -1; 899 } 900 901 /* TSTNE x,sign -> LT x,0 */ 902 if (b == INT64_MIN && is_tst_cond(c)) { 903 /* bl must be 0, so copy that to bh */ 904 args[3] = bl; 905 args[4] = tcg_tst_ltge_cond(c); 906 return -1; 907 } 908 } 909 910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 911 r = do_constant_folding_cond_eq(c); 912 if (r >= 0) { 913 return r; 914 } 915 916 /* TSTNE x,x -> NE x,0 */ 917 if (is_tst_cond(c)) { 918 args[3] = args[2] = arg_new_constant(ctx, 0); 919 args[4] = tcg_tst_eqne_cond(c); 920 return -1; 921 } 922 } 923 924 /* Expand to AND with a temporary if no backend support. */ 925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { 926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); 927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 928 TCGArg t1 = arg_new_temp(ctx); 929 TCGArg t2 = arg_new_temp(ctx); 930 931 op1->args[0] = t1; 932 op1->args[1] = al; 933 op1->args[2] = bl; 934 fold_and(ctx, op1); 935 936 op2->args[0] = t2; 937 op2->args[1] = ah; 938 op2->args[2] = bh; 939 fold_and(ctx, op1); 940 941 args[0] = t1; 942 args[1] = t2; 943 args[3] = args[2] = arg_new_constant(ctx, 0); 944 args[4] = tcg_tst_eqne_cond(c); 945 } 946 return -1; 947 } 948 949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 950 { 951 for (int i = 0; i < nb_args; i++) { 952 TCGTemp *ts = arg_temp(op->args[i]); 953 init_ts_info(ctx, ts); 954 } 955 } 956 957 static void copy_propagate(OptContext *ctx, TCGOp *op, 958 int nb_oargs, int nb_iargs) 959 { 960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 961 TCGTemp *ts = arg_temp(op->args[i]); 962 if (ts_is_copy(ts)) { 963 op->args[i] = temp_arg(find_better_copy(ts)); 964 } 965 } 966 } 967 968 static void finish_bb(OptContext *ctx) 969 { 970 /* We only optimize memory barriers across basic blocks. */ 971 ctx->prev_mb = NULL; 972 } 973 974 static void finish_ebb(OptContext *ctx) 975 { 976 finish_bb(ctx); 977 /* We only optimize across extended basic blocks. */ 978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 979 remove_mem_copy_all(ctx); 980 } 981 982 static bool finish_folding(OptContext *ctx, TCGOp *op) 983 { 984 const TCGOpDef *def = &tcg_op_defs[op->opc]; 985 int i, nb_oargs; 986 987 nb_oargs = def->nb_oargs; 988 for (i = 0; i < nb_oargs; i++) { 989 TCGTemp *ts = arg_temp(op->args[i]); 990 reset_ts(ctx, ts); 991 } 992 return true; 993 } 994 995 /* 996 * The fold_* functions return true when processing is complete, 997 * usually by folding the operation to a constant or to a copy, 998 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 999 * like collect information about the value produced, for use in 1000 * optimizing a subsequent operation. 1001 * 1002 * These first fold_* functions are all helpers, used by other 1003 * folders for more specific operations. 1004 */ 1005 1006 static bool fold_const1(OptContext *ctx, TCGOp *op) 1007 { 1008 if (arg_is_const(op->args[1])) { 1009 uint64_t t = arg_const_val(op->args[1]); 1010 1011 t = do_constant_folding(op->opc, ctx->type, t, 0); 1012 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1013 } 1014 return false; 1015 } 1016 1017 static bool fold_const2(OptContext *ctx, TCGOp *op) 1018 { 1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1020 uint64_t t1 = arg_const_val(op->args[1]); 1021 uint64_t t2 = arg_const_val(op->args[2]); 1022 1023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 1024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1025 } 1026 return false; 1027 } 1028 1029 static bool fold_commutative(OptContext *ctx, TCGOp *op) 1030 { 1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1032 return false; 1033 } 1034 1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 1036 { 1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1038 return fold_const2(ctx, op); 1039 } 1040 1041 /* 1042 * Record "zero" and "sign" masks for the single output of @op. 1043 * See TempOptInfo definition of z_mask and s_mask. 1044 * If z_mask allows, fold the output to constant zero. 1045 * The passed s_mask may be augmented by z_mask. 1046 */ 1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, 1048 uint64_t z_mask, uint64_t o_mask, 1049 int64_t s_mask, uint64_t a_mask) 1050 { 1051 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1052 TCGTemp *ts; 1053 TempOptInfo *ti; 1054 int rep; 1055 1056 /* Only single-output opcodes are supported here. */ 1057 tcg_debug_assert(def->nb_oargs == 1); 1058 1059 /* 1060 * 32-bit ops generate 32-bit results, which for the purpose of 1061 * simplifying tcg are sign-extended. Certainly that's how we 1062 * represent our constants elsewhere. Note that the bits will 1063 * be reset properly for a 64-bit value when encountering the 1064 * type changing opcodes. 1065 */ 1066 if (ctx->type == TCG_TYPE_I32) { 1067 z_mask = (int32_t)z_mask; 1068 o_mask = (int32_t)o_mask; 1069 s_mask |= INT32_MIN; 1070 a_mask = (uint32_t)a_mask; 1071 } 1072 1073 /* Bits that are known 1 and bits that are known 0 must not overlap. */ 1074 tcg_debug_assert((o_mask & ~z_mask) == 0); 1075 1076 /* All bits that are not known zero are known one is a constant. */ 1077 if (z_mask == o_mask) { 1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); 1079 } 1080 1081 /* If no bits are affected, the operation devolves to a copy. */ 1082 if (a_mask == 0) { 1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1084 } 1085 1086 ts = arg_temp(op->args[0]); 1087 reset_ts(ctx, ts); 1088 1089 ti = ts_info(ts); 1090 ti->z_mask = z_mask; 1091 1092 /* Canonicalize s_mask and incorporate data from z_mask. */ 1093 rep = clz64(~s_mask); 1094 rep = MAX(rep, clz64(z_mask)); 1095 rep = MAX(rep, clz64(~o_mask)); 1096 rep = MAX(rep - 1, 0); 1097 ti->s_mask = INT64_MIN >> rep; 1098 1099 return false; 1100 } 1101 1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, 1103 uint64_t o_mask, int64_t s_mask, uint64_t a_mask) 1104 { 1105 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); 1106 return true; 1107 } 1108 1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op, 1110 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) 1111 { 1112 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); 1113 } 1114 1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op, 1116 uint64_t z_mask, uint64_t o_mask) 1117 { 1118 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); 1119 } 1120 1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op, 1122 uint64_t z_mask, uint64_t s_mask) 1123 { 1124 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); 1125 } 1126 1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) 1128 { 1129 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); 1130 } 1131 1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) 1133 { 1134 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); 1135 } 1136 1137 /* 1138 * Convert @op to NOT, if NOT is supported by the host. 1139 * Return true f the conversion is successful, which will still 1140 * indicate that the processing is complete. 1141 */ 1142 static bool fold_not(OptContext *ctx, TCGOp *op); 1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 1144 { 1145 TCGOpcode not_op; 1146 bool have_not; 1147 1148 switch (ctx->type) { 1149 case TCG_TYPE_I32: 1150 case TCG_TYPE_I64: 1151 not_op = INDEX_op_not; 1152 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); 1153 break; 1154 case TCG_TYPE_V64: 1155 case TCG_TYPE_V128: 1156 case TCG_TYPE_V256: 1157 not_op = INDEX_op_not_vec; 1158 have_not = TCG_TARGET_HAS_not_vec; 1159 break; 1160 default: 1161 g_assert_not_reached(); 1162 } 1163 if (have_not) { 1164 op->opc = not_op; 1165 op->args[1] = op->args[idx]; 1166 return fold_not(ctx, op); 1167 } 1168 return false; 1169 } 1170 1171 /* If the binary operation has first argument @i, fold to @i. */ 1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1173 { 1174 if (arg_is_const_val(op->args[1], i)) { 1175 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1176 } 1177 return false; 1178 } 1179 1180 /* If the binary operation has first argument @i, fold to NOT. */ 1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1182 { 1183 if (arg_is_const_val(op->args[1], i)) { 1184 return fold_to_not(ctx, op, 2); 1185 } 1186 return false; 1187 } 1188 1189 /* If the binary operation has second argument @i, fold to @i. */ 1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1191 { 1192 if (arg_is_const_val(op->args[2], i)) { 1193 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1194 } 1195 return false; 1196 } 1197 1198 /* If the binary operation has second argument @i, fold to identity. */ 1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 1200 { 1201 if (arg_is_const_val(op->args[2], i)) { 1202 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1203 } 1204 return false; 1205 } 1206 1207 /* If the binary operation has second argument @i, fold to NOT. */ 1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1209 { 1210 if (arg_is_const_val(op->args[2], i)) { 1211 return fold_to_not(ctx, op, 1); 1212 } 1213 return false; 1214 } 1215 1216 /* If the binary operation has both arguments equal, fold to @i. */ 1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1218 { 1219 if (args_are_copies(op->args[1], op->args[2])) { 1220 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1221 } 1222 return false; 1223 } 1224 1225 /* If the binary operation has both arguments equal, fold to identity. */ 1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1227 { 1228 if (args_are_copies(op->args[1], op->args[2])) { 1229 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1230 } 1231 return false; 1232 } 1233 1234 /* 1235 * These outermost fold_<op> functions are sorted alphabetically. 1236 * 1237 * The ordering of the transformations should be: 1238 * 1) those that produce a constant 1239 * 2) those that produce a copy 1240 * 3) those that produce information about the result value. 1241 */ 1242 1243 static bool fold_addco(OptContext *ctx, TCGOp *op); 1244 static bool fold_or(OptContext *ctx, TCGOp *op); 1245 static bool fold_orc(OptContext *ctx, TCGOp *op); 1246 static bool fold_subbo(OptContext *ctx, TCGOp *op); 1247 static bool fold_xor(OptContext *ctx, TCGOp *op); 1248 1249 static bool fold_add(OptContext *ctx, TCGOp *op) 1250 { 1251 if (fold_const2_commutative(ctx, op) || 1252 fold_xi_to_x(ctx, op, 0)) { 1253 return true; 1254 } 1255 return finish_folding(ctx, op); 1256 } 1257 1258 /* We cannot as yet do_constant_folding with vectors. */ 1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1260 { 1261 if (fold_commutative(ctx, op) || 1262 fold_xi_to_x(ctx, op, 0)) { 1263 return true; 1264 } 1265 return finish_folding(ctx, op); 1266 } 1267 1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op) 1269 { 1270 TempOptInfo *t2; 1271 1272 op = QTAILQ_PREV(op, link); 1273 switch (op->opc) { 1274 case INDEX_op_addco: 1275 op->opc = INDEX_op_add; 1276 fold_add(ctx, op); 1277 break; 1278 case INDEX_op_addcio: 1279 op->opc = INDEX_op_addci; 1280 break; 1281 case INDEX_op_addc1o: 1282 op->opc = INDEX_op_add; 1283 t2 = arg_info(op->args[2]); 1284 if (ti_is_const(t2)) { 1285 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1286 /* Perform other constant folding, if needed. */ 1287 fold_add(ctx, op); 1288 } else { 1289 TCGArg ret = op->args[0]; 1290 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 1291 op->args[0] = ret; 1292 op->args[1] = ret; 1293 op->args[2] = arg_new_constant(ctx, 1); 1294 } 1295 break; 1296 default: 1297 g_assert_not_reached(); 1298 } 1299 } 1300 1301 static bool fold_addci(OptContext *ctx, TCGOp *op) 1302 { 1303 fold_commutative(ctx, op); 1304 1305 if (ctx->carry_state < 0) { 1306 return finish_folding(ctx, op); 1307 } 1308 1309 squash_prev_carryout(ctx, op); 1310 op->opc = INDEX_op_add; 1311 1312 if (ctx->carry_state > 0) { 1313 TempOptInfo *t2 = arg_info(op->args[2]); 1314 1315 /* 1316 * Propagate the known carry-in into a constant, if possible. 1317 * Otherwise emit a second add +1. 1318 */ 1319 if (ti_is_const(t2)) { 1320 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1321 } else { 1322 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); 1323 1324 op2->args[0] = op->args[0]; 1325 op2->args[1] = op->args[1]; 1326 op2->args[2] = op->args[2]; 1327 fold_add(ctx, op2); 1328 1329 op->args[1] = op->args[0]; 1330 op->args[2] = arg_new_constant(ctx, 1); 1331 } 1332 } 1333 1334 ctx->carry_state = -1; 1335 return fold_add(ctx, op); 1336 } 1337 1338 static bool fold_addcio(OptContext *ctx, TCGOp *op) 1339 { 1340 TempOptInfo *t1, *t2; 1341 int carry_out = -1; 1342 uint64_t sum, max; 1343 1344 fold_commutative(ctx, op); 1345 t1 = arg_info(op->args[1]); 1346 t2 = arg_info(op->args[2]); 1347 1348 /* 1349 * The z_mask value is >= the maximum value that can be represented 1350 * with the known zero bits. So adding the z_mask values will not 1351 * overflow if and only if the true values cannot overflow. 1352 */ 1353 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && 1354 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { 1355 carry_out = 0; 1356 } 1357 1358 if (ctx->carry_state < 0) { 1359 ctx->carry_state = carry_out; 1360 return finish_folding(ctx, op); 1361 } 1362 1363 squash_prev_carryout(ctx, op); 1364 if (ctx->carry_state == 0) { 1365 goto do_addco; 1366 } 1367 1368 /* Propagate the known carry-in into a constant, if possible. */ 1369 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 1370 if (ti_is_const(t2)) { 1371 uint64_t v = ti_const_val(t2) & max; 1372 if (v < max) { 1373 op->args[2] = arg_new_constant(ctx, v + 1); 1374 goto do_addco; 1375 } 1376 /* max + known carry in produces known carry out. */ 1377 carry_out = 1; 1378 } 1379 if (ti_is_const(t1)) { 1380 uint64_t v = ti_const_val(t1) & max; 1381 if (v < max) { 1382 op->args[1] = arg_new_constant(ctx, v + 1); 1383 goto do_addco; 1384 } 1385 carry_out = 1; 1386 } 1387 1388 /* Adjust the opcode to remember the known carry-in. */ 1389 op->opc = INDEX_op_addc1o; 1390 ctx->carry_state = carry_out; 1391 return finish_folding(ctx, op); 1392 1393 do_addco: 1394 op->opc = INDEX_op_addco; 1395 return fold_addco(ctx, op); 1396 } 1397 1398 static bool fold_addco(OptContext *ctx, TCGOp *op) 1399 { 1400 TempOptInfo *t1, *t2; 1401 int carry_out = -1; 1402 uint64_t ign; 1403 1404 fold_commutative(ctx, op); 1405 t1 = arg_info(op->args[1]); 1406 t2 = arg_info(op->args[2]); 1407 1408 if (ti_is_const(t2)) { 1409 uint64_t v2 = ti_const_val(t2); 1410 1411 if (ti_is_const(t1)) { 1412 uint64_t v1 = ti_const_val(t1); 1413 /* Given sign-extension of z_mask for I32, we need not truncate. */ 1414 carry_out = uadd64_overflow(v1, v2, &ign); 1415 } else if (v2 == 0) { 1416 carry_out = 0; 1417 } 1418 } else { 1419 /* 1420 * The z_mask value is >= the maximum value that can be represented 1421 * with the known zero bits. So adding the z_mask values will not 1422 * overflow if and only if the true values cannot overflow. 1423 */ 1424 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { 1425 carry_out = 0; 1426 } 1427 } 1428 ctx->carry_state = carry_out; 1429 return finish_folding(ctx, op); 1430 } 1431 1432 static bool fold_and(OptContext *ctx, TCGOp *op) 1433 { 1434 uint64_t z_mask, o_mask, s_mask, a_mask; 1435 TempOptInfo *t1, *t2; 1436 1437 if (fold_const2_commutative(ctx, op)) { 1438 return true; 1439 } 1440 1441 t1 = arg_info(op->args[1]); 1442 t2 = arg_info(op->args[2]); 1443 1444 z_mask = t1->z_mask & t2->z_mask; 1445 o_mask = t1->o_mask & t2->o_mask; 1446 1447 /* 1448 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1449 * Bitwise operations preserve the relative quantity of the repetitions. 1450 */ 1451 s_mask = t1->s_mask & t2->s_mask; 1452 1453 /* Affected bits are those not known zero, masked by those known one. */ 1454 a_mask = t1->z_mask & ~t2->o_mask; 1455 1456 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { 1457 if (op->opc == INDEX_op_and && ti_is_const(t2)) { 1458 /* 1459 * Canonicalize on extract, if valid. This aids x86 with its 1460 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode 1461 * which does not require matching operands. Other backends can 1462 * trivially expand the extract to AND during code generation. 1463 */ 1464 uint64_t val = ti_const_val(t2); 1465 if (!(val & (val + 1))) { 1466 unsigned len = ctz64(~val); 1467 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { 1468 op->opc = INDEX_op_extract; 1469 op->args[2] = 0; 1470 op->args[3] = len; 1471 } 1472 } 1473 } else { 1474 fold_xx_to_x(ctx, op); 1475 } 1476 } 1477 return true; 1478 } 1479 1480 static bool fold_andc(OptContext *ctx, TCGOp *op) 1481 { 1482 uint64_t z_mask, o_mask, s_mask, a_mask; 1483 TempOptInfo *t1, *t2; 1484 1485 if (fold_const2(ctx, op)) { 1486 return true; 1487 } 1488 1489 t1 = arg_info(op->args[1]); 1490 t2 = arg_info(op->args[2]); 1491 1492 if (ti_is_const(t2)) { 1493 /* Fold andc r,x,i to and r,x,~i. */ 1494 switch (ctx->type) { 1495 case TCG_TYPE_I32: 1496 case TCG_TYPE_I64: 1497 op->opc = INDEX_op_and; 1498 break; 1499 case TCG_TYPE_V64: 1500 case TCG_TYPE_V128: 1501 case TCG_TYPE_V256: 1502 op->opc = INDEX_op_and_vec; 1503 break; 1504 default: 1505 g_assert_not_reached(); 1506 } 1507 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1508 return fold_and(ctx, op); 1509 } 1510 if (fold_xx_to_i(ctx, op, 0) || 1511 fold_ix_to_not(ctx, op, -1)) { 1512 return true; 1513 } 1514 1515 z_mask = t1->z_mask & ~t2->o_mask; 1516 o_mask = t1->o_mask & ~t2->z_mask; 1517 s_mask = t1->s_mask & t2->s_mask; 1518 1519 /* Affected bits are those not known zero, masked by those known zero. */ 1520 a_mask = t1->z_mask & t2->z_mask; 1521 1522 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 1523 } 1524 1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) 1526 { 1527 /* If true and false values are the same, eliminate the cmp. */ 1528 if (args_are_copies(op->args[2], op->args[3])) { 1529 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1530 } 1531 1532 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1533 uint64_t tv = arg_const_val(op->args[2]); 1534 uint64_t fv = arg_const_val(op->args[3]); 1535 1536 if (tv == -1 && fv == 0) { 1537 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1538 } 1539 if (tv == 0 && fv == -1) { 1540 if (TCG_TARGET_HAS_not_vec) { 1541 op->opc = INDEX_op_not_vec; 1542 return fold_not(ctx, op); 1543 } else { 1544 op->opc = INDEX_op_xor_vec; 1545 op->args[2] = arg_new_constant(ctx, -1); 1546 return fold_xor(ctx, op); 1547 } 1548 } 1549 } 1550 if (arg_is_const(op->args[2])) { 1551 uint64_t tv = arg_const_val(op->args[2]); 1552 if (tv == -1) { 1553 op->opc = INDEX_op_or_vec; 1554 op->args[2] = op->args[3]; 1555 return fold_or(ctx, op); 1556 } 1557 if (tv == 0 && TCG_TARGET_HAS_andc_vec) { 1558 op->opc = INDEX_op_andc_vec; 1559 op->args[2] = op->args[1]; 1560 op->args[1] = op->args[3]; 1561 return fold_andc(ctx, op); 1562 } 1563 } 1564 if (arg_is_const(op->args[3])) { 1565 uint64_t fv = arg_const_val(op->args[3]); 1566 if (fv == 0) { 1567 op->opc = INDEX_op_and_vec; 1568 return fold_and(ctx, op); 1569 } 1570 if (fv == -1 && TCG_TARGET_HAS_orc_vec) { 1571 TCGArg ta = op->args[2]; 1572 op->opc = INDEX_op_orc_vec; 1573 op->args[2] = op->args[1]; 1574 op->args[1] = ta; 1575 return fold_orc(ctx, op); 1576 } 1577 } 1578 return finish_folding(ctx, op); 1579 } 1580 1581 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1582 { 1583 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], 1584 &op->args[1], &op->args[2]); 1585 if (i == 0) { 1586 tcg_op_remove(ctx->tcg, op); 1587 return true; 1588 } 1589 if (i > 0) { 1590 op->opc = INDEX_op_br; 1591 op->args[0] = op->args[3]; 1592 finish_ebb(ctx); 1593 } else { 1594 finish_bb(ctx); 1595 } 1596 return true; 1597 } 1598 1599 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1600 { 1601 TCGCond cond; 1602 TCGArg label; 1603 int i, inv = 0; 1604 1605 i = do_constant_folding_cond2(ctx, op, &op->args[0]); 1606 cond = op->args[4]; 1607 label = op->args[5]; 1608 if (i >= 0) { 1609 goto do_brcond_const; 1610 } 1611 1612 switch (cond) { 1613 case TCG_COND_LT: 1614 case TCG_COND_GE: 1615 /* 1616 * Simplify LT/GE comparisons vs zero to a single compare 1617 * vs the high word of the input. 1618 */ 1619 if (arg_is_const_val(op->args[2], 0) && 1620 arg_is_const_val(op->args[3], 0)) { 1621 goto do_brcond_high; 1622 } 1623 break; 1624 1625 case TCG_COND_NE: 1626 inv = 1; 1627 QEMU_FALLTHROUGH; 1628 case TCG_COND_EQ: 1629 /* 1630 * Simplify EQ/NE comparisons where one of the pairs 1631 * can be simplified. 1632 */ 1633 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1634 op->args[2], cond); 1635 switch (i ^ inv) { 1636 case 0: 1637 goto do_brcond_const; 1638 case 1: 1639 goto do_brcond_high; 1640 } 1641 1642 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1643 op->args[3], cond); 1644 switch (i ^ inv) { 1645 case 0: 1646 goto do_brcond_const; 1647 case 1: 1648 goto do_brcond_low; 1649 } 1650 break; 1651 1652 case TCG_COND_TSTEQ: 1653 case TCG_COND_TSTNE: 1654 if (arg_is_const_val(op->args[2], 0)) { 1655 goto do_brcond_high; 1656 } 1657 if (arg_is_const_val(op->args[3], 0)) { 1658 goto do_brcond_low; 1659 } 1660 break; 1661 1662 default: 1663 break; 1664 1665 do_brcond_low: 1666 op->opc = INDEX_op_brcond; 1667 op->args[1] = op->args[2]; 1668 op->args[2] = cond; 1669 op->args[3] = label; 1670 return fold_brcond(ctx, op); 1671 1672 do_brcond_high: 1673 op->opc = INDEX_op_brcond; 1674 op->args[0] = op->args[1]; 1675 op->args[1] = op->args[3]; 1676 op->args[2] = cond; 1677 op->args[3] = label; 1678 return fold_brcond(ctx, op); 1679 1680 do_brcond_const: 1681 if (i == 0) { 1682 tcg_op_remove(ctx->tcg, op); 1683 return true; 1684 } 1685 op->opc = INDEX_op_br; 1686 op->args[0] = label; 1687 finish_ebb(ctx); 1688 return true; 1689 } 1690 1691 finish_bb(ctx); 1692 return true; 1693 } 1694 1695 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1696 { 1697 uint64_t z_mask, o_mask, s_mask; 1698 TempOptInfo *t1 = arg_info(op->args[1]); 1699 int flags = op->args[2]; 1700 1701 if (ti_is_const(t1)) { 1702 return tcg_opt_gen_movi(ctx, op, op->args[0], 1703 do_constant_folding(op->opc, ctx->type, 1704 ti_const_val(t1), flags)); 1705 } 1706 1707 z_mask = t1->z_mask; 1708 o_mask = t1->o_mask; 1709 s_mask = 0; 1710 1711 switch (op->opc) { 1712 case INDEX_op_bswap16: 1713 z_mask = bswap16(z_mask); 1714 o_mask = bswap16(o_mask); 1715 if (flags & TCG_BSWAP_OS) { 1716 z_mask = (int16_t)z_mask; 1717 o_mask = (int16_t)o_mask; 1718 s_mask = INT16_MIN; 1719 } else if (!(flags & TCG_BSWAP_OZ)) { 1720 z_mask |= MAKE_64BIT_MASK(16, 48); 1721 } 1722 break; 1723 case INDEX_op_bswap32: 1724 z_mask = bswap32(z_mask); 1725 o_mask = bswap32(o_mask); 1726 if (flags & TCG_BSWAP_OS) { 1727 z_mask = (int32_t)z_mask; 1728 o_mask = (int32_t)o_mask; 1729 s_mask = INT32_MIN; 1730 } else if (!(flags & TCG_BSWAP_OZ)) { 1731 z_mask |= MAKE_64BIT_MASK(32, 32); 1732 } 1733 break; 1734 case INDEX_op_bswap64: 1735 z_mask = bswap64(z_mask); 1736 o_mask = bswap64(o_mask); 1737 break; 1738 default: 1739 g_assert_not_reached(); 1740 } 1741 1742 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1743 } 1744 1745 static bool fold_call(OptContext *ctx, TCGOp *op) 1746 { 1747 TCGContext *s = ctx->tcg; 1748 int nb_oargs = TCGOP_CALLO(op); 1749 int nb_iargs = TCGOP_CALLI(op); 1750 int flags, i; 1751 1752 init_arguments(ctx, op, nb_oargs + nb_iargs); 1753 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1754 1755 /* If the function reads or writes globals, reset temp data. */ 1756 flags = tcg_call_flags(op); 1757 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1758 int nb_globals = s->nb_globals; 1759 1760 for (i = 0; i < nb_globals; i++) { 1761 if (test_bit(i, ctx->temps_used.l)) { 1762 reset_ts(ctx, &ctx->tcg->temps[i]); 1763 } 1764 } 1765 } 1766 1767 /* If the function has side effects, reset mem data. */ 1768 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1769 remove_mem_copy_all(ctx); 1770 } 1771 1772 /* Reset temp data for outputs. */ 1773 for (i = 0; i < nb_oargs; i++) { 1774 reset_temp(ctx, op->args[i]); 1775 } 1776 1777 /* Stop optimizing MB across calls. */ 1778 ctx->prev_mb = NULL; 1779 return true; 1780 } 1781 1782 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) 1783 { 1784 /* Canonicalize the comparison to put immediate second. */ 1785 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1786 op->args[3] = tcg_swap_cond(op->args[3]); 1787 } 1788 return finish_folding(ctx, op); 1789 } 1790 1791 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) 1792 { 1793 /* If true and false values are the same, eliminate the cmp. */ 1794 if (args_are_copies(op->args[3], op->args[4])) { 1795 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 1796 } 1797 1798 /* Canonicalize the comparison to put immediate second. */ 1799 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1800 op->args[5] = tcg_swap_cond(op->args[5]); 1801 } 1802 /* 1803 * Canonicalize the "false" input reg to match the destination, 1804 * so that the tcg backend can implement "move if true". 1805 */ 1806 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1807 op->args[5] = tcg_invert_cond(op->args[5]); 1808 } 1809 return finish_folding(ctx, op); 1810 } 1811 1812 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1813 { 1814 uint64_t z_mask, s_mask; 1815 TempOptInfo *t1 = arg_info(op->args[1]); 1816 TempOptInfo *t2 = arg_info(op->args[2]); 1817 1818 if (ti_is_const(t1)) { 1819 uint64_t t = ti_const_val(t1); 1820 1821 if (t != 0) { 1822 t = do_constant_folding(op->opc, ctx->type, t, 0); 1823 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1824 } 1825 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1826 } 1827 1828 switch (ctx->type) { 1829 case TCG_TYPE_I32: 1830 z_mask = 31; 1831 break; 1832 case TCG_TYPE_I64: 1833 z_mask = 63; 1834 break; 1835 default: 1836 g_assert_not_reached(); 1837 } 1838 s_mask = ~z_mask; 1839 z_mask |= t2->z_mask; 1840 s_mask &= t2->s_mask; 1841 1842 return fold_masks_zs(ctx, op, z_mask, s_mask); 1843 } 1844 1845 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1846 { 1847 uint64_t z_mask; 1848 1849 if (fold_const1(ctx, op)) { 1850 return true; 1851 } 1852 1853 switch (ctx->type) { 1854 case TCG_TYPE_I32: 1855 z_mask = 32 | 31; 1856 break; 1857 case TCG_TYPE_I64: 1858 z_mask = 64 | 63; 1859 break; 1860 default: 1861 g_assert_not_reached(); 1862 } 1863 return fold_masks_z(ctx, op, z_mask); 1864 } 1865 1866 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1867 { 1868 TempOptInfo *t1 = arg_info(op->args[1]); 1869 TempOptInfo *t2 = arg_info(op->args[2]); 1870 int ofs = op->args[3]; 1871 int len = op->args[4]; 1872 int width = 8 * tcg_type_size(ctx->type); 1873 uint64_t z_mask, o_mask, s_mask; 1874 1875 if (ti_is_const(t1) && ti_is_const(t2)) { 1876 return tcg_opt_gen_movi(ctx, op, op->args[0], 1877 deposit64(ti_const_val(t1), ofs, len, 1878 ti_const_val(t2))); 1879 } 1880 1881 /* Inserting a value into zero at offset 0. */ 1882 if (ti_is_const_val(t1, 0) && ofs == 0) { 1883 uint64_t mask = MAKE_64BIT_MASK(0, len); 1884 1885 op->opc = INDEX_op_and; 1886 op->args[1] = op->args[2]; 1887 op->args[2] = arg_new_constant(ctx, mask); 1888 return fold_and(ctx, op); 1889 } 1890 1891 /* Inserting zero into a value. */ 1892 if (ti_is_const_val(t2, 0)) { 1893 uint64_t mask = deposit64(-1, ofs, len, 0); 1894 1895 op->opc = INDEX_op_and; 1896 op->args[2] = arg_new_constant(ctx, mask); 1897 return fold_and(ctx, op); 1898 } 1899 1900 /* The s_mask from the top portion of the deposit is still valid. */ 1901 if (ofs + len == width) { 1902 s_mask = t2->s_mask << ofs; 1903 } else { 1904 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); 1905 } 1906 1907 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); 1908 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); 1909 1910 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1911 } 1912 1913 static bool fold_divide(OptContext *ctx, TCGOp *op) 1914 { 1915 if (fold_const2(ctx, op) || 1916 fold_xi_to_x(ctx, op, 1)) { 1917 return true; 1918 } 1919 return finish_folding(ctx, op); 1920 } 1921 1922 static bool fold_dup(OptContext *ctx, TCGOp *op) 1923 { 1924 if (arg_is_const(op->args[1])) { 1925 uint64_t t = arg_const_val(op->args[1]); 1926 t = dup_const(TCGOP_VECE(op), t); 1927 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1928 } 1929 return finish_folding(ctx, op); 1930 } 1931 1932 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1933 { 1934 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1935 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, 1936 arg_const_val(op->args[2])); 1937 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1938 } 1939 1940 if (args_are_copies(op->args[1], op->args[2])) { 1941 op->opc = INDEX_op_dup_vec; 1942 TCGOP_VECE(op) = MO_32; 1943 } 1944 return finish_folding(ctx, op); 1945 } 1946 1947 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1948 { 1949 uint64_t z_mask, o_mask, s_mask; 1950 TempOptInfo *t1, *t2; 1951 1952 if (fold_const2_commutative(ctx, op)) { 1953 return true; 1954 } 1955 1956 t2 = arg_info(op->args[2]); 1957 if (ti_is_const(t2)) { 1958 /* Fold eqv r,x,i to xor r,x,~i. */ 1959 switch (ctx->type) { 1960 case TCG_TYPE_I32: 1961 case TCG_TYPE_I64: 1962 op->opc = INDEX_op_xor; 1963 break; 1964 case TCG_TYPE_V64: 1965 case TCG_TYPE_V128: 1966 case TCG_TYPE_V256: 1967 op->opc = INDEX_op_xor_vec; 1968 break; 1969 default: 1970 g_assert_not_reached(); 1971 } 1972 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1973 return fold_xor(ctx, op); 1974 } 1975 1976 t1 = arg_info(op->args[1]); 1977 1978 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); 1979 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); 1980 s_mask = t1->s_mask & t2->s_mask; 1981 1982 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 1983 } 1984 1985 static bool fold_extract(OptContext *ctx, TCGOp *op) 1986 { 1987 uint64_t z_mask, o_mask, a_mask; 1988 TempOptInfo *t1 = arg_info(op->args[1]); 1989 int pos = op->args[2]; 1990 int len = op->args[3]; 1991 1992 if (ti_is_const(t1)) { 1993 return tcg_opt_gen_movi(ctx, op, op->args[0], 1994 extract64(ti_const_val(t1), pos, len)); 1995 } 1996 1997 z_mask = extract64(t1->z_mask, pos, len); 1998 o_mask = extract64(t1->o_mask, pos, len); 1999 a_mask = pos ? -1 : t1->z_mask ^ z_mask; 2000 2001 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); 2002 } 2003 2004 static bool fold_extract2(OptContext *ctx, TCGOp *op) 2005 { 2006 TempOptInfo *t1 = arg_info(op->args[1]); 2007 TempOptInfo *t2 = arg_info(op->args[2]); 2008 uint64_t z1 = t1->z_mask; 2009 uint64_t z2 = t2->z_mask; 2010 uint64_t o1 = t1->o_mask; 2011 uint64_t o2 = t2->o_mask; 2012 int shr = op->args[3]; 2013 2014 if (ctx->type == TCG_TYPE_I32) { 2015 z1 = (uint32_t)z1 >> shr; 2016 o1 = (uint32_t)o1 >> shr; 2017 z2 = (uint64_t)((int32_t)z2 << (32 - shr)); 2018 o2 = (uint64_t)((int32_t)o2 << (32 - shr)); 2019 } else { 2020 z1 >>= shr; 2021 o1 >>= shr; 2022 z2 <<= 64 - shr; 2023 o2 <<= 64 - shr; 2024 } 2025 2026 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); 2027 } 2028 2029 static bool fold_exts(OptContext *ctx, TCGOp *op) 2030 { 2031 uint64_t z_mask, o_mask, s_mask; 2032 TempOptInfo *t1; 2033 2034 if (fold_const1(ctx, op)) { 2035 return true; 2036 } 2037 2038 t1 = arg_info(op->args[1]); 2039 z_mask = t1->z_mask; 2040 o_mask = t1->o_mask; 2041 s_mask = t1->s_mask; 2042 2043 switch (op->opc) { 2044 case INDEX_op_ext_i32_i64: 2045 s_mask |= INT32_MIN; 2046 z_mask = (int32_t)z_mask; 2047 o_mask = (int32_t)o_mask; 2048 break; 2049 default: 2050 g_assert_not_reached(); 2051 } 2052 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2053 } 2054 2055 static bool fold_extu(OptContext *ctx, TCGOp *op) 2056 { 2057 uint64_t z_mask, o_mask; 2058 TempOptInfo *t1; 2059 2060 if (fold_const1(ctx, op)) { 2061 return true; 2062 } 2063 2064 t1 = arg_info(op->args[1]); 2065 z_mask = t1->z_mask; 2066 o_mask = t1->o_mask; 2067 2068 switch (op->opc) { 2069 case INDEX_op_extrl_i64_i32: 2070 case INDEX_op_extu_i32_i64: 2071 z_mask = (uint32_t)z_mask; 2072 o_mask = (uint32_t)o_mask; 2073 break; 2074 case INDEX_op_extrh_i64_i32: 2075 z_mask >>= 32; 2076 o_mask >>= 32; 2077 break; 2078 default: 2079 g_assert_not_reached(); 2080 } 2081 return fold_masks_zo(ctx, op, z_mask, o_mask); 2082 } 2083 2084 static bool fold_mb(OptContext *ctx, TCGOp *op) 2085 { 2086 /* Eliminate duplicate and redundant fence instructions. */ 2087 if (ctx->prev_mb) { 2088 /* 2089 * Merge two barriers of the same type into one, 2090 * or a weaker barrier into a stronger one, 2091 * or two weaker barriers into a stronger one. 2092 * mb X; mb Y => mb X|Y 2093 * mb; strl => mb; st 2094 * ldaq; mb => ld; mb 2095 * ldaq; strl => ld; mb; st 2096 * Other combinations are also merged into a strong 2097 * barrier. This is stricter than specified but for 2098 * the purposes of TCG is better than not optimizing. 2099 */ 2100 ctx->prev_mb->args[0] |= op->args[0]; 2101 tcg_op_remove(ctx->tcg, op); 2102 } else { 2103 ctx->prev_mb = op; 2104 } 2105 return true; 2106 } 2107 2108 static bool fold_mov(OptContext *ctx, TCGOp *op) 2109 { 2110 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2111 } 2112 2113 static bool fold_movcond(OptContext *ctx, TCGOp *op) 2114 { 2115 uint64_t z_mask, o_mask, s_mask; 2116 TempOptInfo *tt, *ft; 2117 int i; 2118 2119 /* If true and false values are the same, eliminate the cmp. */ 2120 if (args_are_copies(op->args[3], op->args[4])) { 2121 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 2122 } 2123 2124 /* 2125 * Canonicalize the "false" input reg to match the destination reg so 2126 * that the tcg backend can implement a "move if true" operation. 2127 */ 2128 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 2129 op->args[5] = tcg_invert_cond(op->args[5]); 2130 } 2131 2132 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], 2133 &op->args[2], &op->args[5]); 2134 if (i >= 0) { 2135 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 2136 } 2137 2138 tt = arg_info(op->args[3]); 2139 ft = arg_info(op->args[4]); 2140 z_mask = tt->z_mask | ft->z_mask; 2141 o_mask = tt->o_mask & ft->o_mask; 2142 s_mask = tt->s_mask & ft->s_mask; 2143 2144 if (ti_is_const(tt) && ti_is_const(ft)) { 2145 uint64_t tv = ti_const_val(tt); 2146 uint64_t fv = ti_const_val(ft); 2147 TCGCond cond = op->args[5]; 2148 2149 if (tv == 1 && fv == 0) { 2150 op->opc = INDEX_op_setcond; 2151 op->args[3] = cond; 2152 } else if (fv == 1 && tv == 0) { 2153 op->opc = INDEX_op_setcond; 2154 op->args[3] = tcg_invert_cond(cond); 2155 } else if (tv == -1 && fv == 0) { 2156 op->opc = INDEX_op_negsetcond; 2157 op->args[3] = cond; 2158 } else if (fv == -1 && tv == 0) { 2159 op->opc = INDEX_op_negsetcond; 2160 op->args[3] = tcg_invert_cond(cond); 2161 } 2162 } 2163 2164 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2165 } 2166 2167 static bool fold_mul(OptContext *ctx, TCGOp *op) 2168 { 2169 if (fold_const2(ctx, op) || 2170 fold_xi_to_i(ctx, op, 0) || 2171 fold_xi_to_x(ctx, op, 1)) { 2172 return true; 2173 } 2174 return finish_folding(ctx, op); 2175 } 2176 2177 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 2178 { 2179 if (fold_const2_commutative(ctx, op) || 2180 fold_xi_to_i(ctx, op, 0)) { 2181 return true; 2182 } 2183 return finish_folding(ctx, op); 2184 } 2185 2186 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 2187 { 2188 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 2189 2190 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 2191 uint64_t a = arg_const_val(op->args[2]); 2192 uint64_t b = arg_const_val(op->args[3]); 2193 uint64_t h, l; 2194 TCGArg rl, rh; 2195 TCGOp *op2; 2196 2197 switch (op->opc) { 2198 case INDEX_op_mulu2: 2199 if (ctx->type == TCG_TYPE_I32) { 2200 l = (uint64_t)(uint32_t)a * (uint32_t)b; 2201 h = (int32_t)(l >> 32); 2202 l = (int32_t)l; 2203 } else { 2204 mulu64(&l, &h, a, b); 2205 } 2206 break; 2207 case INDEX_op_muls2: 2208 if (ctx->type == TCG_TYPE_I32) { 2209 l = (int64_t)(int32_t)a * (int32_t)b; 2210 h = l >> 32; 2211 l = (int32_t)l; 2212 } else { 2213 muls64(&l, &h, a, b); 2214 } 2215 break; 2216 default: 2217 g_assert_not_reached(); 2218 } 2219 2220 rl = op->args[0]; 2221 rh = op->args[1]; 2222 2223 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 2224 op2 = opt_insert_before(ctx, op, 0, 2); 2225 2226 tcg_opt_gen_movi(ctx, op, rl, l); 2227 tcg_opt_gen_movi(ctx, op2, rh, h); 2228 return true; 2229 } 2230 return finish_folding(ctx, op); 2231 } 2232 2233 static bool fold_nand(OptContext *ctx, TCGOp *op) 2234 { 2235 uint64_t z_mask, o_mask, s_mask; 2236 TempOptInfo *t1, *t2; 2237 2238 if (fold_const2_commutative(ctx, op) || 2239 fold_xi_to_not(ctx, op, -1)) { 2240 return true; 2241 } 2242 2243 t1 = arg_info(op->args[1]); 2244 t2 = arg_info(op->args[2]); 2245 2246 z_mask = ~(t1->o_mask & t2->o_mask); 2247 o_mask = ~(t1->z_mask & t2->z_mask); 2248 s_mask = t1->s_mask & t2->s_mask; 2249 2250 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2251 } 2252 2253 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) 2254 { 2255 /* Set to 1 all bits to the left of the rightmost. */ 2256 uint64_t z_mask = arg_info(op->args[1])->z_mask; 2257 z_mask = -(z_mask & -z_mask); 2258 2259 return fold_masks_z(ctx, op, z_mask); 2260 } 2261 2262 static bool fold_neg(OptContext *ctx, TCGOp *op) 2263 { 2264 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op); 2265 } 2266 2267 static bool fold_nor(OptContext *ctx, TCGOp *op) 2268 { 2269 uint64_t z_mask, o_mask, s_mask; 2270 TempOptInfo *t1, *t2; 2271 2272 if (fold_const2_commutative(ctx, op) || 2273 fold_xi_to_not(ctx, op, 0)) { 2274 return true; 2275 } 2276 2277 t1 = arg_info(op->args[1]); 2278 t2 = arg_info(op->args[2]); 2279 2280 z_mask = ~(t1->o_mask | t2->o_mask); 2281 o_mask = ~(t1->z_mask | t2->z_mask); 2282 s_mask = t1->s_mask & t2->s_mask; 2283 2284 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2285 } 2286 2287 static bool fold_not(OptContext *ctx, TCGOp *op) 2288 { 2289 TempOptInfo *t1; 2290 2291 if (fold_const1(ctx, op)) { 2292 return true; 2293 } 2294 2295 t1 = arg_info(op->args[1]); 2296 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); 2297 } 2298 2299 static bool fold_or(OptContext *ctx, TCGOp *op) 2300 { 2301 uint64_t z_mask, o_mask, s_mask, a_mask; 2302 TempOptInfo *t1, *t2; 2303 2304 if (fold_const2_commutative(ctx, op) || 2305 fold_xi_to_x(ctx, op, 0) || 2306 fold_xx_to_x(ctx, op)) { 2307 return true; 2308 } 2309 2310 t1 = arg_info(op->args[1]); 2311 t2 = arg_info(op->args[2]); 2312 2313 z_mask = t1->z_mask | t2->z_mask; 2314 o_mask = t1->o_mask | t2->o_mask; 2315 s_mask = t1->s_mask & t2->s_mask; 2316 2317 /* Affected bits are those not known one, masked by those known zero. */ 2318 a_mask = ~t1->o_mask & t2->z_mask; 2319 2320 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2321 } 2322 2323 static bool fold_orc(OptContext *ctx, TCGOp *op) 2324 { 2325 uint64_t z_mask, o_mask, s_mask, a_mask; 2326 TempOptInfo *t1, *t2; 2327 2328 if (fold_const2(ctx, op)) { 2329 return true; 2330 } 2331 2332 t2 = arg_info(op->args[2]); 2333 if (ti_is_const(t2)) { 2334 /* Fold orc r,x,i to or r,x,~i. */ 2335 switch (ctx->type) { 2336 case TCG_TYPE_I32: 2337 case TCG_TYPE_I64: 2338 op->opc = INDEX_op_or; 2339 break; 2340 case TCG_TYPE_V64: 2341 case TCG_TYPE_V128: 2342 case TCG_TYPE_V256: 2343 op->opc = INDEX_op_or_vec; 2344 break; 2345 default: 2346 g_assert_not_reached(); 2347 } 2348 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 2349 return fold_or(ctx, op); 2350 } 2351 if (fold_xx_to_i(ctx, op, -1) || 2352 fold_ix_to_not(ctx, op, 0)) { 2353 return true; 2354 } 2355 t1 = arg_info(op->args[1]); 2356 2357 z_mask = t1->z_mask | ~t2->o_mask; 2358 o_mask = t1->o_mask | ~t2->z_mask; 2359 s_mask = t1->s_mask & t2->s_mask; 2360 2361 /* Affected bits are those not known one, masked by those known one. */ 2362 a_mask = ~t1->o_mask & t2->o_mask; 2363 2364 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2365 } 2366 2367 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) 2368 { 2369 const TCGOpDef *def = &tcg_op_defs[op->opc]; 2370 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 2371 MemOp mop = get_memop(oi); 2372 int width = 8 * memop_size(mop); 2373 uint64_t z_mask = -1, s_mask = 0; 2374 2375 if (width < 64) { 2376 if (mop & MO_SIGN) { 2377 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); 2378 } else { 2379 z_mask = MAKE_64BIT_MASK(0, width); 2380 } 2381 } 2382 2383 /* Opcodes that touch guest memory stop the mb optimization. */ 2384 ctx->prev_mb = NULL; 2385 2386 return fold_masks_zs(ctx, op, z_mask, s_mask); 2387 } 2388 2389 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) 2390 { 2391 /* Opcodes that touch guest memory stop the mb optimization. */ 2392 ctx->prev_mb = NULL; 2393 return finish_folding(ctx, op); 2394 } 2395 2396 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 2397 { 2398 /* Opcodes that touch guest memory stop the mb optimization. */ 2399 ctx->prev_mb = NULL; 2400 return true; 2401 } 2402 2403 static bool fold_remainder(OptContext *ctx, TCGOp *op) 2404 { 2405 if (fold_const2(ctx, op) || 2406 fold_xx_to_i(ctx, op, 0)) { 2407 return true; 2408 } 2409 return finish_folding(ctx, op); 2410 } 2411 2412 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */ 2413 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) 2414 { 2415 uint64_t a_zmask, b_val; 2416 TCGCond cond; 2417 2418 if (!arg_is_const(op->args[2])) { 2419 return false; 2420 } 2421 2422 a_zmask = arg_info(op->args[1])->z_mask; 2423 b_val = arg_const_val(op->args[2]); 2424 cond = op->args[3]; 2425 2426 if (ctx->type == TCG_TYPE_I32) { 2427 a_zmask = (uint32_t)a_zmask; 2428 b_val = (uint32_t)b_val; 2429 } 2430 2431 /* 2432 * A with only low bits set vs B with high bits set means that A < B. 2433 */ 2434 if (a_zmask < b_val) { 2435 bool inv = false; 2436 2437 switch (cond) { 2438 case TCG_COND_NE: 2439 case TCG_COND_LEU: 2440 case TCG_COND_LTU: 2441 inv = true; 2442 /* fall through */ 2443 case TCG_COND_GTU: 2444 case TCG_COND_GEU: 2445 case TCG_COND_EQ: 2446 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); 2447 default: 2448 break; 2449 } 2450 } 2451 2452 /* 2453 * A with only lsb set is already boolean. 2454 */ 2455 if (a_zmask <= 1) { 2456 bool convert = false; 2457 bool inv = false; 2458 2459 switch (cond) { 2460 case TCG_COND_EQ: 2461 inv = true; 2462 /* fall through */ 2463 case TCG_COND_NE: 2464 convert = (b_val == 0); 2465 break; 2466 case TCG_COND_LTU: 2467 case TCG_COND_TSTEQ: 2468 inv = true; 2469 /* fall through */ 2470 case TCG_COND_GEU: 2471 case TCG_COND_TSTNE: 2472 convert = (b_val == 1); 2473 break; 2474 default: 2475 break; 2476 } 2477 if (convert) { 2478 if (!inv && !neg) { 2479 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2480 } 2481 2482 if (!inv) { 2483 op->opc = INDEX_op_neg; 2484 } else if (neg) { 2485 op->opc = INDEX_op_add; 2486 op->args[2] = arg_new_constant(ctx, -1); 2487 } else { 2488 op->opc = INDEX_op_xor; 2489 op->args[2] = arg_new_constant(ctx, 1); 2490 } 2491 return -1; 2492 } 2493 } 2494 return 0; 2495 } 2496 2497 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) 2498 { 2499 TCGCond cond = op->args[3]; 2500 TCGArg ret, src1, src2; 2501 TCGOp *op2; 2502 uint64_t val; 2503 int sh; 2504 bool inv; 2505 2506 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { 2507 return; 2508 } 2509 2510 src2 = op->args[2]; 2511 val = arg_const_val(src2); 2512 if (!is_power_of_2(val)) { 2513 return; 2514 } 2515 sh = ctz64(val); 2516 2517 ret = op->args[0]; 2518 src1 = op->args[1]; 2519 inv = cond == TCG_COND_TSTEQ; 2520 2521 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { 2522 op->opc = INDEX_op_sextract; 2523 op->args[1] = src1; 2524 op->args[2] = sh; 2525 op->args[3] = 1; 2526 return; 2527 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { 2528 op->opc = INDEX_op_extract; 2529 op->args[1] = src1; 2530 op->args[2] = sh; 2531 op->args[3] = 1; 2532 } else { 2533 if (sh) { 2534 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); 2535 op2->args[0] = ret; 2536 op2->args[1] = src1; 2537 op2->args[2] = arg_new_constant(ctx, sh); 2538 src1 = ret; 2539 } 2540 op->opc = INDEX_op_and; 2541 op->args[1] = src1; 2542 op->args[2] = arg_new_constant(ctx, 1); 2543 } 2544 2545 if (neg && inv) { 2546 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); 2547 op2->args[0] = ret; 2548 op2->args[1] = ret; 2549 op2->args[2] = arg_new_constant(ctx, -1); 2550 } else if (inv) { 2551 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); 2552 op2->args[0] = ret; 2553 op2->args[1] = ret; 2554 op2->args[2] = arg_new_constant(ctx, 1); 2555 } else if (neg) { 2556 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); 2557 op2->args[0] = ret; 2558 op2->args[1] = ret; 2559 } 2560 } 2561 2562 static bool fold_setcond(OptContext *ctx, TCGOp *op) 2563 { 2564 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2565 &op->args[2], &op->args[3]); 2566 if (i >= 0) { 2567 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2568 } 2569 2570 i = fold_setcond_zmask(ctx, op, false); 2571 if (i > 0) { 2572 return true; 2573 } 2574 if (i == 0) { 2575 fold_setcond_tst_pow2(ctx, op, false); 2576 } 2577 2578 return fold_masks_z(ctx, op, 1); 2579 } 2580 2581 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 2582 { 2583 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2584 &op->args[2], &op->args[3]); 2585 if (i >= 0) { 2586 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 2587 } 2588 2589 i = fold_setcond_zmask(ctx, op, true); 2590 if (i > 0) { 2591 return true; 2592 } 2593 if (i == 0) { 2594 fold_setcond_tst_pow2(ctx, op, true); 2595 } 2596 2597 /* Value is {0,-1} so all bits are repetitions of the sign. */ 2598 return fold_masks_s(ctx, op, -1); 2599 } 2600 2601 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 2602 { 2603 TCGCond cond; 2604 int i, inv = 0; 2605 2606 i = do_constant_folding_cond2(ctx, op, &op->args[1]); 2607 cond = op->args[5]; 2608 if (i >= 0) { 2609 goto do_setcond_const; 2610 } 2611 2612 switch (cond) { 2613 case TCG_COND_LT: 2614 case TCG_COND_GE: 2615 /* 2616 * Simplify LT/GE comparisons vs zero to a single compare 2617 * vs the high word of the input. 2618 */ 2619 if (arg_is_const_val(op->args[3], 0) && 2620 arg_is_const_val(op->args[4], 0)) { 2621 goto do_setcond_high; 2622 } 2623 break; 2624 2625 case TCG_COND_NE: 2626 inv = 1; 2627 QEMU_FALLTHROUGH; 2628 case TCG_COND_EQ: 2629 /* 2630 * Simplify EQ/NE comparisons where one of the pairs 2631 * can be simplified. 2632 */ 2633 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2634 op->args[3], cond); 2635 switch (i ^ inv) { 2636 case 0: 2637 goto do_setcond_const; 2638 case 1: 2639 goto do_setcond_high; 2640 } 2641 2642 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2643 op->args[4], cond); 2644 switch (i ^ inv) { 2645 case 0: 2646 goto do_setcond_const; 2647 case 1: 2648 goto do_setcond_low; 2649 } 2650 break; 2651 2652 case TCG_COND_TSTEQ: 2653 case TCG_COND_TSTNE: 2654 if (arg_is_const_val(op->args[3], 0)) { 2655 goto do_setcond_high; 2656 } 2657 if (arg_is_const_val(op->args[4], 0)) { 2658 goto do_setcond_low; 2659 } 2660 break; 2661 2662 default: 2663 break; 2664 2665 do_setcond_low: 2666 op->args[2] = op->args[3]; 2667 op->args[3] = cond; 2668 op->opc = INDEX_op_setcond; 2669 return fold_setcond(ctx, op); 2670 2671 do_setcond_high: 2672 op->args[1] = op->args[2]; 2673 op->args[2] = op->args[4]; 2674 op->args[3] = cond; 2675 op->opc = INDEX_op_setcond; 2676 return fold_setcond(ctx, op); 2677 } 2678 2679 return fold_masks_z(ctx, op, 1); 2680 2681 do_setcond_const: 2682 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2683 } 2684 2685 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2686 { 2687 uint64_t z_mask, o_mask, s_mask, a_mask; 2688 TempOptInfo *t1 = arg_info(op->args[1]); 2689 int pos = op->args[2]; 2690 int len = op->args[3]; 2691 2692 if (ti_is_const(t1)) { 2693 return tcg_opt_gen_movi(ctx, op, op->args[0], 2694 sextract64(ti_const_val(t1), pos, len)); 2695 } 2696 2697 s_mask = t1->s_mask >> pos; 2698 s_mask |= -1ull << (len - 1); 2699 a_mask = pos ? -1 : s_mask & ~t1->s_mask; 2700 2701 z_mask = sextract64(t1->z_mask, pos, len); 2702 o_mask = sextract64(t1->o_mask, pos, len); 2703 2704 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); 2705 } 2706 2707 static bool fold_shift(OptContext *ctx, TCGOp *op) 2708 { 2709 uint64_t s_mask, z_mask, o_mask; 2710 TempOptInfo *t1, *t2; 2711 2712 if (fold_const2(ctx, op) || 2713 fold_ix_to_i(ctx, op, 0) || 2714 fold_xi_to_x(ctx, op, 0)) { 2715 return true; 2716 } 2717 2718 t1 = arg_info(op->args[1]); 2719 t2 = arg_info(op->args[2]); 2720 s_mask = t1->s_mask; 2721 z_mask = t1->z_mask; 2722 o_mask = t1->o_mask; 2723 2724 if (ti_is_const(t2)) { 2725 int sh = ti_const_val(t2); 2726 2727 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2728 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); 2729 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2730 2731 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 2732 } 2733 2734 switch (op->opc) { 2735 case INDEX_op_sar: 2736 /* 2737 * Arithmetic right shift will not reduce the number of 2738 * input sign repetitions. 2739 */ 2740 return fold_masks_s(ctx, op, s_mask); 2741 case INDEX_op_shr: 2742 /* 2743 * If the sign bit is known zero, then logical right shift 2744 * will not reduce the number of input sign repetitions. 2745 */ 2746 if (~z_mask & -s_mask) { 2747 return fold_masks_s(ctx, op, s_mask); 2748 } 2749 break; 2750 default: 2751 break; 2752 } 2753 2754 return finish_folding(ctx, op); 2755 } 2756 2757 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2758 { 2759 TCGOpcode neg_op; 2760 bool have_neg; 2761 2762 if (!arg_is_const_val(op->args[1], 0)) { 2763 return false; 2764 } 2765 2766 switch (ctx->type) { 2767 case TCG_TYPE_I32: 2768 case TCG_TYPE_I64: 2769 neg_op = INDEX_op_neg; 2770 have_neg = true; 2771 break; 2772 case TCG_TYPE_V64: 2773 case TCG_TYPE_V128: 2774 case TCG_TYPE_V256: 2775 neg_op = INDEX_op_neg_vec; 2776 have_neg = (TCG_TARGET_HAS_neg_vec && 2777 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2778 break; 2779 default: 2780 g_assert_not_reached(); 2781 } 2782 if (have_neg) { 2783 op->opc = neg_op; 2784 op->args[1] = op->args[2]; 2785 return fold_neg_no_const(ctx, op); 2786 } 2787 return false; 2788 } 2789 2790 /* We cannot as yet do_constant_folding with vectors. */ 2791 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2792 { 2793 if (fold_xx_to_i(ctx, op, 0) || 2794 fold_xi_to_x(ctx, op, 0) || 2795 fold_sub_to_neg(ctx, op)) { 2796 return true; 2797 } 2798 return finish_folding(ctx, op); 2799 } 2800 2801 static bool fold_sub(OptContext *ctx, TCGOp *op) 2802 { 2803 if (fold_const2(ctx, op) || 2804 fold_xx_to_i(ctx, op, 0) || 2805 fold_xi_to_x(ctx, op, 0) || 2806 fold_sub_to_neg(ctx, op)) { 2807 return true; 2808 } 2809 2810 /* Fold sub r,x,i to add r,x,-i */ 2811 if (arg_is_const(op->args[2])) { 2812 uint64_t val = arg_const_val(op->args[2]); 2813 2814 op->opc = INDEX_op_add; 2815 op->args[2] = arg_new_constant(ctx, -val); 2816 } 2817 return finish_folding(ctx, op); 2818 } 2819 2820 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) 2821 { 2822 TempOptInfo *t2; 2823 2824 op = QTAILQ_PREV(op, link); 2825 switch (op->opc) { 2826 case INDEX_op_subbo: 2827 op->opc = INDEX_op_sub; 2828 fold_sub(ctx, op); 2829 break; 2830 case INDEX_op_subbio: 2831 op->opc = INDEX_op_subbi; 2832 break; 2833 case INDEX_op_subb1o: 2834 t2 = arg_info(op->args[2]); 2835 if (ti_is_const(t2)) { 2836 op->opc = INDEX_op_add; 2837 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2838 /* Perform other constant folding, if needed. */ 2839 fold_add(ctx, op); 2840 } else { 2841 TCGArg ret = op->args[0]; 2842 op->opc = INDEX_op_sub; 2843 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 2844 op->args[0] = ret; 2845 op->args[1] = ret; 2846 op->args[2] = arg_new_constant(ctx, -1); 2847 } 2848 break; 2849 default: 2850 g_assert_not_reached(); 2851 } 2852 } 2853 2854 static bool fold_subbi(OptContext *ctx, TCGOp *op) 2855 { 2856 TempOptInfo *t2; 2857 int borrow_in = ctx->carry_state; 2858 2859 if (borrow_in < 0) { 2860 return finish_folding(ctx, op); 2861 } 2862 ctx->carry_state = -1; 2863 2864 squash_prev_borrowout(ctx, op); 2865 if (borrow_in == 0) { 2866 op->opc = INDEX_op_sub; 2867 return fold_sub(ctx, op); 2868 } 2869 2870 /* 2871 * Propagate the known carry-in into any constant, then negate to 2872 * transform from sub to add. If there is no constant, emit a 2873 * separate add -1. 2874 */ 2875 t2 = arg_info(op->args[2]); 2876 if (ti_is_const(t2)) { 2877 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2878 } else { 2879 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); 2880 2881 op2->args[0] = op->args[0]; 2882 op2->args[1] = op->args[1]; 2883 op2->args[2] = op->args[2]; 2884 fold_sub(ctx, op2); 2885 2886 op->args[1] = op->args[0]; 2887 op->args[2] = arg_new_constant(ctx, -1); 2888 } 2889 op->opc = INDEX_op_add; 2890 return fold_add(ctx, op); 2891 } 2892 2893 static bool fold_subbio(OptContext *ctx, TCGOp *op) 2894 { 2895 TempOptInfo *t1, *t2; 2896 int borrow_out = -1; 2897 2898 if (ctx->carry_state < 0) { 2899 return finish_folding(ctx, op); 2900 } 2901 2902 squash_prev_borrowout(ctx, op); 2903 if (ctx->carry_state == 0) { 2904 goto do_subbo; 2905 } 2906 2907 t1 = arg_info(op->args[1]); 2908 t2 = arg_info(op->args[2]); 2909 2910 /* Propagate the known borrow-in into a constant, if possible. */ 2911 if (ti_is_const(t2)) { 2912 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 2913 uint64_t v = ti_const_val(t2) & max; 2914 2915 if (v < max) { 2916 op->args[2] = arg_new_constant(ctx, v + 1); 2917 goto do_subbo; 2918 } 2919 /* subtracting max + 1 produces known borrow out. */ 2920 borrow_out = 1; 2921 } 2922 if (ti_is_const(t1)) { 2923 uint64_t v = ti_const_val(t1); 2924 if (v != 0) { 2925 op->args[2] = arg_new_constant(ctx, v - 1); 2926 goto do_subbo; 2927 } 2928 } 2929 2930 /* Adjust the opcode to remember the known carry-in. */ 2931 op->opc = INDEX_op_subb1o; 2932 ctx->carry_state = borrow_out; 2933 return finish_folding(ctx, op); 2934 2935 do_subbo: 2936 op->opc = INDEX_op_subbo; 2937 return fold_subbo(ctx, op); 2938 } 2939 2940 static bool fold_subbo(OptContext *ctx, TCGOp *op) 2941 { 2942 TempOptInfo *t1 = arg_info(op->args[1]); 2943 TempOptInfo *t2 = arg_info(op->args[2]); 2944 int borrow_out = -1; 2945 2946 if (ti_is_const(t2)) { 2947 uint64_t v2 = ti_const_val(t2); 2948 if (v2 == 0) { 2949 borrow_out = 0; 2950 } else if (ti_is_const(t1)) { 2951 uint64_t v1 = ti_const_val(t1); 2952 borrow_out = v1 < v2; 2953 } 2954 } 2955 ctx->carry_state = borrow_out; 2956 return finish_folding(ctx, op); 2957 } 2958 2959 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2960 { 2961 uint64_t z_mask = -1, s_mask = 0; 2962 2963 /* We can't do any folding with a load, but we can record bits. */ 2964 switch (op->opc) { 2965 case INDEX_op_ld8s: 2966 s_mask = INT8_MIN; 2967 break; 2968 case INDEX_op_ld8u: 2969 z_mask = MAKE_64BIT_MASK(0, 8); 2970 break; 2971 case INDEX_op_ld16s: 2972 s_mask = INT16_MIN; 2973 break; 2974 case INDEX_op_ld16u: 2975 z_mask = MAKE_64BIT_MASK(0, 16); 2976 break; 2977 case INDEX_op_ld32s: 2978 s_mask = INT32_MIN; 2979 break; 2980 case INDEX_op_ld32u: 2981 z_mask = MAKE_64BIT_MASK(0, 32); 2982 break; 2983 default: 2984 g_assert_not_reached(); 2985 } 2986 return fold_masks_zs(ctx, op, z_mask, s_mask); 2987 } 2988 2989 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2990 { 2991 TCGTemp *dst, *src; 2992 intptr_t ofs; 2993 TCGType type; 2994 2995 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2996 return finish_folding(ctx, op); 2997 } 2998 2999 type = ctx->type; 3000 ofs = op->args[2]; 3001 dst = arg_temp(op->args[0]); 3002 src = find_mem_copy_for(ctx, type, ofs); 3003 if (src && src->base_type == type) { 3004 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 3005 } 3006 3007 reset_ts(ctx, dst); 3008 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 3009 return true; 3010 } 3011 3012 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 3013 { 3014 intptr_t ofs = op->args[2]; 3015 intptr_t lm1; 3016 3017 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 3018 remove_mem_copy_all(ctx); 3019 return true; 3020 } 3021 3022 switch (op->opc) { 3023 case INDEX_op_st8: 3024 lm1 = 0; 3025 break; 3026 case INDEX_op_st16: 3027 lm1 = 1; 3028 break; 3029 case INDEX_op_st32: 3030 lm1 = 3; 3031 break; 3032 case INDEX_op_st: 3033 case INDEX_op_st_vec: 3034 lm1 = tcg_type_size(ctx->type) - 1; 3035 break; 3036 default: 3037 g_assert_not_reached(); 3038 } 3039 remove_mem_copy_in(ctx, ofs, ofs + lm1); 3040 return true; 3041 } 3042 3043 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 3044 { 3045 TCGTemp *src; 3046 intptr_t ofs, last; 3047 TCGType type; 3048 3049 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 3050 return fold_tcg_st(ctx, op); 3051 } 3052 3053 src = arg_temp(op->args[0]); 3054 ofs = op->args[2]; 3055 type = ctx->type; 3056 3057 /* 3058 * Eliminate duplicate stores of a constant. 3059 * This happens frequently when the target ISA zero-extends. 3060 */ 3061 if (ts_is_const(src)) { 3062 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 3063 if (src == prev) { 3064 tcg_op_remove(ctx->tcg, op); 3065 return true; 3066 } 3067 } 3068 3069 last = ofs + tcg_type_size(type) - 1; 3070 remove_mem_copy_in(ctx, ofs, last); 3071 record_mem_copy(ctx, type, src, ofs, last); 3072 return true; 3073 } 3074 3075 static bool fold_xor(OptContext *ctx, TCGOp *op) 3076 { 3077 uint64_t z_mask, o_mask, s_mask; 3078 TempOptInfo *t1, *t2; 3079 3080 if (fold_const2_commutative(ctx, op) || 3081 fold_xx_to_i(ctx, op, 0) || 3082 fold_xi_to_x(ctx, op, 0) || 3083 fold_xi_to_not(ctx, op, -1)) { 3084 return true; 3085 } 3086 3087 t1 = arg_info(op->args[1]); 3088 t2 = arg_info(op->args[2]); 3089 3090 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); 3091 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); 3092 s_mask = t1->s_mask & t2->s_mask; 3093 3094 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); 3095 } 3096 3097 /* Propagate constants and copies, fold constant expressions. */ 3098 void tcg_optimize(TCGContext *s) 3099 { 3100 int nb_temps, i; 3101 TCGOp *op, *op_next; 3102 OptContext ctx = { .tcg = s }; 3103 3104 QSIMPLEQ_INIT(&ctx.mem_free); 3105 3106 /* Array VALS has an element for each temp. 3107 If this temp holds a constant then its value is kept in VALS' element. 3108 If this temp is a copy of other ones then the other copies are 3109 available through the doubly linked circular list. */ 3110 3111 nb_temps = s->nb_temps; 3112 for (i = 0; i < nb_temps; ++i) { 3113 s->temps[i].state_ptr = NULL; 3114 } 3115 3116 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3117 TCGOpcode opc = op->opc; 3118 const TCGOpDef *def; 3119 bool done = false; 3120 3121 /* Calls are special. */ 3122 if (opc == INDEX_op_call) { 3123 fold_call(&ctx, op); 3124 continue; 3125 } 3126 3127 def = &tcg_op_defs[opc]; 3128 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 3129 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 3130 3131 /* Pre-compute the type of the operation. */ 3132 ctx.type = TCGOP_TYPE(op); 3133 3134 /* 3135 * Process each opcode. 3136 * Sorted alphabetically by opcode as much as possible. 3137 */ 3138 switch (opc) { 3139 case INDEX_op_add: 3140 done = fold_add(&ctx, op); 3141 break; 3142 case INDEX_op_add_vec: 3143 done = fold_add_vec(&ctx, op); 3144 break; 3145 case INDEX_op_addci: 3146 done = fold_addci(&ctx, op); 3147 break; 3148 case INDEX_op_addcio: 3149 done = fold_addcio(&ctx, op); 3150 break; 3151 case INDEX_op_addco: 3152 done = fold_addco(&ctx, op); 3153 break; 3154 case INDEX_op_and: 3155 case INDEX_op_and_vec: 3156 done = fold_and(&ctx, op); 3157 break; 3158 case INDEX_op_andc: 3159 case INDEX_op_andc_vec: 3160 done = fold_andc(&ctx, op); 3161 break; 3162 case INDEX_op_brcond: 3163 done = fold_brcond(&ctx, op); 3164 break; 3165 case INDEX_op_brcond2_i32: 3166 done = fold_brcond2(&ctx, op); 3167 break; 3168 case INDEX_op_bswap16: 3169 case INDEX_op_bswap32: 3170 case INDEX_op_bswap64: 3171 done = fold_bswap(&ctx, op); 3172 break; 3173 case INDEX_op_clz: 3174 case INDEX_op_ctz: 3175 done = fold_count_zeros(&ctx, op); 3176 break; 3177 case INDEX_op_ctpop: 3178 done = fold_ctpop(&ctx, op); 3179 break; 3180 case INDEX_op_deposit: 3181 done = fold_deposit(&ctx, op); 3182 break; 3183 case INDEX_op_divs: 3184 case INDEX_op_divu: 3185 done = fold_divide(&ctx, op); 3186 break; 3187 case INDEX_op_dup_vec: 3188 done = fold_dup(&ctx, op); 3189 break; 3190 case INDEX_op_dup2_vec: 3191 done = fold_dup2(&ctx, op); 3192 break; 3193 case INDEX_op_eqv: 3194 case INDEX_op_eqv_vec: 3195 done = fold_eqv(&ctx, op); 3196 break; 3197 case INDEX_op_extract: 3198 done = fold_extract(&ctx, op); 3199 break; 3200 case INDEX_op_extract2: 3201 done = fold_extract2(&ctx, op); 3202 break; 3203 case INDEX_op_ext_i32_i64: 3204 done = fold_exts(&ctx, op); 3205 break; 3206 case INDEX_op_extu_i32_i64: 3207 case INDEX_op_extrl_i64_i32: 3208 case INDEX_op_extrh_i64_i32: 3209 done = fold_extu(&ctx, op); 3210 break; 3211 case INDEX_op_ld8s: 3212 case INDEX_op_ld8u: 3213 case INDEX_op_ld16s: 3214 case INDEX_op_ld16u: 3215 case INDEX_op_ld32s: 3216 case INDEX_op_ld32u: 3217 done = fold_tcg_ld(&ctx, op); 3218 break; 3219 case INDEX_op_ld: 3220 case INDEX_op_ld_vec: 3221 done = fold_tcg_ld_memcopy(&ctx, op); 3222 break; 3223 case INDEX_op_st8: 3224 case INDEX_op_st16: 3225 case INDEX_op_st32: 3226 done = fold_tcg_st(&ctx, op); 3227 break; 3228 case INDEX_op_st: 3229 case INDEX_op_st_vec: 3230 done = fold_tcg_st_memcopy(&ctx, op); 3231 break; 3232 case INDEX_op_mb: 3233 done = fold_mb(&ctx, op); 3234 break; 3235 case INDEX_op_mov: 3236 case INDEX_op_mov_vec: 3237 done = fold_mov(&ctx, op); 3238 break; 3239 case INDEX_op_movcond: 3240 done = fold_movcond(&ctx, op); 3241 break; 3242 case INDEX_op_mul: 3243 done = fold_mul(&ctx, op); 3244 break; 3245 case INDEX_op_mulsh: 3246 case INDEX_op_muluh: 3247 done = fold_mul_highpart(&ctx, op); 3248 break; 3249 case INDEX_op_muls2: 3250 case INDEX_op_mulu2: 3251 done = fold_multiply2(&ctx, op); 3252 break; 3253 case INDEX_op_nand: 3254 case INDEX_op_nand_vec: 3255 done = fold_nand(&ctx, op); 3256 break; 3257 case INDEX_op_neg: 3258 done = fold_neg(&ctx, op); 3259 break; 3260 case INDEX_op_nor: 3261 case INDEX_op_nor_vec: 3262 done = fold_nor(&ctx, op); 3263 break; 3264 case INDEX_op_not: 3265 case INDEX_op_not_vec: 3266 done = fold_not(&ctx, op); 3267 break; 3268 case INDEX_op_or: 3269 case INDEX_op_or_vec: 3270 done = fold_or(&ctx, op); 3271 break; 3272 case INDEX_op_orc: 3273 case INDEX_op_orc_vec: 3274 done = fold_orc(&ctx, op); 3275 break; 3276 case INDEX_op_qemu_ld: 3277 done = fold_qemu_ld_1reg(&ctx, op); 3278 break; 3279 case INDEX_op_qemu_ld2: 3280 done = fold_qemu_ld_2reg(&ctx, op); 3281 break; 3282 case INDEX_op_qemu_st: 3283 case INDEX_op_qemu_st2: 3284 done = fold_qemu_st(&ctx, op); 3285 break; 3286 case INDEX_op_rems: 3287 case INDEX_op_remu: 3288 done = fold_remainder(&ctx, op); 3289 break; 3290 case INDEX_op_rotl: 3291 case INDEX_op_rotr: 3292 case INDEX_op_sar: 3293 case INDEX_op_shl: 3294 case INDEX_op_shr: 3295 done = fold_shift(&ctx, op); 3296 break; 3297 case INDEX_op_setcond: 3298 done = fold_setcond(&ctx, op); 3299 break; 3300 case INDEX_op_negsetcond: 3301 done = fold_negsetcond(&ctx, op); 3302 break; 3303 case INDEX_op_setcond2_i32: 3304 done = fold_setcond2(&ctx, op); 3305 break; 3306 case INDEX_op_cmp_vec: 3307 done = fold_cmp_vec(&ctx, op); 3308 break; 3309 case INDEX_op_cmpsel_vec: 3310 done = fold_cmpsel_vec(&ctx, op); 3311 break; 3312 case INDEX_op_bitsel_vec: 3313 done = fold_bitsel_vec(&ctx, op); 3314 break; 3315 case INDEX_op_sextract: 3316 done = fold_sextract(&ctx, op); 3317 break; 3318 case INDEX_op_sub: 3319 done = fold_sub(&ctx, op); 3320 break; 3321 case INDEX_op_subbi: 3322 done = fold_subbi(&ctx, op); 3323 break; 3324 case INDEX_op_subbio: 3325 done = fold_subbio(&ctx, op); 3326 break; 3327 case INDEX_op_subbo: 3328 done = fold_subbo(&ctx, op); 3329 break; 3330 case INDEX_op_sub_vec: 3331 done = fold_sub_vec(&ctx, op); 3332 break; 3333 case INDEX_op_xor: 3334 case INDEX_op_xor_vec: 3335 done = fold_xor(&ctx, op); 3336 break; 3337 case INDEX_op_set_label: 3338 case INDEX_op_br: 3339 case INDEX_op_exit_tb: 3340 case INDEX_op_goto_tb: 3341 case INDEX_op_goto_ptr: 3342 finish_ebb(&ctx); 3343 done = true; 3344 break; 3345 default: 3346 done = finish_folding(&ctx, op); 3347 break; 3348 } 3349 tcg_debug_assert(done); 3350 } 3351 } 3352