1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tcg/tcg-op.h" 28 29 #define CASE_OP_32_64(x) \ 30 glue(glue(case INDEX_op_, x), _i32): \ 31 glue(glue(case INDEX_op_, x), _i64) 32 33 #define CASE_OP_32_64_VEC(x) \ 34 glue(glue(case INDEX_op_, x), _i32): \ 35 glue(glue(case INDEX_op_, x), _i64): \ 36 glue(glue(case INDEX_op_, x), _vec) 37 38 typedef struct TempOptInfo { 39 bool is_const; 40 TCGTemp *prev_copy; 41 TCGTemp *next_copy; 42 uint64_t val; 43 uint64_t mask; 44 } TempOptInfo; 45 46 static inline TempOptInfo *ts_info(TCGTemp *ts) 47 { 48 return ts->state_ptr; 49 } 50 51 static inline TempOptInfo *arg_info(TCGArg arg) 52 { 53 return ts_info(arg_temp(arg)); 54 } 55 56 static inline bool ts_is_const(TCGTemp *ts) 57 { 58 return ts_info(ts)->is_const; 59 } 60 61 static inline bool arg_is_const(TCGArg arg) 62 { 63 return ts_is_const(arg_temp(arg)); 64 } 65 66 static inline bool ts_is_copy(TCGTemp *ts) 67 { 68 return ts_info(ts)->next_copy != ts; 69 } 70 71 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 72 static void reset_ts(TCGTemp *ts) 73 { 74 TempOptInfo *ti = ts_info(ts); 75 TempOptInfo *pi = ts_info(ti->prev_copy); 76 TempOptInfo *ni = ts_info(ti->next_copy); 77 78 ni->prev_copy = ti->prev_copy; 79 pi->next_copy = ti->next_copy; 80 ti->next_copy = ts; 81 ti->prev_copy = ts; 82 ti->is_const = false; 83 ti->mask = -1; 84 } 85 86 static void reset_temp(TCGArg arg) 87 { 88 reset_ts(arg_temp(arg)); 89 } 90 91 /* Initialize and activate a temporary. */ 92 static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts) 93 { 94 size_t idx = temp_idx(ts); 95 TempOptInfo *ti; 96 97 if (test_bit(idx, temps_used->l)) { 98 return; 99 } 100 set_bit(idx, temps_used->l); 101 102 ti = ts->state_ptr; 103 if (ti == NULL) { 104 ti = tcg_malloc(sizeof(TempOptInfo)); 105 ts->state_ptr = ti; 106 } 107 108 ti->next_copy = ts; 109 ti->prev_copy = ts; 110 if (ts->kind == TEMP_CONST) { 111 ti->is_const = true; 112 ti->val = ts->val; 113 ti->mask = ts->val; 114 if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) { 115 /* High bits of a 32-bit quantity are garbage. */ 116 ti->mask |= ~0xffffffffull; 117 } 118 } else { 119 ti->is_const = false; 120 ti->mask = -1; 121 } 122 } 123 124 static void init_arg_info(TCGTempSet *temps_used, TCGArg arg) 125 { 126 init_ts_info(temps_used, arg_temp(arg)); 127 } 128 129 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 130 { 131 TCGTemp *i, *g, *l; 132 133 /* If this is already readonly, we can't do better. */ 134 if (temp_readonly(ts)) { 135 return ts; 136 } 137 138 g = l = NULL; 139 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 140 if (temp_readonly(i)) { 141 return i; 142 } else if (i->kind > ts->kind) { 143 if (i->kind == TEMP_GLOBAL) { 144 g = i; 145 } else if (i->kind == TEMP_LOCAL) { 146 l = i; 147 } 148 } 149 } 150 151 /* If we didn't find a better representation, return the same temp. */ 152 return g ? g : l ? l : ts; 153 } 154 155 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 156 { 157 TCGTemp *i; 158 159 if (ts1 == ts2) { 160 return true; 161 } 162 163 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 164 return false; 165 } 166 167 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 168 if (i == ts2) { 169 return true; 170 } 171 } 172 173 return false; 174 } 175 176 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 177 { 178 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 179 } 180 181 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val) 182 { 183 const TCGOpDef *def; 184 TCGOpcode new_op; 185 uint64_t mask; 186 TempOptInfo *di = arg_info(dst); 187 188 def = &tcg_op_defs[op->opc]; 189 if (def->flags & TCG_OPF_VECTOR) { 190 new_op = INDEX_op_dupi_vec; 191 } else if (def->flags & TCG_OPF_64BIT) { 192 new_op = INDEX_op_movi_i64; 193 } else { 194 new_op = INDEX_op_movi_i32; 195 } 196 op->opc = new_op; 197 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 198 op->args[0] = dst; 199 op->args[1] = val; 200 201 reset_temp(dst); 202 di->is_const = true; 203 di->val = val; 204 mask = val; 205 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 206 /* High bits of the destination are now garbage. */ 207 mask |= ~0xffffffffull; 208 } 209 di->mask = mask; 210 } 211 212 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 213 { 214 TCGTemp *dst_ts = arg_temp(dst); 215 TCGTemp *src_ts = arg_temp(src); 216 const TCGOpDef *def; 217 TempOptInfo *di; 218 TempOptInfo *si; 219 uint64_t mask; 220 TCGOpcode new_op; 221 222 if (ts_are_copies(dst_ts, src_ts)) { 223 tcg_op_remove(s, op); 224 return; 225 } 226 227 reset_ts(dst_ts); 228 di = ts_info(dst_ts); 229 si = ts_info(src_ts); 230 def = &tcg_op_defs[op->opc]; 231 if (def->flags & TCG_OPF_VECTOR) { 232 new_op = INDEX_op_mov_vec; 233 } else if (def->flags & TCG_OPF_64BIT) { 234 new_op = INDEX_op_mov_i64; 235 } else { 236 new_op = INDEX_op_mov_i32; 237 } 238 op->opc = new_op; 239 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 240 op->args[0] = dst; 241 op->args[1] = src; 242 243 mask = si->mask; 244 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 245 /* High bits of the destination are now garbage. */ 246 mask |= ~0xffffffffull; 247 } 248 di->mask = mask; 249 250 if (src_ts->type == dst_ts->type) { 251 TempOptInfo *ni = ts_info(si->next_copy); 252 253 di->next_copy = si->next_copy; 254 di->prev_copy = src_ts; 255 ni->prev_copy = dst_ts; 256 si->next_copy = dst_ts; 257 di->is_const = si->is_const; 258 di->val = si->val; 259 } 260 } 261 262 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 263 { 264 uint64_t l64, h64; 265 266 switch (op) { 267 CASE_OP_32_64(add): 268 return x + y; 269 270 CASE_OP_32_64(sub): 271 return x - y; 272 273 CASE_OP_32_64(mul): 274 return x * y; 275 276 CASE_OP_32_64(and): 277 return x & y; 278 279 CASE_OP_32_64(or): 280 return x | y; 281 282 CASE_OP_32_64(xor): 283 return x ^ y; 284 285 case INDEX_op_shl_i32: 286 return (uint32_t)x << (y & 31); 287 288 case INDEX_op_shl_i64: 289 return (uint64_t)x << (y & 63); 290 291 case INDEX_op_shr_i32: 292 return (uint32_t)x >> (y & 31); 293 294 case INDEX_op_shr_i64: 295 return (uint64_t)x >> (y & 63); 296 297 case INDEX_op_sar_i32: 298 return (int32_t)x >> (y & 31); 299 300 case INDEX_op_sar_i64: 301 return (int64_t)x >> (y & 63); 302 303 case INDEX_op_rotr_i32: 304 return ror32(x, y & 31); 305 306 case INDEX_op_rotr_i64: 307 return ror64(x, y & 63); 308 309 case INDEX_op_rotl_i32: 310 return rol32(x, y & 31); 311 312 case INDEX_op_rotl_i64: 313 return rol64(x, y & 63); 314 315 CASE_OP_32_64(not): 316 return ~x; 317 318 CASE_OP_32_64(neg): 319 return -x; 320 321 CASE_OP_32_64(andc): 322 return x & ~y; 323 324 CASE_OP_32_64(orc): 325 return x | ~y; 326 327 CASE_OP_32_64(eqv): 328 return ~(x ^ y); 329 330 CASE_OP_32_64(nand): 331 return ~(x & y); 332 333 CASE_OP_32_64(nor): 334 return ~(x | y); 335 336 case INDEX_op_clz_i32: 337 return (uint32_t)x ? clz32(x) : y; 338 339 case INDEX_op_clz_i64: 340 return x ? clz64(x) : y; 341 342 case INDEX_op_ctz_i32: 343 return (uint32_t)x ? ctz32(x) : y; 344 345 case INDEX_op_ctz_i64: 346 return x ? ctz64(x) : y; 347 348 case INDEX_op_ctpop_i32: 349 return ctpop32(x); 350 351 case INDEX_op_ctpop_i64: 352 return ctpop64(x); 353 354 CASE_OP_32_64(ext8s): 355 return (int8_t)x; 356 357 CASE_OP_32_64(ext16s): 358 return (int16_t)x; 359 360 CASE_OP_32_64(ext8u): 361 return (uint8_t)x; 362 363 CASE_OP_32_64(ext16u): 364 return (uint16_t)x; 365 366 CASE_OP_32_64(bswap16): 367 return bswap16(x); 368 369 CASE_OP_32_64(bswap32): 370 return bswap32(x); 371 372 case INDEX_op_bswap64_i64: 373 return bswap64(x); 374 375 case INDEX_op_ext_i32_i64: 376 case INDEX_op_ext32s_i64: 377 return (int32_t)x; 378 379 case INDEX_op_extu_i32_i64: 380 case INDEX_op_extrl_i64_i32: 381 case INDEX_op_ext32u_i64: 382 return (uint32_t)x; 383 384 case INDEX_op_extrh_i64_i32: 385 return (uint64_t)x >> 32; 386 387 case INDEX_op_muluh_i32: 388 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 389 case INDEX_op_mulsh_i32: 390 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 391 392 case INDEX_op_muluh_i64: 393 mulu64(&l64, &h64, x, y); 394 return h64; 395 case INDEX_op_mulsh_i64: 396 muls64(&l64, &h64, x, y); 397 return h64; 398 399 case INDEX_op_div_i32: 400 /* Avoid crashing on divide by zero, otherwise undefined. */ 401 return (int32_t)x / ((int32_t)y ? : 1); 402 case INDEX_op_divu_i32: 403 return (uint32_t)x / ((uint32_t)y ? : 1); 404 case INDEX_op_div_i64: 405 return (int64_t)x / ((int64_t)y ? : 1); 406 case INDEX_op_divu_i64: 407 return (uint64_t)x / ((uint64_t)y ? : 1); 408 409 case INDEX_op_rem_i32: 410 return (int32_t)x % ((int32_t)y ? : 1); 411 case INDEX_op_remu_i32: 412 return (uint32_t)x % ((uint32_t)y ? : 1); 413 case INDEX_op_rem_i64: 414 return (int64_t)x % ((int64_t)y ? : 1); 415 case INDEX_op_remu_i64: 416 return (uint64_t)x % ((uint64_t)y ? : 1); 417 418 default: 419 fprintf(stderr, 420 "Unrecognized operation %d in do_constant_folding.\n", op); 421 tcg_abort(); 422 } 423 } 424 425 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y) 426 { 427 const TCGOpDef *def = &tcg_op_defs[op]; 428 uint64_t res = do_constant_folding_2(op, x, y); 429 if (!(def->flags & TCG_OPF_64BIT)) { 430 res = (int32_t)res; 431 } 432 return res; 433 } 434 435 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 436 { 437 switch (c) { 438 case TCG_COND_EQ: 439 return x == y; 440 case TCG_COND_NE: 441 return x != y; 442 case TCG_COND_LT: 443 return (int32_t)x < (int32_t)y; 444 case TCG_COND_GE: 445 return (int32_t)x >= (int32_t)y; 446 case TCG_COND_LE: 447 return (int32_t)x <= (int32_t)y; 448 case TCG_COND_GT: 449 return (int32_t)x > (int32_t)y; 450 case TCG_COND_LTU: 451 return x < y; 452 case TCG_COND_GEU: 453 return x >= y; 454 case TCG_COND_LEU: 455 return x <= y; 456 case TCG_COND_GTU: 457 return x > y; 458 default: 459 tcg_abort(); 460 } 461 } 462 463 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 464 { 465 switch (c) { 466 case TCG_COND_EQ: 467 return x == y; 468 case TCG_COND_NE: 469 return x != y; 470 case TCG_COND_LT: 471 return (int64_t)x < (int64_t)y; 472 case TCG_COND_GE: 473 return (int64_t)x >= (int64_t)y; 474 case TCG_COND_LE: 475 return (int64_t)x <= (int64_t)y; 476 case TCG_COND_GT: 477 return (int64_t)x > (int64_t)y; 478 case TCG_COND_LTU: 479 return x < y; 480 case TCG_COND_GEU: 481 return x >= y; 482 case TCG_COND_LEU: 483 return x <= y; 484 case TCG_COND_GTU: 485 return x > y; 486 default: 487 tcg_abort(); 488 } 489 } 490 491 static bool do_constant_folding_cond_eq(TCGCond c) 492 { 493 switch (c) { 494 case TCG_COND_GT: 495 case TCG_COND_LTU: 496 case TCG_COND_LT: 497 case TCG_COND_GTU: 498 case TCG_COND_NE: 499 return 0; 500 case TCG_COND_GE: 501 case TCG_COND_GEU: 502 case TCG_COND_LE: 503 case TCG_COND_LEU: 504 case TCG_COND_EQ: 505 return 1; 506 default: 507 tcg_abort(); 508 } 509 } 510 511 /* Return 2 if the condition can't be simplified, and the result 512 of the condition (0 or 1) if it can */ 513 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 514 TCGArg y, TCGCond c) 515 { 516 uint64_t xv = arg_info(x)->val; 517 uint64_t yv = arg_info(y)->val; 518 519 if (arg_is_const(x) && arg_is_const(y)) { 520 const TCGOpDef *def = &tcg_op_defs[op]; 521 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 522 if (def->flags & TCG_OPF_64BIT) { 523 return do_constant_folding_cond_64(xv, yv, c); 524 } else { 525 return do_constant_folding_cond_32(xv, yv, c); 526 } 527 } else if (args_are_copies(x, y)) { 528 return do_constant_folding_cond_eq(c); 529 } else if (arg_is_const(y) && yv == 0) { 530 switch (c) { 531 case TCG_COND_LTU: 532 return 0; 533 case TCG_COND_GEU: 534 return 1; 535 default: 536 return 2; 537 } 538 } 539 return 2; 540 } 541 542 /* Return 2 if the condition can't be simplified, and the result 543 of the condition (0 or 1) if it can */ 544 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 545 { 546 TCGArg al = p1[0], ah = p1[1]; 547 TCGArg bl = p2[0], bh = p2[1]; 548 549 if (arg_is_const(bl) && arg_is_const(bh)) { 550 tcg_target_ulong blv = arg_info(bl)->val; 551 tcg_target_ulong bhv = arg_info(bh)->val; 552 uint64_t b = deposit64(blv, 32, 32, bhv); 553 554 if (arg_is_const(al) && arg_is_const(ah)) { 555 tcg_target_ulong alv = arg_info(al)->val; 556 tcg_target_ulong ahv = arg_info(ah)->val; 557 uint64_t a = deposit64(alv, 32, 32, ahv); 558 return do_constant_folding_cond_64(a, b, c); 559 } 560 if (b == 0) { 561 switch (c) { 562 case TCG_COND_LTU: 563 return 0; 564 case TCG_COND_GEU: 565 return 1; 566 default: 567 break; 568 } 569 } 570 } 571 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 572 return do_constant_folding_cond_eq(c); 573 } 574 return 2; 575 } 576 577 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 578 { 579 TCGArg a1 = *p1, a2 = *p2; 580 int sum = 0; 581 sum += arg_is_const(a1); 582 sum -= arg_is_const(a2); 583 584 /* Prefer the constant in second argument, and then the form 585 op a, a, b, which is better handled on non-RISC hosts. */ 586 if (sum > 0 || (sum == 0 && dest == a2)) { 587 *p1 = a2; 588 *p2 = a1; 589 return true; 590 } 591 return false; 592 } 593 594 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 595 { 596 int sum = 0; 597 sum += arg_is_const(p1[0]); 598 sum += arg_is_const(p1[1]); 599 sum -= arg_is_const(p2[0]); 600 sum -= arg_is_const(p2[1]); 601 if (sum > 0) { 602 TCGArg t; 603 t = p1[0], p1[0] = p2[0], p2[0] = t; 604 t = p1[1], p1[1] = p2[1], p2[1] = t; 605 return true; 606 } 607 return false; 608 } 609 610 /* Propagate constants and copies, fold constant expressions. */ 611 void tcg_optimize(TCGContext *s) 612 { 613 int nb_temps, nb_globals, i; 614 TCGOp *op, *op_next, *prev_mb = NULL; 615 TCGTempSet temps_used; 616 617 /* Array VALS has an element for each temp. 618 If this temp holds a constant then its value is kept in VALS' element. 619 If this temp is a copy of other ones then the other copies are 620 available through the doubly linked circular list. */ 621 622 nb_temps = s->nb_temps; 623 nb_globals = s->nb_globals; 624 625 bitmap_zero(temps_used.l, nb_temps); 626 for (i = 0; i < nb_temps; ++i) { 627 s->temps[i].state_ptr = NULL; 628 } 629 630 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 631 uint64_t mask, partmask, affected, tmp; 632 int nb_oargs, nb_iargs; 633 TCGOpcode opc = op->opc; 634 const TCGOpDef *def = &tcg_op_defs[opc]; 635 636 /* Count the arguments, and initialize the temps that are 637 going to be used */ 638 if (opc == INDEX_op_call) { 639 nb_oargs = TCGOP_CALLO(op); 640 nb_iargs = TCGOP_CALLI(op); 641 for (i = 0; i < nb_oargs + nb_iargs; i++) { 642 TCGTemp *ts = arg_temp(op->args[i]); 643 if (ts) { 644 init_ts_info(&temps_used, ts); 645 } 646 } 647 } else { 648 nb_oargs = def->nb_oargs; 649 nb_iargs = def->nb_iargs; 650 for (i = 0; i < nb_oargs + nb_iargs; i++) { 651 init_arg_info(&temps_used, op->args[i]); 652 } 653 } 654 655 /* Do copy propagation */ 656 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 657 TCGTemp *ts = arg_temp(op->args[i]); 658 if (ts && ts_is_copy(ts)) { 659 op->args[i] = temp_arg(find_better_copy(s, ts)); 660 } 661 } 662 663 /* For commutative operations make constant second argument */ 664 switch (opc) { 665 CASE_OP_32_64_VEC(add): 666 CASE_OP_32_64_VEC(mul): 667 CASE_OP_32_64_VEC(and): 668 CASE_OP_32_64_VEC(or): 669 CASE_OP_32_64_VEC(xor): 670 CASE_OP_32_64(eqv): 671 CASE_OP_32_64(nand): 672 CASE_OP_32_64(nor): 673 CASE_OP_32_64(muluh): 674 CASE_OP_32_64(mulsh): 675 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 676 break; 677 CASE_OP_32_64(brcond): 678 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 679 op->args[2] = tcg_swap_cond(op->args[2]); 680 } 681 break; 682 CASE_OP_32_64(setcond): 683 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 684 op->args[3] = tcg_swap_cond(op->args[3]); 685 } 686 break; 687 CASE_OP_32_64(movcond): 688 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 689 op->args[5] = tcg_swap_cond(op->args[5]); 690 } 691 /* For movcond, we canonicalize the "false" input reg to match 692 the destination reg so that the tcg backend can implement 693 a "move if true" operation. */ 694 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 695 op->args[5] = tcg_invert_cond(op->args[5]); 696 } 697 break; 698 CASE_OP_32_64(add2): 699 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 700 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 701 break; 702 CASE_OP_32_64(mulu2): 703 CASE_OP_32_64(muls2): 704 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 705 break; 706 case INDEX_op_brcond2_i32: 707 if (swap_commutative2(&op->args[0], &op->args[2])) { 708 op->args[4] = tcg_swap_cond(op->args[4]); 709 } 710 break; 711 case INDEX_op_setcond2_i32: 712 if (swap_commutative2(&op->args[1], &op->args[3])) { 713 op->args[5] = tcg_swap_cond(op->args[5]); 714 } 715 break; 716 default: 717 break; 718 } 719 720 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 721 and "sub r, 0, a => neg r, a" case. */ 722 switch (opc) { 723 CASE_OP_32_64(shl): 724 CASE_OP_32_64(shr): 725 CASE_OP_32_64(sar): 726 CASE_OP_32_64(rotl): 727 CASE_OP_32_64(rotr): 728 if (arg_is_const(op->args[1]) 729 && arg_info(op->args[1])->val == 0) { 730 tcg_opt_gen_movi(s, op, op->args[0], 0); 731 continue; 732 } 733 break; 734 CASE_OP_32_64_VEC(sub): 735 { 736 TCGOpcode neg_op; 737 bool have_neg; 738 739 if (arg_is_const(op->args[2])) { 740 /* Proceed with possible constant folding. */ 741 break; 742 } 743 if (opc == INDEX_op_sub_i32) { 744 neg_op = INDEX_op_neg_i32; 745 have_neg = TCG_TARGET_HAS_neg_i32; 746 } else if (opc == INDEX_op_sub_i64) { 747 neg_op = INDEX_op_neg_i64; 748 have_neg = TCG_TARGET_HAS_neg_i64; 749 } else if (TCG_TARGET_HAS_neg_vec) { 750 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 751 unsigned vece = TCGOP_VECE(op); 752 neg_op = INDEX_op_neg_vec; 753 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 754 } else { 755 break; 756 } 757 if (!have_neg) { 758 break; 759 } 760 if (arg_is_const(op->args[1]) 761 && arg_info(op->args[1])->val == 0) { 762 op->opc = neg_op; 763 reset_temp(op->args[0]); 764 op->args[1] = op->args[2]; 765 continue; 766 } 767 } 768 break; 769 CASE_OP_32_64_VEC(xor): 770 CASE_OP_32_64(nand): 771 if (!arg_is_const(op->args[1]) 772 && arg_is_const(op->args[2]) 773 && arg_info(op->args[2])->val == -1) { 774 i = 1; 775 goto try_not; 776 } 777 break; 778 CASE_OP_32_64(nor): 779 if (!arg_is_const(op->args[1]) 780 && arg_is_const(op->args[2]) 781 && arg_info(op->args[2])->val == 0) { 782 i = 1; 783 goto try_not; 784 } 785 break; 786 CASE_OP_32_64_VEC(andc): 787 if (!arg_is_const(op->args[2]) 788 && arg_is_const(op->args[1]) 789 && arg_info(op->args[1])->val == -1) { 790 i = 2; 791 goto try_not; 792 } 793 break; 794 CASE_OP_32_64_VEC(orc): 795 CASE_OP_32_64(eqv): 796 if (!arg_is_const(op->args[2]) 797 && arg_is_const(op->args[1]) 798 && arg_info(op->args[1])->val == 0) { 799 i = 2; 800 goto try_not; 801 } 802 break; 803 try_not: 804 { 805 TCGOpcode not_op; 806 bool have_not; 807 808 if (def->flags & TCG_OPF_VECTOR) { 809 not_op = INDEX_op_not_vec; 810 have_not = TCG_TARGET_HAS_not_vec; 811 } else if (def->flags & TCG_OPF_64BIT) { 812 not_op = INDEX_op_not_i64; 813 have_not = TCG_TARGET_HAS_not_i64; 814 } else { 815 not_op = INDEX_op_not_i32; 816 have_not = TCG_TARGET_HAS_not_i32; 817 } 818 if (!have_not) { 819 break; 820 } 821 op->opc = not_op; 822 reset_temp(op->args[0]); 823 op->args[1] = op->args[i]; 824 continue; 825 } 826 default: 827 break; 828 } 829 830 /* Simplify expression for "op r, a, const => mov r, a" cases */ 831 switch (opc) { 832 CASE_OP_32_64_VEC(add): 833 CASE_OP_32_64_VEC(sub): 834 CASE_OP_32_64_VEC(or): 835 CASE_OP_32_64_VEC(xor): 836 CASE_OP_32_64_VEC(andc): 837 CASE_OP_32_64(shl): 838 CASE_OP_32_64(shr): 839 CASE_OP_32_64(sar): 840 CASE_OP_32_64(rotl): 841 CASE_OP_32_64(rotr): 842 if (!arg_is_const(op->args[1]) 843 && arg_is_const(op->args[2]) 844 && arg_info(op->args[2])->val == 0) { 845 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 846 continue; 847 } 848 break; 849 CASE_OP_32_64_VEC(and): 850 CASE_OP_32_64_VEC(orc): 851 CASE_OP_32_64(eqv): 852 if (!arg_is_const(op->args[1]) 853 && arg_is_const(op->args[2]) 854 && arg_info(op->args[2])->val == -1) { 855 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 856 continue; 857 } 858 break; 859 default: 860 break; 861 } 862 863 /* Simplify using known-zero bits. Currently only ops with a single 864 output argument is supported. */ 865 mask = -1; 866 affected = -1; 867 switch (opc) { 868 CASE_OP_32_64(ext8s): 869 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 870 break; 871 } 872 QEMU_FALLTHROUGH; 873 CASE_OP_32_64(ext8u): 874 mask = 0xff; 875 goto and_const; 876 CASE_OP_32_64(ext16s): 877 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 878 break; 879 } 880 QEMU_FALLTHROUGH; 881 CASE_OP_32_64(ext16u): 882 mask = 0xffff; 883 goto and_const; 884 case INDEX_op_ext32s_i64: 885 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 886 break; 887 } 888 QEMU_FALLTHROUGH; 889 case INDEX_op_ext32u_i64: 890 mask = 0xffffffffU; 891 goto and_const; 892 893 CASE_OP_32_64(and): 894 mask = arg_info(op->args[2])->mask; 895 if (arg_is_const(op->args[2])) { 896 and_const: 897 affected = arg_info(op->args[1])->mask & ~mask; 898 } 899 mask = arg_info(op->args[1])->mask & mask; 900 break; 901 902 case INDEX_op_ext_i32_i64: 903 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 904 break; 905 } 906 QEMU_FALLTHROUGH; 907 case INDEX_op_extu_i32_i64: 908 /* We do not compute affected as it is a size changing op. */ 909 mask = (uint32_t)arg_info(op->args[1])->mask; 910 break; 911 912 CASE_OP_32_64(andc): 913 /* Known-zeros does not imply known-ones. Therefore unless 914 op->args[2] is constant, we can't infer anything from it. */ 915 if (arg_is_const(op->args[2])) { 916 mask = ~arg_info(op->args[2])->mask; 917 goto and_const; 918 } 919 /* But we certainly know nothing outside args[1] may be set. */ 920 mask = arg_info(op->args[1])->mask; 921 break; 922 923 case INDEX_op_sar_i32: 924 if (arg_is_const(op->args[2])) { 925 tmp = arg_info(op->args[2])->val & 31; 926 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 927 } 928 break; 929 case INDEX_op_sar_i64: 930 if (arg_is_const(op->args[2])) { 931 tmp = arg_info(op->args[2])->val & 63; 932 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 933 } 934 break; 935 936 case INDEX_op_shr_i32: 937 if (arg_is_const(op->args[2])) { 938 tmp = arg_info(op->args[2])->val & 31; 939 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 940 } 941 break; 942 case INDEX_op_shr_i64: 943 if (arg_is_const(op->args[2])) { 944 tmp = arg_info(op->args[2])->val & 63; 945 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 946 } 947 break; 948 949 case INDEX_op_extrl_i64_i32: 950 mask = (uint32_t)arg_info(op->args[1])->mask; 951 break; 952 case INDEX_op_extrh_i64_i32: 953 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 954 break; 955 956 CASE_OP_32_64(shl): 957 if (arg_is_const(op->args[2])) { 958 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 959 mask = arg_info(op->args[1])->mask << tmp; 960 } 961 break; 962 963 CASE_OP_32_64(neg): 964 /* Set to 1 all bits to the left of the rightmost. */ 965 mask = -(arg_info(op->args[1])->mask 966 & -arg_info(op->args[1])->mask); 967 break; 968 969 CASE_OP_32_64(deposit): 970 mask = deposit64(arg_info(op->args[1])->mask, 971 op->args[3], op->args[4], 972 arg_info(op->args[2])->mask); 973 break; 974 975 CASE_OP_32_64(extract): 976 mask = extract64(arg_info(op->args[1])->mask, 977 op->args[2], op->args[3]); 978 if (op->args[2] == 0) { 979 affected = arg_info(op->args[1])->mask & ~mask; 980 } 981 break; 982 CASE_OP_32_64(sextract): 983 mask = sextract64(arg_info(op->args[1])->mask, 984 op->args[2], op->args[3]); 985 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 986 affected = arg_info(op->args[1])->mask & ~mask; 987 } 988 break; 989 990 CASE_OP_32_64(or): 991 CASE_OP_32_64(xor): 992 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 993 break; 994 995 case INDEX_op_clz_i32: 996 case INDEX_op_ctz_i32: 997 mask = arg_info(op->args[2])->mask | 31; 998 break; 999 1000 case INDEX_op_clz_i64: 1001 case INDEX_op_ctz_i64: 1002 mask = arg_info(op->args[2])->mask | 63; 1003 break; 1004 1005 case INDEX_op_ctpop_i32: 1006 mask = 32 | 31; 1007 break; 1008 case INDEX_op_ctpop_i64: 1009 mask = 64 | 63; 1010 break; 1011 1012 CASE_OP_32_64(setcond): 1013 case INDEX_op_setcond2_i32: 1014 mask = 1; 1015 break; 1016 1017 CASE_OP_32_64(movcond): 1018 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1019 break; 1020 1021 CASE_OP_32_64(ld8u): 1022 mask = 0xff; 1023 break; 1024 CASE_OP_32_64(ld16u): 1025 mask = 0xffff; 1026 break; 1027 case INDEX_op_ld32u_i64: 1028 mask = 0xffffffffu; 1029 break; 1030 1031 CASE_OP_32_64(qemu_ld): 1032 { 1033 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1034 MemOp mop = get_memop(oi); 1035 if (!(mop & MO_SIGN)) { 1036 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1037 } 1038 } 1039 break; 1040 1041 default: 1042 break; 1043 } 1044 1045 /* 32-bit ops generate 32-bit results. For the result is zero test 1046 below, we can ignore high bits, but for further optimizations we 1047 need to record that the high bits contain garbage. */ 1048 partmask = mask; 1049 if (!(def->flags & TCG_OPF_64BIT)) { 1050 mask |= ~(tcg_target_ulong)0xffffffffu; 1051 partmask &= 0xffffffffu; 1052 affected &= 0xffffffffu; 1053 } 1054 1055 if (partmask == 0) { 1056 tcg_debug_assert(nb_oargs == 1); 1057 tcg_opt_gen_movi(s, op, op->args[0], 0); 1058 continue; 1059 } 1060 if (affected == 0) { 1061 tcg_debug_assert(nb_oargs == 1); 1062 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1063 continue; 1064 } 1065 1066 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1067 switch (opc) { 1068 CASE_OP_32_64_VEC(and): 1069 CASE_OP_32_64_VEC(mul): 1070 CASE_OP_32_64(muluh): 1071 CASE_OP_32_64(mulsh): 1072 if (arg_is_const(op->args[2]) 1073 && arg_info(op->args[2])->val == 0) { 1074 tcg_opt_gen_movi(s, op, op->args[0], 0); 1075 continue; 1076 } 1077 break; 1078 default: 1079 break; 1080 } 1081 1082 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1083 switch (opc) { 1084 CASE_OP_32_64_VEC(or): 1085 CASE_OP_32_64_VEC(and): 1086 if (args_are_copies(op->args[1], op->args[2])) { 1087 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1088 continue; 1089 } 1090 break; 1091 default: 1092 break; 1093 } 1094 1095 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1096 switch (opc) { 1097 CASE_OP_32_64_VEC(andc): 1098 CASE_OP_32_64_VEC(sub): 1099 CASE_OP_32_64_VEC(xor): 1100 if (args_are_copies(op->args[1], op->args[2])) { 1101 tcg_opt_gen_movi(s, op, op->args[0], 0); 1102 continue; 1103 } 1104 break; 1105 default: 1106 break; 1107 } 1108 1109 /* Propagate constants through copy operations and do constant 1110 folding. Constants will be substituted to arguments by register 1111 allocator where needed and possible. Also detect copies. */ 1112 switch (opc) { 1113 CASE_OP_32_64_VEC(mov): 1114 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1115 break; 1116 CASE_OP_32_64(movi): 1117 case INDEX_op_dupi_vec: 1118 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1119 break; 1120 1121 case INDEX_op_dup_vec: 1122 if (arg_is_const(op->args[1])) { 1123 tmp = arg_info(op->args[1])->val; 1124 tmp = dup_const(TCGOP_VECE(op), tmp); 1125 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1126 break; 1127 } 1128 goto do_default; 1129 1130 case INDEX_op_dup2_vec: 1131 assert(TCG_TARGET_REG_BITS == 32); 1132 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1133 tmp = arg_info(op->args[1])->val; 1134 if (tmp == arg_info(op->args[2])->val) { 1135 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1136 break; 1137 } 1138 } else if (args_are_copies(op->args[1], op->args[2])) { 1139 op->opc = INDEX_op_dup_vec; 1140 TCGOP_VECE(op) = MO_32; 1141 nb_iargs = 1; 1142 } 1143 goto do_default; 1144 1145 CASE_OP_32_64(not): 1146 CASE_OP_32_64(neg): 1147 CASE_OP_32_64(ext8s): 1148 CASE_OP_32_64(ext8u): 1149 CASE_OP_32_64(ext16s): 1150 CASE_OP_32_64(ext16u): 1151 CASE_OP_32_64(ctpop): 1152 CASE_OP_32_64(bswap16): 1153 CASE_OP_32_64(bswap32): 1154 case INDEX_op_bswap64_i64: 1155 case INDEX_op_ext32s_i64: 1156 case INDEX_op_ext32u_i64: 1157 case INDEX_op_ext_i32_i64: 1158 case INDEX_op_extu_i32_i64: 1159 case INDEX_op_extrl_i64_i32: 1160 case INDEX_op_extrh_i64_i32: 1161 if (arg_is_const(op->args[1])) { 1162 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1163 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1164 break; 1165 } 1166 goto do_default; 1167 1168 CASE_OP_32_64(add): 1169 CASE_OP_32_64(sub): 1170 CASE_OP_32_64(mul): 1171 CASE_OP_32_64(or): 1172 CASE_OP_32_64(and): 1173 CASE_OP_32_64(xor): 1174 CASE_OP_32_64(shl): 1175 CASE_OP_32_64(shr): 1176 CASE_OP_32_64(sar): 1177 CASE_OP_32_64(rotl): 1178 CASE_OP_32_64(rotr): 1179 CASE_OP_32_64(andc): 1180 CASE_OP_32_64(orc): 1181 CASE_OP_32_64(eqv): 1182 CASE_OP_32_64(nand): 1183 CASE_OP_32_64(nor): 1184 CASE_OP_32_64(muluh): 1185 CASE_OP_32_64(mulsh): 1186 CASE_OP_32_64(div): 1187 CASE_OP_32_64(divu): 1188 CASE_OP_32_64(rem): 1189 CASE_OP_32_64(remu): 1190 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1191 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1192 arg_info(op->args[2])->val); 1193 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1194 break; 1195 } 1196 goto do_default; 1197 1198 CASE_OP_32_64(clz): 1199 CASE_OP_32_64(ctz): 1200 if (arg_is_const(op->args[1])) { 1201 TCGArg v = arg_info(op->args[1])->val; 1202 if (v != 0) { 1203 tmp = do_constant_folding(opc, v, 0); 1204 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1205 } else { 1206 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1207 } 1208 break; 1209 } 1210 goto do_default; 1211 1212 CASE_OP_32_64(deposit): 1213 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1214 tmp = deposit64(arg_info(op->args[1])->val, 1215 op->args[3], op->args[4], 1216 arg_info(op->args[2])->val); 1217 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1218 break; 1219 } 1220 goto do_default; 1221 1222 CASE_OP_32_64(extract): 1223 if (arg_is_const(op->args[1])) { 1224 tmp = extract64(arg_info(op->args[1])->val, 1225 op->args[2], op->args[3]); 1226 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1227 break; 1228 } 1229 goto do_default; 1230 1231 CASE_OP_32_64(sextract): 1232 if (arg_is_const(op->args[1])) { 1233 tmp = sextract64(arg_info(op->args[1])->val, 1234 op->args[2], op->args[3]); 1235 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1236 break; 1237 } 1238 goto do_default; 1239 1240 CASE_OP_32_64(extract2): 1241 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1242 uint64_t v1 = arg_info(op->args[1])->val; 1243 uint64_t v2 = arg_info(op->args[2])->val; 1244 int shr = op->args[3]; 1245 1246 if (opc == INDEX_op_extract2_i64) { 1247 tmp = (v1 >> shr) | (v2 << (64 - shr)); 1248 } else { 1249 tmp = (int32_t)(((uint32_t)v1 >> shr) | 1250 ((uint32_t)v2 << (32 - shr))); 1251 } 1252 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1253 break; 1254 } 1255 goto do_default; 1256 1257 CASE_OP_32_64(setcond): 1258 tmp = do_constant_folding_cond(opc, op->args[1], 1259 op->args[2], op->args[3]); 1260 if (tmp != 2) { 1261 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1262 break; 1263 } 1264 goto do_default; 1265 1266 CASE_OP_32_64(brcond): 1267 tmp = do_constant_folding_cond(opc, op->args[0], 1268 op->args[1], op->args[2]); 1269 if (tmp != 2) { 1270 if (tmp) { 1271 bitmap_zero(temps_used.l, nb_temps); 1272 op->opc = INDEX_op_br; 1273 op->args[0] = op->args[3]; 1274 } else { 1275 tcg_op_remove(s, op); 1276 } 1277 break; 1278 } 1279 goto do_default; 1280 1281 CASE_OP_32_64(movcond): 1282 tmp = do_constant_folding_cond(opc, op->args[1], 1283 op->args[2], op->args[5]); 1284 if (tmp != 2) { 1285 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1286 break; 1287 } 1288 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1289 uint64_t tv = arg_info(op->args[3])->val; 1290 uint64_t fv = arg_info(op->args[4])->val; 1291 TCGCond cond = op->args[5]; 1292 1293 if (fv == 1 && tv == 0) { 1294 cond = tcg_invert_cond(cond); 1295 } else if (!(tv == 1 && fv == 0)) { 1296 goto do_default; 1297 } 1298 op->args[3] = cond; 1299 op->opc = opc = (opc == INDEX_op_movcond_i32 1300 ? INDEX_op_setcond_i32 1301 : INDEX_op_setcond_i64); 1302 nb_iargs = 2; 1303 } 1304 goto do_default; 1305 1306 case INDEX_op_add2_i32: 1307 case INDEX_op_sub2_i32: 1308 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1309 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1310 uint32_t al = arg_info(op->args[2])->val; 1311 uint32_t ah = arg_info(op->args[3])->val; 1312 uint32_t bl = arg_info(op->args[4])->val; 1313 uint32_t bh = arg_info(op->args[5])->val; 1314 uint64_t a = ((uint64_t)ah << 32) | al; 1315 uint64_t b = ((uint64_t)bh << 32) | bl; 1316 TCGArg rl, rh; 1317 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1318 1319 if (opc == INDEX_op_add2_i32) { 1320 a += b; 1321 } else { 1322 a -= b; 1323 } 1324 1325 rl = op->args[0]; 1326 rh = op->args[1]; 1327 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1328 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1329 break; 1330 } 1331 goto do_default; 1332 1333 case INDEX_op_mulu2_i32: 1334 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1335 uint32_t a = arg_info(op->args[2])->val; 1336 uint32_t b = arg_info(op->args[3])->val; 1337 uint64_t r = (uint64_t)a * b; 1338 TCGArg rl, rh; 1339 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1340 1341 rl = op->args[0]; 1342 rh = op->args[1]; 1343 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1344 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1345 break; 1346 } 1347 goto do_default; 1348 1349 case INDEX_op_brcond2_i32: 1350 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1351 op->args[4]); 1352 if (tmp != 2) { 1353 if (tmp) { 1354 do_brcond_true: 1355 bitmap_zero(temps_used.l, nb_temps); 1356 op->opc = INDEX_op_br; 1357 op->args[0] = op->args[5]; 1358 } else { 1359 do_brcond_false: 1360 tcg_op_remove(s, op); 1361 } 1362 } else if ((op->args[4] == TCG_COND_LT 1363 || op->args[4] == TCG_COND_GE) 1364 && arg_is_const(op->args[2]) 1365 && arg_info(op->args[2])->val == 0 1366 && arg_is_const(op->args[3]) 1367 && arg_info(op->args[3])->val == 0) { 1368 /* Simplify LT/GE comparisons vs zero to a single compare 1369 vs the high word of the input. */ 1370 do_brcond_high: 1371 bitmap_zero(temps_used.l, nb_temps); 1372 op->opc = INDEX_op_brcond_i32; 1373 op->args[0] = op->args[1]; 1374 op->args[1] = op->args[3]; 1375 op->args[2] = op->args[4]; 1376 op->args[3] = op->args[5]; 1377 } else if (op->args[4] == TCG_COND_EQ) { 1378 /* Simplify EQ comparisons where one of the pairs 1379 can be simplified. */ 1380 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1381 op->args[0], op->args[2], 1382 TCG_COND_EQ); 1383 if (tmp == 0) { 1384 goto do_brcond_false; 1385 } else if (tmp == 1) { 1386 goto do_brcond_high; 1387 } 1388 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1389 op->args[1], op->args[3], 1390 TCG_COND_EQ); 1391 if (tmp == 0) { 1392 goto do_brcond_false; 1393 } else if (tmp != 1) { 1394 goto do_default; 1395 } 1396 do_brcond_low: 1397 bitmap_zero(temps_used.l, nb_temps); 1398 op->opc = INDEX_op_brcond_i32; 1399 op->args[1] = op->args[2]; 1400 op->args[2] = op->args[4]; 1401 op->args[3] = op->args[5]; 1402 } else if (op->args[4] == TCG_COND_NE) { 1403 /* Simplify NE comparisons where one of the pairs 1404 can be simplified. */ 1405 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1406 op->args[0], op->args[2], 1407 TCG_COND_NE); 1408 if (tmp == 0) { 1409 goto do_brcond_high; 1410 } else if (tmp == 1) { 1411 goto do_brcond_true; 1412 } 1413 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1414 op->args[1], op->args[3], 1415 TCG_COND_NE); 1416 if (tmp == 0) { 1417 goto do_brcond_low; 1418 } else if (tmp == 1) { 1419 goto do_brcond_true; 1420 } 1421 goto do_default; 1422 } else { 1423 goto do_default; 1424 } 1425 break; 1426 1427 case INDEX_op_setcond2_i32: 1428 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1429 op->args[5]); 1430 if (tmp != 2) { 1431 do_setcond_const: 1432 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1433 } else if ((op->args[5] == TCG_COND_LT 1434 || op->args[5] == TCG_COND_GE) 1435 && arg_is_const(op->args[3]) 1436 && arg_info(op->args[3])->val == 0 1437 && arg_is_const(op->args[4]) 1438 && arg_info(op->args[4])->val == 0) { 1439 /* Simplify LT/GE comparisons vs zero to a single compare 1440 vs the high word of the input. */ 1441 do_setcond_high: 1442 reset_temp(op->args[0]); 1443 arg_info(op->args[0])->mask = 1; 1444 op->opc = INDEX_op_setcond_i32; 1445 op->args[1] = op->args[2]; 1446 op->args[2] = op->args[4]; 1447 op->args[3] = op->args[5]; 1448 } else if (op->args[5] == TCG_COND_EQ) { 1449 /* Simplify EQ comparisons where one of the pairs 1450 can be simplified. */ 1451 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1452 op->args[1], op->args[3], 1453 TCG_COND_EQ); 1454 if (tmp == 0) { 1455 goto do_setcond_const; 1456 } else if (tmp == 1) { 1457 goto do_setcond_high; 1458 } 1459 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1460 op->args[2], op->args[4], 1461 TCG_COND_EQ); 1462 if (tmp == 0) { 1463 goto do_setcond_high; 1464 } else if (tmp != 1) { 1465 goto do_default; 1466 } 1467 do_setcond_low: 1468 reset_temp(op->args[0]); 1469 arg_info(op->args[0])->mask = 1; 1470 op->opc = INDEX_op_setcond_i32; 1471 op->args[2] = op->args[3]; 1472 op->args[3] = op->args[5]; 1473 } else if (op->args[5] == TCG_COND_NE) { 1474 /* Simplify NE comparisons where one of the pairs 1475 can be simplified. */ 1476 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1477 op->args[1], op->args[3], 1478 TCG_COND_NE); 1479 if (tmp == 0) { 1480 goto do_setcond_high; 1481 } else if (tmp == 1) { 1482 goto do_setcond_const; 1483 } 1484 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1485 op->args[2], op->args[4], 1486 TCG_COND_NE); 1487 if (tmp == 0) { 1488 goto do_setcond_low; 1489 } else if (tmp == 1) { 1490 goto do_setcond_const; 1491 } 1492 goto do_default; 1493 } else { 1494 goto do_default; 1495 } 1496 break; 1497 1498 case INDEX_op_call: 1499 if (!(op->args[nb_oargs + nb_iargs + 1] 1500 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1501 for (i = 0; i < nb_globals; i++) { 1502 if (test_bit(i, temps_used.l)) { 1503 reset_ts(&s->temps[i]); 1504 } 1505 } 1506 } 1507 goto do_reset_output; 1508 1509 default: 1510 do_default: 1511 /* Default case: we know nothing about operation (or were unable 1512 to compute the operation result) so no propagation is done. 1513 We trash everything if the operation is the end of a basic 1514 block, otherwise we only trash the output args. "mask" is 1515 the non-zero bits mask for the first output arg. */ 1516 if (def->flags & TCG_OPF_BB_END) { 1517 bitmap_zero(temps_used.l, nb_temps); 1518 } else { 1519 do_reset_output: 1520 for (i = 0; i < nb_oargs; i++) { 1521 reset_temp(op->args[i]); 1522 /* Save the corresponding known-zero bits mask for the 1523 first output argument (only one supported so far). */ 1524 if (i == 0) { 1525 arg_info(op->args[i])->mask = mask; 1526 } 1527 } 1528 } 1529 break; 1530 } 1531 1532 /* Eliminate duplicate and redundant fence instructions. */ 1533 if (prev_mb) { 1534 switch (opc) { 1535 case INDEX_op_mb: 1536 /* Merge two barriers of the same type into one, 1537 * or a weaker barrier into a stronger one, 1538 * or two weaker barriers into a stronger one. 1539 * mb X; mb Y => mb X|Y 1540 * mb; strl => mb; st 1541 * ldaq; mb => ld; mb 1542 * ldaq; strl => ld; mb; st 1543 * Other combinations are also merged into a strong 1544 * barrier. This is stricter than specified but for 1545 * the purposes of TCG is better than not optimizing. 1546 */ 1547 prev_mb->args[0] |= op->args[0]; 1548 tcg_op_remove(s, op); 1549 break; 1550 1551 default: 1552 /* Opcodes that end the block stop the optimization. */ 1553 if ((def->flags & TCG_OPF_BB_END) == 0) { 1554 break; 1555 } 1556 /* fallthru */ 1557 case INDEX_op_qemu_ld_i32: 1558 case INDEX_op_qemu_ld_i64: 1559 case INDEX_op_qemu_st_i32: 1560 case INDEX_op_qemu_st8_i32: 1561 case INDEX_op_qemu_st_i64: 1562 case INDEX_op_call: 1563 /* Opcodes that touch guest memory stop the optimization. */ 1564 prev_mb = NULL; 1565 break; 1566 } 1567 } else if (opc == INDEX_op_mb) { 1568 prev_mb = op; 1569 } 1570 } 1571 } 1572