1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tcg/tcg-op.h" 28 #include "tcg-internal.h" 29 30 #define CASE_OP_32_64(x) \ 31 glue(glue(case INDEX_op_, x), _i32): \ 32 glue(glue(case INDEX_op_, x), _i64) 33 34 #define CASE_OP_32_64_VEC(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64): \ 37 glue(glue(case INDEX_op_, x), _vec) 38 39 typedef struct TempOptInfo { 40 bool is_const; 41 TCGTemp *prev_copy; 42 TCGTemp *next_copy; 43 uint64_t val; 44 uint64_t mask; 45 } TempOptInfo; 46 47 static inline TempOptInfo *ts_info(TCGTemp *ts) 48 { 49 return ts->state_ptr; 50 } 51 52 static inline TempOptInfo *arg_info(TCGArg arg) 53 { 54 return ts_info(arg_temp(arg)); 55 } 56 57 static inline bool ts_is_const(TCGTemp *ts) 58 { 59 return ts_info(ts)->is_const; 60 } 61 62 static inline bool arg_is_const(TCGArg arg) 63 { 64 return ts_is_const(arg_temp(arg)); 65 } 66 67 static inline bool ts_is_copy(TCGTemp *ts) 68 { 69 return ts_info(ts)->next_copy != ts; 70 } 71 72 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 73 static void reset_ts(TCGTemp *ts) 74 { 75 TempOptInfo *ti = ts_info(ts); 76 TempOptInfo *pi = ts_info(ti->prev_copy); 77 TempOptInfo *ni = ts_info(ti->next_copy); 78 79 ni->prev_copy = ti->prev_copy; 80 pi->next_copy = ti->next_copy; 81 ti->next_copy = ts; 82 ti->prev_copy = ts; 83 ti->is_const = false; 84 ti->mask = -1; 85 } 86 87 static void reset_temp(TCGArg arg) 88 { 89 reset_ts(arg_temp(arg)); 90 } 91 92 /* Initialize and activate a temporary. */ 93 static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts) 94 { 95 size_t idx = temp_idx(ts); 96 TempOptInfo *ti; 97 98 if (test_bit(idx, temps_used->l)) { 99 return; 100 } 101 set_bit(idx, temps_used->l); 102 103 ti = ts->state_ptr; 104 if (ti == NULL) { 105 ti = tcg_malloc(sizeof(TempOptInfo)); 106 ts->state_ptr = ti; 107 } 108 109 ti->next_copy = ts; 110 ti->prev_copy = ts; 111 if (ts->kind == TEMP_CONST) { 112 ti->is_const = true; 113 ti->val = ts->val; 114 ti->mask = ts->val; 115 if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) { 116 /* High bits of a 32-bit quantity are garbage. */ 117 ti->mask |= ~0xffffffffull; 118 } 119 } else { 120 ti->is_const = false; 121 ti->mask = -1; 122 } 123 } 124 125 static void init_arg_info(TCGTempSet *temps_used, TCGArg arg) 126 { 127 init_ts_info(temps_used, arg_temp(arg)); 128 } 129 130 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 131 { 132 TCGTemp *i, *g, *l; 133 134 /* If this is already readonly, we can't do better. */ 135 if (temp_readonly(ts)) { 136 return ts; 137 } 138 139 g = l = NULL; 140 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 141 if (temp_readonly(i)) { 142 return i; 143 } else if (i->kind > ts->kind) { 144 if (i->kind == TEMP_GLOBAL) { 145 g = i; 146 } else if (i->kind == TEMP_LOCAL) { 147 l = i; 148 } 149 } 150 } 151 152 /* If we didn't find a better representation, return the same temp. */ 153 return g ? g : l ? l : ts; 154 } 155 156 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 157 { 158 TCGTemp *i; 159 160 if (ts1 == ts2) { 161 return true; 162 } 163 164 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 165 return false; 166 } 167 168 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 169 if (i == ts2) { 170 return true; 171 } 172 } 173 174 return false; 175 } 176 177 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 178 { 179 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 180 } 181 182 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 183 { 184 TCGTemp *dst_ts = arg_temp(dst); 185 TCGTemp *src_ts = arg_temp(src); 186 const TCGOpDef *def; 187 TempOptInfo *di; 188 TempOptInfo *si; 189 uint64_t mask; 190 TCGOpcode new_op; 191 192 if (ts_are_copies(dst_ts, src_ts)) { 193 tcg_op_remove(s, op); 194 return; 195 } 196 197 reset_ts(dst_ts); 198 di = ts_info(dst_ts); 199 si = ts_info(src_ts); 200 def = &tcg_op_defs[op->opc]; 201 if (def->flags & TCG_OPF_VECTOR) { 202 new_op = INDEX_op_mov_vec; 203 } else if (def->flags & TCG_OPF_64BIT) { 204 new_op = INDEX_op_mov_i64; 205 } else { 206 new_op = INDEX_op_mov_i32; 207 } 208 op->opc = new_op; 209 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 210 op->args[0] = dst; 211 op->args[1] = src; 212 213 mask = si->mask; 214 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 215 /* High bits of the destination are now garbage. */ 216 mask |= ~0xffffffffull; 217 } 218 di->mask = mask; 219 220 if (src_ts->type == dst_ts->type) { 221 TempOptInfo *ni = ts_info(si->next_copy); 222 223 di->next_copy = si->next_copy; 224 di->prev_copy = src_ts; 225 ni->prev_copy = dst_ts; 226 si->next_copy = dst_ts; 227 di->is_const = si->is_const; 228 di->val = si->val; 229 } 230 } 231 232 static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used, 233 TCGOp *op, TCGArg dst, uint64_t val) 234 { 235 const TCGOpDef *def = &tcg_op_defs[op->opc]; 236 TCGType type; 237 TCGTemp *tv; 238 239 if (def->flags & TCG_OPF_VECTOR) { 240 type = TCGOP_VECL(op) + TCG_TYPE_V64; 241 } else if (def->flags & TCG_OPF_64BIT) { 242 type = TCG_TYPE_I64; 243 } else { 244 type = TCG_TYPE_I32; 245 } 246 247 /* Convert movi to mov with constant temp. */ 248 tv = tcg_constant_internal(type, val); 249 init_ts_info(temps_used, tv); 250 tcg_opt_gen_mov(s, op, dst, temp_arg(tv)); 251 } 252 253 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 254 { 255 uint64_t l64, h64; 256 257 switch (op) { 258 CASE_OP_32_64(add): 259 return x + y; 260 261 CASE_OP_32_64(sub): 262 return x - y; 263 264 CASE_OP_32_64(mul): 265 return x * y; 266 267 CASE_OP_32_64(and): 268 return x & y; 269 270 CASE_OP_32_64(or): 271 return x | y; 272 273 CASE_OP_32_64(xor): 274 return x ^ y; 275 276 case INDEX_op_shl_i32: 277 return (uint32_t)x << (y & 31); 278 279 case INDEX_op_shl_i64: 280 return (uint64_t)x << (y & 63); 281 282 case INDEX_op_shr_i32: 283 return (uint32_t)x >> (y & 31); 284 285 case INDEX_op_shr_i64: 286 return (uint64_t)x >> (y & 63); 287 288 case INDEX_op_sar_i32: 289 return (int32_t)x >> (y & 31); 290 291 case INDEX_op_sar_i64: 292 return (int64_t)x >> (y & 63); 293 294 case INDEX_op_rotr_i32: 295 return ror32(x, y & 31); 296 297 case INDEX_op_rotr_i64: 298 return ror64(x, y & 63); 299 300 case INDEX_op_rotl_i32: 301 return rol32(x, y & 31); 302 303 case INDEX_op_rotl_i64: 304 return rol64(x, y & 63); 305 306 CASE_OP_32_64(not): 307 return ~x; 308 309 CASE_OP_32_64(neg): 310 return -x; 311 312 CASE_OP_32_64(andc): 313 return x & ~y; 314 315 CASE_OP_32_64(orc): 316 return x | ~y; 317 318 CASE_OP_32_64(eqv): 319 return ~(x ^ y); 320 321 CASE_OP_32_64(nand): 322 return ~(x & y); 323 324 CASE_OP_32_64(nor): 325 return ~(x | y); 326 327 case INDEX_op_clz_i32: 328 return (uint32_t)x ? clz32(x) : y; 329 330 case INDEX_op_clz_i64: 331 return x ? clz64(x) : y; 332 333 case INDEX_op_ctz_i32: 334 return (uint32_t)x ? ctz32(x) : y; 335 336 case INDEX_op_ctz_i64: 337 return x ? ctz64(x) : y; 338 339 case INDEX_op_ctpop_i32: 340 return ctpop32(x); 341 342 case INDEX_op_ctpop_i64: 343 return ctpop64(x); 344 345 CASE_OP_32_64(ext8s): 346 return (int8_t)x; 347 348 CASE_OP_32_64(ext16s): 349 return (int16_t)x; 350 351 CASE_OP_32_64(ext8u): 352 return (uint8_t)x; 353 354 CASE_OP_32_64(ext16u): 355 return (uint16_t)x; 356 357 CASE_OP_32_64(bswap16): 358 return bswap16(x); 359 360 CASE_OP_32_64(bswap32): 361 return bswap32(x); 362 363 case INDEX_op_bswap64_i64: 364 return bswap64(x); 365 366 case INDEX_op_ext_i32_i64: 367 case INDEX_op_ext32s_i64: 368 return (int32_t)x; 369 370 case INDEX_op_extu_i32_i64: 371 case INDEX_op_extrl_i64_i32: 372 case INDEX_op_ext32u_i64: 373 return (uint32_t)x; 374 375 case INDEX_op_extrh_i64_i32: 376 return (uint64_t)x >> 32; 377 378 case INDEX_op_muluh_i32: 379 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 380 case INDEX_op_mulsh_i32: 381 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 382 383 case INDEX_op_muluh_i64: 384 mulu64(&l64, &h64, x, y); 385 return h64; 386 case INDEX_op_mulsh_i64: 387 muls64(&l64, &h64, x, y); 388 return h64; 389 390 case INDEX_op_div_i32: 391 /* Avoid crashing on divide by zero, otherwise undefined. */ 392 return (int32_t)x / ((int32_t)y ? : 1); 393 case INDEX_op_divu_i32: 394 return (uint32_t)x / ((uint32_t)y ? : 1); 395 case INDEX_op_div_i64: 396 return (int64_t)x / ((int64_t)y ? : 1); 397 case INDEX_op_divu_i64: 398 return (uint64_t)x / ((uint64_t)y ? : 1); 399 400 case INDEX_op_rem_i32: 401 return (int32_t)x % ((int32_t)y ? : 1); 402 case INDEX_op_remu_i32: 403 return (uint32_t)x % ((uint32_t)y ? : 1); 404 case INDEX_op_rem_i64: 405 return (int64_t)x % ((int64_t)y ? : 1); 406 case INDEX_op_remu_i64: 407 return (uint64_t)x % ((uint64_t)y ? : 1); 408 409 default: 410 fprintf(stderr, 411 "Unrecognized operation %d in do_constant_folding.\n", op); 412 tcg_abort(); 413 } 414 } 415 416 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y) 417 { 418 const TCGOpDef *def = &tcg_op_defs[op]; 419 uint64_t res = do_constant_folding_2(op, x, y); 420 if (!(def->flags & TCG_OPF_64BIT)) { 421 res = (int32_t)res; 422 } 423 return res; 424 } 425 426 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 427 { 428 switch (c) { 429 case TCG_COND_EQ: 430 return x == y; 431 case TCG_COND_NE: 432 return x != y; 433 case TCG_COND_LT: 434 return (int32_t)x < (int32_t)y; 435 case TCG_COND_GE: 436 return (int32_t)x >= (int32_t)y; 437 case TCG_COND_LE: 438 return (int32_t)x <= (int32_t)y; 439 case TCG_COND_GT: 440 return (int32_t)x > (int32_t)y; 441 case TCG_COND_LTU: 442 return x < y; 443 case TCG_COND_GEU: 444 return x >= y; 445 case TCG_COND_LEU: 446 return x <= y; 447 case TCG_COND_GTU: 448 return x > y; 449 default: 450 tcg_abort(); 451 } 452 } 453 454 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 455 { 456 switch (c) { 457 case TCG_COND_EQ: 458 return x == y; 459 case TCG_COND_NE: 460 return x != y; 461 case TCG_COND_LT: 462 return (int64_t)x < (int64_t)y; 463 case TCG_COND_GE: 464 return (int64_t)x >= (int64_t)y; 465 case TCG_COND_LE: 466 return (int64_t)x <= (int64_t)y; 467 case TCG_COND_GT: 468 return (int64_t)x > (int64_t)y; 469 case TCG_COND_LTU: 470 return x < y; 471 case TCG_COND_GEU: 472 return x >= y; 473 case TCG_COND_LEU: 474 return x <= y; 475 case TCG_COND_GTU: 476 return x > y; 477 default: 478 tcg_abort(); 479 } 480 } 481 482 static bool do_constant_folding_cond_eq(TCGCond c) 483 { 484 switch (c) { 485 case TCG_COND_GT: 486 case TCG_COND_LTU: 487 case TCG_COND_LT: 488 case TCG_COND_GTU: 489 case TCG_COND_NE: 490 return 0; 491 case TCG_COND_GE: 492 case TCG_COND_GEU: 493 case TCG_COND_LE: 494 case TCG_COND_LEU: 495 case TCG_COND_EQ: 496 return 1; 497 default: 498 tcg_abort(); 499 } 500 } 501 502 /* Return 2 if the condition can't be simplified, and the result 503 of the condition (0 or 1) if it can */ 504 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 505 TCGArg y, TCGCond c) 506 { 507 uint64_t xv = arg_info(x)->val; 508 uint64_t yv = arg_info(y)->val; 509 510 if (arg_is_const(x) && arg_is_const(y)) { 511 const TCGOpDef *def = &tcg_op_defs[op]; 512 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 513 if (def->flags & TCG_OPF_64BIT) { 514 return do_constant_folding_cond_64(xv, yv, c); 515 } else { 516 return do_constant_folding_cond_32(xv, yv, c); 517 } 518 } else if (args_are_copies(x, y)) { 519 return do_constant_folding_cond_eq(c); 520 } else if (arg_is_const(y) && yv == 0) { 521 switch (c) { 522 case TCG_COND_LTU: 523 return 0; 524 case TCG_COND_GEU: 525 return 1; 526 default: 527 return 2; 528 } 529 } 530 return 2; 531 } 532 533 /* Return 2 if the condition can't be simplified, and the result 534 of the condition (0 or 1) if it can */ 535 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 536 { 537 TCGArg al = p1[0], ah = p1[1]; 538 TCGArg bl = p2[0], bh = p2[1]; 539 540 if (arg_is_const(bl) && arg_is_const(bh)) { 541 tcg_target_ulong blv = arg_info(bl)->val; 542 tcg_target_ulong bhv = arg_info(bh)->val; 543 uint64_t b = deposit64(blv, 32, 32, bhv); 544 545 if (arg_is_const(al) && arg_is_const(ah)) { 546 tcg_target_ulong alv = arg_info(al)->val; 547 tcg_target_ulong ahv = arg_info(ah)->val; 548 uint64_t a = deposit64(alv, 32, 32, ahv); 549 return do_constant_folding_cond_64(a, b, c); 550 } 551 if (b == 0) { 552 switch (c) { 553 case TCG_COND_LTU: 554 return 0; 555 case TCG_COND_GEU: 556 return 1; 557 default: 558 break; 559 } 560 } 561 } 562 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 563 return do_constant_folding_cond_eq(c); 564 } 565 return 2; 566 } 567 568 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 569 { 570 TCGArg a1 = *p1, a2 = *p2; 571 int sum = 0; 572 sum += arg_is_const(a1); 573 sum -= arg_is_const(a2); 574 575 /* Prefer the constant in second argument, and then the form 576 op a, a, b, which is better handled on non-RISC hosts. */ 577 if (sum > 0 || (sum == 0 && dest == a2)) { 578 *p1 = a2; 579 *p2 = a1; 580 return true; 581 } 582 return false; 583 } 584 585 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 586 { 587 int sum = 0; 588 sum += arg_is_const(p1[0]); 589 sum += arg_is_const(p1[1]); 590 sum -= arg_is_const(p2[0]); 591 sum -= arg_is_const(p2[1]); 592 if (sum > 0) { 593 TCGArg t; 594 t = p1[0], p1[0] = p2[0], p2[0] = t; 595 t = p1[1], p1[1] = p2[1], p2[1] = t; 596 return true; 597 } 598 return false; 599 } 600 601 /* Propagate constants and copies, fold constant expressions. */ 602 void tcg_optimize(TCGContext *s) 603 { 604 int nb_temps, nb_globals, i; 605 TCGOp *op, *op_next, *prev_mb = NULL; 606 TCGTempSet temps_used; 607 608 /* Array VALS has an element for each temp. 609 If this temp holds a constant then its value is kept in VALS' element. 610 If this temp is a copy of other ones then the other copies are 611 available through the doubly linked circular list. */ 612 613 nb_temps = s->nb_temps; 614 nb_globals = s->nb_globals; 615 616 memset(&temps_used, 0, sizeof(temps_used)); 617 for (i = 0; i < nb_temps; ++i) { 618 s->temps[i].state_ptr = NULL; 619 } 620 621 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 622 uint64_t mask, partmask, affected, tmp; 623 int nb_oargs, nb_iargs; 624 TCGOpcode opc = op->opc; 625 const TCGOpDef *def = &tcg_op_defs[opc]; 626 627 /* Count the arguments, and initialize the temps that are 628 going to be used */ 629 if (opc == INDEX_op_call) { 630 nb_oargs = TCGOP_CALLO(op); 631 nb_iargs = TCGOP_CALLI(op); 632 for (i = 0; i < nb_oargs + nb_iargs; i++) { 633 TCGTemp *ts = arg_temp(op->args[i]); 634 if (ts) { 635 init_ts_info(&temps_used, ts); 636 } 637 } 638 } else { 639 nb_oargs = def->nb_oargs; 640 nb_iargs = def->nb_iargs; 641 for (i = 0; i < nb_oargs + nb_iargs; i++) { 642 init_arg_info(&temps_used, op->args[i]); 643 } 644 } 645 646 /* Do copy propagation */ 647 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 648 TCGTemp *ts = arg_temp(op->args[i]); 649 if (ts && ts_is_copy(ts)) { 650 op->args[i] = temp_arg(find_better_copy(s, ts)); 651 } 652 } 653 654 /* For commutative operations make constant second argument */ 655 switch (opc) { 656 CASE_OP_32_64_VEC(add): 657 CASE_OP_32_64_VEC(mul): 658 CASE_OP_32_64_VEC(and): 659 CASE_OP_32_64_VEC(or): 660 CASE_OP_32_64_VEC(xor): 661 CASE_OP_32_64(eqv): 662 CASE_OP_32_64(nand): 663 CASE_OP_32_64(nor): 664 CASE_OP_32_64(muluh): 665 CASE_OP_32_64(mulsh): 666 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 667 break; 668 CASE_OP_32_64(brcond): 669 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 670 op->args[2] = tcg_swap_cond(op->args[2]); 671 } 672 break; 673 CASE_OP_32_64(setcond): 674 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 675 op->args[3] = tcg_swap_cond(op->args[3]); 676 } 677 break; 678 CASE_OP_32_64(movcond): 679 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 680 op->args[5] = tcg_swap_cond(op->args[5]); 681 } 682 /* For movcond, we canonicalize the "false" input reg to match 683 the destination reg so that the tcg backend can implement 684 a "move if true" operation. */ 685 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 686 op->args[5] = tcg_invert_cond(op->args[5]); 687 } 688 break; 689 CASE_OP_32_64(add2): 690 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 691 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 692 break; 693 CASE_OP_32_64(mulu2): 694 CASE_OP_32_64(muls2): 695 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 696 break; 697 case INDEX_op_brcond2_i32: 698 if (swap_commutative2(&op->args[0], &op->args[2])) { 699 op->args[4] = tcg_swap_cond(op->args[4]); 700 } 701 break; 702 case INDEX_op_setcond2_i32: 703 if (swap_commutative2(&op->args[1], &op->args[3])) { 704 op->args[5] = tcg_swap_cond(op->args[5]); 705 } 706 break; 707 default: 708 break; 709 } 710 711 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 712 and "sub r, 0, a => neg r, a" case. */ 713 switch (opc) { 714 CASE_OP_32_64(shl): 715 CASE_OP_32_64(shr): 716 CASE_OP_32_64(sar): 717 CASE_OP_32_64(rotl): 718 CASE_OP_32_64(rotr): 719 if (arg_is_const(op->args[1]) 720 && arg_info(op->args[1])->val == 0) { 721 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 722 continue; 723 } 724 break; 725 CASE_OP_32_64_VEC(sub): 726 { 727 TCGOpcode neg_op; 728 bool have_neg; 729 730 if (arg_is_const(op->args[2])) { 731 /* Proceed with possible constant folding. */ 732 break; 733 } 734 if (opc == INDEX_op_sub_i32) { 735 neg_op = INDEX_op_neg_i32; 736 have_neg = TCG_TARGET_HAS_neg_i32; 737 } else if (opc == INDEX_op_sub_i64) { 738 neg_op = INDEX_op_neg_i64; 739 have_neg = TCG_TARGET_HAS_neg_i64; 740 } else if (TCG_TARGET_HAS_neg_vec) { 741 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 742 unsigned vece = TCGOP_VECE(op); 743 neg_op = INDEX_op_neg_vec; 744 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 745 } else { 746 break; 747 } 748 if (!have_neg) { 749 break; 750 } 751 if (arg_is_const(op->args[1]) 752 && arg_info(op->args[1])->val == 0) { 753 op->opc = neg_op; 754 reset_temp(op->args[0]); 755 op->args[1] = op->args[2]; 756 continue; 757 } 758 } 759 break; 760 CASE_OP_32_64_VEC(xor): 761 CASE_OP_32_64(nand): 762 if (!arg_is_const(op->args[1]) 763 && arg_is_const(op->args[2]) 764 && arg_info(op->args[2])->val == -1) { 765 i = 1; 766 goto try_not; 767 } 768 break; 769 CASE_OP_32_64(nor): 770 if (!arg_is_const(op->args[1]) 771 && arg_is_const(op->args[2]) 772 && arg_info(op->args[2])->val == 0) { 773 i = 1; 774 goto try_not; 775 } 776 break; 777 CASE_OP_32_64_VEC(andc): 778 if (!arg_is_const(op->args[2]) 779 && arg_is_const(op->args[1]) 780 && arg_info(op->args[1])->val == -1) { 781 i = 2; 782 goto try_not; 783 } 784 break; 785 CASE_OP_32_64_VEC(orc): 786 CASE_OP_32_64(eqv): 787 if (!arg_is_const(op->args[2]) 788 && arg_is_const(op->args[1]) 789 && arg_info(op->args[1])->val == 0) { 790 i = 2; 791 goto try_not; 792 } 793 break; 794 try_not: 795 { 796 TCGOpcode not_op; 797 bool have_not; 798 799 if (def->flags & TCG_OPF_VECTOR) { 800 not_op = INDEX_op_not_vec; 801 have_not = TCG_TARGET_HAS_not_vec; 802 } else if (def->flags & TCG_OPF_64BIT) { 803 not_op = INDEX_op_not_i64; 804 have_not = TCG_TARGET_HAS_not_i64; 805 } else { 806 not_op = INDEX_op_not_i32; 807 have_not = TCG_TARGET_HAS_not_i32; 808 } 809 if (!have_not) { 810 break; 811 } 812 op->opc = not_op; 813 reset_temp(op->args[0]); 814 op->args[1] = op->args[i]; 815 continue; 816 } 817 default: 818 break; 819 } 820 821 /* Simplify expression for "op r, a, const => mov r, a" cases */ 822 switch (opc) { 823 CASE_OP_32_64_VEC(add): 824 CASE_OP_32_64_VEC(sub): 825 CASE_OP_32_64_VEC(or): 826 CASE_OP_32_64_VEC(xor): 827 CASE_OP_32_64_VEC(andc): 828 CASE_OP_32_64(shl): 829 CASE_OP_32_64(shr): 830 CASE_OP_32_64(sar): 831 CASE_OP_32_64(rotl): 832 CASE_OP_32_64(rotr): 833 if (!arg_is_const(op->args[1]) 834 && arg_is_const(op->args[2]) 835 && arg_info(op->args[2])->val == 0) { 836 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 837 continue; 838 } 839 break; 840 CASE_OP_32_64_VEC(and): 841 CASE_OP_32_64_VEC(orc): 842 CASE_OP_32_64(eqv): 843 if (!arg_is_const(op->args[1]) 844 && arg_is_const(op->args[2]) 845 && arg_info(op->args[2])->val == -1) { 846 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 847 continue; 848 } 849 break; 850 default: 851 break; 852 } 853 854 /* Simplify using known-zero bits. Currently only ops with a single 855 output argument is supported. */ 856 mask = -1; 857 affected = -1; 858 switch (opc) { 859 CASE_OP_32_64(ext8s): 860 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 861 break; 862 } 863 QEMU_FALLTHROUGH; 864 CASE_OP_32_64(ext8u): 865 mask = 0xff; 866 goto and_const; 867 CASE_OP_32_64(ext16s): 868 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 869 break; 870 } 871 QEMU_FALLTHROUGH; 872 CASE_OP_32_64(ext16u): 873 mask = 0xffff; 874 goto and_const; 875 case INDEX_op_ext32s_i64: 876 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 877 break; 878 } 879 QEMU_FALLTHROUGH; 880 case INDEX_op_ext32u_i64: 881 mask = 0xffffffffU; 882 goto and_const; 883 884 CASE_OP_32_64(and): 885 mask = arg_info(op->args[2])->mask; 886 if (arg_is_const(op->args[2])) { 887 and_const: 888 affected = arg_info(op->args[1])->mask & ~mask; 889 } 890 mask = arg_info(op->args[1])->mask & mask; 891 break; 892 893 case INDEX_op_ext_i32_i64: 894 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 895 break; 896 } 897 QEMU_FALLTHROUGH; 898 case INDEX_op_extu_i32_i64: 899 /* We do not compute affected as it is a size changing op. */ 900 mask = (uint32_t)arg_info(op->args[1])->mask; 901 break; 902 903 CASE_OP_32_64(andc): 904 /* Known-zeros does not imply known-ones. Therefore unless 905 op->args[2] is constant, we can't infer anything from it. */ 906 if (arg_is_const(op->args[2])) { 907 mask = ~arg_info(op->args[2])->mask; 908 goto and_const; 909 } 910 /* But we certainly know nothing outside args[1] may be set. */ 911 mask = arg_info(op->args[1])->mask; 912 break; 913 914 case INDEX_op_sar_i32: 915 if (arg_is_const(op->args[2])) { 916 tmp = arg_info(op->args[2])->val & 31; 917 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 918 } 919 break; 920 case INDEX_op_sar_i64: 921 if (arg_is_const(op->args[2])) { 922 tmp = arg_info(op->args[2])->val & 63; 923 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 924 } 925 break; 926 927 case INDEX_op_shr_i32: 928 if (arg_is_const(op->args[2])) { 929 tmp = arg_info(op->args[2])->val & 31; 930 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 931 } 932 break; 933 case INDEX_op_shr_i64: 934 if (arg_is_const(op->args[2])) { 935 tmp = arg_info(op->args[2])->val & 63; 936 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 937 } 938 break; 939 940 case INDEX_op_extrl_i64_i32: 941 mask = (uint32_t)arg_info(op->args[1])->mask; 942 break; 943 case INDEX_op_extrh_i64_i32: 944 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 945 break; 946 947 CASE_OP_32_64(shl): 948 if (arg_is_const(op->args[2])) { 949 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 950 mask = arg_info(op->args[1])->mask << tmp; 951 } 952 break; 953 954 CASE_OP_32_64(neg): 955 /* Set to 1 all bits to the left of the rightmost. */ 956 mask = -(arg_info(op->args[1])->mask 957 & -arg_info(op->args[1])->mask); 958 break; 959 960 CASE_OP_32_64(deposit): 961 mask = deposit64(arg_info(op->args[1])->mask, 962 op->args[3], op->args[4], 963 arg_info(op->args[2])->mask); 964 break; 965 966 CASE_OP_32_64(extract): 967 mask = extract64(arg_info(op->args[1])->mask, 968 op->args[2], op->args[3]); 969 if (op->args[2] == 0) { 970 affected = arg_info(op->args[1])->mask & ~mask; 971 } 972 break; 973 CASE_OP_32_64(sextract): 974 mask = sextract64(arg_info(op->args[1])->mask, 975 op->args[2], op->args[3]); 976 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 977 affected = arg_info(op->args[1])->mask & ~mask; 978 } 979 break; 980 981 CASE_OP_32_64(or): 982 CASE_OP_32_64(xor): 983 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 984 break; 985 986 case INDEX_op_clz_i32: 987 case INDEX_op_ctz_i32: 988 mask = arg_info(op->args[2])->mask | 31; 989 break; 990 991 case INDEX_op_clz_i64: 992 case INDEX_op_ctz_i64: 993 mask = arg_info(op->args[2])->mask | 63; 994 break; 995 996 case INDEX_op_ctpop_i32: 997 mask = 32 | 31; 998 break; 999 case INDEX_op_ctpop_i64: 1000 mask = 64 | 63; 1001 break; 1002 1003 CASE_OP_32_64(setcond): 1004 case INDEX_op_setcond2_i32: 1005 mask = 1; 1006 break; 1007 1008 CASE_OP_32_64(movcond): 1009 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1010 break; 1011 1012 CASE_OP_32_64(ld8u): 1013 mask = 0xff; 1014 break; 1015 CASE_OP_32_64(ld16u): 1016 mask = 0xffff; 1017 break; 1018 case INDEX_op_ld32u_i64: 1019 mask = 0xffffffffu; 1020 break; 1021 1022 CASE_OP_32_64(qemu_ld): 1023 { 1024 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1025 MemOp mop = get_memop(oi); 1026 if (!(mop & MO_SIGN)) { 1027 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1028 } 1029 } 1030 break; 1031 1032 default: 1033 break; 1034 } 1035 1036 /* 32-bit ops generate 32-bit results. For the result is zero test 1037 below, we can ignore high bits, but for further optimizations we 1038 need to record that the high bits contain garbage. */ 1039 partmask = mask; 1040 if (!(def->flags & TCG_OPF_64BIT)) { 1041 mask |= ~(tcg_target_ulong)0xffffffffu; 1042 partmask &= 0xffffffffu; 1043 affected &= 0xffffffffu; 1044 } 1045 1046 if (partmask == 0) { 1047 tcg_debug_assert(nb_oargs == 1); 1048 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1049 continue; 1050 } 1051 if (affected == 0) { 1052 tcg_debug_assert(nb_oargs == 1); 1053 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1054 continue; 1055 } 1056 1057 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1058 switch (opc) { 1059 CASE_OP_32_64_VEC(and): 1060 CASE_OP_32_64_VEC(mul): 1061 CASE_OP_32_64(muluh): 1062 CASE_OP_32_64(mulsh): 1063 if (arg_is_const(op->args[2]) 1064 && arg_info(op->args[2])->val == 0) { 1065 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1066 continue; 1067 } 1068 break; 1069 default: 1070 break; 1071 } 1072 1073 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1074 switch (opc) { 1075 CASE_OP_32_64_VEC(or): 1076 CASE_OP_32_64_VEC(and): 1077 if (args_are_copies(op->args[1], op->args[2])) { 1078 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1079 continue; 1080 } 1081 break; 1082 default: 1083 break; 1084 } 1085 1086 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1087 switch (opc) { 1088 CASE_OP_32_64_VEC(andc): 1089 CASE_OP_32_64_VEC(sub): 1090 CASE_OP_32_64_VEC(xor): 1091 if (args_are_copies(op->args[1], op->args[2])) { 1092 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1093 continue; 1094 } 1095 break; 1096 default: 1097 break; 1098 } 1099 1100 /* Propagate constants through copy operations and do constant 1101 folding. Constants will be substituted to arguments by register 1102 allocator where needed and possible. Also detect copies. */ 1103 switch (opc) { 1104 CASE_OP_32_64_VEC(mov): 1105 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1106 break; 1107 1108 case INDEX_op_dup_vec: 1109 if (arg_is_const(op->args[1])) { 1110 tmp = arg_info(op->args[1])->val; 1111 tmp = dup_const(TCGOP_VECE(op), tmp); 1112 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1113 break; 1114 } 1115 goto do_default; 1116 1117 case INDEX_op_dup2_vec: 1118 assert(TCG_TARGET_REG_BITS == 32); 1119 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1120 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 1121 deposit64(arg_info(op->args[1])->val, 32, 32, 1122 arg_info(op->args[2])->val)); 1123 break; 1124 } else if (args_are_copies(op->args[1], op->args[2])) { 1125 op->opc = INDEX_op_dup_vec; 1126 TCGOP_VECE(op) = MO_32; 1127 nb_iargs = 1; 1128 } 1129 goto do_default; 1130 1131 CASE_OP_32_64(not): 1132 CASE_OP_32_64(neg): 1133 CASE_OP_32_64(ext8s): 1134 CASE_OP_32_64(ext8u): 1135 CASE_OP_32_64(ext16s): 1136 CASE_OP_32_64(ext16u): 1137 CASE_OP_32_64(ctpop): 1138 CASE_OP_32_64(bswap16): 1139 CASE_OP_32_64(bswap32): 1140 case INDEX_op_bswap64_i64: 1141 case INDEX_op_ext32s_i64: 1142 case INDEX_op_ext32u_i64: 1143 case INDEX_op_ext_i32_i64: 1144 case INDEX_op_extu_i32_i64: 1145 case INDEX_op_extrl_i64_i32: 1146 case INDEX_op_extrh_i64_i32: 1147 if (arg_is_const(op->args[1])) { 1148 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1149 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1150 break; 1151 } 1152 goto do_default; 1153 1154 CASE_OP_32_64(add): 1155 CASE_OP_32_64(sub): 1156 CASE_OP_32_64(mul): 1157 CASE_OP_32_64(or): 1158 CASE_OP_32_64(and): 1159 CASE_OP_32_64(xor): 1160 CASE_OP_32_64(shl): 1161 CASE_OP_32_64(shr): 1162 CASE_OP_32_64(sar): 1163 CASE_OP_32_64(rotl): 1164 CASE_OP_32_64(rotr): 1165 CASE_OP_32_64(andc): 1166 CASE_OP_32_64(orc): 1167 CASE_OP_32_64(eqv): 1168 CASE_OP_32_64(nand): 1169 CASE_OP_32_64(nor): 1170 CASE_OP_32_64(muluh): 1171 CASE_OP_32_64(mulsh): 1172 CASE_OP_32_64(div): 1173 CASE_OP_32_64(divu): 1174 CASE_OP_32_64(rem): 1175 CASE_OP_32_64(remu): 1176 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1177 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1178 arg_info(op->args[2])->val); 1179 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1180 break; 1181 } 1182 goto do_default; 1183 1184 CASE_OP_32_64(clz): 1185 CASE_OP_32_64(ctz): 1186 if (arg_is_const(op->args[1])) { 1187 TCGArg v = arg_info(op->args[1])->val; 1188 if (v != 0) { 1189 tmp = do_constant_folding(opc, v, 0); 1190 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1191 } else { 1192 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1193 } 1194 break; 1195 } 1196 goto do_default; 1197 1198 CASE_OP_32_64(deposit): 1199 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1200 tmp = deposit64(arg_info(op->args[1])->val, 1201 op->args[3], op->args[4], 1202 arg_info(op->args[2])->val); 1203 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1204 break; 1205 } 1206 goto do_default; 1207 1208 CASE_OP_32_64(extract): 1209 if (arg_is_const(op->args[1])) { 1210 tmp = extract64(arg_info(op->args[1])->val, 1211 op->args[2], op->args[3]); 1212 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1213 break; 1214 } 1215 goto do_default; 1216 1217 CASE_OP_32_64(sextract): 1218 if (arg_is_const(op->args[1])) { 1219 tmp = sextract64(arg_info(op->args[1])->val, 1220 op->args[2], op->args[3]); 1221 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1222 break; 1223 } 1224 goto do_default; 1225 1226 CASE_OP_32_64(extract2): 1227 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1228 uint64_t v1 = arg_info(op->args[1])->val; 1229 uint64_t v2 = arg_info(op->args[2])->val; 1230 int shr = op->args[3]; 1231 1232 if (opc == INDEX_op_extract2_i64) { 1233 tmp = (v1 >> shr) | (v2 << (64 - shr)); 1234 } else { 1235 tmp = (int32_t)(((uint32_t)v1 >> shr) | 1236 ((uint32_t)v2 << (32 - shr))); 1237 } 1238 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1239 break; 1240 } 1241 goto do_default; 1242 1243 CASE_OP_32_64(setcond): 1244 tmp = do_constant_folding_cond(opc, op->args[1], 1245 op->args[2], op->args[3]); 1246 if (tmp != 2) { 1247 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1248 break; 1249 } 1250 goto do_default; 1251 1252 CASE_OP_32_64(brcond): 1253 tmp = do_constant_folding_cond(opc, op->args[0], 1254 op->args[1], op->args[2]); 1255 if (tmp != 2) { 1256 if (tmp) { 1257 memset(&temps_used, 0, sizeof(temps_used)); 1258 op->opc = INDEX_op_br; 1259 op->args[0] = op->args[3]; 1260 } else { 1261 tcg_op_remove(s, op); 1262 } 1263 break; 1264 } 1265 goto do_default; 1266 1267 CASE_OP_32_64(movcond): 1268 tmp = do_constant_folding_cond(opc, op->args[1], 1269 op->args[2], op->args[5]); 1270 if (tmp != 2) { 1271 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1272 break; 1273 } 1274 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1275 uint64_t tv = arg_info(op->args[3])->val; 1276 uint64_t fv = arg_info(op->args[4])->val; 1277 TCGCond cond = op->args[5]; 1278 1279 if (fv == 1 && tv == 0) { 1280 cond = tcg_invert_cond(cond); 1281 } else if (!(tv == 1 && fv == 0)) { 1282 goto do_default; 1283 } 1284 op->args[3] = cond; 1285 op->opc = opc = (opc == INDEX_op_movcond_i32 1286 ? INDEX_op_setcond_i32 1287 : INDEX_op_setcond_i64); 1288 nb_iargs = 2; 1289 } 1290 goto do_default; 1291 1292 case INDEX_op_add2_i32: 1293 case INDEX_op_sub2_i32: 1294 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1295 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1296 uint32_t al = arg_info(op->args[2])->val; 1297 uint32_t ah = arg_info(op->args[3])->val; 1298 uint32_t bl = arg_info(op->args[4])->val; 1299 uint32_t bh = arg_info(op->args[5])->val; 1300 uint64_t a = ((uint64_t)ah << 32) | al; 1301 uint64_t b = ((uint64_t)bh << 32) | bl; 1302 TCGArg rl, rh; 1303 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1304 1305 if (opc == INDEX_op_add2_i32) { 1306 a += b; 1307 } else { 1308 a -= b; 1309 } 1310 1311 rl = op->args[0]; 1312 rh = op->args[1]; 1313 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a); 1314 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32)); 1315 break; 1316 } 1317 goto do_default; 1318 1319 case INDEX_op_mulu2_i32: 1320 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1321 uint32_t a = arg_info(op->args[2])->val; 1322 uint32_t b = arg_info(op->args[3])->val; 1323 uint64_t r = (uint64_t)a * b; 1324 TCGArg rl, rh; 1325 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1326 1327 rl = op->args[0]; 1328 rh = op->args[1]; 1329 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r); 1330 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32)); 1331 break; 1332 } 1333 goto do_default; 1334 1335 case INDEX_op_brcond2_i32: 1336 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1337 op->args[4]); 1338 if (tmp != 2) { 1339 if (tmp) { 1340 do_brcond_true: 1341 memset(&temps_used, 0, sizeof(temps_used)); 1342 op->opc = INDEX_op_br; 1343 op->args[0] = op->args[5]; 1344 } else { 1345 do_brcond_false: 1346 tcg_op_remove(s, op); 1347 } 1348 } else if ((op->args[4] == TCG_COND_LT 1349 || op->args[4] == TCG_COND_GE) 1350 && arg_is_const(op->args[2]) 1351 && arg_info(op->args[2])->val == 0 1352 && arg_is_const(op->args[3]) 1353 && arg_info(op->args[3])->val == 0) { 1354 /* Simplify LT/GE comparisons vs zero to a single compare 1355 vs the high word of the input. */ 1356 do_brcond_high: 1357 memset(&temps_used, 0, sizeof(temps_used)); 1358 op->opc = INDEX_op_brcond_i32; 1359 op->args[0] = op->args[1]; 1360 op->args[1] = op->args[3]; 1361 op->args[2] = op->args[4]; 1362 op->args[3] = op->args[5]; 1363 } else if (op->args[4] == TCG_COND_EQ) { 1364 /* Simplify EQ comparisons where one of the pairs 1365 can be simplified. */ 1366 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1367 op->args[0], op->args[2], 1368 TCG_COND_EQ); 1369 if (tmp == 0) { 1370 goto do_brcond_false; 1371 } else if (tmp == 1) { 1372 goto do_brcond_high; 1373 } 1374 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1375 op->args[1], op->args[3], 1376 TCG_COND_EQ); 1377 if (tmp == 0) { 1378 goto do_brcond_false; 1379 } else if (tmp != 1) { 1380 goto do_default; 1381 } 1382 do_brcond_low: 1383 memset(&temps_used, 0, sizeof(temps_used)); 1384 op->opc = INDEX_op_brcond_i32; 1385 op->args[1] = op->args[2]; 1386 op->args[2] = op->args[4]; 1387 op->args[3] = op->args[5]; 1388 } else if (op->args[4] == TCG_COND_NE) { 1389 /* Simplify NE comparisons where one of the pairs 1390 can be simplified. */ 1391 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1392 op->args[0], op->args[2], 1393 TCG_COND_NE); 1394 if (tmp == 0) { 1395 goto do_brcond_high; 1396 } else if (tmp == 1) { 1397 goto do_brcond_true; 1398 } 1399 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1400 op->args[1], op->args[3], 1401 TCG_COND_NE); 1402 if (tmp == 0) { 1403 goto do_brcond_low; 1404 } else if (tmp == 1) { 1405 goto do_brcond_true; 1406 } 1407 goto do_default; 1408 } else { 1409 goto do_default; 1410 } 1411 break; 1412 1413 case INDEX_op_setcond2_i32: 1414 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1415 op->args[5]); 1416 if (tmp != 2) { 1417 do_setcond_const: 1418 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1419 } else if ((op->args[5] == TCG_COND_LT 1420 || op->args[5] == TCG_COND_GE) 1421 && arg_is_const(op->args[3]) 1422 && arg_info(op->args[3])->val == 0 1423 && arg_is_const(op->args[4]) 1424 && arg_info(op->args[4])->val == 0) { 1425 /* Simplify LT/GE comparisons vs zero to a single compare 1426 vs the high word of the input. */ 1427 do_setcond_high: 1428 reset_temp(op->args[0]); 1429 arg_info(op->args[0])->mask = 1; 1430 op->opc = INDEX_op_setcond_i32; 1431 op->args[1] = op->args[2]; 1432 op->args[2] = op->args[4]; 1433 op->args[3] = op->args[5]; 1434 } else if (op->args[5] == TCG_COND_EQ) { 1435 /* Simplify EQ comparisons where one of the pairs 1436 can be simplified. */ 1437 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1438 op->args[1], op->args[3], 1439 TCG_COND_EQ); 1440 if (tmp == 0) { 1441 goto do_setcond_const; 1442 } else if (tmp == 1) { 1443 goto do_setcond_high; 1444 } 1445 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1446 op->args[2], op->args[4], 1447 TCG_COND_EQ); 1448 if (tmp == 0) { 1449 goto do_setcond_high; 1450 } else if (tmp != 1) { 1451 goto do_default; 1452 } 1453 do_setcond_low: 1454 reset_temp(op->args[0]); 1455 arg_info(op->args[0])->mask = 1; 1456 op->opc = INDEX_op_setcond_i32; 1457 op->args[2] = op->args[3]; 1458 op->args[3] = op->args[5]; 1459 } else if (op->args[5] == TCG_COND_NE) { 1460 /* Simplify NE comparisons where one of the pairs 1461 can be simplified. */ 1462 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1463 op->args[1], op->args[3], 1464 TCG_COND_NE); 1465 if (tmp == 0) { 1466 goto do_setcond_high; 1467 } else if (tmp == 1) { 1468 goto do_setcond_const; 1469 } 1470 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1471 op->args[2], op->args[4], 1472 TCG_COND_NE); 1473 if (tmp == 0) { 1474 goto do_setcond_low; 1475 } else if (tmp == 1) { 1476 goto do_setcond_const; 1477 } 1478 goto do_default; 1479 } else { 1480 goto do_default; 1481 } 1482 break; 1483 1484 case INDEX_op_call: 1485 if (!(tcg_call_flags(op) 1486 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1487 for (i = 0; i < nb_globals; i++) { 1488 if (test_bit(i, temps_used.l)) { 1489 reset_ts(&s->temps[i]); 1490 } 1491 } 1492 } 1493 goto do_reset_output; 1494 1495 default: 1496 do_default: 1497 /* Default case: we know nothing about operation (or were unable 1498 to compute the operation result) so no propagation is done. 1499 We trash everything if the operation is the end of a basic 1500 block, otherwise we only trash the output args. "mask" is 1501 the non-zero bits mask for the first output arg. */ 1502 if (def->flags & TCG_OPF_BB_END) { 1503 memset(&temps_used, 0, sizeof(temps_used)); 1504 } else { 1505 do_reset_output: 1506 for (i = 0; i < nb_oargs; i++) { 1507 reset_temp(op->args[i]); 1508 /* Save the corresponding known-zero bits mask for the 1509 first output argument (only one supported so far). */ 1510 if (i == 0) { 1511 arg_info(op->args[i])->mask = mask; 1512 } 1513 } 1514 } 1515 break; 1516 } 1517 1518 /* Eliminate duplicate and redundant fence instructions. */ 1519 if (prev_mb) { 1520 switch (opc) { 1521 case INDEX_op_mb: 1522 /* Merge two barriers of the same type into one, 1523 * or a weaker barrier into a stronger one, 1524 * or two weaker barriers into a stronger one. 1525 * mb X; mb Y => mb X|Y 1526 * mb; strl => mb; st 1527 * ldaq; mb => ld; mb 1528 * ldaq; strl => ld; mb; st 1529 * Other combinations are also merged into a strong 1530 * barrier. This is stricter than specified but for 1531 * the purposes of TCG is better than not optimizing. 1532 */ 1533 prev_mb->args[0] |= op->args[0]; 1534 tcg_op_remove(s, op); 1535 break; 1536 1537 default: 1538 /* Opcodes that end the block stop the optimization. */ 1539 if ((def->flags & TCG_OPF_BB_END) == 0) { 1540 break; 1541 } 1542 /* fallthru */ 1543 case INDEX_op_qemu_ld_i32: 1544 case INDEX_op_qemu_ld_i64: 1545 case INDEX_op_qemu_st_i32: 1546 case INDEX_op_qemu_st8_i32: 1547 case INDEX_op_qemu_st_i64: 1548 case INDEX_op_call: 1549 /* Opcodes that touch guest memory stop the optimization. */ 1550 prev_mb = NULL; 1551 break; 1552 } 1553 } else if (opc == INDEX_op_mb) { 1554 prev_mb = op; 1555 } 1556 } 1557 } 1558