1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 TCGTemp *prev_copy; 38 TCGTemp *next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 44 { 45 return ts->state_ptr; 46 } 47 48 static inline struct tcg_temp_info *arg_info(TCGArg arg) 49 { 50 return ts_info(arg_temp(arg)); 51 } 52 53 static inline bool ts_is_const(TCGTemp *ts) 54 { 55 return ts_info(ts)->is_const; 56 } 57 58 static inline bool arg_is_const(TCGArg arg) 59 { 60 return ts_is_const(arg_temp(arg)); 61 } 62 63 static inline bool ts_is_copy(TCGTemp *ts) 64 { 65 return ts_info(ts)->next_copy != ts; 66 } 67 68 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 69 static void reset_ts(TCGTemp *ts) 70 { 71 struct tcg_temp_info *ti = ts_info(ts); 72 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 73 struct tcg_temp_info *ni = ts_info(ti->next_copy); 74 75 ni->prev_copy = ti->prev_copy; 76 pi->next_copy = ti->next_copy; 77 ti->next_copy = ts; 78 ti->prev_copy = ts; 79 ti->is_const = false; 80 ti->mask = -1; 81 } 82 83 static void reset_temp(TCGArg arg) 84 { 85 reset_ts(arg_temp(arg)); 86 } 87 88 /* Initialize and activate a temporary. */ 89 static void init_ts_info(struct tcg_temp_info *infos, 90 TCGTempSet *temps_used, TCGTemp *ts) 91 { 92 size_t idx = temp_idx(ts); 93 if (!test_bit(idx, temps_used->l)) { 94 struct tcg_temp_info *ti = &infos[idx]; 95 96 ts->state_ptr = ti; 97 ti->next_copy = ts; 98 ti->prev_copy = ts; 99 ti->is_const = false; 100 ti->mask = -1; 101 set_bit(idx, temps_used->l); 102 } 103 } 104 105 static void init_arg_info(struct tcg_temp_info *infos, 106 TCGTempSet *temps_used, TCGArg arg) 107 { 108 init_ts_info(infos, temps_used, arg_temp(arg)); 109 } 110 111 static int op_bits(TCGOpcode op) 112 { 113 const TCGOpDef *def = &tcg_op_defs[op]; 114 return def->flags & TCG_OPF_64BIT ? 64 : 32; 115 } 116 117 static TCGOpcode op_to_mov(TCGOpcode op) 118 { 119 switch (op_bits(op)) { 120 case 32: 121 return INDEX_op_mov_i32; 122 case 64: 123 return INDEX_op_mov_i64; 124 default: 125 fprintf(stderr, "op_to_mov: unexpected return value of " 126 "function op_bits.\n"); 127 tcg_abort(); 128 } 129 } 130 131 static TCGOpcode op_to_movi(TCGOpcode op) 132 { 133 switch (op_bits(op)) { 134 case 32: 135 return INDEX_op_movi_i32; 136 case 64: 137 return INDEX_op_movi_i64; 138 default: 139 fprintf(stderr, "op_to_movi: unexpected return value of " 140 "function op_bits.\n"); 141 tcg_abort(); 142 } 143 } 144 145 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 146 { 147 TCGTemp *i; 148 149 /* If this is already a global, we can't do better. */ 150 if (ts->temp_global) { 151 return ts; 152 } 153 154 /* Search for a global first. */ 155 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 156 if (i->temp_global) { 157 return i; 158 } 159 } 160 161 /* If it is a temp, search for a temp local. */ 162 if (!ts->temp_local) { 163 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 164 if (ts->temp_local) { 165 return i; 166 } 167 } 168 } 169 170 /* Failure to find a better representation, return the same temp. */ 171 return ts; 172 } 173 174 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 175 { 176 TCGTemp *i; 177 178 if (ts1 == ts2) { 179 return true; 180 } 181 182 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 183 return false; 184 } 185 186 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 187 if (i == ts2) { 188 return true; 189 } 190 } 191 192 return false; 193 } 194 195 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 196 { 197 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 198 } 199 200 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 201 { 202 TCGOpcode new_op = op_to_movi(op->opc); 203 tcg_target_ulong mask; 204 struct tcg_temp_info *di = arg_info(dst); 205 206 op->opc = new_op; 207 208 reset_temp(dst); 209 di->is_const = true; 210 di->val = val; 211 mask = val; 212 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 213 /* High bits of the destination are now garbage. */ 214 mask |= ~0xffffffffull; 215 } 216 di->mask = mask; 217 218 op->args[0] = dst; 219 op->args[1] = val; 220 } 221 222 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 223 { 224 TCGTemp *dst_ts = arg_temp(dst); 225 TCGTemp *src_ts = arg_temp(src); 226 struct tcg_temp_info *di; 227 struct tcg_temp_info *si; 228 tcg_target_ulong mask; 229 TCGOpcode new_op; 230 231 if (ts_are_copies(dst_ts, src_ts)) { 232 tcg_op_remove(s, op); 233 return; 234 } 235 236 reset_ts(dst_ts); 237 di = ts_info(dst_ts); 238 si = ts_info(src_ts); 239 new_op = op_to_mov(op->opc); 240 241 op->opc = new_op; 242 op->args[0] = dst; 243 op->args[1] = src; 244 245 mask = si->mask; 246 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 247 /* High bits of the destination are now garbage. */ 248 mask |= ~0xffffffffull; 249 } 250 di->mask = mask; 251 252 if (src_ts->type == dst_ts->type) { 253 struct tcg_temp_info *ni = ts_info(si->next_copy); 254 255 di->next_copy = si->next_copy; 256 di->prev_copy = src_ts; 257 ni->prev_copy = dst_ts; 258 si->next_copy = dst_ts; 259 di->is_const = si->is_const; 260 di->val = si->val; 261 } 262 } 263 264 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 265 { 266 uint64_t l64, h64; 267 268 switch (op) { 269 CASE_OP_32_64(add): 270 return x + y; 271 272 CASE_OP_32_64(sub): 273 return x - y; 274 275 CASE_OP_32_64(mul): 276 return x * y; 277 278 CASE_OP_32_64(and): 279 return x & y; 280 281 CASE_OP_32_64(or): 282 return x | y; 283 284 CASE_OP_32_64(xor): 285 return x ^ y; 286 287 case INDEX_op_shl_i32: 288 return (uint32_t)x << (y & 31); 289 290 case INDEX_op_shl_i64: 291 return (uint64_t)x << (y & 63); 292 293 case INDEX_op_shr_i32: 294 return (uint32_t)x >> (y & 31); 295 296 case INDEX_op_shr_i64: 297 return (uint64_t)x >> (y & 63); 298 299 case INDEX_op_sar_i32: 300 return (int32_t)x >> (y & 31); 301 302 case INDEX_op_sar_i64: 303 return (int64_t)x >> (y & 63); 304 305 case INDEX_op_rotr_i32: 306 return ror32(x, y & 31); 307 308 case INDEX_op_rotr_i64: 309 return ror64(x, y & 63); 310 311 case INDEX_op_rotl_i32: 312 return rol32(x, y & 31); 313 314 case INDEX_op_rotl_i64: 315 return rol64(x, y & 63); 316 317 CASE_OP_32_64(not): 318 return ~x; 319 320 CASE_OP_32_64(neg): 321 return -x; 322 323 CASE_OP_32_64(andc): 324 return x & ~y; 325 326 CASE_OP_32_64(orc): 327 return x | ~y; 328 329 CASE_OP_32_64(eqv): 330 return ~(x ^ y); 331 332 CASE_OP_32_64(nand): 333 return ~(x & y); 334 335 CASE_OP_32_64(nor): 336 return ~(x | y); 337 338 case INDEX_op_clz_i32: 339 return (uint32_t)x ? clz32(x) : y; 340 341 case INDEX_op_clz_i64: 342 return x ? clz64(x) : y; 343 344 case INDEX_op_ctz_i32: 345 return (uint32_t)x ? ctz32(x) : y; 346 347 case INDEX_op_ctz_i64: 348 return x ? ctz64(x) : y; 349 350 case INDEX_op_ctpop_i32: 351 return ctpop32(x); 352 353 case INDEX_op_ctpop_i64: 354 return ctpop64(x); 355 356 CASE_OP_32_64(ext8s): 357 return (int8_t)x; 358 359 CASE_OP_32_64(ext16s): 360 return (int16_t)x; 361 362 CASE_OP_32_64(ext8u): 363 return (uint8_t)x; 364 365 CASE_OP_32_64(ext16u): 366 return (uint16_t)x; 367 368 case INDEX_op_ext_i32_i64: 369 case INDEX_op_ext32s_i64: 370 return (int32_t)x; 371 372 case INDEX_op_extu_i32_i64: 373 case INDEX_op_extrl_i64_i32: 374 case INDEX_op_ext32u_i64: 375 return (uint32_t)x; 376 377 case INDEX_op_extrh_i64_i32: 378 return (uint64_t)x >> 32; 379 380 case INDEX_op_muluh_i32: 381 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 382 case INDEX_op_mulsh_i32: 383 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 384 385 case INDEX_op_muluh_i64: 386 mulu64(&l64, &h64, x, y); 387 return h64; 388 case INDEX_op_mulsh_i64: 389 muls64(&l64, &h64, x, y); 390 return h64; 391 392 case INDEX_op_div_i32: 393 /* Avoid crashing on divide by zero, otherwise undefined. */ 394 return (int32_t)x / ((int32_t)y ? : 1); 395 case INDEX_op_divu_i32: 396 return (uint32_t)x / ((uint32_t)y ? : 1); 397 case INDEX_op_div_i64: 398 return (int64_t)x / ((int64_t)y ? : 1); 399 case INDEX_op_divu_i64: 400 return (uint64_t)x / ((uint64_t)y ? : 1); 401 402 case INDEX_op_rem_i32: 403 return (int32_t)x % ((int32_t)y ? : 1); 404 case INDEX_op_remu_i32: 405 return (uint32_t)x % ((uint32_t)y ? : 1); 406 case INDEX_op_rem_i64: 407 return (int64_t)x % ((int64_t)y ? : 1); 408 case INDEX_op_remu_i64: 409 return (uint64_t)x % ((uint64_t)y ? : 1); 410 411 default: 412 fprintf(stderr, 413 "Unrecognized operation %d in do_constant_folding.\n", op); 414 tcg_abort(); 415 } 416 } 417 418 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 419 { 420 TCGArg res = do_constant_folding_2(op, x, y); 421 if (op_bits(op) == 32) { 422 res = (int32_t)res; 423 } 424 return res; 425 } 426 427 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 428 { 429 switch (c) { 430 case TCG_COND_EQ: 431 return x == y; 432 case TCG_COND_NE: 433 return x != y; 434 case TCG_COND_LT: 435 return (int32_t)x < (int32_t)y; 436 case TCG_COND_GE: 437 return (int32_t)x >= (int32_t)y; 438 case TCG_COND_LE: 439 return (int32_t)x <= (int32_t)y; 440 case TCG_COND_GT: 441 return (int32_t)x > (int32_t)y; 442 case TCG_COND_LTU: 443 return x < y; 444 case TCG_COND_GEU: 445 return x >= y; 446 case TCG_COND_LEU: 447 return x <= y; 448 case TCG_COND_GTU: 449 return x > y; 450 default: 451 tcg_abort(); 452 } 453 } 454 455 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 456 { 457 switch (c) { 458 case TCG_COND_EQ: 459 return x == y; 460 case TCG_COND_NE: 461 return x != y; 462 case TCG_COND_LT: 463 return (int64_t)x < (int64_t)y; 464 case TCG_COND_GE: 465 return (int64_t)x >= (int64_t)y; 466 case TCG_COND_LE: 467 return (int64_t)x <= (int64_t)y; 468 case TCG_COND_GT: 469 return (int64_t)x > (int64_t)y; 470 case TCG_COND_LTU: 471 return x < y; 472 case TCG_COND_GEU: 473 return x >= y; 474 case TCG_COND_LEU: 475 return x <= y; 476 case TCG_COND_GTU: 477 return x > y; 478 default: 479 tcg_abort(); 480 } 481 } 482 483 static bool do_constant_folding_cond_eq(TCGCond c) 484 { 485 switch (c) { 486 case TCG_COND_GT: 487 case TCG_COND_LTU: 488 case TCG_COND_LT: 489 case TCG_COND_GTU: 490 case TCG_COND_NE: 491 return 0; 492 case TCG_COND_GE: 493 case TCG_COND_GEU: 494 case TCG_COND_LE: 495 case TCG_COND_LEU: 496 case TCG_COND_EQ: 497 return 1; 498 default: 499 tcg_abort(); 500 } 501 } 502 503 /* Return 2 if the condition can't be simplified, and the result 504 of the condition (0 or 1) if it can */ 505 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 506 TCGArg y, TCGCond c) 507 { 508 tcg_target_ulong xv = arg_info(x)->val; 509 tcg_target_ulong yv = arg_info(y)->val; 510 if (arg_is_const(x) && arg_is_const(y)) { 511 switch (op_bits(op)) { 512 case 32: 513 return do_constant_folding_cond_32(xv, yv, c); 514 case 64: 515 return do_constant_folding_cond_64(xv, yv, c); 516 default: 517 tcg_abort(); 518 } 519 } else if (args_are_copies(x, y)) { 520 return do_constant_folding_cond_eq(c); 521 } else if (arg_is_const(y) && yv == 0) { 522 switch (c) { 523 case TCG_COND_LTU: 524 return 0; 525 case TCG_COND_GEU: 526 return 1; 527 default: 528 return 2; 529 } 530 } 531 return 2; 532 } 533 534 /* Return 2 if the condition can't be simplified, and the result 535 of the condition (0 or 1) if it can */ 536 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 537 { 538 TCGArg al = p1[0], ah = p1[1]; 539 TCGArg bl = p2[0], bh = p2[1]; 540 541 if (arg_is_const(bl) && arg_is_const(bh)) { 542 tcg_target_ulong blv = arg_info(bl)->val; 543 tcg_target_ulong bhv = arg_info(bh)->val; 544 uint64_t b = deposit64(blv, 32, 32, bhv); 545 546 if (arg_is_const(al) && arg_is_const(ah)) { 547 tcg_target_ulong alv = arg_info(al)->val; 548 tcg_target_ulong ahv = arg_info(ah)->val; 549 uint64_t a = deposit64(alv, 32, 32, ahv); 550 return do_constant_folding_cond_64(a, b, c); 551 } 552 if (b == 0) { 553 switch (c) { 554 case TCG_COND_LTU: 555 return 0; 556 case TCG_COND_GEU: 557 return 1; 558 default: 559 break; 560 } 561 } 562 } 563 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 564 return do_constant_folding_cond_eq(c); 565 } 566 return 2; 567 } 568 569 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 570 { 571 TCGArg a1 = *p1, a2 = *p2; 572 int sum = 0; 573 sum += arg_is_const(a1); 574 sum -= arg_is_const(a2); 575 576 /* Prefer the constant in second argument, and then the form 577 op a, a, b, which is better handled on non-RISC hosts. */ 578 if (sum > 0 || (sum == 0 && dest == a2)) { 579 *p1 = a2; 580 *p2 = a1; 581 return true; 582 } 583 return false; 584 } 585 586 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 587 { 588 int sum = 0; 589 sum += arg_is_const(p1[0]); 590 sum += arg_is_const(p1[1]); 591 sum -= arg_is_const(p2[0]); 592 sum -= arg_is_const(p2[1]); 593 if (sum > 0) { 594 TCGArg t; 595 t = p1[0], p1[0] = p2[0], p2[0] = t; 596 t = p1[1], p1[1] = p2[1], p2[1] = t; 597 return true; 598 } 599 return false; 600 } 601 602 /* Propagate constants and copies, fold constant expressions. */ 603 void tcg_optimize(TCGContext *s) 604 { 605 int nb_temps, nb_globals; 606 TCGOp *op, *op_next, *prev_mb = NULL; 607 struct tcg_temp_info *infos; 608 TCGTempSet temps_used; 609 610 /* Array VALS has an element for each temp. 611 If this temp holds a constant then its value is kept in VALS' element. 612 If this temp is a copy of other ones then the other copies are 613 available through the doubly linked circular list. */ 614 615 nb_temps = s->nb_temps; 616 nb_globals = s->nb_globals; 617 bitmap_zero(temps_used.l, nb_temps); 618 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 619 620 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 621 tcg_target_ulong mask, partmask, affected; 622 int nb_oargs, nb_iargs, i; 623 TCGArg tmp; 624 TCGOpcode opc = op->opc; 625 const TCGOpDef *def = &tcg_op_defs[opc]; 626 627 /* Count the arguments, and initialize the temps that are 628 going to be used */ 629 if (opc == INDEX_op_call) { 630 nb_oargs = TCGOP_CALLO(op); 631 nb_iargs = TCGOP_CALLI(op); 632 for (i = 0; i < nb_oargs + nb_iargs; i++) { 633 TCGTemp *ts = arg_temp(op->args[i]); 634 if (ts) { 635 init_ts_info(infos, &temps_used, ts); 636 } 637 } 638 } else { 639 nb_oargs = def->nb_oargs; 640 nb_iargs = def->nb_iargs; 641 for (i = 0; i < nb_oargs + nb_iargs; i++) { 642 init_arg_info(infos, &temps_used, op->args[i]); 643 } 644 } 645 646 /* Do copy propagation */ 647 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 648 TCGTemp *ts = arg_temp(op->args[i]); 649 if (ts && ts_is_copy(ts)) { 650 op->args[i] = temp_arg(find_better_copy(s, ts)); 651 } 652 } 653 654 /* For commutative operations make constant second argument */ 655 switch (opc) { 656 CASE_OP_32_64(add): 657 CASE_OP_32_64(mul): 658 CASE_OP_32_64(and): 659 CASE_OP_32_64(or): 660 CASE_OP_32_64(xor): 661 CASE_OP_32_64(eqv): 662 CASE_OP_32_64(nand): 663 CASE_OP_32_64(nor): 664 CASE_OP_32_64(muluh): 665 CASE_OP_32_64(mulsh): 666 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 667 break; 668 CASE_OP_32_64(brcond): 669 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 670 op->args[2] = tcg_swap_cond(op->args[2]); 671 } 672 break; 673 CASE_OP_32_64(setcond): 674 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 675 op->args[3] = tcg_swap_cond(op->args[3]); 676 } 677 break; 678 CASE_OP_32_64(movcond): 679 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 680 op->args[5] = tcg_swap_cond(op->args[5]); 681 } 682 /* For movcond, we canonicalize the "false" input reg to match 683 the destination reg so that the tcg backend can implement 684 a "move if true" operation. */ 685 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 686 op->args[5] = tcg_invert_cond(op->args[5]); 687 } 688 break; 689 CASE_OP_32_64(add2): 690 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 691 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 692 break; 693 CASE_OP_32_64(mulu2): 694 CASE_OP_32_64(muls2): 695 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 696 break; 697 case INDEX_op_brcond2_i32: 698 if (swap_commutative2(&op->args[0], &op->args[2])) { 699 op->args[4] = tcg_swap_cond(op->args[4]); 700 } 701 break; 702 case INDEX_op_setcond2_i32: 703 if (swap_commutative2(&op->args[1], &op->args[3])) { 704 op->args[5] = tcg_swap_cond(op->args[5]); 705 } 706 break; 707 default: 708 break; 709 } 710 711 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 712 and "sub r, 0, a => neg r, a" case. */ 713 switch (opc) { 714 CASE_OP_32_64(shl): 715 CASE_OP_32_64(shr): 716 CASE_OP_32_64(sar): 717 CASE_OP_32_64(rotl): 718 CASE_OP_32_64(rotr): 719 if (arg_is_const(op->args[1]) 720 && arg_info(op->args[1])->val == 0) { 721 tcg_opt_gen_movi(s, op, op->args[0], 0); 722 continue; 723 } 724 break; 725 CASE_OP_32_64(sub): 726 { 727 TCGOpcode neg_op; 728 bool have_neg; 729 730 if (arg_is_const(op->args[2])) { 731 /* Proceed with possible constant folding. */ 732 break; 733 } 734 if (opc == INDEX_op_sub_i32) { 735 neg_op = INDEX_op_neg_i32; 736 have_neg = TCG_TARGET_HAS_neg_i32; 737 } else { 738 neg_op = INDEX_op_neg_i64; 739 have_neg = TCG_TARGET_HAS_neg_i64; 740 } 741 if (!have_neg) { 742 break; 743 } 744 if (arg_is_const(op->args[1]) 745 && arg_info(op->args[1])->val == 0) { 746 op->opc = neg_op; 747 reset_temp(op->args[0]); 748 op->args[1] = op->args[2]; 749 continue; 750 } 751 } 752 break; 753 CASE_OP_32_64(xor): 754 CASE_OP_32_64(nand): 755 if (!arg_is_const(op->args[1]) 756 && arg_is_const(op->args[2]) 757 && arg_info(op->args[2])->val == -1) { 758 i = 1; 759 goto try_not; 760 } 761 break; 762 CASE_OP_32_64(nor): 763 if (!arg_is_const(op->args[1]) 764 && arg_is_const(op->args[2]) 765 && arg_info(op->args[2])->val == 0) { 766 i = 1; 767 goto try_not; 768 } 769 break; 770 CASE_OP_32_64(andc): 771 if (!arg_is_const(op->args[2]) 772 && arg_is_const(op->args[1]) 773 && arg_info(op->args[1])->val == -1) { 774 i = 2; 775 goto try_not; 776 } 777 break; 778 CASE_OP_32_64(orc): 779 CASE_OP_32_64(eqv): 780 if (!arg_is_const(op->args[2]) 781 && arg_is_const(op->args[1]) 782 && arg_info(op->args[1])->val == 0) { 783 i = 2; 784 goto try_not; 785 } 786 break; 787 try_not: 788 { 789 TCGOpcode not_op; 790 bool have_not; 791 792 if (def->flags & TCG_OPF_64BIT) { 793 not_op = INDEX_op_not_i64; 794 have_not = TCG_TARGET_HAS_not_i64; 795 } else { 796 not_op = INDEX_op_not_i32; 797 have_not = TCG_TARGET_HAS_not_i32; 798 } 799 if (!have_not) { 800 break; 801 } 802 op->opc = not_op; 803 reset_temp(op->args[0]); 804 op->args[1] = op->args[i]; 805 continue; 806 } 807 default: 808 break; 809 } 810 811 /* Simplify expression for "op r, a, const => mov r, a" cases */ 812 switch (opc) { 813 CASE_OP_32_64(add): 814 CASE_OP_32_64(sub): 815 CASE_OP_32_64(shl): 816 CASE_OP_32_64(shr): 817 CASE_OP_32_64(sar): 818 CASE_OP_32_64(rotl): 819 CASE_OP_32_64(rotr): 820 CASE_OP_32_64(or): 821 CASE_OP_32_64(xor): 822 CASE_OP_32_64(andc): 823 if (!arg_is_const(op->args[1]) 824 && arg_is_const(op->args[2]) 825 && arg_info(op->args[2])->val == 0) { 826 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 827 continue; 828 } 829 break; 830 CASE_OP_32_64(and): 831 CASE_OP_32_64(orc): 832 CASE_OP_32_64(eqv): 833 if (!arg_is_const(op->args[1]) 834 && arg_is_const(op->args[2]) 835 && arg_info(op->args[2])->val == -1) { 836 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 837 continue; 838 } 839 break; 840 default: 841 break; 842 } 843 844 /* Simplify using known-zero bits. Currently only ops with a single 845 output argument is supported. */ 846 mask = -1; 847 affected = -1; 848 switch (opc) { 849 CASE_OP_32_64(ext8s): 850 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 851 break; 852 } 853 CASE_OP_32_64(ext8u): 854 mask = 0xff; 855 goto and_const; 856 CASE_OP_32_64(ext16s): 857 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 858 break; 859 } 860 CASE_OP_32_64(ext16u): 861 mask = 0xffff; 862 goto and_const; 863 case INDEX_op_ext32s_i64: 864 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 865 break; 866 } 867 case INDEX_op_ext32u_i64: 868 mask = 0xffffffffU; 869 goto and_const; 870 871 CASE_OP_32_64(and): 872 mask = arg_info(op->args[2])->mask; 873 if (arg_is_const(op->args[2])) { 874 and_const: 875 affected = arg_info(op->args[1])->mask & ~mask; 876 } 877 mask = arg_info(op->args[1])->mask & mask; 878 break; 879 880 case INDEX_op_ext_i32_i64: 881 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 882 break; 883 } 884 case INDEX_op_extu_i32_i64: 885 /* We do not compute affected as it is a size changing op. */ 886 mask = (uint32_t)arg_info(op->args[1])->mask; 887 break; 888 889 CASE_OP_32_64(andc): 890 /* Known-zeros does not imply known-ones. Therefore unless 891 op->args[2] is constant, we can't infer anything from it. */ 892 if (arg_is_const(op->args[2])) { 893 mask = ~arg_info(op->args[2])->mask; 894 goto and_const; 895 } 896 /* But we certainly know nothing outside args[1] may be set. */ 897 mask = arg_info(op->args[1])->mask; 898 break; 899 900 case INDEX_op_sar_i32: 901 if (arg_is_const(op->args[2])) { 902 tmp = arg_info(op->args[2])->val & 31; 903 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 904 } 905 break; 906 case INDEX_op_sar_i64: 907 if (arg_is_const(op->args[2])) { 908 tmp = arg_info(op->args[2])->val & 63; 909 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 910 } 911 break; 912 913 case INDEX_op_shr_i32: 914 if (arg_is_const(op->args[2])) { 915 tmp = arg_info(op->args[2])->val & 31; 916 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 917 } 918 break; 919 case INDEX_op_shr_i64: 920 if (arg_is_const(op->args[2])) { 921 tmp = arg_info(op->args[2])->val & 63; 922 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 923 } 924 break; 925 926 case INDEX_op_extrl_i64_i32: 927 mask = (uint32_t)arg_info(op->args[1])->mask; 928 break; 929 case INDEX_op_extrh_i64_i32: 930 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 931 break; 932 933 CASE_OP_32_64(shl): 934 if (arg_is_const(op->args[2])) { 935 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 936 mask = arg_info(op->args[1])->mask << tmp; 937 } 938 break; 939 940 CASE_OP_32_64(neg): 941 /* Set to 1 all bits to the left of the rightmost. */ 942 mask = -(arg_info(op->args[1])->mask 943 & -arg_info(op->args[1])->mask); 944 break; 945 946 CASE_OP_32_64(deposit): 947 mask = deposit64(arg_info(op->args[1])->mask, 948 op->args[3], op->args[4], 949 arg_info(op->args[2])->mask); 950 break; 951 952 CASE_OP_32_64(extract): 953 mask = extract64(arg_info(op->args[1])->mask, 954 op->args[2], op->args[3]); 955 if (op->args[2] == 0) { 956 affected = arg_info(op->args[1])->mask & ~mask; 957 } 958 break; 959 CASE_OP_32_64(sextract): 960 mask = sextract64(arg_info(op->args[1])->mask, 961 op->args[2], op->args[3]); 962 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 963 affected = arg_info(op->args[1])->mask & ~mask; 964 } 965 break; 966 967 CASE_OP_32_64(or): 968 CASE_OP_32_64(xor): 969 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 970 break; 971 972 case INDEX_op_clz_i32: 973 case INDEX_op_ctz_i32: 974 mask = arg_info(op->args[2])->mask | 31; 975 break; 976 977 case INDEX_op_clz_i64: 978 case INDEX_op_ctz_i64: 979 mask = arg_info(op->args[2])->mask | 63; 980 break; 981 982 case INDEX_op_ctpop_i32: 983 mask = 32 | 31; 984 break; 985 case INDEX_op_ctpop_i64: 986 mask = 64 | 63; 987 break; 988 989 CASE_OP_32_64(setcond): 990 case INDEX_op_setcond2_i32: 991 mask = 1; 992 break; 993 994 CASE_OP_32_64(movcond): 995 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 996 break; 997 998 CASE_OP_32_64(ld8u): 999 mask = 0xff; 1000 break; 1001 CASE_OP_32_64(ld16u): 1002 mask = 0xffff; 1003 break; 1004 case INDEX_op_ld32u_i64: 1005 mask = 0xffffffffu; 1006 break; 1007 1008 CASE_OP_32_64(qemu_ld): 1009 { 1010 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1011 TCGMemOp mop = get_memop(oi); 1012 if (!(mop & MO_SIGN)) { 1013 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1014 } 1015 } 1016 break; 1017 1018 default: 1019 break; 1020 } 1021 1022 /* 32-bit ops generate 32-bit results. For the result is zero test 1023 below, we can ignore high bits, but for further optimizations we 1024 need to record that the high bits contain garbage. */ 1025 partmask = mask; 1026 if (!(def->flags & TCG_OPF_64BIT)) { 1027 mask |= ~(tcg_target_ulong)0xffffffffu; 1028 partmask &= 0xffffffffu; 1029 affected &= 0xffffffffu; 1030 } 1031 1032 if (partmask == 0) { 1033 tcg_debug_assert(nb_oargs == 1); 1034 tcg_opt_gen_movi(s, op, op->args[0], 0); 1035 continue; 1036 } 1037 if (affected == 0) { 1038 tcg_debug_assert(nb_oargs == 1); 1039 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1040 continue; 1041 } 1042 1043 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1044 switch (opc) { 1045 CASE_OP_32_64(and): 1046 CASE_OP_32_64(mul): 1047 CASE_OP_32_64(muluh): 1048 CASE_OP_32_64(mulsh): 1049 if (arg_is_const(op->args[2]) 1050 && arg_info(op->args[2])->val == 0) { 1051 tcg_opt_gen_movi(s, op, op->args[0], 0); 1052 continue; 1053 } 1054 break; 1055 default: 1056 break; 1057 } 1058 1059 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1060 switch (opc) { 1061 CASE_OP_32_64(or): 1062 CASE_OP_32_64(and): 1063 if (args_are_copies(op->args[1], op->args[2])) { 1064 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1065 continue; 1066 } 1067 break; 1068 default: 1069 break; 1070 } 1071 1072 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1073 switch (opc) { 1074 CASE_OP_32_64(andc): 1075 CASE_OP_32_64(sub): 1076 CASE_OP_32_64(xor): 1077 if (args_are_copies(op->args[1], op->args[2])) { 1078 tcg_opt_gen_movi(s, op, op->args[0], 0); 1079 continue; 1080 } 1081 break; 1082 default: 1083 break; 1084 } 1085 1086 /* Propagate constants through copy operations and do constant 1087 folding. Constants will be substituted to arguments by register 1088 allocator where needed and possible. Also detect copies. */ 1089 switch (opc) { 1090 CASE_OP_32_64(mov): 1091 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1092 break; 1093 CASE_OP_32_64(movi): 1094 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1095 break; 1096 1097 CASE_OP_32_64(not): 1098 CASE_OP_32_64(neg): 1099 CASE_OP_32_64(ext8s): 1100 CASE_OP_32_64(ext8u): 1101 CASE_OP_32_64(ext16s): 1102 CASE_OP_32_64(ext16u): 1103 CASE_OP_32_64(ctpop): 1104 case INDEX_op_ext32s_i64: 1105 case INDEX_op_ext32u_i64: 1106 case INDEX_op_ext_i32_i64: 1107 case INDEX_op_extu_i32_i64: 1108 case INDEX_op_extrl_i64_i32: 1109 case INDEX_op_extrh_i64_i32: 1110 if (arg_is_const(op->args[1])) { 1111 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1112 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1113 break; 1114 } 1115 goto do_default; 1116 1117 CASE_OP_32_64(add): 1118 CASE_OP_32_64(sub): 1119 CASE_OP_32_64(mul): 1120 CASE_OP_32_64(or): 1121 CASE_OP_32_64(and): 1122 CASE_OP_32_64(xor): 1123 CASE_OP_32_64(shl): 1124 CASE_OP_32_64(shr): 1125 CASE_OP_32_64(sar): 1126 CASE_OP_32_64(rotl): 1127 CASE_OP_32_64(rotr): 1128 CASE_OP_32_64(andc): 1129 CASE_OP_32_64(orc): 1130 CASE_OP_32_64(eqv): 1131 CASE_OP_32_64(nand): 1132 CASE_OP_32_64(nor): 1133 CASE_OP_32_64(muluh): 1134 CASE_OP_32_64(mulsh): 1135 CASE_OP_32_64(div): 1136 CASE_OP_32_64(divu): 1137 CASE_OP_32_64(rem): 1138 CASE_OP_32_64(remu): 1139 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1140 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1141 arg_info(op->args[2])->val); 1142 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1143 break; 1144 } 1145 goto do_default; 1146 1147 CASE_OP_32_64(clz): 1148 CASE_OP_32_64(ctz): 1149 if (arg_is_const(op->args[1])) { 1150 TCGArg v = arg_info(op->args[1])->val; 1151 if (v != 0) { 1152 tmp = do_constant_folding(opc, v, 0); 1153 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1154 } else { 1155 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1156 } 1157 break; 1158 } 1159 goto do_default; 1160 1161 CASE_OP_32_64(deposit): 1162 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1163 tmp = deposit64(arg_info(op->args[1])->val, 1164 op->args[3], op->args[4], 1165 arg_info(op->args[2])->val); 1166 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1167 break; 1168 } 1169 goto do_default; 1170 1171 CASE_OP_32_64(extract): 1172 if (arg_is_const(op->args[1])) { 1173 tmp = extract64(arg_info(op->args[1])->val, 1174 op->args[2], op->args[3]); 1175 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1176 break; 1177 } 1178 goto do_default; 1179 1180 CASE_OP_32_64(sextract): 1181 if (arg_is_const(op->args[1])) { 1182 tmp = sextract64(arg_info(op->args[1])->val, 1183 op->args[2], op->args[3]); 1184 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1185 break; 1186 } 1187 goto do_default; 1188 1189 CASE_OP_32_64(setcond): 1190 tmp = do_constant_folding_cond(opc, op->args[1], 1191 op->args[2], op->args[3]); 1192 if (tmp != 2) { 1193 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1194 break; 1195 } 1196 goto do_default; 1197 1198 CASE_OP_32_64(brcond): 1199 tmp = do_constant_folding_cond(opc, op->args[0], 1200 op->args[1], op->args[2]); 1201 if (tmp != 2) { 1202 if (tmp) { 1203 bitmap_zero(temps_used.l, nb_temps); 1204 op->opc = INDEX_op_br; 1205 op->args[0] = op->args[3]; 1206 } else { 1207 tcg_op_remove(s, op); 1208 } 1209 break; 1210 } 1211 goto do_default; 1212 1213 CASE_OP_32_64(movcond): 1214 tmp = do_constant_folding_cond(opc, op->args[1], 1215 op->args[2], op->args[5]); 1216 if (tmp != 2) { 1217 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1218 break; 1219 } 1220 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1221 tcg_target_ulong tv = arg_info(op->args[3])->val; 1222 tcg_target_ulong fv = arg_info(op->args[4])->val; 1223 TCGCond cond = op->args[5]; 1224 if (fv == 1 && tv == 0) { 1225 cond = tcg_invert_cond(cond); 1226 } else if (!(tv == 1 && fv == 0)) { 1227 goto do_default; 1228 } 1229 op->args[3] = cond; 1230 op->opc = opc = (opc == INDEX_op_movcond_i32 1231 ? INDEX_op_setcond_i32 1232 : INDEX_op_setcond_i64); 1233 nb_iargs = 2; 1234 } 1235 goto do_default; 1236 1237 case INDEX_op_add2_i32: 1238 case INDEX_op_sub2_i32: 1239 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1240 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1241 uint32_t al = arg_info(op->args[2])->val; 1242 uint32_t ah = arg_info(op->args[3])->val; 1243 uint32_t bl = arg_info(op->args[4])->val; 1244 uint32_t bh = arg_info(op->args[5])->val; 1245 uint64_t a = ((uint64_t)ah << 32) | al; 1246 uint64_t b = ((uint64_t)bh << 32) | bl; 1247 TCGArg rl, rh; 1248 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1249 1250 if (opc == INDEX_op_add2_i32) { 1251 a += b; 1252 } else { 1253 a -= b; 1254 } 1255 1256 rl = op->args[0]; 1257 rh = op->args[1]; 1258 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1259 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1260 break; 1261 } 1262 goto do_default; 1263 1264 case INDEX_op_mulu2_i32: 1265 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1266 uint32_t a = arg_info(op->args[2])->val; 1267 uint32_t b = arg_info(op->args[3])->val; 1268 uint64_t r = (uint64_t)a * b; 1269 TCGArg rl, rh; 1270 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1271 1272 rl = op->args[0]; 1273 rh = op->args[1]; 1274 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1275 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1276 break; 1277 } 1278 goto do_default; 1279 1280 case INDEX_op_brcond2_i32: 1281 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1282 op->args[4]); 1283 if (tmp != 2) { 1284 if (tmp) { 1285 do_brcond_true: 1286 bitmap_zero(temps_used.l, nb_temps); 1287 op->opc = INDEX_op_br; 1288 op->args[0] = op->args[5]; 1289 } else { 1290 do_brcond_false: 1291 tcg_op_remove(s, op); 1292 } 1293 } else if ((op->args[4] == TCG_COND_LT 1294 || op->args[4] == TCG_COND_GE) 1295 && arg_is_const(op->args[2]) 1296 && arg_info(op->args[2])->val == 0 1297 && arg_is_const(op->args[3]) 1298 && arg_info(op->args[3])->val == 0) { 1299 /* Simplify LT/GE comparisons vs zero to a single compare 1300 vs the high word of the input. */ 1301 do_brcond_high: 1302 bitmap_zero(temps_used.l, nb_temps); 1303 op->opc = INDEX_op_brcond_i32; 1304 op->args[0] = op->args[1]; 1305 op->args[1] = op->args[3]; 1306 op->args[2] = op->args[4]; 1307 op->args[3] = op->args[5]; 1308 } else if (op->args[4] == TCG_COND_EQ) { 1309 /* Simplify EQ comparisons where one of the pairs 1310 can be simplified. */ 1311 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1312 op->args[0], op->args[2], 1313 TCG_COND_EQ); 1314 if (tmp == 0) { 1315 goto do_brcond_false; 1316 } else if (tmp == 1) { 1317 goto do_brcond_high; 1318 } 1319 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1320 op->args[1], op->args[3], 1321 TCG_COND_EQ); 1322 if (tmp == 0) { 1323 goto do_brcond_false; 1324 } else if (tmp != 1) { 1325 goto do_default; 1326 } 1327 do_brcond_low: 1328 bitmap_zero(temps_used.l, nb_temps); 1329 op->opc = INDEX_op_brcond_i32; 1330 op->args[1] = op->args[2]; 1331 op->args[2] = op->args[4]; 1332 op->args[3] = op->args[5]; 1333 } else if (op->args[4] == TCG_COND_NE) { 1334 /* Simplify NE comparisons where one of the pairs 1335 can be simplified. */ 1336 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1337 op->args[0], op->args[2], 1338 TCG_COND_NE); 1339 if (tmp == 0) { 1340 goto do_brcond_high; 1341 } else if (tmp == 1) { 1342 goto do_brcond_true; 1343 } 1344 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1345 op->args[1], op->args[3], 1346 TCG_COND_NE); 1347 if (tmp == 0) { 1348 goto do_brcond_low; 1349 } else if (tmp == 1) { 1350 goto do_brcond_true; 1351 } 1352 goto do_default; 1353 } else { 1354 goto do_default; 1355 } 1356 break; 1357 1358 case INDEX_op_setcond2_i32: 1359 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1360 op->args[5]); 1361 if (tmp != 2) { 1362 do_setcond_const: 1363 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1364 } else if ((op->args[5] == TCG_COND_LT 1365 || op->args[5] == TCG_COND_GE) 1366 && arg_is_const(op->args[3]) 1367 && arg_info(op->args[3])->val == 0 1368 && arg_is_const(op->args[4]) 1369 && arg_info(op->args[4])->val == 0) { 1370 /* Simplify LT/GE comparisons vs zero to a single compare 1371 vs the high word of the input. */ 1372 do_setcond_high: 1373 reset_temp(op->args[0]); 1374 arg_info(op->args[0])->mask = 1; 1375 op->opc = INDEX_op_setcond_i32; 1376 op->args[1] = op->args[2]; 1377 op->args[2] = op->args[4]; 1378 op->args[3] = op->args[5]; 1379 } else if (op->args[5] == TCG_COND_EQ) { 1380 /* Simplify EQ comparisons where one of the pairs 1381 can be simplified. */ 1382 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1383 op->args[1], op->args[3], 1384 TCG_COND_EQ); 1385 if (tmp == 0) { 1386 goto do_setcond_const; 1387 } else if (tmp == 1) { 1388 goto do_setcond_high; 1389 } 1390 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1391 op->args[2], op->args[4], 1392 TCG_COND_EQ); 1393 if (tmp == 0) { 1394 goto do_setcond_high; 1395 } else if (tmp != 1) { 1396 goto do_default; 1397 } 1398 do_setcond_low: 1399 reset_temp(op->args[0]); 1400 arg_info(op->args[0])->mask = 1; 1401 op->opc = INDEX_op_setcond_i32; 1402 op->args[2] = op->args[3]; 1403 op->args[3] = op->args[5]; 1404 } else if (op->args[5] == TCG_COND_NE) { 1405 /* Simplify NE comparisons where one of the pairs 1406 can be simplified. */ 1407 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1408 op->args[1], op->args[3], 1409 TCG_COND_NE); 1410 if (tmp == 0) { 1411 goto do_setcond_high; 1412 } else if (tmp == 1) { 1413 goto do_setcond_const; 1414 } 1415 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1416 op->args[2], op->args[4], 1417 TCG_COND_NE); 1418 if (tmp == 0) { 1419 goto do_setcond_low; 1420 } else if (tmp == 1) { 1421 goto do_setcond_const; 1422 } 1423 goto do_default; 1424 } else { 1425 goto do_default; 1426 } 1427 break; 1428 1429 case INDEX_op_call: 1430 if (!(op->args[nb_oargs + nb_iargs + 1] 1431 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1432 for (i = 0; i < nb_globals; i++) { 1433 if (test_bit(i, temps_used.l)) { 1434 reset_ts(&s->temps[i]); 1435 } 1436 } 1437 } 1438 goto do_reset_output; 1439 1440 default: 1441 do_default: 1442 /* Default case: we know nothing about operation (or were unable 1443 to compute the operation result) so no propagation is done. 1444 We trash everything if the operation is the end of a basic 1445 block, otherwise we only trash the output args. "mask" is 1446 the non-zero bits mask for the first output arg. */ 1447 if (def->flags & TCG_OPF_BB_END) { 1448 bitmap_zero(temps_used.l, nb_temps); 1449 } else { 1450 do_reset_output: 1451 for (i = 0; i < nb_oargs; i++) { 1452 reset_temp(op->args[i]); 1453 /* Save the corresponding known-zero bits mask for the 1454 first output argument (only one supported so far). */ 1455 if (i == 0) { 1456 arg_info(op->args[i])->mask = mask; 1457 } 1458 } 1459 } 1460 break; 1461 } 1462 1463 /* Eliminate duplicate and redundant fence instructions. */ 1464 if (prev_mb) { 1465 switch (opc) { 1466 case INDEX_op_mb: 1467 /* Merge two barriers of the same type into one, 1468 * or a weaker barrier into a stronger one, 1469 * or two weaker barriers into a stronger one. 1470 * mb X; mb Y => mb X|Y 1471 * mb; strl => mb; st 1472 * ldaq; mb => ld; mb 1473 * ldaq; strl => ld; mb; st 1474 * Other combinations are also merged into a strong 1475 * barrier. This is stricter than specified but for 1476 * the purposes of TCG is better than not optimizing. 1477 */ 1478 prev_mb->args[0] |= op->args[0]; 1479 tcg_op_remove(s, op); 1480 break; 1481 1482 default: 1483 /* Opcodes that end the block stop the optimization. */ 1484 if ((def->flags & TCG_OPF_BB_END) == 0) { 1485 break; 1486 } 1487 /* fallthru */ 1488 case INDEX_op_qemu_ld_i32: 1489 case INDEX_op_qemu_ld_i64: 1490 case INDEX_op_qemu_st_i32: 1491 case INDEX_op_qemu_st_i64: 1492 case INDEX_op_call: 1493 /* Opcodes that touch guest memory stop the optimization. */ 1494 prev_mb = NULL; 1495 break; 1496 } 1497 } else if (opc == INDEX_op_mb) { 1498 prev_mb = op; 1499 } 1500 } 1501 } 1502