1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 TCGTemp *prev_copy; 38 TCGTemp *next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 44 { 45 return ts->state_ptr; 46 } 47 48 static inline struct tcg_temp_info *arg_info(TCGArg arg) 49 { 50 return ts_info(arg_temp(arg)); 51 } 52 53 static inline bool ts_is_const(TCGTemp *ts) 54 { 55 return ts_info(ts)->is_const; 56 } 57 58 static inline bool arg_is_const(TCGArg arg) 59 { 60 return ts_is_const(arg_temp(arg)); 61 } 62 63 static inline bool ts_is_copy(TCGTemp *ts) 64 { 65 return ts_info(ts)->next_copy != ts; 66 } 67 68 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 69 static void reset_ts(TCGTemp *ts) 70 { 71 struct tcg_temp_info *ti = ts_info(ts); 72 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 73 struct tcg_temp_info *ni = ts_info(ti->next_copy); 74 75 ni->prev_copy = ti->prev_copy; 76 pi->next_copy = ti->next_copy; 77 ti->next_copy = ts; 78 ti->prev_copy = ts; 79 ti->is_const = false; 80 ti->mask = -1; 81 } 82 83 static void reset_temp(TCGArg arg) 84 { 85 reset_ts(arg_temp(arg)); 86 } 87 88 /* Initialize and activate a temporary. */ 89 static void init_ts_info(struct tcg_temp_info *infos, 90 TCGTempSet *temps_used, TCGTemp *ts) 91 { 92 size_t idx = temp_idx(ts); 93 if (!test_bit(idx, temps_used->l)) { 94 struct tcg_temp_info *ti = &infos[idx]; 95 96 ts->state_ptr = ti; 97 ti->next_copy = ts; 98 ti->prev_copy = ts; 99 ti->is_const = false; 100 ti->mask = -1; 101 set_bit(idx, temps_used->l); 102 } 103 } 104 105 static void init_arg_info(struct tcg_temp_info *infos, 106 TCGTempSet *temps_used, TCGArg arg) 107 { 108 init_ts_info(infos, temps_used, arg_temp(arg)); 109 } 110 111 static int op_bits(TCGOpcode op) 112 { 113 const TCGOpDef *def = &tcg_op_defs[op]; 114 return def->flags & TCG_OPF_64BIT ? 64 : 32; 115 } 116 117 static TCGOpcode op_to_mov(TCGOpcode op) 118 { 119 switch (op_bits(op)) { 120 case 32: 121 return INDEX_op_mov_i32; 122 case 64: 123 return INDEX_op_mov_i64; 124 default: 125 fprintf(stderr, "op_to_mov: unexpected return value of " 126 "function op_bits.\n"); 127 tcg_abort(); 128 } 129 } 130 131 static TCGOpcode op_to_movi(TCGOpcode op) 132 { 133 switch (op_bits(op)) { 134 case 32: 135 return INDEX_op_movi_i32; 136 case 64: 137 return INDEX_op_movi_i64; 138 default: 139 fprintf(stderr, "op_to_movi: unexpected return value of " 140 "function op_bits.\n"); 141 tcg_abort(); 142 } 143 } 144 145 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 146 { 147 TCGTemp *i; 148 149 /* If this is already a global, we can't do better. */ 150 if (ts->temp_global) { 151 return ts; 152 } 153 154 /* Search for a global first. */ 155 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 156 if (i->temp_global) { 157 return i; 158 } 159 } 160 161 /* If it is a temp, search for a temp local. */ 162 if (!ts->temp_local) { 163 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 164 if (ts->temp_local) { 165 return i; 166 } 167 } 168 } 169 170 /* Failure to find a better representation, return the same temp. */ 171 return ts; 172 } 173 174 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 175 { 176 TCGTemp *i; 177 178 if (ts1 == ts2) { 179 return true; 180 } 181 182 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 183 return false; 184 } 185 186 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 187 if (i == ts2) { 188 return true; 189 } 190 } 191 192 return false; 193 } 194 195 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 196 { 197 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 198 } 199 200 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 201 { 202 TCGOpcode new_op = op_to_movi(op->opc); 203 tcg_target_ulong mask; 204 struct tcg_temp_info *di = arg_info(dst); 205 206 op->opc = new_op; 207 208 reset_temp(dst); 209 di->is_const = true; 210 di->val = val; 211 mask = val; 212 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 213 /* High bits of the destination are now garbage. */ 214 mask |= ~0xffffffffull; 215 } 216 di->mask = mask; 217 218 op->args[0] = dst; 219 op->args[1] = val; 220 } 221 222 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 223 { 224 TCGTemp *dst_ts = arg_temp(dst); 225 TCGTemp *src_ts = arg_temp(src); 226 struct tcg_temp_info *di; 227 struct tcg_temp_info *si; 228 tcg_target_ulong mask; 229 TCGOpcode new_op; 230 231 if (ts_are_copies(dst_ts, src_ts)) { 232 tcg_op_remove(s, op); 233 return; 234 } 235 236 reset_ts(dst_ts); 237 di = ts_info(dst_ts); 238 si = ts_info(src_ts); 239 new_op = op_to_mov(op->opc); 240 241 op->opc = new_op; 242 op->args[0] = dst; 243 op->args[1] = src; 244 245 mask = si->mask; 246 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 247 /* High bits of the destination are now garbage. */ 248 mask |= ~0xffffffffull; 249 } 250 di->mask = mask; 251 252 if (src_ts->type == dst_ts->type) { 253 struct tcg_temp_info *ni = ts_info(si->next_copy); 254 255 di->next_copy = si->next_copy; 256 di->prev_copy = src_ts; 257 ni->prev_copy = dst_ts; 258 si->next_copy = dst_ts; 259 di->is_const = si->is_const; 260 di->val = si->val; 261 } 262 } 263 264 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 265 { 266 uint64_t l64, h64; 267 268 switch (op) { 269 CASE_OP_32_64(add): 270 return x + y; 271 272 CASE_OP_32_64(sub): 273 return x - y; 274 275 CASE_OP_32_64(mul): 276 return x * y; 277 278 CASE_OP_32_64(and): 279 return x & y; 280 281 CASE_OP_32_64(or): 282 return x | y; 283 284 CASE_OP_32_64(xor): 285 return x ^ y; 286 287 case INDEX_op_shl_i32: 288 return (uint32_t)x << (y & 31); 289 290 case INDEX_op_shl_i64: 291 return (uint64_t)x << (y & 63); 292 293 case INDEX_op_shr_i32: 294 return (uint32_t)x >> (y & 31); 295 296 case INDEX_op_shr_i64: 297 return (uint64_t)x >> (y & 63); 298 299 case INDEX_op_sar_i32: 300 return (int32_t)x >> (y & 31); 301 302 case INDEX_op_sar_i64: 303 return (int64_t)x >> (y & 63); 304 305 case INDEX_op_rotr_i32: 306 return ror32(x, y & 31); 307 308 case INDEX_op_rotr_i64: 309 return ror64(x, y & 63); 310 311 case INDEX_op_rotl_i32: 312 return rol32(x, y & 31); 313 314 case INDEX_op_rotl_i64: 315 return rol64(x, y & 63); 316 317 CASE_OP_32_64(not): 318 return ~x; 319 320 CASE_OP_32_64(neg): 321 return -x; 322 323 CASE_OP_32_64(andc): 324 return x & ~y; 325 326 CASE_OP_32_64(orc): 327 return x | ~y; 328 329 CASE_OP_32_64(eqv): 330 return ~(x ^ y); 331 332 CASE_OP_32_64(nand): 333 return ~(x & y); 334 335 CASE_OP_32_64(nor): 336 return ~(x | y); 337 338 case INDEX_op_clz_i32: 339 return (uint32_t)x ? clz32(x) : y; 340 341 case INDEX_op_clz_i64: 342 return x ? clz64(x) : y; 343 344 case INDEX_op_ctz_i32: 345 return (uint32_t)x ? ctz32(x) : y; 346 347 case INDEX_op_ctz_i64: 348 return x ? ctz64(x) : y; 349 350 case INDEX_op_ctpop_i32: 351 return ctpop32(x); 352 353 case INDEX_op_ctpop_i64: 354 return ctpop64(x); 355 356 CASE_OP_32_64(ext8s): 357 return (int8_t)x; 358 359 CASE_OP_32_64(ext16s): 360 return (int16_t)x; 361 362 CASE_OP_32_64(ext8u): 363 return (uint8_t)x; 364 365 CASE_OP_32_64(ext16u): 366 return (uint16_t)x; 367 368 case INDEX_op_ext_i32_i64: 369 case INDEX_op_ext32s_i64: 370 return (int32_t)x; 371 372 case INDEX_op_extu_i32_i64: 373 case INDEX_op_extrl_i64_i32: 374 case INDEX_op_ext32u_i64: 375 return (uint32_t)x; 376 377 case INDEX_op_extrh_i64_i32: 378 return (uint64_t)x >> 32; 379 380 case INDEX_op_muluh_i32: 381 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 382 case INDEX_op_mulsh_i32: 383 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 384 385 case INDEX_op_muluh_i64: 386 mulu64(&l64, &h64, x, y); 387 return h64; 388 case INDEX_op_mulsh_i64: 389 muls64(&l64, &h64, x, y); 390 return h64; 391 392 case INDEX_op_div_i32: 393 /* Avoid crashing on divide by zero, otherwise undefined. */ 394 return (int32_t)x / ((int32_t)y ? : 1); 395 case INDEX_op_divu_i32: 396 return (uint32_t)x / ((uint32_t)y ? : 1); 397 case INDEX_op_div_i64: 398 return (int64_t)x / ((int64_t)y ? : 1); 399 case INDEX_op_divu_i64: 400 return (uint64_t)x / ((uint64_t)y ? : 1); 401 402 case INDEX_op_rem_i32: 403 return (int32_t)x % ((int32_t)y ? : 1); 404 case INDEX_op_remu_i32: 405 return (uint32_t)x % ((uint32_t)y ? : 1); 406 case INDEX_op_rem_i64: 407 return (int64_t)x % ((int64_t)y ? : 1); 408 case INDEX_op_remu_i64: 409 return (uint64_t)x % ((uint64_t)y ? : 1); 410 411 default: 412 fprintf(stderr, 413 "Unrecognized operation %d in do_constant_folding.\n", op); 414 tcg_abort(); 415 } 416 } 417 418 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 419 { 420 TCGArg res = do_constant_folding_2(op, x, y); 421 if (op_bits(op) == 32) { 422 res = (int32_t)res; 423 } 424 return res; 425 } 426 427 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 428 { 429 switch (c) { 430 case TCG_COND_EQ: 431 return x == y; 432 case TCG_COND_NE: 433 return x != y; 434 case TCG_COND_LT: 435 return (int32_t)x < (int32_t)y; 436 case TCG_COND_GE: 437 return (int32_t)x >= (int32_t)y; 438 case TCG_COND_LE: 439 return (int32_t)x <= (int32_t)y; 440 case TCG_COND_GT: 441 return (int32_t)x > (int32_t)y; 442 case TCG_COND_LTU: 443 return x < y; 444 case TCG_COND_GEU: 445 return x >= y; 446 case TCG_COND_LEU: 447 return x <= y; 448 case TCG_COND_GTU: 449 return x > y; 450 default: 451 tcg_abort(); 452 } 453 } 454 455 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 456 { 457 switch (c) { 458 case TCG_COND_EQ: 459 return x == y; 460 case TCG_COND_NE: 461 return x != y; 462 case TCG_COND_LT: 463 return (int64_t)x < (int64_t)y; 464 case TCG_COND_GE: 465 return (int64_t)x >= (int64_t)y; 466 case TCG_COND_LE: 467 return (int64_t)x <= (int64_t)y; 468 case TCG_COND_GT: 469 return (int64_t)x > (int64_t)y; 470 case TCG_COND_LTU: 471 return x < y; 472 case TCG_COND_GEU: 473 return x >= y; 474 case TCG_COND_LEU: 475 return x <= y; 476 case TCG_COND_GTU: 477 return x > y; 478 default: 479 tcg_abort(); 480 } 481 } 482 483 static bool do_constant_folding_cond_eq(TCGCond c) 484 { 485 switch (c) { 486 case TCG_COND_GT: 487 case TCG_COND_LTU: 488 case TCG_COND_LT: 489 case TCG_COND_GTU: 490 case TCG_COND_NE: 491 return 0; 492 case TCG_COND_GE: 493 case TCG_COND_GEU: 494 case TCG_COND_LE: 495 case TCG_COND_LEU: 496 case TCG_COND_EQ: 497 return 1; 498 default: 499 tcg_abort(); 500 } 501 } 502 503 /* Return 2 if the condition can't be simplified, and the result 504 of the condition (0 or 1) if it can */ 505 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 506 TCGArg y, TCGCond c) 507 { 508 tcg_target_ulong xv = arg_info(x)->val; 509 tcg_target_ulong yv = arg_info(y)->val; 510 if (arg_is_const(x) && arg_is_const(y)) { 511 switch (op_bits(op)) { 512 case 32: 513 return do_constant_folding_cond_32(xv, yv, c); 514 case 64: 515 return do_constant_folding_cond_64(xv, yv, c); 516 default: 517 tcg_abort(); 518 } 519 } else if (args_are_copies(x, y)) { 520 return do_constant_folding_cond_eq(c); 521 } else if (arg_is_const(y) && yv == 0) { 522 switch (c) { 523 case TCG_COND_LTU: 524 return 0; 525 case TCG_COND_GEU: 526 return 1; 527 default: 528 return 2; 529 } 530 } 531 return 2; 532 } 533 534 /* Return 2 if the condition can't be simplified, and the result 535 of the condition (0 or 1) if it can */ 536 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 537 { 538 TCGArg al = p1[0], ah = p1[1]; 539 TCGArg bl = p2[0], bh = p2[1]; 540 541 if (arg_is_const(bl) && arg_is_const(bh)) { 542 tcg_target_ulong blv = arg_info(bl)->val; 543 tcg_target_ulong bhv = arg_info(bh)->val; 544 uint64_t b = deposit64(blv, 32, 32, bhv); 545 546 if (arg_is_const(al) && arg_is_const(ah)) { 547 tcg_target_ulong alv = arg_info(al)->val; 548 tcg_target_ulong ahv = arg_info(ah)->val; 549 uint64_t a = deposit64(alv, 32, 32, ahv); 550 return do_constant_folding_cond_64(a, b, c); 551 } 552 if (b == 0) { 553 switch (c) { 554 case TCG_COND_LTU: 555 return 0; 556 case TCG_COND_GEU: 557 return 1; 558 default: 559 break; 560 } 561 } 562 } 563 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 564 return do_constant_folding_cond_eq(c); 565 } 566 return 2; 567 } 568 569 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 570 { 571 TCGArg a1 = *p1, a2 = *p2; 572 int sum = 0; 573 sum += arg_is_const(a1); 574 sum -= arg_is_const(a2); 575 576 /* Prefer the constant in second argument, and then the form 577 op a, a, b, which is better handled on non-RISC hosts. */ 578 if (sum > 0 || (sum == 0 && dest == a2)) { 579 *p1 = a2; 580 *p2 = a1; 581 return true; 582 } 583 return false; 584 } 585 586 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 587 { 588 int sum = 0; 589 sum += arg_is_const(p1[0]); 590 sum += arg_is_const(p1[1]); 591 sum -= arg_is_const(p2[0]); 592 sum -= arg_is_const(p2[1]); 593 if (sum > 0) { 594 TCGArg t; 595 t = p1[0], p1[0] = p2[0], p2[0] = t; 596 t = p1[1], p1[1] = p2[1], p2[1] = t; 597 return true; 598 } 599 return false; 600 } 601 602 /* Propagate constants and copies, fold constant expressions. */ 603 void tcg_optimize(TCGContext *s) 604 { 605 int oi, oi_next, nb_temps, nb_globals; 606 TCGOp *prev_mb = NULL; 607 struct tcg_temp_info *infos; 608 TCGTempSet temps_used; 609 610 /* Array VALS has an element for each temp. 611 If this temp holds a constant then its value is kept in VALS' element. 612 If this temp is a copy of other ones then the other copies are 613 available through the doubly linked circular list. */ 614 615 nb_temps = s->nb_temps; 616 nb_globals = s->nb_globals; 617 bitmap_zero(temps_used.l, nb_temps); 618 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 619 620 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 621 tcg_target_ulong mask, partmask, affected; 622 int nb_oargs, nb_iargs, i; 623 TCGArg tmp; 624 625 TCGOp * const op = &s->gen_op_buf[oi]; 626 TCGOpcode opc = op->opc; 627 const TCGOpDef *def = &tcg_op_defs[opc]; 628 629 oi_next = op->next; 630 631 /* Count the arguments, and initialize the temps that are 632 going to be used */ 633 if (opc == INDEX_op_call) { 634 nb_oargs = op->callo; 635 nb_iargs = op->calli; 636 for (i = 0; i < nb_oargs + nb_iargs; i++) { 637 TCGTemp *ts = arg_temp(op->args[i]); 638 if (ts) { 639 init_ts_info(infos, &temps_used, ts); 640 } 641 } 642 } else { 643 nb_oargs = def->nb_oargs; 644 nb_iargs = def->nb_iargs; 645 for (i = 0; i < nb_oargs + nb_iargs; i++) { 646 init_arg_info(infos, &temps_used, op->args[i]); 647 } 648 } 649 650 /* Do copy propagation */ 651 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 652 TCGTemp *ts = arg_temp(op->args[i]); 653 if (ts && ts_is_copy(ts)) { 654 op->args[i] = temp_arg(find_better_copy(s, ts)); 655 } 656 } 657 658 /* For commutative operations make constant second argument */ 659 switch (opc) { 660 CASE_OP_32_64(add): 661 CASE_OP_32_64(mul): 662 CASE_OP_32_64(and): 663 CASE_OP_32_64(or): 664 CASE_OP_32_64(xor): 665 CASE_OP_32_64(eqv): 666 CASE_OP_32_64(nand): 667 CASE_OP_32_64(nor): 668 CASE_OP_32_64(muluh): 669 CASE_OP_32_64(mulsh): 670 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 671 break; 672 CASE_OP_32_64(brcond): 673 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 674 op->args[2] = tcg_swap_cond(op->args[2]); 675 } 676 break; 677 CASE_OP_32_64(setcond): 678 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 679 op->args[3] = tcg_swap_cond(op->args[3]); 680 } 681 break; 682 CASE_OP_32_64(movcond): 683 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 684 op->args[5] = tcg_swap_cond(op->args[5]); 685 } 686 /* For movcond, we canonicalize the "false" input reg to match 687 the destination reg so that the tcg backend can implement 688 a "move if true" operation. */ 689 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 690 op->args[5] = tcg_invert_cond(op->args[5]); 691 } 692 break; 693 CASE_OP_32_64(add2): 694 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 695 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 696 break; 697 CASE_OP_32_64(mulu2): 698 CASE_OP_32_64(muls2): 699 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 700 break; 701 case INDEX_op_brcond2_i32: 702 if (swap_commutative2(&op->args[0], &op->args[2])) { 703 op->args[4] = tcg_swap_cond(op->args[4]); 704 } 705 break; 706 case INDEX_op_setcond2_i32: 707 if (swap_commutative2(&op->args[1], &op->args[3])) { 708 op->args[5] = tcg_swap_cond(op->args[5]); 709 } 710 break; 711 default: 712 break; 713 } 714 715 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 716 and "sub r, 0, a => neg r, a" case. */ 717 switch (opc) { 718 CASE_OP_32_64(shl): 719 CASE_OP_32_64(shr): 720 CASE_OP_32_64(sar): 721 CASE_OP_32_64(rotl): 722 CASE_OP_32_64(rotr): 723 if (arg_is_const(op->args[1]) 724 && arg_info(op->args[1])->val == 0) { 725 tcg_opt_gen_movi(s, op, op->args[0], 0); 726 continue; 727 } 728 break; 729 CASE_OP_32_64(sub): 730 { 731 TCGOpcode neg_op; 732 bool have_neg; 733 734 if (arg_is_const(op->args[2])) { 735 /* Proceed with possible constant folding. */ 736 break; 737 } 738 if (opc == INDEX_op_sub_i32) { 739 neg_op = INDEX_op_neg_i32; 740 have_neg = TCG_TARGET_HAS_neg_i32; 741 } else { 742 neg_op = INDEX_op_neg_i64; 743 have_neg = TCG_TARGET_HAS_neg_i64; 744 } 745 if (!have_neg) { 746 break; 747 } 748 if (arg_is_const(op->args[1]) 749 && arg_info(op->args[1])->val == 0) { 750 op->opc = neg_op; 751 reset_temp(op->args[0]); 752 op->args[1] = op->args[2]; 753 continue; 754 } 755 } 756 break; 757 CASE_OP_32_64(xor): 758 CASE_OP_32_64(nand): 759 if (!arg_is_const(op->args[1]) 760 && arg_is_const(op->args[2]) 761 && arg_info(op->args[2])->val == -1) { 762 i = 1; 763 goto try_not; 764 } 765 break; 766 CASE_OP_32_64(nor): 767 if (!arg_is_const(op->args[1]) 768 && arg_is_const(op->args[2]) 769 && arg_info(op->args[2])->val == 0) { 770 i = 1; 771 goto try_not; 772 } 773 break; 774 CASE_OP_32_64(andc): 775 if (!arg_is_const(op->args[2]) 776 && arg_is_const(op->args[1]) 777 && arg_info(op->args[1])->val == -1) { 778 i = 2; 779 goto try_not; 780 } 781 break; 782 CASE_OP_32_64(orc): 783 CASE_OP_32_64(eqv): 784 if (!arg_is_const(op->args[2]) 785 && arg_is_const(op->args[1]) 786 && arg_info(op->args[1])->val == 0) { 787 i = 2; 788 goto try_not; 789 } 790 break; 791 try_not: 792 { 793 TCGOpcode not_op; 794 bool have_not; 795 796 if (def->flags & TCG_OPF_64BIT) { 797 not_op = INDEX_op_not_i64; 798 have_not = TCG_TARGET_HAS_not_i64; 799 } else { 800 not_op = INDEX_op_not_i32; 801 have_not = TCG_TARGET_HAS_not_i32; 802 } 803 if (!have_not) { 804 break; 805 } 806 op->opc = not_op; 807 reset_temp(op->args[0]); 808 op->args[1] = op->args[i]; 809 continue; 810 } 811 default: 812 break; 813 } 814 815 /* Simplify expression for "op r, a, const => mov r, a" cases */ 816 switch (opc) { 817 CASE_OP_32_64(add): 818 CASE_OP_32_64(sub): 819 CASE_OP_32_64(shl): 820 CASE_OP_32_64(shr): 821 CASE_OP_32_64(sar): 822 CASE_OP_32_64(rotl): 823 CASE_OP_32_64(rotr): 824 CASE_OP_32_64(or): 825 CASE_OP_32_64(xor): 826 CASE_OP_32_64(andc): 827 if (!arg_is_const(op->args[1]) 828 && arg_is_const(op->args[2]) 829 && arg_info(op->args[2])->val == 0) { 830 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 831 continue; 832 } 833 break; 834 CASE_OP_32_64(and): 835 CASE_OP_32_64(orc): 836 CASE_OP_32_64(eqv): 837 if (!arg_is_const(op->args[1]) 838 && arg_is_const(op->args[2]) 839 && arg_info(op->args[2])->val == -1) { 840 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 841 continue; 842 } 843 break; 844 default: 845 break; 846 } 847 848 /* Simplify using known-zero bits. Currently only ops with a single 849 output argument is supported. */ 850 mask = -1; 851 affected = -1; 852 switch (opc) { 853 CASE_OP_32_64(ext8s): 854 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 855 break; 856 } 857 CASE_OP_32_64(ext8u): 858 mask = 0xff; 859 goto and_const; 860 CASE_OP_32_64(ext16s): 861 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 862 break; 863 } 864 CASE_OP_32_64(ext16u): 865 mask = 0xffff; 866 goto and_const; 867 case INDEX_op_ext32s_i64: 868 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 869 break; 870 } 871 case INDEX_op_ext32u_i64: 872 mask = 0xffffffffU; 873 goto and_const; 874 875 CASE_OP_32_64(and): 876 mask = arg_info(op->args[2])->mask; 877 if (arg_is_const(op->args[2])) { 878 and_const: 879 affected = arg_info(op->args[1])->mask & ~mask; 880 } 881 mask = arg_info(op->args[1])->mask & mask; 882 break; 883 884 case INDEX_op_ext_i32_i64: 885 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 886 break; 887 } 888 case INDEX_op_extu_i32_i64: 889 /* We do not compute affected as it is a size changing op. */ 890 mask = (uint32_t)arg_info(op->args[1])->mask; 891 break; 892 893 CASE_OP_32_64(andc): 894 /* Known-zeros does not imply known-ones. Therefore unless 895 op->args[2] is constant, we can't infer anything from it. */ 896 if (arg_is_const(op->args[2])) { 897 mask = ~arg_info(op->args[2])->mask; 898 goto and_const; 899 } 900 /* But we certainly know nothing outside args[1] may be set. */ 901 mask = arg_info(op->args[1])->mask; 902 break; 903 904 case INDEX_op_sar_i32: 905 if (arg_is_const(op->args[2])) { 906 tmp = arg_info(op->args[2])->val & 31; 907 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 908 } 909 break; 910 case INDEX_op_sar_i64: 911 if (arg_is_const(op->args[2])) { 912 tmp = arg_info(op->args[2])->val & 63; 913 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 914 } 915 break; 916 917 case INDEX_op_shr_i32: 918 if (arg_is_const(op->args[2])) { 919 tmp = arg_info(op->args[2])->val & 31; 920 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 921 } 922 break; 923 case INDEX_op_shr_i64: 924 if (arg_is_const(op->args[2])) { 925 tmp = arg_info(op->args[2])->val & 63; 926 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 927 } 928 break; 929 930 case INDEX_op_extrl_i64_i32: 931 mask = (uint32_t)arg_info(op->args[1])->mask; 932 break; 933 case INDEX_op_extrh_i64_i32: 934 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 935 break; 936 937 CASE_OP_32_64(shl): 938 if (arg_is_const(op->args[2])) { 939 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 940 mask = arg_info(op->args[1])->mask << tmp; 941 } 942 break; 943 944 CASE_OP_32_64(neg): 945 /* Set to 1 all bits to the left of the rightmost. */ 946 mask = -(arg_info(op->args[1])->mask 947 & -arg_info(op->args[1])->mask); 948 break; 949 950 CASE_OP_32_64(deposit): 951 mask = deposit64(arg_info(op->args[1])->mask, 952 op->args[3], op->args[4], 953 arg_info(op->args[2])->mask); 954 break; 955 956 CASE_OP_32_64(extract): 957 mask = extract64(arg_info(op->args[1])->mask, 958 op->args[2], op->args[3]); 959 if (op->args[2] == 0) { 960 affected = arg_info(op->args[1])->mask & ~mask; 961 } 962 break; 963 CASE_OP_32_64(sextract): 964 mask = sextract64(arg_info(op->args[1])->mask, 965 op->args[2], op->args[3]); 966 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 967 affected = arg_info(op->args[1])->mask & ~mask; 968 } 969 break; 970 971 CASE_OP_32_64(or): 972 CASE_OP_32_64(xor): 973 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 974 break; 975 976 case INDEX_op_clz_i32: 977 case INDEX_op_ctz_i32: 978 mask = arg_info(op->args[2])->mask | 31; 979 break; 980 981 case INDEX_op_clz_i64: 982 case INDEX_op_ctz_i64: 983 mask = arg_info(op->args[2])->mask | 63; 984 break; 985 986 case INDEX_op_ctpop_i32: 987 mask = 32 | 31; 988 break; 989 case INDEX_op_ctpop_i64: 990 mask = 64 | 63; 991 break; 992 993 CASE_OP_32_64(setcond): 994 case INDEX_op_setcond2_i32: 995 mask = 1; 996 break; 997 998 CASE_OP_32_64(movcond): 999 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1000 break; 1001 1002 CASE_OP_32_64(ld8u): 1003 mask = 0xff; 1004 break; 1005 CASE_OP_32_64(ld16u): 1006 mask = 0xffff; 1007 break; 1008 case INDEX_op_ld32u_i64: 1009 mask = 0xffffffffu; 1010 break; 1011 1012 CASE_OP_32_64(qemu_ld): 1013 { 1014 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1015 TCGMemOp mop = get_memop(oi); 1016 if (!(mop & MO_SIGN)) { 1017 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1018 } 1019 } 1020 break; 1021 1022 default: 1023 break; 1024 } 1025 1026 /* 32-bit ops generate 32-bit results. For the result is zero test 1027 below, we can ignore high bits, but for further optimizations we 1028 need to record that the high bits contain garbage. */ 1029 partmask = mask; 1030 if (!(def->flags & TCG_OPF_64BIT)) { 1031 mask |= ~(tcg_target_ulong)0xffffffffu; 1032 partmask &= 0xffffffffu; 1033 affected &= 0xffffffffu; 1034 } 1035 1036 if (partmask == 0) { 1037 tcg_debug_assert(nb_oargs == 1); 1038 tcg_opt_gen_movi(s, op, op->args[0], 0); 1039 continue; 1040 } 1041 if (affected == 0) { 1042 tcg_debug_assert(nb_oargs == 1); 1043 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1044 continue; 1045 } 1046 1047 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1048 switch (opc) { 1049 CASE_OP_32_64(and): 1050 CASE_OP_32_64(mul): 1051 CASE_OP_32_64(muluh): 1052 CASE_OP_32_64(mulsh): 1053 if (arg_is_const(op->args[2]) 1054 && arg_info(op->args[2])->val == 0) { 1055 tcg_opt_gen_movi(s, op, op->args[0], 0); 1056 continue; 1057 } 1058 break; 1059 default: 1060 break; 1061 } 1062 1063 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1064 switch (opc) { 1065 CASE_OP_32_64(or): 1066 CASE_OP_32_64(and): 1067 if (args_are_copies(op->args[1], op->args[2])) { 1068 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1069 continue; 1070 } 1071 break; 1072 default: 1073 break; 1074 } 1075 1076 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1077 switch (opc) { 1078 CASE_OP_32_64(andc): 1079 CASE_OP_32_64(sub): 1080 CASE_OP_32_64(xor): 1081 if (args_are_copies(op->args[1], op->args[2])) { 1082 tcg_opt_gen_movi(s, op, op->args[0], 0); 1083 continue; 1084 } 1085 break; 1086 default: 1087 break; 1088 } 1089 1090 /* Propagate constants through copy operations and do constant 1091 folding. Constants will be substituted to arguments by register 1092 allocator where needed and possible. Also detect copies. */ 1093 switch (opc) { 1094 CASE_OP_32_64(mov): 1095 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1096 break; 1097 CASE_OP_32_64(movi): 1098 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1099 break; 1100 1101 CASE_OP_32_64(not): 1102 CASE_OP_32_64(neg): 1103 CASE_OP_32_64(ext8s): 1104 CASE_OP_32_64(ext8u): 1105 CASE_OP_32_64(ext16s): 1106 CASE_OP_32_64(ext16u): 1107 CASE_OP_32_64(ctpop): 1108 case INDEX_op_ext32s_i64: 1109 case INDEX_op_ext32u_i64: 1110 case INDEX_op_ext_i32_i64: 1111 case INDEX_op_extu_i32_i64: 1112 case INDEX_op_extrl_i64_i32: 1113 case INDEX_op_extrh_i64_i32: 1114 if (arg_is_const(op->args[1])) { 1115 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1116 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1117 break; 1118 } 1119 goto do_default; 1120 1121 CASE_OP_32_64(add): 1122 CASE_OP_32_64(sub): 1123 CASE_OP_32_64(mul): 1124 CASE_OP_32_64(or): 1125 CASE_OP_32_64(and): 1126 CASE_OP_32_64(xor): 1127 CASE_OP_32_64(shl): 1128 CASE_OP_32_64(shr): 1129 CASE_OP_32_64(sar): 1130 CASE_OP_32_64(rotl): 1131 CASE_OP_32_64(rotr): 1132 CASE_OP_32_64(andc): 1133 CASE_OP_32_64(orc): 1134 CASE_OP_32_64(eqv): 1135 CASE_OP_32_64(nand): 1136 CASE_OP_32_64(nor): 1137 CASE_OP_32_64(muluh): 1138 CASE_OP_32_64(mulsh): 1139 CASE_OP_32_64(div): 1140 CASE_OP_32_64(divu): 1141 CASE_OP_32_64(rem): 1142 CASE_OP_32_64(remu): 1143 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1144 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1145 arg_info(op->args[2])->val); 1146 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1147 break; 1148 } 1149 goto do_default; 1150 1151 CASE_OP_32_64(clz): 1152 CASE_OP_32_64(ctz): 1153 if (arg_is_const(op->args[1])) { 1154 TCGArg v = arg_info(op->args[1])->val; 1155 if (v != 0) { 1156 tmp = do_constant_folding(opc, v, 0); 1157 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1158 } else { 1159 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1160 } 1161 break; 1162 } 1163 goto do_default; 1164 1165 CASE_OP_32_64(deposit): 1166 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1167 tmp = deposit64(arg_info(op->args[1])->val, 1168 op->args[3], op->args[4], 1169 arg_info(op->args[2])->val); 1170 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1171 break; 1172 } 1173 goto do_default; 1174 1175 CASE_OP_32_64(extract): 1176 if (arg_is_const(op->args[1])) { 1177 tmp = extract64(arg_info(op->args[1])->val, 1178 op->args[2], op->args[3]); 1179 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1180 break; 1181 } 1182 goto do_default; 1183 1184 CASE_OP_32_64(sextract): 1185 if (arg_is_const(op->args[1])) { 1186 tmp = sextract64(arg_info(op->args[1])->val, 1187 op->args[2], op->args[3]); 1188 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1189 break; 1190 } 1191 goto do_default; 1192 1193 CASE_OP_32_64(setcond): 1194 tmp = do_constant_folding_cond(opc, op->args[1], 1195 op->args[2], op->args[3]); 1196 if (tmp != 2) { 1197 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1198 break; 1199 } 1200 goto do_default; 1201 1202 CASE_OP_32_64(brcond): 1203 tmp = do_constant_folding_cond(opc, op->args[0], 1204 op->args[1], op->args[2]); 1205 if (tmp != 2) { 1206 if (tmp) { 1207 bitmap_zero(temps_used.l, nb_temps); 1208 op->opc = INDEX_op_br; 1209 op->args[0] = op->args[3]; 1210 } else { 1211 tcg_op_remove(s, op); 1212 } 1213 break; 1214 } 1215 goto do_default; 1216 1217 CASE_OP_32_64(movcond): 1218 tmp = do_constant_folding_cond(opc, op->args[1], 1219 op->args[2], op->args[5]); 1220 if (tmp != 2) { 1221 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1222 break; 1223 } 1224 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1225 tcg_target_ulong tv = arg_info(op->args[3])->val; 1226 tcg_target_ulong fv = arg_info(op->args[4])->val; 1227 TCGCond cond = op->args[5]; 1228 if (fv == 1 && tv == 0) { 1229 cond = tcg_invert_cond(cond); 1230 } else if (!(tv == 1 && fv == 0)) { 1231 goto do_default; 1232 } 1233 op->args[3] = cond; 1234 op->opc = opc = (opc == INDEX_op_movcond_i32 1235 ? INDEX_op_setcond_i32 1236 : INDEX_op_setcond_i64); 1237 nb_iargs = 2; 1238 } 1239 goto do_default; 1240 1241 case INDEX_op_add2_i32: 1242 case INDEX_op_sub2_i32: 1243 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1244 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1245 uint32_t al = arg_info(op->args[2])->val; 1246 uint32_t ah = arg_info(op->args[3])->val; 1247 uint32_t bl = arg_info(op->args[4])->val; 1248 uint32_t bh = arg_info(op->args[5])->val; 1249 uint64_t a = ((uint64_t)ah << 32) | al; 1250 uint64_t b = ((uint64_t)bh << 32) | bl; 1251 TCGArg rl, rh; 1252 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1253 1254 if (opc == INDEX_op_add2_i32) { 1255 a += b; 1256 } else { 1257 a -= b; 1258 } 1259 1260 rl = op->args[0]; 1261 rh = op->args[1]; 1262 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1263 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1264 1265 /* We've done all we need to do with the movi. Skip it. */ 1266 oi_next = op2->next; 1267 break; 1268 } 1269 goto do_default; 1270 1271 case INDEX_op_mulu2_i32: 1272 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1273 uint32_t a = arg_info(op->args[2])->val; 1274 uint32_t b = arg_info(op->args[3])->val; 1275 uint64_t r = (uint64_t)a * b; 1276 TCGArg rl, rh; 1277 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1278 1279 rl = op->args[0]; 1280 rh = op->args[1]; 1281 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1282 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1283 1284 /* We've done all we need to do with the movi. Skip it. */ 1285 oi_next = op2->next; 1286 break; 1287 } 1288 goto do_default; 1289 1290 case INDEX_op_brcond2_i32: 1291 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1292 op->args[4]); 1293 if (tmp != 2) { 1294 if (tmp) { 1295 do_brcond_true: 1296 bitmap_zero(temps_used.l, nb_temps); 1297 op->opc = INDEX_op_br; 1298 op->args[0] = op->args[5]; 1299 } else { 1300 do_brcond_false: 1301 tcg_op_remove(s, op); 1302 } 1303 } else if ((op->args[4] == TCG_COND_LT 1304 || op->args[4] == TCG_COND_GE) 1305 && arg_is_const(op->args[2]) 1306 && arg_info(op->args[2])->val == 0 1307 && arg_is_const(op->args[3]) 1308 && arg_info(op->args[3])->val == 0) { 1309 /* Simplify LT/GE comparisons vs zero to a single compare 1310 vs the high word of the input. */ 1311 do_brcond_high: 1312 bitmap_zero(temps_used.l, nb_temps); 1313 op->opc = INDEX_op_brcond_i32; 1314 op->args[0] = op->args[1]; 1315 op->args[1] = op->args[3]; 1316 op->args[2] = op->args[4]; 1317 op->args[3] = op->args[5]; 1318 } else if (op->args[4] == TCG_COND_EQ) { 1319 /* Simplify EQ comparisons where one of the pairs 1320 can be simplified. */ 1321 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1322 op->args[0], op->args[2], 1323 TCG_COND_EQ); 1324 if (tmp == 0) { 1325 goto do_brcond_false; 1326 } else if (tmp == 1) { 1327 goto do_brcond_high; 1328 } 1329 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1330 op->args[1], op->args[3], 1331 TCG_COND_EQ); 1332 if (tmp == 0) { 1333 goto do_brcond_false; 1334 } else if (tmp != 1) { 1335 goto do_default; 1336 } 1337 do_brcond_low: 1338 bitmap_zero(temps_used.l, nb_temps); 1339 op->opc = INDEX_op_brcond_i32; 1340 op->args[1] = op->args[2]; 1341 op->args[2] = op->args[4]; 1342 op->args[3] = op->args[5]; 1343 } else if (op->args[4] == TCG_COND_NE) { 1344 /* Simplify NE comparisons where one of the pairs 1345 can be simplified. */ 1346 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1347 op->args[0], op->args[2], 1348 TCG_COND_NE); 1349 if (tmp == 0) { 1350 goto do_brcond_high; 1351 } else if (tmp == 1) { 1352 goto do_brcond_true; 1353 } 1354 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1355 op->args[1], op->args[3], 1356 TCG_COND_NE); 1357 if (tmp == 0) { 1358 goto do_brcond_low; 1359 } else if (tmp == 1) { 1360 goto do_brcond_true; 1361 } 1362 goto do_default; 1363 } else { 1364 goto do_default; 1365 } 1366 break; 1367 1368 case INDEX_op_setcond2_i32: 1369 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1370 op->args[5]); 1371 if (tmp != 2) { 1372 do_setcond_const: 1373 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1374 } else if ((op->args[5] == TCG_COND_LT 1375 || op->args[5] == TCG_COND_GE) 1376 && arg_is_const(op->args[3]) 1377 && arg_info(op->args[3])->val == 0 1378 && arg_is_const(op->args[4]) 1379 && arg_info(op->args[4])->val == 0) { 1380 /* Simplify LT/GE comparisons vs zero to a single compare 1381 vs the high word of the input. */ 1382 do_setcond_high: 1383 reset_temp(op->args[0]); 1384 arg_info(op->args[0])->mask = 1; 1385 op->opc = INDEX_op_setcond_i32; 1386 op->args[1] = op->args[2]; 1387 op->args[2] = op->args[4]; 1388 op->args[3] = op->args[5]; 1389 } else if (op->args[5] == TCG_COND_EQ) { 1390 /* Simplify EQ comparisons where one of the pairs 1391 can be simplified. */ 1392 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1393 op->args[1], op->args[3], 1394 TCG_COND_EQ); 1395 if (tmp == 0) { 1396 goto do_setcond_const; 1397 } else if (tmp == 1) { 1398 goto do_setcond_high; 1399 } 1400 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1401 op->args[2], op->args[4], 1402 TCG_COND_EQ); 1403 if (tmp == 0) { 1404 goto do_setcond_high; 1405 } else if (tmp != 1) { 1406 goto do_default; 1407 } 1408 do_setcond_low: 1409 reset_temp(op->args[0]); 1410 arg_info(op->args[0])->mask = 1; 1411 op->opc = INDEX_op_setcond_i32; 1412 op->args[2] = op->args[3]; 1413 op->args[3] = op->args[5]; 1414 } else if (op->args[5] == TCG_COND_NE) { 1415 /* Simplify NE comparisons where one of the pairs 1416 can be simplified. */ 1417 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1418 op->args[1], op->args[3], 1419 TCG_COND_NE); 1420 if (tmp == 0) { 1421 goto do_setcond_high; 1422 } else if (tmp == 1) { 1423 goto do_setcond_const; 1424 } 1425 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1426 op->args[2], op->args[4], 1427 TCG_COND_NE); 1428 if (tmp == 0) { 1429 goto do_setcond_low; 1430 } else if (tmp == 1) { 1431 goto do_setcond_const; 1432 } 1433 goto do_default; 1434 } else { 1435 goto do_default; 1436 } 1437 break; 1438 1439 case INDEX_op_call: 1440 if (!(op->args[nb_oargs + nb_iargs + 1] 1441 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1442 for (i = 0; i < nb_globals; i++) { 1443 if (test_bit(i, temps_used.l)) { 1444 reset_ts(&s->temps[i]); 1445 } 1446 } 1447 } 1448 goto do_reset_output; 1449 1450 default: 1451 do_default: 1452 /* Default case: we know nothing about operation (or were unable 1453 to compute the operation result) so no propagation is done. 1454 We trash everything if the operation is the end of a basic 1455 block, otherwise we only trash the output args. "mask" is 1456 the non-zero bits mask for the first output arg. */ 1457 if (def->flags & TCG_OPF_BB_END) { 1458 bitmap_zero(temps_used.l, nb_temps); 1459 } else { 1460 do_reset_output: 1461 for (i = 0; i < nb_oargs; i++) { 1462 reset_temp(op->args[i]); 1463 /* Save the corresponding known-zero bits mask for the 1464 first output argument (only one supported so far). */ 1465 if (i == 0) { 1466 arg_info(op->args[i])->mask = mask; 1467 } 1468 } 1469 } 1470 break; 1471 } 1472 1473 /* Eliminate duplicate and redundant fence instructions. */ 1474 if (prev_mb) { 1475 switch (opc) { 1476 case INDEX_op_mb: 1477 /* Merge two barriers of the same type into one, 1478 * or a weaker barrier into a stronger one, 1479 * or two weaker barriers into a stronger one. 1480 * mb X; mb Y => mb X|Y 1481 * mb; strl => mb; st 1482 * ldaq; mb => ld; mb 1483 * ldaq; strl => ld; mb; st 1484 * Other combinations are also merged into a strong 1485 * barrier. This is stricter than specified but for 1486 * the purposes of TCG is better than not optimizing. 1487 */ 1488 prev_mb->args[0] |= op->args[0]; 1489 tcg_op_remove(s, op); 1490 break; 1491 1492 default: 1493 /* Opcodes that end the block stop the optimization. */ 1494 if ((def->flags & TCG_OPF_BB_END) == 0) { 1495 break; 1496 } 1497 /* fallthru */ 1498 case INDEX_op_qemu_ld_i32: 1499 case INDEX_op_qemu_ld_i64: 1500 case INDEX_op_qemu_st_i32: 1501 case INDEX_op_qemu_st_i64: 1502 case INDEX_op_call: 1503 /* Opcodes that touch guest memory stop the optimization. */ 1504 prev_mb = NULL; 1505 break; 1506 } 1507 } else if (opc == INDEX_op_mb) { 1508 prev_mb = op; 1509 } 1510 } 1511 } 1512