1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 #define CASE_OP_32_64_VEC(x) \ 36 glue(glue(case INDEX_op_, x), _i32): \ 37 glue(glue(case INDEX_op_, x), _i64): \ 38 glue(glue(case INDEX_op_, x), _vec) 39 40 struct tcg_temp_info { 41 bool is_const; 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 tcg_target_ulong val; 45 tcg_target_ulong mask; 46 }; 47 48 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 49 { 50 return ts->state_ptr; 51 } 52 53 static inline struct tcg_temp_info *arg_info(TCGArg arg) 54 { 55 return ts_info(arg_temp(arg)); 56 } 57 58 static inline bool ts_is_const(TCGTemp *ts) 59 { 60 return ts_info(ts)->is_const; 61 } 62 63 static inline bool arg_is_const(TCGArg arg) 64 { 65 return ts_is_const(arg_temp(arg)); 66 } 67 68 static inline bool ts_is_copy(TCGTemp *ts) 69 { 70 return ts_info(ts)->next_copy != ts; 71 } 72 73 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 74 static void reset_ts(TCGTemp *ts) 75 { 76 struct tcg_temp_info *ti = ts_info(ts); 77 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 78 struct tcg_temp_info *ni = ts_info(ti->next_copy); 79 80 ni->prev_copy = ti->prev_copy; 81 pi->next_copy = ti->next_copy; 82 ti->next_copy = ts; 83 ti->prev_copy = ts; 84 ti->is_const = false; 85 ti->mask = -1; 86 } 87 88 static void reset_temp(TCGArg arg) 89 { 90 reset_ts(arg_temp(arg)); 91 } 92 93 /* Initialize and activate a temporary. */ 94 static void init_ts_info(struct tcg_temp_info *infos, 95 TCGTempSet *temps_used, TCGTemp *ts) 96 { 97 size_t idx = temp_idx(ts); 98 if (!test_bit(idx, temps_used->l)) { 99 struct tcg_temp_info *ti = &infos[idx]; 100 101 ts->state_ptr = ti; 102 ti->next_copy = ts; 103 ti->prev_copy = ts; 104 ti->is_const = false; 105 ti->mask = -1; 106 set_bit(idx, temps_used->l); 107 } 108 } 109 110 static void init_arg_info(struct tcg_temp_info *infos, 111 TCGTempSet *temps_used, TCGArg arg) 112 { 113 init_ts_info(infos, temps_used, arg_temp(arg)); 114 } 115 116 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 117 { 118 TCGTemp *i; 119 120 /* If this is already a global, we can't do better. */ 121 if (ts->temp_global) { 122 return ts; 123 } 124 125 /* Search for a global first. */ 126 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 127 if (i->temp_global) { 128 return i; 129 } 130 } 131 132 /* If it is a temp, search for a temp local. */ 133 if (!ts->temp_local) { 134 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 135 if (ts->temp_local) { 136 return i; 137 } 138 } 139 } 140 141 /* Failure to find a better representation, return the same temp. */ 142 return ts; 143 } 144 145 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 146 { 147 TCGTemp *i; 148 149 if (ts1 == ts2) { 150 return true; 151 } 152 153 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 154 return false; 155 } 156 157 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 158 if (i == ts2) { 159 return true; 160 } 161 } 162 163 return false; 164 } 165 166 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 167 { 168 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 169 } 170 171 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 172 { 173 const TCGOpDef *def; 174 TCGOpcode new_op; 175 tcg_target_ulong mask; 176 struct tcg_temp_info *di = arg_info(dst); 177 178 def = &tcg_op_defs[op->opc]; 179 if (def->flags & TCG_OPF_VECTOR) { 180 new_op = INDEX_op_dupi_vec; 181 } else if (def->flags & TCG_OPF_64BIT) { 182 new_op = INDEX_op_movi_i64; 183 } else { 184 new_op = INDEX_op_movi_i32; 185 } 186 op->opc = new_op; 187 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 188 op->args[0] = dst; 189 op->args[1] = val; 190 191 reset_temp(dst); 192 di->is_const = true; 193 di->val = val; 194 mask = val; 195 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 196 /* High bits of the destination are now garbage. */ 197 mask |= ~0xffffffffull; 198 } 199 di->mask = mask; 200 } 201 202 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 203 { 204 TCGTemp *dst_ts = arg_temp(dst); 205 TCGTemp *src_ts = arg_temp(src); 206 const TCGOpDef *def; 207 struct tcg_temp_info *di; 208 struct tcg_temp_info *si; 209 tcg_target_ulong mask; 210 TCGOpcode new_op; 211 212 if (ts_are_copies(dst_ts, src_ts)) { 213 tcg_op_remove(s, op); 214 return; 215 } 216 217 reset_ts(dst_ts); 218 di = ts_info(dst_ts); 219 si = ts_info(src_ts); 220 def = &tcg_op_defs[op->opc]; 221 if (def->flags & TCG_OPF_VECTOR) { 222 new_op = INDEX_op_mov_vec; 223 } else if (def->flags & TCG_OPF_64BIT) { 224 new_op = INDEX_op_mov_i64; 225 } else { 226 new_op = INDEX_op_mov_i32; 227 } 228 op->opc = new_op; 229 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 230 op->args[0] = dst; 231 op->args[1] = src; 232 233 mask = si->mask; 234 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 235 /* High bits of the destination are now garbage. */ 236 mask |= ~0xffffffffull; 237 } 238 di->mask = mask; 239 240 if (src_ts->type == dst_ts->type) { 241 struct tcg_temp_info *ni = ts_info(si->next_copy); 242 243 di->next_copy = si->next_copy; 244 di->prev_copy = src_ts; 245 ni->prev_copy = dst_ts; 246 si->next_copy = dst_ts; 247 di->is_const = si->is_const; 248 di->val = si->val; 249 } 250 } 251 252 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 253 { 254 uint64_t l64, h64; 255 256 switch (op) { 257 CASE_OP_32_64(add): 258 return x + y; 259 260 CASE_OP_32_64(sub): 261 return x - y; 262 263 CASE_OP_32_64(mul): 264 return x * y; 265 266 CASE_OP_32_64(and): 267 return x & y; 268 269 CASE_OP_32_64(or): 270 return x | y; 271 272 CASE_OP_32_64(xor): 273 return x ^ y; 274 275 case INDEX_op_shl_i32: 276 return (uint32_t)x << (y & 31); 277 278 case INDEX_op_shl_i64: 279 return (uint64_t)x << (y & 63); 280 281 case INDEX_op_shr_i32: 282 return (uint32_t)x >> (y & 31); 283 284 case INDEX_op_shr_i64: 285 return (uint64_t)x >> (y & 63); 286 287 case INDEX_op_sar_i32: 288 return (int32_t)x >> (y & 31); 289 290 case INDEX_op_sar_i64: 291 return (int64_t)x >> (y & 63); 292 293 case INDEX_op_rotr_i32: 294 return ror32(x, y & 31); 295 296 case INDEX_op_rotr_i64: 297 return ror64(x, y & 63); 298 299 case INDEX_op_rotl_i32: 300 return rol32(x, y & 31); 301 302 case INDEX_op_rotl_i64: 303 return rol64(x, y & 63); 304 305 CASE_OP_32_64(not): 306 return ~x; 307 308 CASE_OP_32_64(neg): 309 return -x; 310 311 CASE_OP_32_64(andc): 312 return x & ~y; 313 314 CASE_OP_32_64(orc): 315 return x | ~y; 316 317 CASE_OP_32_64(eqv): 318 return ~(x ^ y); 319 320 CASE_OP_32_64(nand): 321 return ~(x & y); 322 323 CASE_OP_32_64(nor): 324 return ~(x | y); 325 326 case INDEX_op_clz_i32: 327 return (uint32_t)x ? clz32(x) : y; 328 329 case INDEX_op_clz_i64: 330 return x ? clz64(x) : y; 331 332 case INDEX_op_ctz_i32: 333 return (uint32_t)x ? ctz32(x) : y; 334 335 case INDEX_op_ctz_i64: 336 return x ? ctz64(x) : y; 337 338 case INDEX_op_ctpop_i32: 339 return ctpop32(x); 340 341 case INDEX_op_ctpop_i64: 342 return ctpop64(x); 343 344 CASE_OP_32_64(ext8s): 345 return (int8_t)x; 346 347 CASE_OP_32_64(ext16s): 348 return (int16_t)x; 349 350 CASE_OP_32_64(ext8u): 351 return (uint8_t)x; 352 353 CASE_OP_32_64(ext16u): 354 return (uint16_t)x; 355 356 CASE_OP_32_64(bswap16): 357 return bswap16(x); 358 359 CASE_OP_32_64(bswap32): 360 return bswap32(x); 361 362 case INDEX_op_bswap64_i64: 363 return bswap64(x); 364 365 case INDEX_op_ext_i32_i64: 366 case INDEX_op_ext32s_i64: 367 return (int32_t)x; 368 369 case INDEX_op_extu_i32_i64: 370 case INDEX_op_extrl_i64_i32: 371 case INDEX_op_ext32u_i64: 372 return (uint32_t)x; 373 374 case INDEX_op_extrh_i64_i32: 375 return (uint64_t)x >> 32; 376 377 case INDEX_op_muluh_i32: 378 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 379 case INDEX_op_mulsh_i32: 380 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 381 382 case INDEX_op_muluh_i64: 383 mulu64(&l64, &h64, x, y); 384 return h64; 385 case INDEX_op_mulsh_i64: 386 muls64(&l64, &h64, x, y); 387 return h64; 388 389 case INDEX_op_div_i32: 390 /* Avoid crashing on divide by zero, otherwise undefined. */ 391 return (int32_t)x / ((int32_t)y ? : 1); 392 case INDEX_op_divu_i32: 393 return (uint32_t)x / ((uint32_t)y ? : 1); 394 case INDEX_op_div_i64: 395 return (int64_t)x / ((int64_t)y ? : 1); 396 case INDEX_op_divu_i64: 397 return (uint64_t)x / ((uint64_t)y ? : 1); 398 399 case INDEX_op_rem_i32: 400 return (int32_t)x % ((int32_t)y ? : 1); 401 case INDEX_op_remu_i32: 402 return (uint32_t)x % ((uint32_t)y ? : 1); 403 case INDEX_op_rem_i64: 404 return (int64_t)x % ((int64_t)y ? : 1); 405 case INDEX_op_remu_i64: 406 return (uint64_t)x % ((uint64_t)y ? : 1); 407 408 default: 409 fprintf(stderr, 410 "Unrecognized operation %d in do_constant_folding.\n", op); 411 tcg_abort(); 412 } 413 } 414 415 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 416 { 417 const TCGOpDef *def = &tcg_op_defs[op]; 418 TCGArg res = do_constant_folding_2(op, x, y); 419 if (!(def->flags & TCG_OPF_64BIT)) { 420 res = (int32_t)res; 421 } 422 return res; 423 } 424 425 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 426 { 427 switch (c) { 428 case TCG_COND_EQ: 429 return x == y; 430 case TCG_COND_NE: 431 return x != y; 432 case TCG_COND_LT: 433 return (int32_t)x < (int32_t)y; 434 case TCG_COND_GE: 435 return (int32_t)x >= (int32_t)y; 436 case TCG_COND_LE: 437 return (int32_t)x <= (int32_t)y; 438 case TCG_COND_GT: 439 return (int32_t)x > (int32_t)y; 440 case TCG_COND_LTU: 441 return x < y; 442 case TCG_COND_GEU: 443 return x >= y; 444 case TCG_COND_LEU: 445 return x <= y; 446 case TCG_COND_GTU: 447 return x > y; 448 default: 449 tcg_abort(); 450 } 451 } 452 453 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 454 { 455 switch (c) { 456 case TCG_COND_EQ: 457 return x == y; 458 case TCG_COND_NE: 459 return x != y; 460 case TCG_COND_LT: 461 return (int64_t)x < (int64_t)y; 462 case TCG_COND_GE: 463 return (int64_t)x >= (int64_t)y; 464 case TCG_COND_LE: 465 return (int64_t)x <= (int64_t)y; 466 case TCG_COND_GT: 467 return (int64_t)x > (int64_t)y; 468 case TCG_COND_LTU: 469 return x < y; 470 case TCG_COND_GEU: 471 return x >= y; 472 case TCG_COND_LEU: 473 return x <= y; 474 case TCG_COND_GTU: 475 return x > y; 476 default: 477 tcg_abort(); 478 } 479 } 480 481 static bool do_constant_folding_cond_eq(TCGCond c) 482 { 483 switch (c) { 484 case TCG_COND_GT: 485 case TCG_COND_LTU: 486 case TCG_COND_LT: 487 case TCG_COND_GTU: 488 case TCG_COND_NE: 489 return 0; 490 case TCG_COND_GE: 491 case TCG_COND_GEU: 492 case TCG_COND_LE: 493 case TCG_COND_LEU: 494 case TCG_COND_EQ: 495 return 1; 496 default: 497 tcg_abort(); 498 } 499 } 500 501 /* Return 2 if the condition can't be simplified, and the result 502 of the condition (0 or 1) if it can */ 503 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 504 TCGArg y, TCGCond c) 505 { 506 tcg_target_ulong xv = arg_info(x)->val; 507 tcg_target_ulong yv = arg_info(y)->val; 508 if (arg_is_const(x) && arg_is_const(y)) { 509 const TCGOpDef *def = &tcg_op_defs[op]; 510 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 511 if (def->flags & TCG_OPF_64BIT) { 512 return do_constant_folding_cond_64(xv, yv, c); 513 } else { 514 return do_constant_folding_cond_32(xv, yv, c); 515 } 516 } else if (args_are_copies(x, y)) { 517 return do_constant_folding_cond_eq(c); 518 } else if (arg_is_const(y) && yv == 0) { 519 switch (c) { 520 case TCG_COND_LTU: 521 return 0; 522 case TCG_COND_GEU: 523 return 1; 524 default: 525 return 2; 526 } 527 } 528 return 2; 529 } 530 531 /* Return 2 if the condition can't be simplified, and the result 532 of the condition (0 or 1) if it can */ 533 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 534 { 535 TCGArg al = p1[0], ah = p1[1]; 536 TCGArg bl = p2[0], bh = p2[1]; 537 538 if (arg_is_const(bl) && arg_is_const(bh)) { 539 tcg_target_ulong blv = arg_info(bl)->val; 540 tcg_target_ulong bhv = arg_info(bh)->val; 541 uint64_t b = deposit64(blv, 32, 32, bhv); 542 543 if (arg_is_const(al) && arg_is_const(ah)) { 544 tcg_target_ulong alv = arg_info(al)->val; 545 tcg_target_ulong ahv = arg_info(ah)->val; 546 uint64_t a = deposit64(alv, 32, 32, ahv); 547 return do_constant_folding_cond_64(a, b, c); 548 } 549 if (b == 0) { 550 switch (c) { 551 case TCG_COND_LTU: 552 return 0; 553 case TCG_COND_GEU: 554 return 1; 555 default: 556 break; 557 } 558 } 559 } 560 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 561 return do_constant_folding_cond_eq(c); 562 } 563 return 2; 564 } 565 566 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 567 { 568 TCGArg a1 = *p1, a2 = *p2; 569 int sum = 0; 570 sum += arg_is_const(a1); 571 sum -= arg_is_const(a2); 572 573 /* Prefer the constant in second argument, and then the form 574 op a, a, b, which is better handled on non-RISC hosts. */ 575 if (sum > 0 || (sum == 0 && dest == a2)) { 576 *p1 = a2; 577 *p2 = a1; 578 return true; 579 } 580 return false; 581 } 582 583 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 584 { 585 int sum = 0; 586 sum += arg_is_const(p1[0]); 587 sum += arg_is_const(p1[1]); 588 sum -= arg_is_const(p2[0]); 589 sum -= arg_is_const(p2[1]); 590 if (sum > 0) { 591 TCGArg t; 592 t = p1[0], p1[0] = p2[0], p2[0] = t; 593 t = p1[1], p1[1] = p2[1], p2[1] = t; 594 return true; 595 } 596 return false; 597 } 598 599 /* Propagate constants and copies, fold constant expressions. */ 600 void tcg_optimize(TCGContext *s) 601 { 602 int nb_temps, nb_globals; 603 TCGOp *op, *op_next, *prev_mb = NULL; 604 struct tcg_temp_info *infos; 605 TCGTempSet temps_used; 606 607 /* Array VALS has an element for each temp. 608 If this temp holds a constant then its value is kept in VALS' element. 609 If this temp is a copy of other ones then the other copies are 610 available through the doubly linked circular list. */ 611 612 nb_temps = s->nb_temps; 613 nb_globals = s->nb_globals; 614 bitmap_zero(temps_used.l, nb_temps); 615 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 616 617 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 618 tcg_target_ulong mask, partmask, affected; 619 int nb_oargs, nb_iargs, i; 620 TCGArg tmp; 621 TCGOpcode opc = op->opc; 622 const TCGOpDef *def = &tcg_op_defs[opc]; 623 624 /* Count the arguments, and initialize the temps that are 625 going to be used */ 626 if (opc == INDEX_op_call) { 627 nb_oargs = TCGOP_CALLO(op); 628 nb_iargs = TCGOP_CALLI(op); 629 for (i = 0; i < nb_oargs + nb_iargs; i++) { 630 TCGTemp *ts = arg_temp(op->args[i]); 631 if (ts) { 632 init_ts_info(infos, &temps_used, ts); 633 } 634 } 635 } else { 636 nb_oargs = def->nb_oargs; 637 nb_iargs = def->nb_iargs; 638 for (i = 0; i < nb_oargs + nb_iargs; i++) { 639 init_arg_info(infos, &temps_used, op->args[i]); 640 } 641 } 642 643 /* Do copy propagation */ 644 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 645 TCGTemp *ts = arg_temp(op->args[i]); 646 if (ts && ts_is_copy(ts)) { 647 op->args[i] = temp_arg(find_better_copy(s, ts)); 648 } 649 } 650 651 /* For commutative operations make constant second argument */ 652 switch (opc) { 653 CASE_OP_32_64_VEC(add): 654 CASE_OP_32_64_VEC(mul): 655 CASE_OP_32_64_VEC(and): 656 CASE_OP_32_64_VEC(or): 657 CASE_OP_32_64_VEC(xor): 658 CASE_OP_32_64(eqv): 659 CASE_OP_32_64(nand): 660 CASE_OP_32_64(nor): 661 CASE_OP_32_64(muluh): 662 CASE_OP_32_64(mulsh): 663 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 664 break; 665 CASE_OP_32_64(brcond): 666 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 667 op->args[2] = tcg_swap_cond(op->args[2]); 668 } 669 break; 670 CASE_OP_32_64(setcond): 671 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 672 op->args[3] = tcg_swap_cond(op->args[3]); 673 } 674 break; 675 CASE_OP_32_64(movcond): 676 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 677 op->args[5] = tcg_swap_cond(op->args[5]); 678 } 679 /* For movcond, we canonicalize the "false" input reg to match 680 the destination reg so that the tcg backend can implement 681 a "move if true" operation. */ 682 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 683 op->args[5] = tcg_invert_cond(op->args[5]); 684 } 685 break; 686 CASE_OP_32_64(add2): 687 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 688 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 689 break; 690 CASE_OP_32_64(mulu2): 691 CASE_OP_32_64(muls2): 692 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 693 break; 694 case INDEX_op_brcond2_i32: 695 if (swap_commutative2(&op->args[0], &op->args[2])) { 696 op->args[4] = tcg_swap_cond(op->args[4]); 697 } 698 break; 699 case INDEX_op_setcond2_i32: 700 if (swap_commutative2(&op->args[1], &op->args[3])) { 701 op->args[5] = tcg_swap_cond(op->args[5]); 702 } 703 break; 704 default: 705 break; 706 } 707 708 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 709 and "sub r, 0, a => neg r, a" case. */ 710 switch (opc) { 711 CASE_OP_32_64(shl): 712 CASE_OP_32_64(shr): 713 CASE_OP_32_64(sar): 714 CASE_OP_32_64(rotl): 715 CASE_OP_32_64(rotr): 716 if (arg_is_const(op->args[1]) 717 && arg_info(op->args[1])->val == 0) { 718 tcg_opt_gen_movi(s, op, op->args[0], 0); 719 continue; 720 } 721 break; 722 CASE_OP_32_64_VEC(sub): 723 { 724 TCGOpcode neg_op; 725 bool have_neg; 726 727 if (arg_is_const(op->args[2])) { 728 /* Proceed with possible constant folding. */ 729 break; 730 } 731 if (opc == INDEX_op_sub_i32) { 732 neg_op = INDEX_op_neg_i32; 733 have_neg = TCG_TARGET_HAS_neg_i32; 734 } else if (opc == INDEX_op_sub_i64) { 735 neg_op = INDEX_op_neg_i64; 736 have_neg = TCG_TARGET_HAS_neg_i64; 737 } else { 738 neg_op = INDEX_op_neg_vec; 739 have_neg = TCG_TARGET_HAS_neg_vec; 740 } 741 if (!have_neg) { 742 break; 743 } 744 if (arg_is_const(op->args[1]) 745 && arg_info(op->args[1])->val == 0) { 746 op->opc = neg_op; 747 reset_temp(op->args[0]); 748 op->args[1] = op->args[2]; 749 continue; 750 } 751 } 752 break; 753 CASE_OP_32_64_VEC(xor): 754 CASE_OP_32_64(nand): 755 if (!arg_is_const(op->args[1]) 756 && arg_is_const(op->args[2]) 757 && arg_info(op->args[2])->val == -1) { 758 i = 1; 759 goto try_not; 760 } 761 break; 762 CASE_OP_32_64(nor): 763 if (!arg_is_const(op->args[1]) 764 && arg_is_const(op->args[2]) 765 && arg_info(op->args[2])->val == 0) { 766 i = 1; 767 goto try_not; 768 } 769 break; 770 CASE_OP_32_64_VEC(andc): 771 if (!arg_is_const(op->args[2]) 772 && arg_is_const(op->args[1]) 773 && arg_info(op->args[1])->val == -1) { 774 i = 2; 775 goto try_not; 776 } 777 break; 778 CASE_OP_32_64_VEC(orc): 779 CASE_OP_32_64(eqv): 780 if (!arg_is_const(op->args[2]) 781 && arg_is_const(op->args[1]) 782 && arg_info(op->args[1])->val == 0) { 783 i = 2; 784 goto try_not; 785 } 786 break; 787 try_not: 788 { 789 TCGOpcode not_op; 790 bool have_not; 791 792 if (def->flags & TCG_OPF_VECTOR) { 793 not_op = INDEX_op_not_vec; 794 have_not = TCG_TARGET_HAS_not_vec; 795 } else if (def->flags & TCG_OPF_64BIT) { 796 not_op = INDEX_op_not_i64; 797 have_not = TCG_TARGET_HAS_not_i64; 798 } else { 799 not_op = INDEX_op_not_i32; 800 have_not = TCG_TARGET_HAS_not_i32; 801 } 802 if (!have_not) { 803 break; 804 } 805 op->opc = not_op; 806 reset_temp(op->args[0]); 807 op->args[1] = op->args[i]; 808 continue; 809 } 810 default: 811 break; 812 } 813 814 /* Simplify expression for "op r, a, const => mov r, a" cases */ 815 switch (opc) { 816 CASE_OP_32_64_VEC(add): 817 CASE_OP_32_64_VEC(sub): 818 CASE_OP_32_64_VEC(or): 819 CASE_OP_32_64_VEC(xor): 820 CASE_OP_32_64_VEC(andc): 821 CASE_OP_32_64(shl): 822 CASE_OP_32_64(shr): 823 CASE_OP_32_64(sar): 824 CASE_OP_32_64(rotl): 825 CASE_OP_32_64(rotr): 826 if (!arg_is_const(op->args[1]) 827 && arg_is_const(op->args[2]) 828 && arg_info(op->args[2])->val == 0) { 829 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 830 continue; 831 } 832 break; 833 CASE_OP_32_64_VEC(and): 834 CASE_OP_32_64_VEC(orc): 835 CASE_OP_32_64(eqv): 836 if (!arg_is_const(op->args[1]) 837 && arg_is_const(op->args[2]) 838 && arg_info(op->args[2])->val == -1) { 839 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 840 continue; 841 } 842 break; 843 default: 844 break; 845 } 846 847 /* Simplify using known-zero bits. Currently only ops with a single 848 output argument is supported. */ 849 mask = -1; 850 affected = -1; 851 switch (opc) { 852 CASE_OP_32_64(ext8s): 853 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 854 break; 855 } 856 CASE_OP_32_64(ext8u): 857 mask = 0xff; 858 goto and_const; 859 CASE_OP_32_64(ext16s): 860 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 861 break; 862 } 863 CASE_OP_32_64(ext16u): 864 mask = 0xffff; 865 goto and_const; 866 case INDEX_op_ext32s_i64: 867 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 868 break; 869 } 870 case INDEX_op_ext32u_i64: 871 mask = 0xffffffffU; 872 goto and_const; 873 874 CASE_OP_32_64(and): 875 mask = arg_info(op->args[2])->mask; 876 if (arg_is_const(op->args[2])) { 877 and_const: 878 affected = arg_info(op->args[1])->mask & ~mask; 879 } 880 mask = arg_info(op->args[1])->mask & mask; 881 break; 882 883 case INDEX_op_ext_i32_i64: 884 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 885 break; 886 } 887 case INDEX_op_extu_i32_i64: 888 /* We do not compute affected as it is a size changing op. */ 889 mask = (uint32_t)arg_info(op->args[1])->mask; 890 break; 891 892 CASE_OP_32_64(andc): 893 /* Known-zeros does not imply known-ones. Therefore unless 894 op->args[2] is constant, we can't infer anything from it. */ 895 if (arg_is_const(op->args[2])) { 896 mask = ~arg_info(op->args[2])->mask; 897 goto and_const; 898 } 899 /* But we certainly know nothing outside args[1] may be set. */ 900 mask = arg_info(op->args[1])->mask; 901 break; 902 903 case INDEX_op_sar_i32: 904 if (arg_is_const(op->args[2])) { 905 tmp = arg_info(op->args[2])->val & 31; 906 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 907 } 908 break; 909 case INDEX_op_sar_i64: 910 if (arg_is_const(op->args[2])) { 911 tmp = arg_info(op->args[2])->val & 63; 912 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 913 } 914 break; 915 916 case INDEX_op_shr_i32: 917 if (arg_is_const(op->args[2])) { 918 tmp = arg_info(op->args[2])->val & 31; 919 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 920 } 921 break; 922 case INDEX_op_shr_i64: 923 if (arg_is_const(op->args[2])) { 924 tmp = arg_info(op->args[2])->val & 63; 925 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 926 } 927 break; 928 929 case INDEX_op_extrl_i64_i32: 930 mask = (uint32_t)arg_info(op->args[1])->mask; 931 break; 932 case INDEX_op_extrh_i64_i32: 933 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 934 break; 935 936 CASE_OP_32_64(shl): 937 if (arg_is_const(op->args[2])) { 938 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 939 mask = arg_info(op->args[1])->mask << tmp; 940 } 941 break; 942 943 CASE_OP_32_64(neg): 944 /* Set to 1 all bits to the left of the rightmost. */ 945 mask = -(arg_info(op->args[1])->mask 946 & -arg_info(op->args[1])->mask); 947 break; 948 949 CASE_OP_32_64(deposit): 950 mask = deposit64(arg_info(op->args[1])->mask, 951 op->args[3], op->args[4], 952 arg_info(op->args[2])->mask); 953 break; 954 955 CASE_OP_32_64(extract): 956 mask = extract64(arg_info(op->args[1])->mask, 957 op->args[2], op->args[3]); 958 if (op->args[2] == 0) { 959 affected = arg_info(op->args[1])->mask & ~mask; 960 } 961 break; 962 CASE_OP_32_64(sextract): 963 mask = sextract64(arg_info(op->args[1])->mask, 964 op->args[2], op->args[3]); 965 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 966 affected = arg_info(op->args[1])->mask & ~mask; 967 } 968 break; 969 970 CASE_OP_32_64(or): 971 CASE_OP_32_64(xor): 972 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 973 break; 974 975 case INDEX_op_clz_i32: 976 case INDEX_op_ctz_i32: 977 mask = arg_info(op->args[2])->mask | 31; 978 break; 979 980 case INDEX_op_clz_i64: 981 case INDEX_op_ctz_i64: 982 mask = arg_info(op->args[2])->mask | 63; 983 break; 984 985 case INDEX_op_ctpop_i32: 986 mask = 32 | 31; 987 break; 988 case INDEX_op_ctpop_i64: 989 mask = 64 | 63; 990 break; 991 992 CASE_OP_32_64(setcond): 993 case INDEX_op_setcond2_i32: 994 mask = 1; 995 break; 996 997 CASE_OP_32_64(movcond): 998 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 999 break; 1000 1001 CASE_OP_32_64(ld8u): 1002 mask = 0xff; 1003 break; 1004 CASE_OP_32_64(ld16u): 1005 mask = 0xffff; 1006 break; 1007 case INDEX_op_ld32u_i64: 1008 mask = 0xffffffffu; 1009 break; 1010 1011 CASE_OP_32_64(qemu_ld): 1012 { 1013 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1014 TCGMemOp mop = get_memop(oi); 1015 if (!(mop & MO_SIGN)) { 1016 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1017 } 1018 } 1019 break; 1020 1021 default: 1022 break; 1023 } 1024 1025 /* 32-bit ops generate 32-bit results. For the result is zero test 1026 below, we can ignore high bits, but for further optimizations we 1027 need to record that the high bits contain garbage. */ 1028 partmask = mask; 1029 if (!(def->flags & TCG_OPF_64BIT)) { 1030 mask |= ~(tcg_target_ulong)0xffffffffu; 1031 partmask &= 0xffffffffu; 1032 affected &= 0xffffffffu; 1033 } 1034 1035 if (partmask == 0) { 1036 tcg_debug_assert(nb_oargs == 1); 1037 tcg_opt_gen_movi(s, op, op->args[0], 0); 1038 continue; 1039 } 1040 if (affected == 0) { 1041 tcg_debug_assert(nb_oargs == 1); 1042 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1043 continue; 1044 } 1045 1046 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1047 switch (opc) { 1048 CASE_OP_32_64_VEC(and): 1049 CASE_OP_32_64_VEC(mul): 1050 CASE_OP_32_64(muluh): 1051 CASE_OP_32_64(mulsh): 1052 if (arg_is_const(op->args[2]) 1053 && arg_info(op->args[2])->val == 0) { 1054 tcg_opt_gen_movi(s, op, op->args[0], 0); 1055 continue; 1056 } 1057 break; 1058 default: 1059 break; 1060 } 1061 1062 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1063 switch (opc) { 1064 CASE_OP_32_64_VEC(or): 1065 CASE_OP_32_64_VEC(and): 1066 if (args_are_copies(op->args[1], op->args[2])) { 1067 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1068 continue; 1069 } 1070 break; 1071 default: 1072 break; 1073 } 1074 1075 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1076 switch (opc) { 1077 CASE_OP_32_64_VEC(andc): 1078 CASE_OP_32_64_VEC(sub): 1079 CASE_OP_32_64_VEC(xor): 1080 if (args_are_copies(op->args[1], op->args[2])) { 1081 tcg_opt_gen_movi(s, op, op->args[0], 0); 1082 continue; 1083 } 1084 break; 1085 default: 1086 break; 1087 } 1088 1089 /* Propagate constants through copy operations and do constant 1090 folding. Constants will be substituted to arguments by register 1091 allocator where needed and possible. Also detect copies. */ 1092 switch (opc) { 1093 CASE_OP_32_64_VEC(mov): 1094 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1095 break; 1096 CASE_OP_32_64(movi): 1097 case INDEX_op_dupi_vec: 1098 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1099 break; 1100 1101 case INDEX_op_dup_vec: 1102 if (arg_is_const(op->args[1])) { 1103 tmp = arg_info(op->args[1])->val; 1104 tmp = dup_const(TCGOP_VECE(op), tmp); 1105 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1106 break; 1107 } 1108 goto do_default; 1109 1110 CASE_OP_32_64(not): 1111 CASE_OP_32_64(neg): 1112 CASE_OP_32_64(ext8s): 1113 CASE_OP_32_64(ext8u): 1114 CASE_OP_32_64(ext16s): 1115 CASE_OP_32_64(ext16u): 1116 CASE_OP_32_64(ctpop): 1117 CASE_OP_32_64(bswap16): 1118 CASE_OP_32_64(bswap32): 1119 case INDEX_op_bswap64_i64: 1120 case INDEX_op_ext32s_i64: 1121 case INDEX_op_ext32u_i64: 1122 case INDEX_op_ext_i32_i64: 1123 case INDEX_op_extu_i32_i64: 1124 case INDEX_op_extrl_i64_i32: 1125 case INDEX_op_extrh_i64_i32: 1126 if (arg_is_const(op->args[1])) { 1127 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1128 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1129 break; 1130 } 1131 goto do_default; 1132 1133 CASE_OP_32_64(add): 1134 CASE_OP_32_64(sub): 1135 CASE_OP_32_64(mul): 1136 CASE_OP_32_64(or): 1137 CASE_OP_32_64(and): 1138 CASE_OP_32_64(xor): 1139 CASE_OP_32_64(shl): 1140 CASE_OP_32_64(shr): 1141 CASE_OP_32_64(sar): 1142 CASE_OP_32_64(rotl): 1143 CASE_OP_32_64(rotr): 1144 CASE_OP_32_64(andc): 1145 CASE_OP_32_64(orc): 1146 CASE_OP_32_64(eqv): 1147 CASE_OP_32_64(nand): 1148 CASE_OP_32_64(nor): 1149 CASE_OP_32_64(muluh): 1150 CASE_OP_32_64(mulsh): 1151 CASE_OP_32_64(div): 1152 CASE_OP_32_64(divu): 1153 CASE_OP_32_64(rem): 1154 CASE_OP_32_64(remu): 1155 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1156 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1157 arg_info(op->args[2])->val); 1158 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1159 break; 1160 } 1161 goto do_default; 1162 1163 CASE_OP_32_64(clz): 1164 CASE_OP_32_64(ctz): 1165 if (arg_is_const(op->args[1])) { 1166 TCGArg v = arg_info(op->args[1])->val; 1167 if (v != 0) { 1168 tmp = do_constant_folding(opc, v, 0); 1169 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1170 } else { 1171 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1172 } 1173 break; 1174 } 1175 goto do_default; 1176 1177 CASE_OP_32_64(deposit): 1178 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1179 tmp = deposit64(arg_info(op->args[1])->val, 1180 op->args[3], op->args[4], 1181 arg_info(op->args[2])->val); 1182 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1183 break; 1184 } 1185 goto do_default; 1186 1187 CASE_OP_32_64(extract): 1188 if (arg_is_const(op->args[1])) { 1189 tmp = extract64(arg_info(op->args[1])->val, 1190 op->args[2], op->args[3]); 1191 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1192 break; 1193 } 1194 goto do_default; 1195 1196 CASE_OP_32_64(sextract): 1197 if (arg_is_const(op->args[1])) { 1198 tmp = sextract64(arg_info(op->args[1])->val, 1199 op->args[2], op->args[3]); 1200 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1201 break; 1202 } 1203 goto do_default; 1204 1205 CASE_OP_32_64(extract2): 1206 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1207 TCGArg v1 = arg_info(op->args[1])->val; 1208 TCGArg v2 = arg_info(op->args[2])->val; 1209 1210 if (opc == INDEX_op_extract2_i64) { 1211 tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); 1212 } else { 1213 tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); 1214 tmp = (int32_t)tmp; 1215 } 1216 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1217 break; 1218 } 1219 goto do_default; 1220 1221 CASE_OP_32_64(setcond): 1222 tmp = do_constant_folding_cond(opc, op->args[1], 1223 op->args[2], op->args[3]); 1224 if (tmp != 2) { 1225 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1226 break; 1227 } 1228 goto do_default; 1229 1230 CASE_OP_32_64(brcond): 1231 tmp = do_constant_folding_cond(opc, op->args[0], 1232 op->args[1], op->args[2]); 1233 if (tmp != 2) { 1234 if (tmp) { 1235 bitmap_zero(temps_used.l, nb_temps); 1236 op->opc = INDEX_op_br; 1237 op->args[0] = op->args[3]; 1238 } else { 1239 tcg_op_remove(s, op); 1240 } 1241 break; 1242 } 1243 goto do_default; 1244 1245 CASE_OP_32_64(movcond): 1246 tmp = do_constant_folding_cond(opc, op->args[1], 1247 op->args[2], op->args[5]); 1248 if (tmp != 2) { 1249 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1250 break; 1251 } 1252 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1253 tcg_target_ulong tv = arg_info(op->args[3])->val; 1254 tcg_target_ulong fv = arg_info(op->args[4])->val; 1255 TCGCond cond = op->args[5]; 1256 if (fv == 1 && tv == 0) { 1257 cond = tcg_invert_cond(cond); 1258 } else if (!(tv == 1 && fv == 0)) { 1259 goto do_default; 1260 } 1261 op->args[3] = cond; 1262 op->opc = opc = (opc == INDEX_op_movcond_i32 1263 ? INDEX_op_setcond_i32 1264 : INDEX_op_setcond_i64); 1265 nb_iargs = 2; 1266 } 1267 goto do_default; 1268 1269 case INDEX_op_add2_i32: 1270 case INDEX_op_sub2_i32: 1271 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1272 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1273 uint32_t al = arg_info(op->args[2])->val; 1274 uint32_t ah = arg_info(op->args[3])->val; 1275 uint32_t bl = arg_info(op->args[4])->val; 1276 uint32_t bh = arg_info(op->args[5])->val; 1277 uint64_t a = ((uint64_t)ah << 32) | al; 1278 uint64_t b = ((uint64_t)bh << 32) | bl; 1279 TCGArg rl, rh; 1280 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1281 1282 if (opc == INDEX_op_add2_i32) { 1283 a += b; 1284 } else { 1285 a -= b; 1286 } 1287 1288 rl = op->args[0]; 1289 rh = op->args[1]; 1290 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1291 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1292 break; 1293 } 1294 goto do_default; 1295 1296 case INDEX_op_mulu2_i32: 1297 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1298 uint32_t a = arg_info(op->args[2])->val; 1299 uint32_t b = arg_info(op->args[3])->val; 1300 uint64_t r = (uint64_t)a * b; 1301 TCGArg rl, rh; 1302 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1303 1304 rl = op->args[0]; 1305 rh = op->args[1]; 1306 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1307 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1308 break; 1309 } 1310 goto do_default; 1311 1312 case INDEX_op_brcond2_i32: 1313 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1314 op->args[4]); 1315 if (tmp != 2) { 1316 if (tmp) { 1317 do_brcond_true: 1318 bitmap_zero(temps_used.l, nb_temps); 1319 op->opc = INDEX_op_br; 1320 op->args[0] = op->args[5]; 1321 } else { 1322 do_brcond_false: 1323 tcg_op_remove(s, op); 1324 } 1325 } else if ((op->args[4] == TCG_COND_LT 1326 || op->args[4] == TCG_COND_GE) 1327 && arg_is_const(op->args[2]) 1328 && arg_info(op->args[2])->val == 0 1329 && arg_is_const(op->args[3]) 1330 && arg_info(op->args[3])->val == 0) { 1331 /* Simplify LT/GE comparisons vs zero to a single compare 1332 vs the high word of the input. */ 1333 do_brcond_high: 1334 bitmap_zero(temps_used.l, nb_temps); 1335 op->opc = INDEX_op_brcond_i32; 1336 op->args[0] = op->args[1]; 1337 op->args[1] = op->args[3]; 1338 op->args[2] = op->args[4]; 1339 op->args[3] = op->args[5]; 1340 } else if (op->args[4] == TCG_COND_EQ) { 1341 /* Simplify EQ comparisons where one of the pairs 1342 can be simplified. */ 1343 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1344 op->args[0], op->args[2], 1345 TCG_COND_EQ); 1346 if (tmp == 0) { 1347 goto do_brcond_false; 1348 } else if (tmp == 1) { 1349 goto do_brcond_high; 1350 } 1351 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1352 op->args[1], op->args[3], 1353 TCG_COND_EQ); 1354 if (tmp == 0) { 1355 goto do_brcond_false; 1356 } else if (tmp != 1) { 1357 goto do_default; 1358 } 1359 do_brcond_low: 1360 bitmap_zero(temps_used.l, nb_temps); 1361 op->opc = INDEX_op_brcond_i32; 1362 op->args[1] = op->args[2]; 1363 op->args[2] = op->args[4]; 1364 op->args[3] = op->args[5]; 1365 } else if (op->args[4] == TCG_COND_NE) { 1366 /* Simplify NE comparisons where one of the pairs 1367 can be simplified. */ 1368 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1369 op->args[0], op->args[2], 1370 TCG_COND_NE); 1371 if (tmp == 0) { 1372 goto do_brcond_high; 1373 } else if (tmp == 1) { 1374 goto do_brcond_true; 1375 } 1376 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1377 op->args[1], op->args[3], 1378 TCG_COND_NE); 1379 if (tmp == 0) { 1380 goto do_brcond_low; 1381 } else if (tmp == 1) { 1382 goto do_brcond_true; 1383 } 1384 goto do_default; 1385 } else { 1386 goto do_default; 1387 } 1388 break; 1389 1390 case INDEX_op_setcond2_i32: 1391 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1392 op->args[5]); 1393 if (tmp != 2) { 1394 do_setcond_const: 1395 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1396 } else if ((op->args[5] == TCG_COND_LT 1397 || op->args[5] == TCG_COND_GE) 1398 && arg_is_const(op->args[3]) 1399 && arg_info(op->args[3])->val == 0 1400 && arg_is_const(op->args[4]) 1401 && arg_info(op->args[4])->val == 0) { 1402 /* Simplify LT/GE comparisons vs zero to a single compare 1403 vs the high word of the input. */ 1404 do_setcond_high: 1405 reset_temp(op->args[0]); 1406 arg_info(op->args[0])->mask = 1; 1407 op->opc = INDEX_op_setcond_i32; 1408 op->args[1] = op->args[2]; 1409 op->args[2] = op->args[4]; 1410 op->args[3] = op->args[5]; 1411 } else if (op->args[5] == TCG_COND_EQ) { 1412 /* Simplify EQ comparisons where one of the pairs 1413 can be simplified. */ 1414 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1415 op->args[1], op->args[3], 1416 TCG_COND_EQ); 1417 if (tmp == 0) { 1418 goto do_setcond_const; 1419 } else if (tmp == 1) { 1420 goto do_setcond_high; 1421 } 1422 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1423 op->args[2], op->args[4], 1424 TCG_COND_EQ); 1425 if (tmp == 0) { 1426 goto do_setcond_high; 1427 } else if (tmp != 1) { 1428 goto do_default; 1429 } 1430 do_setcond_low: 1431 reset_temp(op->args[0]); 1432 arg_info(op->args[0])->mask = 1; 1433 op->opc = INDEX_op_setcond_i32; 1434 op->args[2] = op->args[3]; 1435 op->args[3] = op->args[5]; 1436 } else if (op->args[5] == TCG_COND_NE) { 1437 /* Simplify NE comparisons where one of the pairs 1438 can be simplified. */ 1439 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1440 op->args[1], op->args[3], 1441 TCG_COND_NE); 1442 if (tmp == 0) { 1443 goto do_setcond_high; 1444 } else if (tmp == 1) { 1445 goto do_setcond_const; 1446 } 1447 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1448 op->args[2], op->args[4], 1449 TCG_COND_NE); 1450 if (tmp == 0) { 1451 goto do_setcond_low; 1452 } else if (tmp == 1) { 1453 goto do_setcond_const; 1454 } 1455 goto do_default; 1456 } else { 1457 goto do_default; 1458 } 1459 break; 1460 1461 case INDEX_op_call: 1462 if (!(op->args[nb_oargs + nb_iargs + 1] 1463 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1464 for (i = 0; i < nb_globals; i++) { 1465 if (test_bit(i, temps_used.l)) { 1466 reset_ts(&s->temps[i]); 1467 } 1468 } 1469 } 1470 goto do_reset_output; 1471 1472 default: 1473 do_default: 1474 /* Default case: we know nothing about operation (or were unable 1475 to compute the operation result) so no propagation is done. 1476 We trash everything if the operation is the end of a basic 1477 block, otherwise we only trash the output args. "mask" is 1478 the non-zero bits mask for the first output arg. */ 1479 if (def->flags & TCG_OPF_BB_END) { 1480 bitmap_zero(temps_used.l, nb_temps); 1481 } else { 1482 do_reset_output: 1483 for (i = 0; i < nb_oargs; i++) { 1484 reset_temp(op->args[i]); 1485 /* Save the corresponding known-zero bits mask for the 1486 first output argument (only one supported so far). */ 1487 if (i == 0) { 1488 arg_info(op->args[i])->mask = mask; 1489 } 1490 } 1491 } 1492 break; 1493 } 1494 1495 /* Eliminate duplicate and redundant fence instructions. */ 1496 if (prev_mb) { 1497 switch (opc) { 1498 case INDEX_op_mb: 1499 /* Merge two barriers of the same type into one, 1500 * or a weaker barrier into a stronger one, 1501 * or two weaker barriers into a stronger one. 1502 * mb X; mb Y => mb X|Y 1503 * mb; strl => mb; st 1504 * ldaq; mb => ld; mb 1505 * ldaq; strl => ld; mb; st 1506 * Other combinations are also merged into a strong 1507 * barrier. This is stricter than specified but for 1508 * the purposes of TCG is better than not optimizing. 1509 */ 1510 prev_mb->args[0] |= op->args[0]; 1511 tcg_op_remove(s, op); 1512 break; 1513 1514 default: 1515 /* Opcodes that end the block stop the optimization. */ 1516 if ((def->flags & TCG_OPF_BB_END) == 0) { 1517 break; 1518 } 1519 /* fallthru */ 1520 case INDEX_op_qemu_ld_i32: 1521 case INDEX_op_qemu_ld_i64: 1522 case INDEX_op_qemu_st_i32: 1523 case INDEX_op_qemu_st_i64: 1524 case INDEX_op_call: 1525 /* Opcodes that touch guest memory stop the optimization. */ 1526 prev_mb = NULL; 1527 break; 1528 } 1529 } else if (opc == INDEX_op_mb) { 1530 prev_mb = op; 1531 } 1532 } 1533 } 1534