1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 #define CASE_OP_32_64_VEC(x) \ 36 glue(glue(case INDEX_op_, x), _i32): \ 37 glue(glue(case INDEX_op_, x), _i64): \ 38 glue(glue(case INDEX_op_, x), _vec) 39 40 struct tcg_temp_info { 41 bool is_const; 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 tcg_target_ulong val; 45 tcg_target_ulong mask; 46 }; 47 48 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 49 { 50 return ts->state_ptr; 51 } 52 53 static inline struct tcg_temp_info *arg_info(TCGArg arg) 54 { 55 return ts_info(arg_temp(arg)); 56 } 57 58 static inline bool ts_is_const(TCGTemp *ts) 59 { 60 return ts_info(ts)->is_const; 61 } 62 63 static inline bool arg_is_const(TCGArg arg) 64 { 65 return ts_is_const(arg_temp(arg)); 66 } 67 68 static inline bool ts_is_copy(TCGTemp *ts) 69 { 70 return ts_info(ts)->next_copy != ts; 71 } 72 73 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 74 static void reset_ts(TCGTemp *ts) 75 { 76 struct tcg_temp_info *ti = ts_info(ts); 77 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 78 struct tcg_temp_info *ni = ts_info(ti->next_copy); 79 80 ni->prev_copy = ti->prev_copy; 81 pi->next_copy = ti->next_copy; 82 ti->next_copy = ts; 83 ti->prev_copy = ts; 84 ti->is_const = false; 85 ti->mask = -1; 86 } 87 88 static void reset_temp(TCGArg arg) 89 { 90 reset_ts(arg_temp(arg)); 91 } 92 93 /* Initialize and activate a temporary. */ 94 static void init_ts_info(struct tcg_temp_info *infos, 95 TCGTempSet *temps_used, TCGTemp *ts) 96 { 97 size_t idx = temp_idx(ts); 98 if (!test_bit(idx, temps_used->l)) { 99 struct tcg_temp_info *ti = &infos[idx]; 100 101 ts->state_ptr = ti; 102 ti->next_copy = ts; 103 ti->prev_copy = ts; 104 ti->is_const = false; 105 ti->mask = -1; 106 set_bit(idx, temps_used->l); 107 } 108 } 109 110 static void init_arg_info(struct tcg_temp_info *infos, 111 TCGTempSet *temps_used, TCGArg arg) 112 { 113 init_ts_info(infos, temps_used, arg_temp(arg)); 114 } 115 116 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 117 { 118 TCGTemp *i; 119 120 /* If this is already a global, we can't do better. */ 121 if (ts->temp_global) { 122 return ts; 123 } 124 125 /* Search for a global first. */ 126 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 127 if (i->temp_global) { 128 return i; 129 } 130 } 131 132 /* If it is a temp, search for a temp local. */ 133 if (!ts->temp_local) { 134 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 135 if (ts->temp_local) { 136 return i; 137 } 138 } 139 } 140 141 /* Failure to find a better representation, return the same temp. */ 142 return ts; 143 } 144 145 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 146 { 147 TCGTemp *i; 148 149 if (ts1 == ts2) { 150 return true; 151 } 152 153 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 154 return false; 155 } 156 157 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 158 if (i == ts2) { 159 return true; 160 } 161 } 162 163 return false; 164 } 165 166 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 167 { 168 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 169 } 170 171 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 172 { 173 const TCGOpDef *def; 174 TCGOpcode new_op; 175 tcg_target_ulong mask; 176 struct tcg_temp_info *di = arg_info(dst); 177 178 def = &tcg_op_defs[op->opc]; 179 if (def->flags & TCG_OPF_VECTOR) { 180 new_op = INDEX_op_dupi_vec; 181 } else if (def->flags & TCG_OPF_64BIT) { 182 new_op = INDEX_op_movi_i64; 183 } else { 184 new_op = INDEX_op_movi_i32; 185 } 186 op->opc = new_op; 187 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 188 op->args[0] = dst; 189 op->args[1] = val; 190 191 reset_temp(dst); 192 di->is_const = true; 193 di->val = val; 194 mask = val; 195 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 196 /* High bits of the destination are now garbage. */ 197 mask |= ~0xffffffffull; 198 } 199 di->mask = mask; 200 } 201 202 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 203 { 204 TCGTemp *dst_ts = arg_temp(dst); 205 TCGTemp *src_ts = arg_temp(src); 206 const TCGOpDef *def; 207 struct tcg_temp_info *di; 208 struct tcg_temp_info *si; 209 tcg_target_ulong mask; 210 TCGOpcode new_op; 211 212 if (ts_are_copies(dst_ts, src_ts)) { 213 tcg_op_remove(s, op); 214 return; 215 } 216 217 reset_ts(dst_ts); 218 di = ts_info(dst_ts); 219 si = ts_info(src_ts); 220 def = &tcg_op_defs[op->opc]; 221 if (def->flags & TCG_OPF_VECTOR) { 222 new_op = INDEX_op_mov_vec; 223 } else if (def->flags & TCG_OPF_64BIT) { 224 new_op = INDEX_op_mov_i64; 225 } else { 226 new_op = INDEX_op_mov_i32; 227 } 228 op->opc = new_op; 229 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 230 op->args[0] = dst; 231 op->args[1] = src; 232 233 mask = si->mask; 234 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 235 /* High bits of the destination are now garbage. */ 236 mask |= ~0xffffffffull; 237 } 238 di->mask = mask; 239 240 if (src_ts->type == dst_ts->type) { 241 struct tcg_temp_info *ni = ts_info(si->next_copy); 242 243 di->next_copy = si->next_copy; 244 di->prev_copy = src_ts; 245 ni->prev_copy = dst_ts; 246 si->next_copy = dst_ts; 247 di->is_const = si->is_const; 248 di->val = si->val; 249 } 250 } 251 252 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 253 { 254 uint64_t l64, h64; 255 256 switch (op) { 257 CASE_OP_32_64(add): 258 return x + y; 259 260 CASE_OP_32_64(sub): 261 return x - y; 262 263 CASE_OP_32_64(mul): 264 return x * y; 265 266 CASE_OP_32_64(and): 267 return x & y; 268 269 CASE_OP_32_64(or): 270 return x | y; 271 272 CASE_OP_32_64(xor): 273 return x ^ y; 274 275 case INDEX_op_shl_i32: 276 return (uint32_t)x << (y & 31); 277 278 case INDEX_op_shl_i64: 279 return (uint64_t)x << (y & 63); 280 281 case INDEX_op_shr_i32: 282 return (uint32_t)x >> (y & 31); 283 284 case INDEX_op_shr_i64: 285 return (uint64_t)x >> (y & 63); 286 287 case INDEX_op_sar_i32: 288 return (int32_t)x >> (y & 31); 289 290 case INDEX_op_sar_i64: 291 return (int64_t)x >> (y & 63); 292 293 case INDEX_op_rotr_i32: 294 return ror32(x, y & 31); 295 296 case INDEX_op_rotr_i64: 297 return ror64(x, y & 63); 298 299 case INDEX_op_rotl_i32: 300 return rol32(x, y & 31); 301 302 case INDEX_op_rotl_i64: 303 return rol64(x, y & 63); 304 305 CASE_OP_32_64(not): 306 return ~x; 307 308 CASE_OP_32_64(neg): 309 return -x; 310 311 CASE_OP_32_64(andc): 312 return x & ~y; 313 314 CASE_OP_32_64(orc): 315 return x | ~y; 316 317 CASE_OP_32_64(eqv): 318 return ~(x ^ y); 319 320 CASE_OP_32_64(nand): 321 return ~(x & y); 322 323 CASE_OP_32_64(nor): 324 return ~(x | y); 325 326 case INDEX_op_clz_i32: 327 return (uint32_t)x ? clz32(x) : y; 328 329 case INDEX_op_clz_i64: 330 return x ? clz64(x) : y; 331 332 case INDEX_op_ctz_i32: 333 return (uint32_t)x ? ctz32(x) : y; 334 335 case INDEX_op_ctz_i64: 336 return x ? ctz64(x) : y; 337 338 case INDEX_op_ctpop_i32: 339 return ctpop32(x); 340 341 case INDEX_op_ctpop_i64: 342 return ctpop64(x); 343 344 CASE_OP_32_64(ext8s): 345 return (int8_t)x; 346 347 CASE_OP_32_64(ext16s): 348 return (int16_t)x; 349 350 CASE_OP_32_64(ext8u): 351 return (uint8_t)x; 352 353 CASE_OP_32_64(ext16u): 354 return (uint16_t)x; 355 356 case INDEX_op_ext_i32_i64: 357 case INDEX_op_ext32s_i64: 358 return (int32_t)x; 359 360 case INDEX_op_extu_i32_i64: 361 case INDEX_op_extrl_i64_i32: 362 case INDEX_op_ext32u_i64: 363 return (uint32_t)x; 364 365 case INDEX_op_extrh_i64_i32: 366 return (uint64_t)x >> 32; 367 368 case INDEX_op_muluh_i32: 369 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 370 case INDEX_op_mulsh_i32: 371 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 372 373 case INDEX_op_muluh_i64: 374 mulu64(&l64, &h64, x, y); 375 return h64; 376 case INDEX_op_mulsh_i64: 377 muls64(&l64, &h64, x, y); 378 return h64; 379 380 case INDEX_op_div_i32: 381 /* Avoid crashing on divide by zero, otherwise undefined. */ 382 return (int32_t)x / ((int32_t)y ? : 1); 383 case INDEX_op_divu_i32: 384 return (uint32_t)x / ((uint32_t)y ? : 1); 385 case INDEX_op_div_i64: 386 return (int64_t)x / ((int64_t)y ? : 1); 387 case INDEX_op_divu_i64: 388 return (uint64_t)x / ((uint64_t)y ? : 1); 389 390 case INDEX_op_rem_i32: 391 return (int32_t)x % ((int32_t)y ? : 1); 392 case INDEX_op_remu_i32: 393 return (uint32_t)x % ((uint32_t)y ? : 1); 394 case INDEX_op_rem_i64: 395 return (int64_t)x % ((int64_t)y ? : 1); 396 case INDEX_op_remu_i64: 397 return (uint64_t)x % ((uint64_t)y ? : 1); 398 399 default: 400 fprintf(stderr, 401 "Unrecognized operation %d in do_constant_folding.\n", op); 402 tcg_abort(); 403 } 404 } 405 406 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 407 { 408 const TCGOpDef *def = &tcg_op_defs[op]; 409 TCGArg res = do_constant_folding_2(op, x, y); 410 if (!(def->flags & TCG_OPF_64BIT)) { 411 res = (int32_t)res; 412 } 413 return res; 414 } 415 416 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 417 { 418 switch (c) { 419 case TCG_COND_EQ: 420 return x == y; 421 case TCG_COND_NE: 422 return x != y; 423 case TCG_COND_LT: 424 return (int32_t)x < (int32_t)y; 425 case TCG_COND_GE: 426 return (int32_t)x >= (int32_t)y; 427 case TCG_COND_LE: 428 return (int32_t)x <= (int32_t)y; 429 case TCG_COND_GT: 430 return (int32_t)x > (int32_t)y; 431 case TCG_COND_LTU: 432 return x < y; 433 case TCG_COND_GEU: 434 return x >= y; 435 case TCG_COND_LEU: 436 return x <= y; 437 case TCG_COND_GTU: 438 return x > y; 439 default: 440 tcg_abort(); 441 } 442 } 443 444 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 445 { 446 switch (c) { 447 case TCG_COND_EQ: 448 return x == y; 449 case TCG_COND_NE: 450 return x != y; 451 case TCG_COND_LT: 452 return (int64_t)x < (int64_t)y; 453 case TCG_COND_GE: 454 return (int64_t)x >= (int64_t)y; 455 case TCG_COND_LE: 456 return (int64_t)x <= (int64_t)y; 457 case TCG_COND_GT: 458 return (int64_t)x > (int64_t)y; 459 case TCG_COND_LTU: 460 return x < y; 461 case TCG_COND_GEU: 462 return x >= y; 463 case TCG_COND_LEU: 464 return x <= y; 465 case TCG_COND_GTU: 466 return x > y; 467 default: 468 tcg_abort(); 469 } 470 } 471 472 static bool do_constant_folding_cond_eq(TCGCond c) 473 { 474 switch (c) { 475 case TCG_COND_GT: 476 case TCG_COND_LTU: 477 case TCG_COND_LT: 478 case TCG_COND_GTU: 479 case TCG_COND_NE: 480 return 0; 481 case TCG_COND_GE: 482 case TCG_COND_GEU: 483 case TCG_COND_LE: 484 case TCG_COND_LEU: 485 case TCG_COND_EQ: 486 return 1; 487 default: 488 tcg_abort(); 489 } 490 } 491 492 /* Return 2 if the condition can't be simplified, and the result 493 of the condition (0 or 1) if it can */ 494 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 495 TCGArg y, TCGCond c) 496 { 497 tcg_target_ulong xv = arg_info(x)->val; 498 tcg_target_ulong yv = arg_info(y)->val; 499 if (arg_is_const(x) && arg_is_const(y)) { 500 const TCGOpDef *def = &tcg_op_defs[op]; 501 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 502 if (def->flags & TCG_OPF_64BIT) { 503 return do_constant_folding_cond_64(xv, yv, c); 504 } else { 505 return do_constant_folding_cond_32(xv, yv, c); 506 } 507 } else if (args_are_copies(x, y)) { 508 return do_constant_folding_cond_eq(c); 509 } else if (arg_is_const(y) && yv == 0) { 510 switch (c) { 511 case TCG_COND_LTU: 512 return 0; 513 case TCG_COND_GEU: 514 return 1; 515 default: 516 return 2; 517 } 518 } 519 return 2; 520 } 521 522 /* Return 2 if the condition can't be simplified, and the result 523 of the condition (0 or 1) if it can */ 524 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 525 { 526 TCGArg al = p1[0], ah = p1[1]; 527 TCGArg bl = p2[0], bh = p2[1]; 528 529 if (arg_is_const(bl) && arg_is_const(bh)) { 530 tcg_target_ulong blv = arg_info(bl)->val; 531 tcg_target_ulong bhv = arg_info(bh)->val; 532 uint64_t b = deposit64(blv, 32, 32, bhv); 533 534 if (arg_is_const(al) && arg_is_const(ah)) { 535 tcg_target_ulong alv = arg_info(al)->val; 536 tcg_target_ulong ahv = arg_info(ah)->val; 537 uint64_t a = deposit64(alv, 32, 32, ahv); 538 return do_constant_folding_cond_64(a, b, c); 539 } 540 if (b == 0) { 541 switch (c) { 542 case TCG_COND_LTU: 543 return 0; 544 case TCG_COND_GEU: 545 return 1; 546 default: 547 break; 548 } 549 } 550 } 551 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 552 return do_constant_folding_cond_eq(c); 553 } 554 return 2; 555 } 556 557 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 558 { 559 TCGArg a1 = *p1, a2 = *p2; 560 int sum = 0; 561 sum += arg_is_const(a1); 562 sum -= arg_is_const(a2); 563 564 /* Prefer the constant in second argument, and then the form 565 op a, a, b, which is better handled on non-RISC hosts. */ 566 if (sum > 0 || (sum == 0 && dest == a2)) { 567 *p1 = a2; 568 *p2 = a1; 569 return true; 570 } 571 return false; 572 } 573 574 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 575 { 576 int sum = 0; 577 sum += arg_is_const(p1[0]); 578 sum += arg_is_const(p1[1]); 579 sum -= arg_is_const(p2[0]); 580 sum -= arg_is_const(p2[1]); 581 if (sum > 0) { 582 TCGArg t; 583 t = p1[0], p1[0] = p2[0], p2[0] = t; 584 t = p1[1], p1[1] = p2[1], p2[1] = t; 585 return true; 586 } 587 return false; 588 } 589 590 /* Propagate constants and copies, fold constant expressions. */ 591 void tcg_optimize(TCGContext *s) 592 { 593 int nb_temps, nb_globals; 594 TCGOp *op, *op_next, *prev_mb = NULL; 595 struct tcg_temp_info *infos; 596 TCGTempSet temps_used; 597 598 /* Array VALS has an element for each temp. 599 If this temp holds a constant then its value is kept in VALS' element. 600 If this temp is a copy of other ones then the other copies are 601 available through the doubly linked circular list. */ 602 603 nb_temps = s->nb_temps; 604 nb_globals = s->nb_globals; 605 bitmap_zero(temps_used.l, nb_temps); 606 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 607 608 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 609 tcg_target_ulong mask, partmask, affected; 610 int nb_oargs, nb_iargs, i; 611 TCGArg tmp; 612 TCGOpcode opc = op->opc; 613 const TCGOpDef *def = &tcg_op_defs[opc]; 614 615 /* Count the arguments, and initialize the temps that are 616 going to be used */ 617 if (opc == INDEX_op_call) { 618 nb_oargs = TCGOP_CALLO(op); 619 nb_iargs = TCGOP_CALLI(op); 620 for (i = 0; i < nb_oargs + nb_iargs; i++) { 621 TCGTemp *ts = arg_temp(op->args[i]); 622 if (ts) { 623 init_ts_info(infos, &temps_used, ts); 624 } 625 } 626 } else { 627 nb_oargs = def->nb_oargs; 628 nb_iargs = def->nb_iargs; 629 for (i = 0; i < nb_oargs + nb_iargs; i++) { 630 init_arg_info(infos, &temps_used, op->args[i]); 631 } 632 } 633 634 /* Do copy propagation */ 635 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 636 TCGTemp *ts = arg_temp(op->args[i]); 637 if (ts && ts_is_copy(ts)) { 638 op->args[i] = temp_arg(find_better_copy(s, ts)); 639 } 640 } 641 642 /* For commutative operations make constant second argument */ 643 switch (opc) { 644 CASE_OP_32_64_VEC(add): 645 CASE_OP_32_64_VEC(mul): 646 CASE_OP_32_64_VEC(and): 647 CASE_OP_32_64_VEC(or): 648 CASE_OP_32_64_VEC(xor): 649 CASE_OP_32_64(eqv): 650 CASE_OP_32_64(nand): 651 CASE_OP_32_64(nor): 652 CASE_OP_32_64(muluh): 653 CASE_OP_32_64(mulsh): 654 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 655 break; 656 CASE_OP_32_64(brcond): 657 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 658 op->args[2] = tcg_swap_cond(op->args[2]); 659 } 660 break; 661 CASE_OP_32_64(setcond): 662 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 663 op->args[3] = tcg_swap_cond(op->args[3]); 664 } 665 break; 666 CASE_OP_32_64(movcond): 667 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 668 op->args[5] = tcg_swap_cond(op->args[5]); 669 } 670 /* For movcond, we canonicalize the "false" input reg to match 671 the destination reg so that the tcg backend can implement 672 a "move if true" operation. */ 673 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 674 op->args[5] = tcg_invert_cond(op->args[5]); 675 } 676 break; 677 CASE_OP_32_64(add2): 678 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 679 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 680 break; 681 CASE_OP_32_64(mulu2): 682 CASE_OP_32_64(muls2): 683 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 684 break; 685 case INDEX_op_brcond2_i32: 686 if (swap_commutative2(&op->args[0], &op->args[2])) { 687 op->args[4] = tcg_swap_cond(op->args[4]); 688 } 689 break; 690 case INDEX_op_setcond2_i32: 691 if (swap_commutative2(&op->args[1], &op->args[3])) { 692 op->args[5] = tcg_swap_cond(op->args[5]); 693 } 694 break; 695 default: 696 break; 697 } 698 699 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 700 and "sub r, 0, a => neg r, a" case. */ 701 switch (opc) { 702 CASE_OP_32_64(shl): 703 CASE_OP_32_64(shr): 704 CASE_OP_32_64(sar): 705 CASE_OP_32_64(rotl): 706 CASE_OP_32_64(rotr): 707 if (arg_is_const(op->args[1]) 708 && arg_info(op->args[1])->val == 0) { 709 tcg_opt_gen_movi(s, op, op->args[0], 0); 710 continue; 711 } 712 break; 713 CASE_OP_32_64_VEC(sub): 714 { 715 TCGOpcode neg_op; 716 bool have_neg; 717 718 if (arg_is_const(op->args[2])) { 719 /* Proceed with possible constant folding. */ 720 break; 721 } 722 if (opc == INDEX_op_sub_i32) { 723 neg_op = INDEX_op_neg_i32; 724 have_neg = TCG_TARGET_HAS_neg_i32; 725 } else if (opc == INDEX_op_sub_i64) { 726 neg_op = INDEX_op_neg_i64; 727 have_neg = TCG_TARGET_HAS_neg_i64; 728 } else { 729 neg_op = INDEX_op_neg_vec; 730 have_neg = TCG_TARGET_HAS_neg_vec; 731 } 732 if (!have_neg) { 733 break; 734 } 735 if (arg_is_const(op->args[1]) 736 && arg_info(op->args[1])->val == 0) { 737 op->opc = neg_op; 738 reset_temp(op->args[0]); 739 op->args[1] = op->args[2]; 740 continue; 741 } 742 } 743 break; 744 CASE_OP_32_64_VEC(xor): 745 CASE_OP_32_64(nand): 746 if (!arg_is_const(op->args[1]) 747 && arg_is_const(op->args[2]) 748 && arg_info(op->args[2])->val == -1) { 749 i = 1; 750 goto try_not; 751 } 752 break; 753 CASE_OP_32_64(nor): 754 if (!arg_is_const(op->args[1]) 755 && arg_is_const(op->args[2]) 756 && arg_info(op->args[2])->val == 0) { 757 i = 1; 758 goto try_not; 759 } 760 break; 761 CASE_OP_32_64_VEC(andc): 762 if (!arg_is_const(op->args[2]) 763 && arg_is_const(op->args[1]) 764 && arg_info(op->args[1])->val == -1) { 765 i = 2; 766 goto try_not; 767 } 768 break; 769 CASE_OP_32_64_VEC(orc): 770 CASE_OP_32_64(eqv): 771 if (!arg_is_const(op->args[2]) 772 && arg_is_const(op->args[1]) 773 && arg_info(op->args[1])->val == 0) { 774 i = 2; 775 goto try_not; 776 } 777 break; 778 try_not: 779 { 780 TCGOpcode not_op; 781 bool have_not; 782 783 if (def->flags & TCG_OPF_VECTOR) { 784 not_op = INDEX_op_not_vec; 785 have_not = TCG_TARGET_HAS_not_vec; 786 } else if (def->flags & TCG_OPF_64BIT) { 787 not_op = INDEX_op_not_i64; 788 have_not = TCG_TARGET_HAS_not_i64; 789 } else { 790 not_op = INDEX_op_not_i32; 791 have_not = TCG_TARGET_HAS_not_i32; 792 } 793 if (!have_not) { 794 break; 795 } 796 op->opc = not_op; 797 reset_temp(op->args[0]); 798 op->args[1] = op->args[i]; 799 continue; 800 } 801 default: 802 break; 803 } 804 805 /* Simplify expression for "op r, a, const => mov r, a" cases */ 806 switch (opc) { 807 CASE_OP_32_64_VEC(add): 808 CASE_OP_32_64_VEC(sub): 809 CASE_OP_32_64_VEC(or): 810 CASE_OP_32_64_VEC(xor): 811 CASE_OP_32_64_VEC(andc): 812 CASE_OP_32_64(shl): 813 CASE_OP_32_64(shr): 814 CASE_OP_32_64(sar): 815 CASE_OP_32_64(rotl): 816 CASE_OP_32_64(rotr): 817 if (!arg_is_const(op->args[1]) 818 && arg_is_const(op->args[2]) 819 && arg_info(op->args[2])->val == 0) { 820 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 821 continue; 822 } 823 break; 824 CASE_OP_32_64_VEC(and): 825 CASE_OP_32_64_VEC(orc): 826 CASE_OP_32_64(eqv): 827 if (!arg_is_const(op->args[1]) 828 && arg_is_const(op->args[2]) 829 && arg_info(op->args[2])->val == -1) { 830 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 831 continue; 832 } 833 break; 834 default: 835 break; 836 } 837 838 /* Simplify using known-zero bits. Currently only ops with a single 839 output argument is supported. */ 840 mask = -1; 841 affected = -1; 842 switch (opc) { 843 CASE_OP_32_64(ext8s): 844 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 845 break; 846 } 847 CASE_OP_32_64(ext8u): 848 mask = 0xff; 849 goto and_const; 850 CASE_OP_32_64(ext16s): 851 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 852 break; 853 } 854 CASE_OP_32_64(ext16u): 855 mask = 0xffff; 856 goto and_const; 857 case INDEX_op_ext32s_i64: 858 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 859 break; 860 } 861 case INDEX_op_ext32u_i64: 862 mask = 0xffffffffU; 863 goto and_const; 864 865 CASE_OP_32_64(and): 866 mask = arg_info(op->args[2])->mask; 867 if (arg_is_const(op->args[2])) { 868 and_const: 869 affected = arg_info(op->args[1])->mask & ~mask; 870 } 871 mask = arg_info(op->args[1])->mask & mask; 872 break; 873 874 case INDEX_op_ext_i32_i64: 875 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 876 break; 877 } 878 case INDEX_op_extu_i32_i64: 879 /* We do not compute affected as it is a size changing op. */ 880 mask = (uint32_t)arg_info(op->args[1])->mask; 881 break; 882 883 CASE_OP_32_64(andc): 884 /* Known-zeros does not imply known-ones. Therefore unless 885 op->args[2] is constant, we can't infer anything from it. */ 886 if (arg_is_const(op->args[2])) { 887 mask = ~arg_info(op->args[2])->mask; 888 goto and_const; 889 } 890 /* But we certainly know nothing outside args[1] may be set. */ 891 mask = arg_info(op->args[1])->mask; 892 break; 893 894 case INDEX_op_sar_i32: 895 if (arg_is_const(op->args[2])) { 896 tmp = arg_info(op->args[2])->val & 31; 897 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 898 } 899 break; 900 case INDEX_op_sar_i64: 901 if (arg_is_const(op->args[2])) { 902 tmp = arg_info(op->args[2])->val & 63; 903 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 904 } 905 break; 906 907 case INDEX_op_shr_i32: 908 if (arg_is_const(op->args[2])) { 909 tmp = arg_info(op->args[2])->val & 31; 910 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 911 } 912 break; 913 case INDEX_op_shr_i64: 914 if (arg_is_const(op->args[2])) { 915 tmp = arg_info(op->args[2])->val & 63; 916 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 917 } 918 break; 919 920 case INDEX_op_extrl_i64_i32: 921 mask = (uint32_t)arg_info(op->args[1])->mask; 922 break; 923 case INDEX_op_extrh_i64_i32: 924 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 925 break; 926 927 CASE_OP_32_64(shl): 928 if (arg_is_const(op->args[2])) { 929 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 930 mask = arg_info(op->args[1])->mask << tmp; 931 } 932 break; 933 934 CASE_OP_32_64(neg): 935 /* Set to 1 all bits to the left of the rightmost. */ 936 mask = -(arg_info(op->args[1])->mask 937 & -arg_info(op->args[1])->mask); 938 break; 939 940 CASE_OP_32_64(deposit): 941 mask = deposit64(arg_info(op->args[1])->mask, 942 op->args[3], op->args[4], 943 arg_info(op->args[2])->mask); 944 break; 945 946 CASE_OP_32_64(extract): 947 mask = extract64(arg_info(op->args[1])->mask, 948 op->args[2], op->args[3]); 949 if (op->args[2] == 0) { 950 affected = arg_info(op->args[1])->mask & ~mask; 951 } 952 break; 953 CASE_OP_32_64(sextract): 954 mask = sextract64(arg_info(op->args[1])->mask, 955 op->args[2], op->args[3]); 956 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 957 affected = arg_info(op->args[1])->mask & ~mask; 958 } 959 break; 960 961 CASE_OP_32_64(or): 962 CASE_OP_32_64(xor): 963 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 964 break; 965 966 case INDEX_op_clz_i32: 967 case INDEX_op_ctz_i32: 968 mask = arg_info(op->args[2])->mask | 31; 969 break; 970 971 case INDEX_op_clz_i64: 972 case INDEX_op_ctz_i64: 973 mask = arg_info(op->args[2])->mask | 63; 974 break; 975 976 case INDEX_op_ctpop_i32: 977 mask = 32 | 31; 978 break; 979 case INDEX_op_ctpop_i64: 980 mask = 64 | 63; 981 break; 982 983 CASE_OP_32_64(setcond): 984 case INDEX_op_setcond2_i32: 985 mask = 1; 986 break; 987 988 CASE_OP_32_64(movcond): 989 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 990 break; 991 992 CASE_OP_32_64(ld8u): 993 mask = 0xff; 994 break; 995 CASE_OP_32_64(ld16u): 996 mask = 0xffff; 997 break; 998 case INDEX_op_ld32u_i64: 999 mask = 0xffffffffu; 1000 break; 1001 1002 CASE_OP_32_64(qemu_ld): 1003 { 1004 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1005 TCGMemOp mop = get_memop(oi); 1006 if (!(mop & MO_SIGN)) { 1007 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1008 } 1009 } 1010 break; 1011 1012 default: 1013 break; 1014 } 1015 1016 /* 32-bit ops generate 32-bit results. For the result is zero test 1017 below, we can ignore high bits, but for further optimizations we 1018 need to record that the high bits contain garbage. */ 1019 partmask = mask; 1020 if (!(def->flags & TCG_OPF_64BIT)) { 1021 mask |= ~(tcg_target_ulong)0xffffffffu; 1022 partmask &= 0xffffffffu; 1023 affected &= 0xffffffffu; 1024 } 1025 1026 if (partmask == 0) { 1027 tcg_debug_assert(nb_oargs == 1); 1028 tcg_opt_gen_movi(s, op, op->args[0], 0); 1029 continue; 1030 } 1031 if (affected == 0) { 1032 tcg_debug_assert(nb_oargs == 1); 1033 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1034 continue; 1035 } 1036 1037 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1038 switch (opc) { 1039 CASE_OP_32_64_VEC(and): 1040 CASE_OP_32_64_VEC(mul): 1041 CASE_OP_32_64(muluh): 1042 CASE_OP_32_64(mulsh): 1043 if (arg_is_const(op->args[2]) 1044 && arg_info(op->args[2])->val == 0) { 1045 tcg_opt_gen_movi(s, op, op->args[0], 0); 1046 continue; 1047 } 1048 break; 1049 default: 1050 break; 1051 } 1052 1053 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1054 switch (opc) { 1055 CASE_OP_32_64_VEC(or): 1056 CASE_OP_32_64_VEC(and): 1057 if (args_are_copies(op->args[1], op->args[2])) { 1058 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1059 continue; 1060 } 1061 break; 1062 default: 1063 break; 1064 } 1065 1066 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1067 switch (opc) { 1068 CASE_OP_32_64_VEC(andc): 1069 CASE_OP_32_64_VEC(sub): 1070 CASE_OP_32_64_VEC(xor): 1071 if (args_are_copies(op->args[1], op->args[2])) { 1072 tcg_opt_gen_movi(s, op, op->args[0], 0); 1073 continue; 1074 } 1075 break; 1076 default: 1077 break; 1078 } 1079 1080 /* Propagate constants through copy operations and do constant 1081 folding. Constants will be substituted to arguments by register 1082 allocator where needed and possible. Also detect copies. */ 1083 switch (opc) { 1084 CASE_OP_32_64_VEC(mov): 1085 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1086 break; 1087 CASE_OP_32_64(movi): 1088 case INDEX_op_dupi_vec: 1089 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1090 break; 1091 1092 case INDEX_op_dup_vec: 1093 if (arg_is_const(op->args[1])) { 1094 tmp = arg_info(op->args[1])->val; 1095 tmp = dup_const(TCGOP_VECE(op), tmp); 1096 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1097 break; 1098 } 1099 goto do_default; 1100 1101 CASE_OP_32_64(not): 1102 CASE_OP_32_64(neg): 1103 CASE_OP_32_64(ext8s): 1104 CASE_OP_32_64(ext8u): 1105 CASE_OP_32_64(ext16s): 1106 CASE_OP_32_64(ext16u): 1107 CASE_OP_32_64(ctpop): 1108 case INDEX_op_ext32s_i64: 1109 case INDEX_op_ext32u_i64: 1110 case INDEX_op_ext_i32_i64: 1111 case INDEX_op_extu_i32_i64: 1112 case INDEX_op_extrl_i64_i32: 1113 case INDEX_op_extrh_i64_i32: 1114 if (arg_is_const(op->args[1])) { 1115 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1116 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1117 break; 1118 } 1119 goto do_default; 1120 1121 CASE_OP_32_64(add): 1122 CASE_OP_32_64(sub): 1123 CASE_OP_32_64(mul): 1124 CASE_OP_32_64(or): 1125 CASE_OP_32_64(and): 1126 CASE_OP_32_64(xor): 1127 CASE_OP_32_64(shl): 1128 CASE_OP_32_64(shr): 1129 CASE_OP_32_64(sar): 1130 CASE_OP_32_64(rotl): 1131 CASE_OP_32_64(rotr): 1132 CASE_OP_32_64(andc): 1133 CASE_OP_32_64(orc): 1134 CASE_OP_32_64(eqv): 1135 CASE_OP_32_64(nand): 1136 CASE_OP_32_64(nor): 1137 CASE_OP_32_64(muluh): 1138 CASE_OP_32_64(mulsh): 1139 CASE_OP_32_64(div): 1140 CASE_OP_32_64(divu): 1141 CASE_OP_32_64(rem): 1142 CASE_OP_32_64(remu): 1143 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1144 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1145 arg_info(op->args[2])->val); 1146 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1147 break; 1148 } 1149 goto do_default; 1150 1151 CASE_OP_32_64(clz): 1152 CASE_OP_32_64(ctz): 1153 if (arg_is_const(op->args[1])) { 1154 TCGArg v = arg_info(op->args[1])->val; 1155 if (v != 0) { 1156 tmp = do_constant_folding(opc, v, 0); 1157 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1158 } else { 1159 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1160 } 1161 break; 1162 } 1163 goto do_default; 1164 1165 CASE_OP_32_64(deposit): 1166 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1167 tmp = deposit64(arg_info(op->args[1])->val, 1168 op->args[3], op->args[4], 1169 arg_info(op->args[2])->val); 1170 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1171 break; 1172 } 1173 goto do_default; 1174 1175 CASE_OP_32_64(extract): 1176 if (arg_is_const(op->args[1])) { 1177 tmp = extract64(arg_info(op->args[1])->val, 1178 op->args[2], op->args[3]); 1179 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1180 break; 1181 } 1182 goto do_default; 1183 1184 CASE_OP_32_64(sextract): 1185 if (arg_is_const(op->args[1])) { 1186 tmp = sextract64(arg_info(op->args[1])->val, 1187 op->args[2], op->args[3]); 1188 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1189 break; 1190 } 1191 goto do_default; 1192 1193 CASE_OP_32_64(setcond): 1194 tmp = do_constant_folding_cond(opc, op->args[1], 1195 op->args[2], op->args[3]); 1196 if (tmp != 2) { 1197 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1198 break; 1199 } 1200 goto do_default; 1201 1202 CASE_OP_32_64(brcond): 1203 tmp = do_constant_folding_cond(opc, op->args[0], 1204 op->args[1], op->args[2]); 1205 if (tmp != 2) { 1206 if (tmp) { 1207 bitmap_zero(temps_used.l, nb_temps); 1208 op->opc = INDEX_op_br; 1209 op->args[0] = op->args[3]; 1210 } else { 1211 tcg_op_remove(s, op); 1212 } 1213 break; 1214 } 1215 goto do_default; 1216 1217 CASE_OP_32_64(movcond): 1218 tmp = do_constant_folding_cond(opc, op->args[1], 1219 op->args[2], op->args[5]); 1220 if (tmp != 2) { 1221 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1222 break; 1223 } 1224 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1225 tcg_target_ulong tv = arg_info(op->args[3])->val; 1226 tcg_target_ulong fv = arg_info(op->args[4])->val; 1227 TCGCond cond = op->args[5]; 1228 if (fv == 1 && tv == 0) { 1229 cond = tcg_invert_cond(cond); 1230 } else if (!(tv == 1 && fv == 0)) { 1231 goto do_default; 1232 } 1233 op->args[3] = cond; 1234 op->opc = opc = (opc == INDEX_op_movcond_i32 1235 ? INDEX_op_setcond_i32 1236 : INDEX_op_setcond_i64); 1237 nb_iargs = 2; 1238 } 1239 goto do_default; 1240 1241 case INDEX_op_add2_i32: 1242 case INDEX_op_sub2_i32: 1243 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1244 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1245 uint32_t al = arg_info(op->args[2])->val; 1246 uint32_t ah = arg_info(op->args[3])->val; 1247 uint32_t bl = arg_info(op->args[4])->val; 1248 uint32_t bh = arg_info(op->args[5])->val; 1249 uint64_t a = ((uint64_t)ah << 32) | al; 1250 uint64_t b = ((uint64_t)bh << 32) | bl; 1251 TCGArg rl, rh; 1252 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1253 1254 if (opc == INDEX_op_add2_i32) { 1255 a += b; 1256 } else { 1257 a -= b; 1258 } 1259 1260 rl = op->args[0]; 1261 rh = op->args[1]; 1262 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1263 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1264 break; 1265 } 1266 goto do_default; 1267 1268 case INDEX_op_mulu2_i32: 1269 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1270 uint32_t a = arg_info(op->args[2])->val; 1271 uint32_t b = arg_info(op->args[3])->val; 1272 uint64_t r = (uint64_t)a * b; 1273 TCGArg rl, rh; 1274 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1275 1276 rl = op->args[0]; 1277 rh = op->args[1]; 1278 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1279 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1280 break; 1281 } 1282 goto do_default; 1283 1284 case INDEX_op_brcond2_i32: 1285 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1286 op->args[4]); 1287 if (tmp != 2) { 1288 if (tmp) { 1289 do_brcond_true: 1290 bitmap_zero(temps_used.l, nb_temps); 1291 op->opc = INDEX_op_br; 1292 op->args[0] = op->args[5]; 1293 } else { 1294 do_brcond_false: 1295 tcg_op_remove(s, op); 1296 } 1297 } else if ((op->args[4] == TCG_COND_LT 1298 || op->args[4] == TCG_COND_GE) 1299 && arg_is_const(op->args[2]) 1300 && arg_info(op->args[2])->val == 0 1301 && arg_is_const(op->args[3]) 1302 && arg_info(op->args[3])->val == 0) { 1303 /* Simplify LT/GE comparisons vs zero to a single compare 1304 vs the high word of the input. */ 1305 do_brcond_high: 1306 bitmap_zero(temps_used.l, nb_temps); 1307 op->opc = INDEX_op_brcond_i32; 1308 op->args[0] = op->args[1]; 1309 op->args[1] = op->args[3]; 1310 op->args[2] = op->args[4]; 1311 op->args[3] = op->args[5]; 1312 } else if (op->args[4] == TCG_COND_EQ) { 1313 /* Simplify EQ comparisons where one of the pairs 1314 can be simplified. */ 1315 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1316 op->args[0], op->args[2], 1317 TCG_COND_EQ); 1318 if (tmp == 0) { 1319 goto do_brcond_false; 1320 } else if (tmp == 1) { 1321 goto do_brcond_high; 1322 } 1323 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1324 op->args[1], op->args[3], 1325 TCG_COND_EQ); 1326 if (tmp == 0) { 1327 goto do_brcond_false; 1328 } else if (tmp != 1) { 1329 goto do_default; 1330 } 1331 do_brcond_low: 1332 bitmap_zero(temps_used.l, nb_temps); 1333 op->opc = INDEX_op_brcond_i32; 1334 op->args[1] = op->args[2]; 1335 op->args[2] = op->args[4]; 1336 op->args[3] = op->args[5]; 1337 } else if (op->args[4] == TCG_COND_NE) { 1338 /* Simplify NE comparisons where one of the pairs 1339 can be simplified. */ 1340 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1341 op->args[0], op->args[2], 1342 TCG_COND_NE); 1343 if (tmp == 0) { 1344 goto do_brcond_high; 1345 } else if (tmp == 1) { 1346 goto do_brcond_true; 1347 } 1348 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1349 op->args[1], op->args[3], 1350 TCG_COND_NE); 1351 if (tmp == 0) { 1352 goto do_brcond_low; 1353 } else if (tmp == 1) { 1354 goto do_brcond_true; 1355 } 1356 goto do_default; 1357 } else { 1358 goto do_default; 1359 } 1360 break; 1361 1362 case INDEX_op_setcond2_i32: 1363 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1364 op->args[5]); 1365 if (tmp != 2) { 1366 do_setcond_const: 1367 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1368 } else if ((op->args[5] == TCG_COND_LT 1369 || op->args[5] == TCG_COND_GE) 1370 && arg_is_const(op->args[3]) 1371 && arg_info(op->args[3])->val == 0 1372 && arg_is_const(op->args[4]) 1373 && arg_info(op->args[4])->val == 0) { 1374 /* Simplify LT/GE comparisons vs zero to a single compare 1375 vs the high word of the input. */ 1376 do_setcond_high: 1377 reset_temp(op->args[0]); 1378 arg_info(op->args[0])->mask = 1; 1379 op->opc = INDEX_op_setcond_i32; 1380 op->args[1] = op->args[2]; 1381 op->args[2] = op->args[4]; 1382 op->args[3] = op->args[5]; 1383 } else if (op->args[5] == TCG_COND_EQ) { 1384 /* Simplify EQ comparisons where one of the pairs 1385 can be simplified. */ 1386 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1387 op->args[1], op->args[3], 1388 TCG_COND_EQ); 1389 if (tmp == 0) { 1390 goto do_setcond_const; 1391 } else if (tmp == 1) { 1392 goto do_setcond_high; 1393 } 1394 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1395 op->args[2], op->args[4], 1396 TCG_COND_EQ); 1397 if (tmp == 0) { 1398 goto do_setcond_high; 1399 } else if (tmp != 1) { 1400 goto do_default; 1401 } 1402 do_setcond_low: 1403 reset_temp(op->args[0]); 1404 arg_info(op->args[0])->mask = 1; 1405 op->opc = INDEX_op_setcond_i32; 1406 op->args[2] = op->args[3]; 1407 op->args[3] = op->args[5]; 1408 } else if (op->args[5] == TCG_COND_NE) { 1409 /* Simplify NE comparisons where one of the pairs 1410 can be simplified. */ 1411 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1412 op->args[1], op->args[3], 1413 TCG_COND_NE); 1414 if (tmp == 0) { 1415 goto do_setcond_high; 1416 } else if (tmp == 1) { 1417 goto do_setcond_const; 1418 } 1419 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1420 op->args[2], op->args[4], 1421 TCG_COND_NE); 1422 if (tmp == 0) { 1423 goto do_setcond_low; 1424 } else if (tmp == 1) { 1425 goto do_setcond_const; 1426 } 1427 goto do_default; 1428 } else { 1429 goto do_default; 1430 } 1431 break; 1432 1433 case INDEX_op_call: 1434 if (!(op->args[nb_oargs + nb_iargs + 1] 1435 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1436 for (i = 0; i < nb_globals; i++) { 1437 if (test_bit(i, temps_used.l)) { 1438 reset_ts(&s->temps[i]); 1439 } 1440 } 1441 } 1442 goto do_reset_output; 1443 1444 default: 1445 do_default: 1446 /* Default case: we know nothing about operation (or were unable 1447 to compute the operation result) so no propagation is done. 1448 We trash everything if the operation is the end of a basic 1449 block, otherwise we only trash the output args. "mask" is 1450 the non-zero bits mask for the first output arg. */ 1451 if (def->flags & TCG_OPF_BB_END) { 1452 bitmap_zero(temps_used.l, nb_temps); 1453 } else { 1454 do_reset_output: 1455 for (i = 0; i < nb_oargs; i++) { 1456 reset_temp(op->args[i]); 1457 /* Save the corresponding known-zero bits mask for the 1458 first output argument (only one supported so far). */ 1459 if (i == 0) { 1460 arg_info(op->args[i])->mask = mask; 1461 } 1462 } 1463 } 1464 break; 1465 } 1466 1467 /* Eliminate duplicate and redundant fence instructions. */ 1468 if (prev_mb) { 1469 switch (opc) { 1470 case INDEX_op_mb: 1471 /* Merge two barriers of the same type into one, 1472 * or a weaker barrier into a stronger one, 1473 * or two weaker barriers into a stronger one. 1474 * mb X; mb Y => mb X|Y 1475 * mb; strl => mb; st 1476 * ldaq; mb => ld; mb 1477 * ldaq; strl => ld; mb; st 1478 * Other combinations are also merged into a strong 1479 * barrier. This is stricter than specified but for 1480 * the purposes of TCG is better than not optimizing. 1481 */ 1482 prev_mb->args[0] |= op->args[0]; 1483 tcg_op_remove(s, op); 1484 break; 1485 1486 default: 1487 /* Opcodes that end the block stop the optimization. */ 1488 if ((def->flags & TCG_OPF_BB_END) == 0) { 1489 break; 1490 } 1491 /* fallthru */ 1492 case INDEX_op_qemu_ld_i32: 1493 case INDEX_op_qemu_ld_i64: 1494 case INDEX_op_qemu_st_i32: 1495 case INDEX_op_qemu_st_i64: 1496 case INDEX_op_call: 1497 /* Opcodes that touch guest memory stop the optimization. */ 1498 prev_mb = NULL; 1499 break; 1500 } 1501 } else if (opc == INDEX_op_mb) { 1502 prev_mb = op; 1503 } 1504 } 1505 } 1506