1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "exec/cpu-common.h" 28 #include "tcg-op.h" 29 30 #define CASE_OP_32_64(x) \ 31 glue(glue(case INDEX_op_, x), _i32): \ 32 glue(glue(case INDEX_op_, x), _i64) 33 34 #define CASE_OP_32_64_VEC(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64): \ 37 glue(glue(case INDEX_op_, x), _vec) 38 39 struct tcg_temp_info { 40 bool is_const; 41 TCGTemp *prev_copy; 42 TCGTemp *next_copy; 43 tcg_target_ulong val; 44 tcg_target_ulong mask; 45 }; 46 47 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 48 { 49 return ts->state_ptr; 50 } 51 52 static inline struct tcg_temp_info *arg_info(TCGArg arg) 53 { 54 return ts_info(arg_temp(arg)); 55 } 56 57 static inline bool ts_is_const(TCGTemp *ts) 58 { 59 return ts_info(ts)->is_const; 60 } 61 62 static inline bool arg_is_const(TCGArg arg) 63 { 64 return ts_is_const(arg_temp(arg)); 65 } 66 67 static inline bool ts_is_copy(TCGTemp *ts) 68 { 69 return ts_info(ts)->next_copy != ts; 70 } 71 72 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 73 static void reset_ts(TCGTemp *ts) 74 { 75 struct tcg_temp_info *ti = ts_info(ts); 76 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 77 struct tcg_temp_info *ni = ts_info(ti->next_copy); 78 79 ni->prev_copy = ti->prev_copy; 80 pi->next_copy = ti->next_copy; 81 ti->next_copy = ts; 82 ti->prev_copy = ts; 83 ti->is_const = false; 84 ti->mask = -1; 85 } 86 87 static void reset_temp(TCGArg arg) 88 { 89 reset_ts(arg_temp(arg)); 90 } 91 92 /* Initialize and activate a temporary. */ 93 static void init_ts_info(struct tcg_temp_info *infos, 94 TCGTempSet *temps_used, TCGTemp *ts) 95 { 96 size_t idx = temp_idx(ts); 97 if (!test_bit(idx, temps_used->l)) { 98 struct tcg_temp_info *ti = &infos[idx]; 99 100 ts->state_ptr = ti; 101 ti->next_copy = ts; 102 ti->prev_copy = ts; 103 ti->is_const = false; 104 ti->mask = -1; 105 set_bit(idx, temps_used->l); 106 } 107 } 108 109 static void init_arg_info(struct tcg_temp_info *infos, 110 TCGTempSet *temps_used, TCGArg arg) 111 { 112 init_ts_info(infos, temps_used, arg_temp(arg)); 113 } 114 115 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 116 { 117 TCGTemp *i; 118 119 /* If this is already a global, we can't do better. */ 120 if (ts->temp_global) { 121 return ts; 122 } 123 124 /* Search for a global first. */ 125 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 126 if (i->temp_global) { 127 return i; 128 } 129 } 130 131 /* If it is a temp, search for a temp local. */ 132 if (!ts->temp_local) { 133 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 134 if (ts->temp_local) { 135 return i; 136 } 137 } 138 } 139 140 /* Failure to find a better representation, return the same temp. */ 141 return ts; 142 } 143 144 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 145 { 146 TCGTemp *i; 147 148 if (ts1 == ts2) { 149 return true; 150 } 151 152 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 153 return false; 154 } 155 156 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 157 if (i == ts2) { 158 return true; 159 } 160 } 161 162 return false; 163 } 164 165 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 166 { 167 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 168 } 169 170 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 171 { 172 const TCGOpDef *def; 173 TCGOpcode new_op; 174 tcg_target_ulong mask; 175 struct tcg_temp_info *di = arg_info(dst); 176 177 def = &tcg_op_defs[op->opc]; 178 if (def->flags & TCG_OPF_VECTOR) { 179 new_op = INDEX_op_dupi_vec; 180 } else if (def->flags & TCG_OPF_64BIT) { 181 new_op = INDEX_op_movi_i64; 182 } else { 183 new_op = INDEX_op_movi_i32; 184 } 185 op->opc = new_op; 186 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 187 op->args[0] = dst; 188 op->args[1] = val; 189 190 reset_temp(dst); 191 di->is_const = true; 192 di->val = val; 193 mask = val; 194 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 195 /* High bits of the destination are now garbage. */ 196 mask |= ~0xffffffffull; 197 } 198 di->mask = mask; 199 } 200 201 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 202 { 203 TCGTemp *dst_ts = arg_temp(dst); 204 TCGTemp *src_ts = arg_temp(src); 205 const TCGOpDef *def; 206 struct tcg_temp_info *di; 207 struct tcg_temp_info *si; 208 tcg_target_ulong mask; 209 TCGOpcode new_op; 210 211 if (ts_are_copies(dst_ts, src_ts)) { 212 tcg_op_remove(s, op); 213 return; 214 } 215 216 reset_ts(dst_ts); 217 di = ts_info(dst_ts); 218 si = ts_info(src_ts); 219 def = &tcg_op_defs[op->opc]; 220 if (def->flags & TCG_OPF_VECTOR) { 221 new_op = INDEX_op_mov_vec; 222 } else if (def->flags & TCG_OPF_64BIT) { 223 new_op = INDEX_op_mov_i64; 224 } else { 225 new_op = INDEX_op_mov_i32; 226 } 227 op->opc = new_op; 228 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 229 op->args[0] = dst; 230 op->args[1] = src; 231 232 mask = si->mask; 233 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 234 /* High bits of the destination are now garbage. */ 235 mask |= ~0xffffffffull; 236 } 237 di->mask = mask; 238 239 if (src_ts->type == dst_ts->type) { 240 struct tcg_temp_info *ni = ts_info(si->next_copy); 241 242 di->next_copy = si->next_copy; 243 di->prev_copy = src_ts; 244 ni->prev_copy = dst_ts; 245 si->next_copy = dst_ts; 246 di->is_const = si->is_const; 247 di->val = si->val; 248 } 249 } 250 251 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 252 { 253 uint64_t l64, h64; 254 255 switch (op) { 256 CASE_OP_32_64(add): 257 return x + y; 258 259 CASE_OP_32_64(sub): 260 return x - y; 261 262 CASE_OP_32_64(mul): 263 return x * y; 264 265 CASE_OP_32_64(and): 266 return x & y; 267 268 CASE_OP_32_64(or): 269 return x | y; 270 271 CASE_OP_32_64(xor): 272 return x ^ y; 273 274 case INDEX_op_shl_i32: 275 return (uint32_t)x << (y & 31); 276 277 case INDEX_op_shl_i64: 278 return (uint64_t)x << (y & 63); 279 280 case INDEX_op_shr_i32: 281 return (uint32_t)x >> (y & 31); 282 283 case INDEX_op_shr_i64: 284 return (uint64_t)x >> (y & 63); 285 286 case INDEX_op_sar_i32: 287 return (int32_t)x >> (y & 31); 288 289 case INDEX_op_sar_i64: 290 return (int64_t)x >> (y & 63); 291 292 case INDEX_op_rotr_i32: 293 return ror32(x, y & 31); 294 295 case INDEX_op_rotr_i64: 296 return ror64(x, y & 63); 297 298 case INDEX_op_rotl_i32: 299 return rol32(x, y & 31); 300 301 case INDEX_op_rotl_i64: 302 return rol64(x, y & 63); 303 304 CASE_OP_32_64(not): 305 return ~x; 306 307 CASE_OP_32_64(neg): 308 return -x; 309 310 CASE_OP_32_64(andc): 311 return x & ~y; 312 313 CASE_OP_32_64(orc): 314 return x | ~y; 315 316 CASE_OP_32_64(eqv): 317 return ~(x ^ y); 318 319 CASE_OP_32_64(nand): 320 return ~(x & y); 321 322 CASE_OP_32_64(nor): 323 return ~(x | y); 324 325 case INDEX_op_clz_i32: 326 return (uint32_t)x ? clz32(x) : y; 327 328 case INDEX_op_clz_i64: 329 return x ? clz64(x) : y; 330 331 case INDEX_op_ctz_i32: 332 return (uint32_t)x ? ctz32(x) : y; 333 334 case INDEX_op_ctz_i64: 335 return x ? ctz64(x) : y; 336 337 case INDEX_op_ctpop_i32: 338 return ctpop32(x); 339 340 case INDEX_op_ctpop_i64: 341 return ctpop64(x); 342 343 CASE_OP_32_64(ext8s): 344 return (int8_t)x; 345 346 CASE_OP_32_64(ext16s): 347 return (int16_t)x; 348 349 CASE_OP_32_64(ext8u): 350 return (uint8_t)x; 351 352 CASE_OP_32_64(ext16u): 353 return (uint16_t)x; 354 355 CASE_OP_32_64(bswap16): 356 return bswap16(x); 357 358 CASE_OP_32_64(bswap32): 359 return bswap32(x); 360 361 case INDEX_op_bswap64_i64: 362 return bswap64(x); 363 364 case INDEX_op_ext_i32_i64: 365 case INDEX_op_ext32s_i64: 366 return (int32_t)x; 367 368 case INDEX_op_extu_i32_i64: 369 case INDEX_op_extrl_i64_i32: 370 case INDEX_op_ext32u_i64: 371 return (uint32_t)x; 372 373 case INDEX_op_extrh_i64_i32: 374 return (uint64_t)x >> 32; 375 376 case INDEX_op_muluh_i32: 377 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 378 case INDEX_op_mulsh_i32: 379 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 380 381 case INDEX_op_muluh_i64: 382 mulu64(&l64, &h64, x, y); 383 return h64; 384 case INDEX_op_mulsh_i64: 385 muls64(&l64, &h64, x, y); 386 return h64; 387 388 case INDEX_op_div_i32: 389 /* Avoid crashing on divide by zero, otherwise undefined. */ 390 return (int32_t)x / ((int32_t)y ? : 1); 391 case INDEX_op_divu_i32: 392 return (uint32_t)x / ((uint32_t)y ? : 1); 393 case INDEX_op_div_i64: 394 return (int64_t)x / ((int64_t)y ? : 1); 395 case INDEX_op_divu_i64: 396 return (uint64_t)x / ((uint64_t)y ? : 1); 397 398 case INDEX_op_rem_i32: 399 return (int32_t)x % ((int32_t)y ? : 1); 400 case INDEX_op_remu_i32: 401 return (uint32_t)x % ((uint32_t)y ? : 1); 402 case INDEX_op_rem_i64: 403 return (int64_t)x % ((int64_t)y ? : 1); 404 case INDEX_op_remu_i64: 405 return (uint64_t)x % ((uint64_t)y ? : 1); 406 407 default: 408 fprintf(stderr, 409 "Unrecognized operation %d in do_constant_folding.\n", op); 410 tcg_abort(); 411 } 412 } 413 414 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 415 { 416 const TCGOpDef *def = &tcg_op_defs[op]; 417 TCGArg res = do_constant_folding_2(op, x, y); 418 if (!(def->flags & TCG_OPF_64BIT)) { 419 res = (int32_t)res; 420 } 421 return res; 422 } 423 424 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 425 { 426 switch (c) { 427 case TCG_COND_EQ: 428 return x == y; 429 case TCG_COND_NE: 430 return x != y; 431 case TCG_COND_LT: 432 return (int32_t)x < (int32_t)y; 433 case TCG_COND_GE: 434 return (int32_t)x >= (int32_t)y; 435 case TCG_COND_LE: 436 return (int32_t)x <= (int32_t)y; 437 case TCG_COND_GT: 438 return (int32_t)x > (int32_t)y; 439 case TCG_COND_LTU: 440 return x < y; 441 case TCG_COND_GEU: 442 return x >= y; 443 case TCG_COND_LEU: 444 return x <= y; 445 case TCG_COND_GTU: 446 return x > y; 447 default: 448 tcg_abort(); 449 } 450 } 451 452 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 453 { 454 switch (c) { 455 case TCG_COND_EQ: 456 return x == y; 457 case TCG_COND_NE: 458 return x != y; 459 case TCG_COND_LT: 460 return (int64_t)x < (int64_t)y; 461 case TCG_COND_GE: 462 return (int64_t)x >= (int64_t)y; 463 case TCG_COND_LE: 464 return (int64_t)x <= (int64_t)y; 465 case TCG_COND_GT: 466 return (int64_t)x > (int64_t)y; 467 case TCG_COND_LTU: 468 return x < y; 469 case TCG_COND_GEU: 470 return x >= y; 471 case TCG_COND_LEU: 472 return x <= y; 473 case TCG_COND_GTU: 474 return x > y; 475 default: 476 tcg_abort(); 477 } 478 } 479 480 static bool do_constant_folding_cond_eq(TCGCond c) 481 { 482 switch (c) { 483 case TCG_COND_GT: 484 case TCG_COND_LTU: 485 case TCG_COND_LT: 486 case TCG_COND_GTU: 487 case TCG_COND_NE: 488 return 0; 489 case TCG_COND_GE: 490 case TCG_COND_GEU: 491 case TCG_COND_LE: 492 case TCG_COND_LEU: 493 case TCG_COND_EQ: 494 return 1; 495 default: 496 tcg_abort(); 497 } 498 } 499 500 /* Return 2 if the condition can't be simplified, and the result 501 of the condition (0 or 1) if it can */ 502 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 503 TCGArg y, TCGCond c) 504 { 505 tcg_target_ulong xv = arg_info(x)->val; 506 tcg_target_ulong yv = arg_info(y)->val; 507 if (arg_is_const(x) && arg_is_const(y)) { 508 const TCGOpDef *def = &tcg_op_defs[op]; 509 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 510 if (def->flags & TCG_OPF_64BIT) { 511 return do_constant_folding_cond_64(xv, yv, c); 512 } else { 513 return do_constant_folding_cond_32(xv, yv, c); 514 } 515 } else if (args_are_copies(x, y)) { 516 return do_constant_folding_cond_eq(c); 517 } else if (arg_is_const(y) && yv == 0) { 518 switch (c) { 519 case TCG_COND_LTU: 520 return 0; 521 case TCG_COND_GEU: 522 return 1; 523 default: 524 return 2; 525 } 526 } 527 return 2; 528 } 529 530 /* Return 2 if the condition can't be simplified, and the result 531 of the condition (0 or 1) if it can */ 532 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 533 { 534 TCGArg al = p1[0], ah = p1[1]; 535 TCGArg bl = p2[0], bh = p2[1]; 536 537 if (arg_is_const(bl) && arg_is_const(bh)) { 538 tcg_target_ulong blv = arg_info(bl)->val; 539 tcg_target_ulong bhv = arg_info(bh)->val; 540 uint64_t b = deposit64(blv, 32, 32, bhv); 541 542 if (arg_is_const(al) && arg_is_const(ah)) { 543 tcg_target_ulong alv = arg_info(al)->val; 544 tcg_target_ulong ahv = arg_info(ah)->val; 545 uint64_t a = deposit64(alv, 32, 32, ahv); 546 return do_constant_folding_cond_64(a, b, c); 547 } 548 if (b == 0) { 549 switch (c) { 550 case TCG_COND_LTU: 551 return 0; 552 case TCG_COND_GEU: 553 return 1; 554 default: 555 break; 556 } 557 } 558 } 559 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 560 return do_constant_folding_cond_eq(c); 561 } 562 return 2; 563 } 564 565 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 566 { 567 TCGArg a1 = *p1, a2 = *p2; 568 int sum = 0; 569 sum += arg_is_const(a1); 570 sum -= arg_is_const(a2); 571 572 /* Prefer the constant in second argument, and then the form 573 op a, a, b, which is better handled on non-RISC hosts. */ 574 if (sum > 0 || (sum == 0 && dest == a2)) { 575 *p1 = a2; 576 *p2 = a1; 577 return true; 578 } 579 return false; 580 } 581 582 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 583 { 584 int sum = 0; 585 sum += arg_is_const(p1[0]); 586 sum += arg_is_const(p1[1]); 587 sum -= arg_is_const(p2[0]); 588 sum -= arg_is_const(p2[1]); 589 if (sum > 0) { 590 TCGArg t; 591 t = p1[0], p1[0] = p2[0], p2[0] = t; 592 t = p1[1], p1[1] = p2[1], p2[1] = t; 593 return true; 594 } 595 return false; 596 } 597 598 /* Propagate constants and copies, fold constant expressions. */ 599 void tcg_optimize(TCGContext *s) 600 { 601 int nb_temps, nb_globals; 602 TCGOp *op, *op_next, *prev_mb = NULL; 603 struct tcg_temp_info *infos; 604 TCGTempSet temps_used; 605 606 /* Array VALS has an element for each temp. 607 If this temp holds a constant then its value is kept in VALS' element. 608 If this temp is a copy of other ones then the other copies are 609 available through the doubly linked circular list. */ 610 611 nb_temps = s->nb_temps; 612 nb_globals = s->nb_globals; 613 bitmap_zero(temps_used.l, nb_temps); 614 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 615 616 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 617 tcg_target_ulong mask, partmask, affected; 618 int nb_oargs, nb_iargs, i; 619 TCGArg tmp; 620 TCGOpcode opc = op->opc; 621 const TCGOpDef *def = &tcg_op_defs[opc]; 622 623 /* Count the arguments, and initialize the temps that are 624 going to be used */ 625 if (opc == INDEX_op_call) { 626 nb_oargs = TCGOP_CALLO(op); 627 nb_iargs = TCGOP_CALLI(op); 628 for (i = 0; i < nb_oargs + nb_iargs; i++) { 629 TCGTemp *ts = arg_temp(op->args[i]); 630 if (ts) { 631 init_ts_info(infos, &temps_used, ts); 632 } 633 } 634 } else { 635 nb_oargs = def->nb_oargs; 636 nb_iargs = def->nb_iargs; 637 for (i = 0; i < nb_oargs + nb_iargs; i++) { 638 init_arg_info(infos, &temps_used, op->args[i]); 639 } 640 } 641 642 /* Do copy propagation */ 643 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 644 TCGTemp *ts = arg_temp(op->args[i]); 645 if (ts && ts_is_copy(ts)) { 646 op->args[i] = temp_arg(find_better_copy(s, ts)); 647 } 648 } 649 650 /* For commutative operations make constant second argument */ 651 switch (opc) { 652 CASE_OP_32_64_VEC(add): 653 CASE_OP_32_64_VEC(mul): 654 CASE_OP_32_64_VEC(and): 655 CASE_OP_32_64_VEC(or): 656 CASE_OP_32_64_VEC(xor): 657 CASE_OP_32_64(eqv): 658 CASE_OP_32_64(nand): 659 CASE_OP_32_64(nor): 660 CASE_OP_32_64(muluh): 661 CASE_OP_32_64(mulsh): 662 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 663 break; 664 CASE_OP_32_64(brcond): 665 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 666 op->args[2] = tcg_swap_cond(op->args[2]); 667 } 668 break; 669 CASE_OP_32_64(setcond): 670 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 671 op->args[3] = tcg_swap_cond(op->args[3]); 672 } 673 break; 674 CASE_OP_32_64(movcond): 675 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 676 op->args[5] = tcg_swap_cond(op->args[5]); 677 } 678 /* For movcond, we canonicalize the "false" input reg to match 679 the destination reg so that the tcg backend can implement 680 a "move if true" operation. */ 681 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 682 op->args[5] = tcg_invert_cond(op->args[5]); 683 } 684 break; 685 CASE_OP_32_64(add2): 686 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 687 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 688 break; 689 CASE_OP_32_64(mulu2): 690 CASE_OP_32_64(muls2): 691 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 692 break; 693 case INDEX_op_brcond2_i32: 694 if (swap_commutative2(&op->args[0], &op->args[2])) { 695 op->args[4] = tcg_swap_cond(op->args[4]); 696 } 697 break; 698 case INDEX_op_setcond2_i32: 699 if (swap_commutative2(&op->args[1], &op->args[3])) { 700 op->args[5] = tcg_swap_cond(op->args[5]); 701 } 702 break; 703 default: 704 break; 705 } 706 707 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 708 and "sub r, 0, a => neg r, a" case. */ 709 switch (opc) { 710 CASE_OP_32_64(shl): 711 CASE_OP_32_64(shr): 712 CASE_OP_32_64(sar): 713 CASE_OP_32_64(rotl): 714 CASE_OP_32_64(rotr): 715 if (arg_is_const(op->args[1]) 716 && arg_info(op->args[1])->val == 0) { 717 tcg_opt_gen_movi(s, op, op->args[0], 0); 718 continue; 719 } 720 break; 721 CASE_OP_32_64_VEC(sub): 722 { 723 TCGOpcode neg_op; 724 bool have_neg; 725 726 if (arg_is_const(op->args[2])) { 727 /* Proceed with possible constant folding. */ 728 break; 729 } 730 if (opc == INDEX_op_sub_i32) { 731 neg_op = INDEX_op_neg_i32; 732 have_neg = TCG_TARGET_HAS_neg_i32; 733 } else if (opc == INDEX_op_sub_i64) { 734 neg_op = INDEX_op_neg_i64; 735 have_neg = TCG_TARGET_HAS_neg_i64; 736 } else if (TCG_TARGET_HAS_neg_vec) { 737 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 738 unsigned vece = TCGOP_VECE(op); 739 neg_op = INDEX_op_neg_vec; 740 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 741 } else { 742 break; 743 } 744 if (!have_neg) { 745 break; 746 } 747 if (arg_is_const(op->args[1]) 748 && arg_info(op->args[1])->val == 0) { 749 op->opc = neg_op; 750 reset_temp(op->args[0]); 751 op->args[1] = op->args[2]; 752 continue; 753 } 754 } 755 break; 756 CASE_OP_32_64_VEC(xor): 757 CASE_OP_32_64(nand): 758 if (!arg_is_const(op->args[1]) 759 && arg_is_const(op->args[2]) 760 && arg_info(op->args[2])->val == -1) { 761 i = 1; 762 goto try_not; 763 } 764 break; 765 CASE_OP_32_64(nor): 766 if (!arg_is_const(op->args[1]) 767 && arg_is_const(op->args[2]) 768 && arg_info(op->args[2])->val == 0) { 769 i = 1; 770 goto try_not; 771 } 772 break; 773 CASE_OP_32_64_VEC(andc): 774 if (!arg_is_const(op->args[2]) 775 && arg_is_const(op->args[1]) 776 && arg_info(op->args[1])->val == -1) { 777 i = 2; 778 goto try_not; 779 } 780 break; 781 CASE_OP_32_64_VEC(orc): 782 CASE_OP_32_64(eqv): 783 if (!arg_is_const(op->args[2]) 784 && arg_is_const(op->args[1]) 785 && arg_info(op->args[1])->val == 0) { 786 i = 2; 787 goto try_not; 788 } 789 break; 790 try_not: 791 { 792 TCGOpcode not_op; 793 bool have_not; 794 795 if (def->flags & TCG_OPF_VECTOR) { 796 not_op = INDEX_op_not_vec; 797 have_not = TCG_TARGET_HAS_not_vec; 798 } else if (def->flags & TCG_OPF_64BIT) { 799 not_op = INDEX_op_not_i64; 800 have_not = TCG_TARGET_HAS_not_i64; 801 } else { 802 not_op = INDEX_op_not_i32; 803 have_not = TCG_TARGET_HAS_not_i32; 804 } 805 if (!have_not) { 806 break; 807 } 808 op->opc = not_op; 809 reset_temp(op->args[0]); 810 op->args[1] = op->args[i]; 811 continue; 812 } 813 default: 814 break; 815 } 816 817 /* Simplify expression for "op r, a, const => mov r, a" cases */ 818 switch (opc) { 819 CASE_OP_32_64_VEC(add): 820 CASE_OP_32_64_VEC(sub): 821 CASE_OP_32_64_VEC(or): 822 CASE_OP_32_64_VEC(xor): 823 CASE_OP_32_64_VEC(andc): 824 CASE_OP_32_64(shl): 825 CASE_OP_32_64(shr): 826 CASE_OP_32_64(sar): 827 CASE_OP_32_64(rotl): 828 CASE_OP_32_64(rotr): 829 if (!arg_is_const(op->args[1]) 830 && arg_is_const(op->args[2]) 831 && arg_info(op->args[2])->val == 0) { 832 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 833 continue; 834 } 835 break; 836 CASE_OP_32_64_VEC(and): 837 CASE_OP_32_64_VEC(orc): 838 CASE_OP_32_64(eqv): 839 if (!arg_is_const(op->args[1]) 840 && arg_is_const(op->args[2]) 841 && arg_info(op->args[2])->val == -1) { 842 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 843 continue; 844 } 845 break; 846 default: 847 break; 848 } 849 850 /* Simplify using known-zero bits. Currently only ops with a single 851 output argument is supported. */ 852 mask = -1; 853 affected = -1; 854 switch (opc) { 855 CASE_OP_32_64(ext8s): 856 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 857 break; 858 } 859 CASE_OP_32_64(ext8u): 860 mask = 0xff; 861 goto and_const; 862 CASE_OP_32_64(ext16s): 863 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 864 break; 865 } 866 CASE_OP_32_64(ext16u): 867 mask = 0xffff; 868 goto and_const; 869 case INDEX_op_ext32s_i64: 870 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 871 break; 872 } 873 case INDEX_op_ext32u_i64: 874 mask = 0xffffffffU; 875 goto and_const; 876 877 CASE_OP_32_64(and): 878 mask = arg_info(op->args[2])->mask; 879 if (arg_is_const(op->args[2])) { 880 and_const: 881 affected = arg_info(op->args[1])->mask & ~mask; 882 } 883 mask = arg_info(op->args[1])->mask & mask; 884 break; 885 886 case INDEX_op_ext_i32_i64: 887 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 888 break; 889 } 890 case INDEX_op_extu_i32_i64: 891 /* We do not compute affected as it is a size changing op. */ 892 mask = (uint32_t)arg_info(op->args[1])->mask; 893 break; 894 895 CASE_OP_32_64(andc): 896 /* Known-zeros does not imply known-ones. Therefore unless 897 op->args[2] is constant, we can't infer anything from it. */ 898 if (arg_is_const(op->args[2])) { 899 mask = ~arg_info(op->args[2])->mask; 900 goto and_const; 901 } 902 /* But we certainly know nothing outside args[1] may be set. */ 903 mask = arg_info(op->args[1])->mask; 904 break; 905 906 case INDEX_op_sar_i32: 907 if (arg_is_const(op->args[2])) { 908 tmp = arg_info(op->args[2])->val & 31; 909 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 910 } 911 break; 912 case INDEX_op_sar_i64: 913 if (arg_is_const(op->args[2])) { 914 tmp = arg_info(op->args[2])->val & 63; 915 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 916 } 917 break; 918 919 case INDEX_op_shr_i32: 920 if (arg_is_const(op->args[2])) { 921 tmp = arg_info(op->args[2])->val & 31; 922 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 923 } 924 break; 925 case INDEX_op_shr_i64: 926 if (arg_is_const(op->args[2])) { 927 tmp = arg_info(op->args[2])->val & 63; 928 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 929 } 930 break; 931 932 case INDEX_op_extrl_i64_i32: 933 mask = (uint32_t)arg_info(op->args[1])->mask; 934 break; 935 case INDEX_op_extrh_i64_i32: 936 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 937 break; 938 939 CASE_OP_32_64(shl): 940 if (arg_is_const(op->args[2])) { 941 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 942 mask = arg_info(op->args[1])->mask << tmp; 943 } 944 break; 945 946 CASE_OP_32_64(neg): 947 /* Set to 1 all bits to the left of the rightmost. */ 948 mask = -(arg_info(op->args[1])->mask 949 & -arg_info(op->args[1])->mask); 950 break; 951 952 CASE_OP_32_64(deposit): 953 mask = deposit64(arg_info(op->args[1])->mask, 954 op->args[3], op->args[4], 955 arg_info(op->args[2])->mask); 956 break; 957 958 CASE_OP_32_64(extract): 959 mask = extract64(arg_info(op->args[1])->mask, 960 op->args[2], op->args[3]); 961 if (op->args[2] == 0) { 962 affected = arg_info(op->args[1])->mask & ~mask; 963 } 964 break; 965 CASE_OP_32_64(sextract): 966 mask = sextract64(arg_info(op->args[1])->mask, 967 op->args[2], op->args[3]); 968 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 969 affected = arg_info(op->args[1])->mask & ~mask; 970 } 971 break; 972 973 CASE_OP_32_64(or): 974 CASE_OP_32_64(xor): 975 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 976 break; 977 978 case INDEX_op_clz_i32: 979 case INDEX_op_ctz_i32: 980 mask = arg_info(op->args[2])->mask | 31; 981 break; 982 983 case INDEX_op_clz_i64: 984 case INDEX_op_ctz_i64: 985 mask = arg_info(op->args[2])->mask | 63; 986 break; 987 988 case INDEX_op_ctpop_i32: 989 mask = 32 | 31; 990 break; 991 case INDEX_op_ctpop_i64: 992 mask = 64 | 63; 993 break; 994 995 CASE_OP_32_64(setcond): 996 case INDEX_op_setcond2_i32: 997 mask = 1; 998 break; 999 1000 CASE_OP_32_64(movcond): 1001 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1002 break; 1003 1004 CASE_OP_32_64(ld8u): 1005 mask = 0xff; 1006 break; 1007 CASE_OP_32_64(ld16u): 1008 mask = 0xffff; 1009 break; 1010 case INDEX_op_ld32u_i64: 1011 mask = 0xffffffffu; 1012 break; 1013 1014 CASE_OP_32_64(qemu_ld): 1015 { 1016 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1017 TCGMemOp mop = get_memop(oi); 1018 if (!(mop & MO_SIGN)) { 1019 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1020 } 1021 } 1022 break; 1023 1024 default: 1025 break; 1026 } 1027 1028 /* 32-bit ops generate 32-bit results. For the result is zero test 1029 below, we can ignore high bits, but for further optimizations we 1030 need to record that the high bits contain garbage. */ 1031 partmask = mask; 1032 if (!(def->flags & TCG_OPF_64BIT)) { 1033 mask |= ~(tcg_target_ulong)0xffffffffu; 1034 partmask &= 0xffffffffu; 1035 affected &= 0xffffffffu; 1036 } 1037 1038 if (partmask == 0) { 1039 tcg_debug_assert(nb_oargs == 1); 1040 tcg_opt_gen_movi(s, op, op->args[0], 0); 1041 continue; 1042 } 1043 if (affected == 0) { 1044 tcg_debug_assert(nb_oargs == 1); 1045 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1046 continue; 1047 } 1048 1049 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1050 switch (opc) { 1051 CASE_OP_32_64_VEC(and): 1052 CASE_OP_32_64_VEC(mul): 1053 CASE_OP_32_64(muluh): 1054 CASE_OP_32_64(mulsh): 1055 if (arg_is_const(op->args[2]) 1056 && arg_info(op->args[2])->val == 0) { 1057 tcg_opt_gen_movi(s, op, op->args[0], 0); 1058 continue; 1059 } 1060 break; 1061 default: 1062 break; 1063 } 1064 1065 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1066 switch (opc) { 1067 CASE_OP_32_64_VEC(or): 1068 CASE_OP_32_64_VEC(and): 1069 if (args_are_copies(op->args[1], op->args[2])) { 1070 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1071 continue; 1072 } 1073 break; 1074 default: 1075 break; 1076 } 1077 1078 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1079 switch (opc) { 1080 CASE_OP_32_64_VEC(andc): 1081 CASE_OP_32_64_VEC(sub): 1082 CASE_OP_32_64_VEC(xor): 1083 if (args_are_copies(op->args[1], op->args[2])) { 1084 tcg_opt_gen_movi(s, op, op->args[0], 0); 1085 continue; 1086 } 1087 break; 1088 default: 1089 break; 1090 } 1091 1092 /* Propagate constants through copy operations and do constant 1093 folding. Constants will be substituted to arguments by register 1094 allocator where needed and possible. Also detect copies. */ 1095 switch (opc) { 1096 CASE_OP_32_64_VEC(mov): 1097 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1098 break; 1099 CASE_OP_32_64(movi): 1100 case INDEX_op_dupi_vec: 1101 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1102 break; 1103 1104 case INDEX_op_dup_vec: 1105 if (arg_is_const(op->args[1])) { 1106 tmp = arg_info(op->args[1])->val; 1107 tmp = dup_const(TCGOP_VECE(op), tmp); 1108 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1109 break; 1110 } 1111 goto do_default; 1112 1113 CASE_OP_32_64(not): 1114 CASE_OP_32_64(neg): 1115 CASE_OP_32_64(ext8s): 1116 CASE_OP_32_64(ext8u): 1117 CASE_OP_32_64(ext16s): 1118 CASE_OP_32_64(ext16u): 1119 CASE_OP_32_64(ctpop): 1120 CASE_OP_32_64(bswap16): 1121 CASE_OP_32_64(bswap32): 1122 case INDEX_op_bswap64_i64: 1123 case INDEX_op_ext32s_i64: 1124 case INDEX_op_ext32u_i64: 1125 case INDEX_op_ext_i32_i64: 1126 case INDEX_op_extu_i32_i64: 1127 case INDEX_op_extrl_i64_i32: 1128 case INDEX_op_extrh_i64_i32: 1129 if (arg_is_const(op->args[1])) { 1130 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1131 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1132 break; 1133 } 1134 goto do_default; 1135 1136 CASE_OP_32_64(add): 1137 CASE_OP_32_64(sub): 1138 CASE_OP_32_64(mul): 1139 CASE_OP_32_64(or): 1140 CASE_OP_32_64(and): 1141 CASE_OP_32_64(xor): 1142 CASE_OP_32_64(shl): 1143 CASE_OP_32_64(shr): 1144 CASE_OP_32_64(sar): 1145 CASE_OP_32_64(rotl): 1146 CASE_OP_32_64(rotr): 1147 CASE_OP_32_64(andc): 1148 CASE_OP_32_64(orc): 1149 CASE_OP_32_64(eqv): 1150 CASE_OP_32_64(nand): 1151 CASE_OP_32_64(nor): 1152 CASE_OP_32_64(muluh): 1153 CASE_OP_32_64(mulsh): 1154 CASE_OP_32_64(div): 1155 CASE_OP_32_64(divu): 1156 CASE_OP_32_64(rem): 1157 CASE_OP_32_64(remu): 1158 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1159 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1160 arg_info(op->args[2])->val); 1161 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1162 break; 1163 } 1164 goto do_default; 1165 1166 CASE_OP_32_64(clz): 1167 CASE_OP_32_64(ctz): 1168 if (arg_is_const(op->args[1])) { 1169 TCGArg v = arg_info(op->args[1])->val; 1170 if (v != 0) { 1171 tmp = do_constant_folding(opc, v, 0); 1172 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1173 } else { 1174 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1175 } 1176 break; 1177 } 1178 goto do_default; 1179 1180 CASE_OP_32_64(deposit): 1181 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1182 tmp = deposit64(arg_info(op->args[1])->val, 1183 op->args[3], op->args[4], 1184 arg_info(op->args[2])->val); 1185 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1186 break; 1187 } 1188 goto do_default; 1189 1190 CASE_OP_32_64(extract): 1191 if (arg_is_const(op->args[1])) { 1192 tmp = extract64(arg_info(op->args[1])->val, 1193 op->args[2], op->args[3]); 1194 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1195 break; 1196 } 1197 goto do_default; 1198 1199 CASE_OP_32_64(sextract): 1200 if (arg_is_const(op->args[1])) { 1201 tmp = sextract64(arg_info(op->args[1])->val, 1202 op->args[2], op->args[3]); 1203 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1204 break; 1205 } 1206 goto do_default; 1207 1208 CASE_OP_32_64(extract2): 1209 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1210 TCGArg v1 = arg_info(op->args[1])->val; 1211 TCGArg v2 = arg_info(op->args[2])->val; 1212 1213 if (opc == INDEX_op_extract2_i64) { 1214 tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); 1215 } else { 1216 tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); 1217 tmp = (int32_t)tmp; 1218 } 1219 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1220 break; 1221 } 1222 goto do_default; 1223 1224 CASE_OP_32_64(setcond): 1225 tmp = do_constant_folding_cond(opc, op->args[1], 1226 op->args[2], op->args[3]); 1227 if (tmp != 2) { 1228 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1229 break; 1230 } 1231 goto do_default; 1232 1233 CASE_OP_32_64(brcond): 1234 tmp = do_constant_folding_cond(opc, op->args[0], 1235 op->args[1], op->args[2]); 1236 if (tmp != 2) { 1237 if (tmp) { 1238 bitmap_zero(temps_used.l, nb_temps); 1239 op->opc = INDEX_op_br; 1240 op->args[0] = op->args[3]; 1241 } else { 1242 tcg_op_remove(s, op); 1243 } 1244 break; 1245 } 1246 goto do_default; 1247 1248 CASE_OP_32_64(movcond): 1249 tmp = do_constant_folding_cond(opc, op->args[1], 1250 op->args[2], op->args[5]); 1251 if (tmp != 2) { 1252 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1253 break; 1254 } 1255 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1256 tcg_target_ulong tv = arg_info(op->args[3])->val; 1257 tcg_target_ulong fv = arg_info(op->args[4])->val; 1258 TCGCond cond = op->args[5]; 1259 if (fv == 1 && tv == 0) { 1260 cond = tcg_invert_cond(cond); 1261 } else if (!(tv == 1 && fv == 0)) { 1262 goto do_default; 1263 } 1264 op->args[3] = cond; 1265 op->opc = opc = (opc == INDEX_op_movcond_i32 1266 ? INDEX_op_setcond_i32 1267 : INDEX_op_setcond_i64); 1268 nb_iargs = 2; 1269 } 1270 goto do_default; 1271 1272 case INDEX_op_add2_i32: 1273 case INDEX_op_sub2_i32: 1274 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1275 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1276 uint32_t al = arg_info(op->args[2])->val; 1277 uint32_t ah = arg_info(op->args[3])->val; 1278 uint32_t bl = arg_info(op->args[4])->val; 1279 uint32_t bh = arg_info(op->args[5])->val; 1280 uint64_t a = ((uint64_t)ah << 32) | al; 1281 uint64_t b = ((uint64_t)bh << 32) | bl; 1282 TCGArg rl, rh; 1283 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1284 1285 if (opc == INDEX_op_add2_i32) { 1286 a += b; 1287 } else { 1288 a -= b; 1289 } 1290 1291 rl = op->args[0]; 1292 rh = op->args[1]; 1293 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1294 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1295 break; 1296 } 1297 goto do_default; 1298 1299 case INDEX_op_mulu2_i32: 1300 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1301 uint32_t a = arg_info(op->args[2])->val; 1302 uint32_t b = arg_info(op->args[3])->val; 1303 uint64_t r = (uint64_t)a * b; 1304 TCGArg rl, rh; 1305 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1306 1307 rl = op->args[0]; 1308 rh = op->args[1]; 1309 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1310 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1311 break; 1312 } 1313 goto do_default; 1314 1315 case INDEX_op_brcond2_i32: 1316 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1317 op->args[4]); 1318 if (tmp != 2) { 1319 if (tmp) { 1320 do_brcond_true: 1321 bitmap_zero(temps_used.l, nb_temps); 1322 op->opc = INDEX_op_br; 1323 op->args[0] = op->args[5]; 1324 } else { 1325 do_brcond_false: 1326 tcg_op_remove(s, op); 1327 } 1328 } else if ((op->args[4] == TCG_COND_LT 1329 || op->args[4] == TCG_COND_GE) 1330 && arg_is_const(op->args[2]) 1331 && arg_info(op->args[2])->val == 0 1332 && arg_is_const(op->args[3]) 1333 && arg_info(op->args[3])->val == 0) { 1334 /* Simplify LT/GE comparisons vs zero to a single compare 1335 vs the high word of the input. */ 1336 do_brcond_high: 1337 bitmap_zero(temps_used.l, nb_temps); 1338 op->opc = INDEX_op_brcond_i32; 1339 op->args[0] = op->args[1]; 1340 op->args[1] = op->args[3]; 1341 op->args[2] = op->args[4]; 1342 op->args[3] = op->args[5]; 1343 } else if (op->args[4] == TCG_COND_EQ) { 1344 /* Simplify EQ comparisons where one of the pairs 1345 can be simplified. */ 1346 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1347 op->args[0], op->args[2], 1348 TCG_COND_EQ); 1349 if (tmp == 0) { 1350 goto do_brcond_false; 1351 } else if (tmp == 1) { 1352 goto do_brcond_high; 1353 } 1354 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1355 op->args[1], op->args[3], 1356 TCG_COND_EQ); 1357 if (tmp == 0) { 1358 goto do_brcond_false; 1359 } else if (tmp != 1) { 1360 goto do_default; 1361 } 1362 do_brcond_low: 1363 bitmap_zero(temps_used.l, nb_temps); 1364 op->opc = INDEX_op_brcond_i32; 1365 op->args[1] = op->args[2]; 1366 op->args[2] = op->args[4]; 1367 op->args[3] = op->args[5]; 1368 } else if (op->args[4] == TCG_COND_NE) { 1369 /* Simplify NE comparisons where one of the pairs 1370 can be simplified. */ 1371 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1372 op->args[0], op->args[2], 1373 TCG_COND_NE); 1374 if (tmp == 0) { 1375 goto do_brcond_high; 1376 } else if (tmp == 1) { 1377 goto do_brcond_true; 1378 } 1379 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1380 op->args[1], op->args[3], 1381 TCG_COND_NE); 1382 if (tmp == 0) { 1383 goto do_brcond_low; 1384 } else if (tmp == 1) { 1385 goto do_brcond_true; 1386 } 1387 goto do_default; 1388 } else { 1389 goto do_default; 1390 } 1391 break; 1392 1393 case INDEX_op_setcond2_i32: 1394 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1395 op->args[5]); 1396 if (tmp != 2) { 1397 do_setcond_const: 1398 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1399 } else if ((op->args[5] == TCG_COND_LT 1400 || op->args[5] == TCG_COND_GE) 1401 && arg_is_const(op->args[3]) 1402 && arg_info(op->args[3])->val == 0 1403 && arg_is_const(op->args[4]) 1404 && arg_info(op->args[4])->val == 0) { 1405 /* Simplify LT/GE comparisons vs zero to a single compare 1406 vs the high word of the input. */ 1407 do_setcond_high: 1408 reset_temp(op->args[0]); 1409 arg_info(op->args[0])->mask = 1; 1410 op->opc = INDEX_op_setcond_i32; 1411 op->args[1] = op->args[2]; 1412 op->args[2] = op->args[4]; 1413 op->args[3] = op->args[5]; 1414 } else if (op->args[5] == TCG_COND_EQ) { 1415 /* Simplify EQ comparisons where one of the pairs 1416 can be simplified. */ 1417 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1418 op->args[1], op->args[3], 1419 TCG_COND_EQ); 1420 if (tmp == 0) { 1421 goto do_setcond_const; 1422 } else if (tmp == 1) { 1423 goto do_setcond_high; 1424 } 1425 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1426 op->args[2], op->args[4], 1427 TCG_COND_EQ); 1428 if (tmp == 0) { 1429 goto do_setcond_high; 1430 } else if (tmp != 1) { 1431 goto do_default; 1432 } 1433 do_setcond_low: 1434 reset_temp(op->args[0]); 1435 arg_info(op->args[0])->mask = 1; 1436 op->opc = INDEX_op_setcond_i32; 1437 op->args[2] = op->args[3]; 1438 op->args[3] = op->args[5]; 1439 } else if (op->args[5] == TCG_COND_NE) { 1440 /* Simplify NE comparisons where one of the pairs 1441 can be simplified. */ 1442 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1443 op->args[1], op->args[3], 1444 TCG_COND_NE); 1445 if (tmp == 0) { 1446 goto do_setcond_high; 1447 } else if (tmp == 1) { 1448 goto do_setcond_const; 1449 } 1450 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1451 op->args[2], op->args[4], 1452 TCG_COND_NE); 1453 if (tmp == 0) { 1454 goto do_setcond_low; 1455 } else if (tmp == 1) { 1456 goto do_setcond_const; 1457 } 1458 goto do_default; 1459 } else { 1460 goto do_default; 1461 } 1462 break; 1463 1464 case INDEX_op_call: 1465 if (!(op->args[nb_oargs + nb_iargs + 1] 1466 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1467 for (i = 0; i < nb_globals; i++) { 1468 if (test_bit(i, temps_used.l)) { 1469 reset_ts(&s->temps[i]); 1470 } 1471 } 1472 } 1473 goto do_reset_output; 1474 1475 default: 1476 do_default: 1477 /* Default case: we know nothing about operation (or were unable 1478 to compute the operation result) so no propagation is done. 1479 We trash everything if the operation is the end of a basic 1480 block, otherwise we only trash the output args. "mask" is 1481 the non-zero bits mask for the first output arg. */ 1482 if (def->flags & TCG_OPF_BB_END) { 1483 bitmap_zero(temps_used.l, nb_temps); 1484 } else { 1485 do_reset_output: 1486 for (i = 0; i < nb_oargs; i++) { 1487 reset_temp(op->args[i]); 1488 /* Save the corresponding known-zero bits mask for the 1489 first output argument (only one supported so far). */ 1490 if (i == 0) { 1491 arg_info(op->args[i])->mask = mask; 1492 } 1493 } 1494 } 1495 break; 1496 } 1497 1498 /* Eliminate duplicate and redundant fence instructions. */ 1499 if (prev_mb) { 1500 switch (opc) { 1501 case INDEX_op_mb: 1502 /* Merge two barriers of the same type into one, 1503 * or a weaker barrier into a stronger one, 1504 * or two weaker barriers into a stronger one. 1505 * mb X; mb Y => mb X|Y 1506 * mb; strl => mb; st 1507 * ldaq; mb => ld; mb 1508 * ldaq; strl => ld; mb; st 1509 * Other combinations are also merged into a strong 1510 * barrier. This is stricter than specified but for 1511 * the purposes of TCG is better than not optimizing. 1512 */ 1513 prev_mb->args[0] |= op->args[0]; 1514 tcg_op_remove(s, op); 1515 break; 1516 1517 default: 1518 /* Opcodes that end the block stop the optimization. */ 1519 if ((def->flags & TCG_OPF_BB_END) == 0) { 1520 break; 1521 } 1522 /* fallthru */ 1523 case INDEX_op_qemu_ld_i32: 1524 case INDEX_op_qemu_ld_i64: 1525 case INDEX_op_qemu_st_i32: 1526 case INDEX_op_qemu_st_i64: 1527 case INDEX_op_call: 1528 /* Opcodes that touch guest memory stop the optimization. */ 1529 prev_mb = NULL; 1530 break; 1531 } 1532 } else if (opc == INDEX_op_mb) { 1533 prev_mb = op; 1534 } 1535 } 1536 } 1537