1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tcg/tcg-op.h" 28 29 #define CASE_OP_32_64(x) \ 30 glue(glue(case INDEX_op_, x), _i32): \ 31 glue(glue(case INDEX_op_, x), _i64) 32 33 #define CASE_OP_32_64_VEC(x) \ 34 glue(glue(case INDEX_op_, x), _i32): \ 35 glue(glue(case INDEX_op_, x), _i64): \ 36 glue(glue(case INDEX_op_, x), _vec) 37 38 struct tcg_temp_info { 39 bool is_const; 40 TCGTemp *prev_copy; 41 TCGTemp *next_copy; 42 tcg_target_ulong val; 43 tcg_target_ulong mask; 44 }; 45 46 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 47 { 48 return ts->state_ptr; 49 } 50 51 static inline struct tcg_temp_info *arg_info(TCGArg arg) 52 { 53 return ts_info(arg_temp(arg)); 54 } 55 56 static inline bool ts_is_const(TCGTemp *ts) 57 { 58 return ts_info(ts)->is_const; 59 } 60 61 static inline bool arg_is_const(TCGArg arg) 62 { 63 return ts_is_const(arg_temp(arg)); 64 } 65 66 static inline bool ts_is_copy(TCGTemp *ts) 67 { 68 return ts_info(ts)->next_copy != ts; 69 } 70 71 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 72 static void reset_ts(TCGTemp *ts) 73 { 74 struct tcg_temp_info *ti = ts_info(ts); 75 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 76 struct tcg_temp_info *ni = ts_info(ti->next_copy); 77 78 ni->prev_copy = ti->prev_copy; 79 pi->next_copy = ti->next_copy; 80 ti->next_copy = ts; 81 ti->prev_copy = ts; 82 ti->is_const = false; 83 ti->mask = -1; 84 } 85 86 static void reset_temp(TCGArg arg) 87 { 88 reset_ts(arg_temp(arg)); 89 } 90 91 /* Initialize and activate a temporary. */ 92 static void init_ts_info(struct tcg_temp_info *infos, 93 TCGTempSet *temps_used, TCGTemp *ts) 94 { 95 size_t idx = temp_idx(ts); 96 if (!test_bit(idx, temps_used->l)) { 97 struct tcg_temp_info *ti = &infos[idx]; 98 99 ts->state_ptr = ti; 100 ti->next_copy = ts; 101 ti->prev_copy = ts; 102 ti->is_const = false; 103 ti->mask = -1; 104 set_bit(idx, temps_used->l); 105 } 106 } 107 108 static void init_arg_info(struct tcg_temp_info *infos, 109 TCGTempSet *temps_used, TCGArg arg) 110 { 111 init_ts_info(infos, temps_used, arg_temp(arg)); 112 } 113 114 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 115 { 116 TCGTemp *i; 117 118 /* If this is already a global, we can't do better. */ 119 if (ts->temp_global) { 120 return ts; 121 } 122 123 /* Search for a global first. */ 124 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 125 if (i->temp_global) { 126 return i; 127 } 128 } 129 130 /* If it is a temp, search for a temp local. */ 131 if (!ts->temp_local) { 132 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 133 if (ts->temp_local) { 134 return i; 135 } 136 } 137 } 138 139 /* Failure to find a better representation, return the same temp. */ 140 return ts; 141 } 142 143 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 144 { 145 TCGTemp *i; 146 147 if (ts1 == ts2) { 148 return true; 149 } 150 151 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 152 return false; 153 } 154 155 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 156 if (i == ts2) { 157 return true; 158 } 159 } 160 161 return false; 162 } 163 164 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 165 { 166 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 167 } 168 169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 170 { 171 const TCGOpDef *def; 172 TCGOpcode new_op; 173 tcg_target_ulong mask; 174 struct tcg_temp_info *di = arg_info(dst); 175 176 def = &tcg_op_defs[op->opc]; 177 if (def->flags & TCG_OPF_VECTOR) { 178 new_op = INDEX_op_dupi_vec; 179 } else if (def->flags & TCG_OPF_64BIT) { 180 new_op = INDEX_op_movi_i64; 181 } else { 182 new_op = INDEX_op_movi_i32; 183 } 184 op->opc = new_op; 185 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 186 op->args[0] = dst; 187 op->args[1] = val; 188 189 reset_temp(dst); 190 di->is_const = true; 191 di->val = val; 192 mask = val; 193 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 194 /* High bits of the destination are now garbage. */ 195 mask |= ~0xffffffffull; 196 } 197 di->mask = mask; 198 } 199 200 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 201 { 202 TCGTemp *dst_ts = arg_temp(dst); 203 TCGTemp *src_ts = arg_temp(src); 204 const TCGOpDef *def; 205 struct tcg_temp_info *di; 206 struct tcg_temp_info *si; 207 tcg_target_ulong mask; 208 TCGOpcode new_op; 209 210 if (ts_are_copies(dst_ts, src_ts)) { 211 tcg_op_remove(s, op); 212 return; 213 } 214 215 reset_ts(dst_ts); 216 di = ts_info(dst_ts); 217 si = ts_info(src_ts); 218 def = &tcg_op_defs[op->opc]; 219 if (def->flags & TCG_OPF_VECTOR) { 220 new_op = INDEX_op_mov_vec; 221 } else if (def->flags & TCG_OPF_64BIT) { 222 new_op = INDEX_op_mov_i64; 223 } else { 224 new_op = INDEX_op_mov_i32; 225 } 226 op->opc = new_op; 227 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 228 op->args[0] = dst; 229 op->args[1] = src; 230 231 mask = si->mask; 232 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 233 /* High bits of the destination are now garbage. */ 234 mask |= ~0xffffffffull; 235 } 236 di->mask = mask; 237 238 if (src_ts->type == dst_ts->type) { 239 struct tcg_temp_info *ni = ts_info(si->next_copy); 240 241 di->next_copy = si->next_copy; 242 di->prev_copy = src_ts; 243 ni->prev_copy = dst_ts; 244 si->next_copy = dst_ts; 245 di->is_const = si->is_const; 246 di->val = si->val; 247 } 248 } 249 250 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 251 { 252 uint64_t l64, h64; 253 254 switch (op) { 255 CASE_OP_32_64(add): 256 return x + y; 257 258 CASE_OP_32_64(sub): 259 return x - y; 260 261 CASE_OP_32_64(mul): 262 return x * y; 263 264 CASE_OP_32_64(and): 265 return x & y; 266 267 CASE_OP_32_64(or): 268 return x | y; 269 270 CASE_OP_32_64(xor): 271 return x ^ y; 272 273 case INDEX_op_shl_i32: 274 return (uint32_t)x << (y & 31); 275 276 case INDEX_op_shl_i64: 277 return (uint64_t)x << (y & 63); 278 279 case INDEX_op_shr_i32: 280 return (uint32_t)x >> (y & 31); 281 282 case INDEX_op_shr_i64: 283 return (uint64_t)x >> (y & 63); 284 285 case INDEX_op_sar_i32: 286 return (int32_t)x >> (y & 31); 287 288 case INDEX_op_sar_i64: 289 return (int64_t)x >> (y & 63); 290 291 case INDEX_op_rotr_i32: 292 return ror32(x, y & 31); 293 294 case INDEX_op_rotr_i64: 295 return ror64(x, y & 63); 296 297 case INDEX_op_rotl_i32: 298 return rol32(x, y & 31); 299 300 case INDEX_op_rotl_i64: 301 return rol64(x, y & 63); 302 303 CASE_OP_32_64(not): 304 return ~x; 305 306 CASE_OP_32_64(neg): 307 return -x; 308 309 CASE_OP_32_64(andc): 310 return x & ~y; 311 312 CASE_OP_32_64(orc): 313 return x | ~y; 314 315 CASE_OP_32_64(eqv): 316 return ~(x ^ y); 317 318 CASE_OP_32_64(nand): 319 return ~(x & y); 320 321 CASE_OP_32_64(nor): 322 return ~(x | y); 323 324 case INDEX_op_clz_i32: 325 return (uint32_t)x ? clz32(x) : y; 326 327 case INDEX_op_clz_i64: 328 return x ? clz64(x) : y; 329 330 case INDEX_op_ctz_i32: 331 return (uint32_t)x ? ctz32(x) : y; 332 333 case INDEX_op_ctz_i64: 334 return x ? ctz64(x) : y; 335 336 case INDEX_op_ctpop_i32: 337 return ctpop32(x); 338 339 case INDEX_op_ctpop_i64: 340 return ctpop64(x); 341 342 CASE_OP_32_64(ext8s): 343 return (int8_t)x; 344 345 CASE_OP_32_64(ext16s): 346 return (int16_t)x; 347 348 CASE_OP_32_64(ext8u): 349 return (uint8_t)x; 350 351 CASE_OP_32_64(ext16u): 352 return (uint16_t)x; 353 354 CASE_OP_32_64(bswap16): 355 return bswap16(x); 356 357 CASE_OP_32_64(bswap32): 358 return bswap32(x); 359 360 case INDEX_op_bswap64_i64: 361 return bswap64(x); 362 363 case INDEX_op_ext_i32_i64: 364 case INDEX_op_ext32s_i64: 365 return (int32_t)x; 366 367 case INDEX_op_extu_i32_i64: 368 case INDEX_op_extrl_i64_i32: 369 case INDEX_op_ext32u_i64: 370 return (uint32_t)x; 371 372 case INDEX_op_extrh_i64_i32: 373 return (uint64_t)x >> 32; 374 375 case INDEX_op_muluh_i32: 376 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 377 case INDEX_op_mulsh_i32: 378 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 379 380 case INDEX_op_muluh_i64: 381 mulu64(&l64, &h64, x, y); 382 return h64; 383 case INDEX_op_mulsh_i64: 384 muls64(&l64, &h64, x, y); 385 return h64; 386 387 case INDEX_op_div_i32: 388 /* Avoid crashing on divide by zero, otherwise undefined. */ 389 return (int32_t)x / ((int32_t)y ? : 1); 390 case INDEX_op_divu_i32: 391 return (uint32_t)x / ((uint32_t)y ? : 1); 392 case INDEX_op_div_i64: 393 return (int64_t)x / ((int64_t)y ? : 1); 394 case INDEX_op_divu_i64: 395 return (uint64_t)x / ((uint64_t)y ? : 1); 396 397 case INDEX_op_rem_i32: 398 return (int32_t)x % ((int32_t)y ? : 1); 399 case INDEX_op_remu_i32: 400 return (uint32_t)x % ((uint32_t)y ? : 1); 401 case INDEX_op_rem_i64: 402 return (int64_t)x % ((int64_t)y ? : 1); 403 case INDEX_op_remu_i64: 404 return (uint64_t)x % ((uint64_t)y ? : 1); 405 406 default: 407 fprintf(stderr, 408 "Unrecognized operation %d in do_constant_folding.\n", op); 409 tcg_abort(); 410 } 411 } 412 413 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 414 { 415 const TCGOpDef *def = &tcg_op_defs[op]; 416 TCGArg res = do_constant_folding_2(op, x, y); 417 if (!(def->flags & TCG_OPF_64BIT)) { 418 res = (int32_t)res; 419 } 420 return res; 421 } 422 423 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 424 { 425 switch (c) { 426 case TCG_COND_EQ: 427 return x == y; 428 case TCG_COND_NE: 429 return x != y; 430 case TCG_COND_LT: 431 return (int32_t)x < (int32_t)y; 432 case TCG_COND_GE: 433 return (int32_t)x >= (int32_t)y; 434 case TCG_COND_LE: 435 return (int32_t)x <= (int32_t)y; 436 case TCG_COND_GT: 437 return (int32_t)x > (int32_t)y; 438 case TCG_COND_LTU: 439 return x < y; 440 case TCG_COND_GEU: 441 return x >= y; 442 case TCG_COND_LEU: 443 return x <= y; 444 case TCG_COND_GTU: 445 return x > y; 446 default: 447 tcg_abort(); 448 } 449 } 450 451 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 452 { 453 switch (c) { 454 case TCG_COND_EQ: 455 return x == y; 456 case TCG_COND_NE: 457 return x != y; 458 case TCG_COND_LT: 459 return (int64_t)x < (int64_t)y; 460 case TCG_COND_GE: 461 return (int64_t)x >= (int64_t)y; 462 case TCG_COND_LE: 463 return (int64_t)x <= (int64_t)y; 464 case TCG_COND_GT: 465 return (int64_t)x > (int64_t)y; 466 case TCG_COND_LTU: 467 return x < y; 468 case TCG_COND_GEU: 469 return x >= y; 470 case TCG_COND_LEU: 471 return x <= y; 472 case TCG_COND_GTU: 473 return x > y; 474 default: 475 tcg_abort(); 476 } 477 } 478 479 static bool do_constant_folding_cond_eq(TCGCond c) 480 { 481 switch (c) { 482 case TCG_COND_GT: 483 case TCG_COND_LTU: 484 case TCG_COND_LT: 485 case TCG_COND_GTU: 486 case TCG_COND_NE: 487 return 0; 488 case TCG_COND_GE: 489 case TCG_COND_GEU: 490 case TCG_COND_LE: 491 case TCG_COND_LEU: 492 case TCG_COND_EQ: 493 return 1; 494 default: 495 tcg_abort(); 496 } 497 } 498 499 /* Return 2 if the condition can't be simplified, and the result 500 of the condition (0 or 1) if it can */ 501 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 502 TCGArg y, TCGCond c) 503 { 504 tcg_target_ulong xv = arg_info(x)->val; 505 tcg_target_ulong yv = arg_info(y)->val; 506 if (arg_is_const(x) && arg_is_const(y)) { 507 const TCGOpDef *def = &tcg_op_defs[op]; 508 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 509 if (def->flags & TCG_OPF_64BIT) { 510 return do_constant_folding_cond_64(xv, yv, c); 511 } else { 512 return do_constant_folding_cond_32(xv, yv, c); 513 } 514 } else if (args_are_copies(x, y)) { 515 return do_constant_folding_cond_eq(c); 516 } else if (arg_is_const(y) && yv == 0) { 517 switch (c) { 518 case TCG_COND_LTU: 519 return 0; 520 case TCG_COND_GEU: 521 return 1; 522 default: 523 return 2; 524 } 525 } 526 return 2; 527 } 528 529 /* Return 2 if the condition can't be simplified, and the result 530 of the condition (0 or 1) if it can */ 531 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 532 { 533 TCGArg al = p1[0], ah = p1[1]; 534 TCGArg bl = p2[0], bh = p2[1]; 535 536 if (arg_is_const(bl) && arg_is_const(bh)) { 537 tcg_target_ulong blv = arg_info(bl)->val; 538 tcg_target_ulong bhv = arg_info(bh)->val; 539 uint64_t b = deposit64(blv, 32, 32, bhv); 540 541 if (arg_is_const(al) && arg_is_const(ah)) { 542 tcg_target_ulong alv = arg_info(al)->val; 543 tcg_target_ulong ahv = arg_info(ah)->val; 544 uint64_t a = deposit64(alv, 32, 32, ahv); 545 return do_constant_folding_cond_64(a, b, c); 546 } 547 if (b == 0) { 548 switch (c) { 549 case TCG_COND_LTU: 550 return 0; 551 case TCG_COND_GEU: 552 return 1; 553 default: 554 break; 555 } 556 } 557 } 558 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 559 return do_constant_folding_cond_eq(c); 560 } 561 return 2; 562 } 563 564 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 565 { 566 TCGArg a1 = *p1, a2 = *p2; 567 int sum = 0; 568 sum += arg_is_const(a1); 569 sum -= arg_is_const(a2); 570 571 /* Prefer the constant in second argument, and then the form 572 op a, a, b, which is better handled on non-RISC hosts. */ 573 if (sum > 0 || (sum == 0 && dest == a2)) { 574 *p1 = a2; 575 *p2 = a1; 576 return true; 577 } 578 return false; 579 } 580 581 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 582 { 583 int sum = 0; 584 sum += arg_is_const(p1[0]); 585 sum += arg_is_const(p1[1]); 586 sum -= arg_is_const(p2[0]); 587 sum -= arg_is_const(p2[1]); 588 if (sum > 0) { 589 TCGArg t; 590 t = p1[0], p1[0] = p2[0], p2[0] = t; 591 t = p1[1], p1[1] = p2[1], p2[1] = t; 592 return true; 593 } 594 return false; 595 } 596 597 /* Propagate constants and copies, fold constant expressions. */ 598 void tcg_optimize(TCGContext *s) 599 { 600 int nb_temps, nb_globals; 601 TCGOp *op, *op_next, *prev_mb = NULL; 602 struct tcg_temp_info *infos; 603 TCGTempSet temps_used; 604 605 /* Array VALS has an element for each temp. 606 If this temp holds a constant then its value is kept in VALS' element. 607 If this temp is a copy of other ones then the other copies are 608 available through the doubly linked circular list. */ 609 610 nb_temps = s->nb_temps; 611 nb_globals = s->nb_globals; 612 bitmap_zero(temps_used.l, nb_temps); 613 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 614 615 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 616 tcg_target_ulong mask, partmask, affected; 617 int nb_oargs, nb_iargs, i; 618 TCGArg tmp; 619 TCGOpcode opc = op->opc; 620 const TCGOpDef *def = &tcg_op_defs[opc]; 621 622 /* Count the arguments, and initialize the temps that are 623 going to be used */ 624 if (opc == INDEX_op_call) { 625 nb_oargs = TCGOP_CALLO(op); 626 nb_iargs = TCGOP_CALLI(op); 627 for (i = 0; i < nb_oargs + nb_iargs; i++) { 628 TCGTemp *ts = arg_temp(op->args[i]); 629 if (ts) { 630 init_ts_info(infos, &temps_used, ts); 631 } 632 } 633 } else { 634 nb_oargs = def->nb_oargs; 635 nb_iargs = def->nb_iargs; 636 for (i = 0; i < nb_oargs + nb_iargs; i++) { 637 init_arg_info(infos, &temps_used, op->args[i]); 638 } 639 } 640 641 /* Do copy propagation */ 642 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 643 TCGTemp *ts = arg_temp(op->args[i]); 644 if (ts && ts_is_copy(ts)) { 645 op->args[i] = temp_arg(find_better_copy(s, ts)); 646 } 647 } 648 649 /* For commutative operations make constant second argument */ 650 switch (opc) { 651 CASE_OP_32_64_VEC(add): 652 CASE_OP_32_64_VEC(mul): 653 CASE_OP_32_64_VEC(and): 654 CASE_OP_32_64_VEC(or): 655 CASE_OP_32_64_VEC(xor): 656 CASE_OP_32_64(eqv): 657 CASE_OP_32_64(nand): 658 CASE_OP_32_64(nor): 659 CASE_OP_32_64(muluh): 660 CASE_OP_32_64(mulsh): 661 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 662 break; 663 CASE_OP_32_64(brcond): 664 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 665 op->args[2] = tcg_swap_cond(op->args[2]); 666 } 667 break; 668 CASE_OP_32_64(setcond): 669 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 670 op->args[3] = tcg_swap_cond(op->args[3]); 671 } 672 break; 673 CASE_OP_32_64(movcond): 674 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 675 op->args[5] = tcg_swap_cond(op->args[5]); 676 } 677 /* For movcond, we canonicalize the "false" input reg to match 678 the destination reg so that the tcg backend can implement 679 a "move if true" operation. */ 680 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 681 op->args[5] = tcg_invert_cond(op->args[5]); 682 } 683 break; 684 CASE_OP_32_64(add2): 685 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 686 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 687 break; 688 CASE_OP_32_64(mulu2): 689 CASE_OP_32_64(muls2): 690 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 691 break; 692 case INDEX_op_brcond2_i32: 693 if (swap_commutative2(&op->args[0], &op->args[2])) { 694 op->args[4] = tcg_swap_cond(op->args[4]); 695 } 696 break; 697 case INDEX_op_setcond2_i32: 698 if (swap_commutative2(&op->args[1], &op->args[3])) { 699 op->args[5] = tcg_swap_cond(op->args[5]); 700 } 701 break; 702 default: 703 break; 704 } 705 706 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 707 and "sub r, 0, a => neg r, a" case. */ 708 switch (opc) { 709 CASE_OP_32_64(shl): 710 CASE_OP_32_64(shr): 711 CASE_OP_32_64(sar): 712 CASE_OP_32_64(rotl): 713 CASE_OP_32_64(rotr): 714 if (arg_is_const(op->args[1]) 715 && arg_info(op->args[1])->val == 0) { 716 tcg_opt_gen_movi(s, op, op->args[0], 0); 717 continue; 718 } 719 break; 720 CASE_OP_32_64_VEC(sub): 721 { 722 TCGOpcode neg_op; 723 bool have_neg; 724 725 if (arg_is_const(op->args[2])) { 726 /* Proceed with possible constant folding. */ 727 break; 728 } 729 if (opc == INDEX_op_sub_i32) { 730 neg_op = INDEX_op_neg_i32; 731 have_neg = TCG_TARGET_HAS_neg_i32; 732 } else if (opc == INDEX_op_sub_i64) { 733 neg_op = INDEX_op_neg_i64; 734 have_neg = TCG_TARGET_HAS_neg_i64; 735 } else if (TCG_TARGET_HAS_neg_vec) { 736 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 737 unsigned vece = TCGOP_VECE(op); 738 neg_op = INDEX_op_neg_vec; 739 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 740 } else { 741 break; 742 } 743 if (!have_neg) { 744 break; 745 } 746 if (arg_is_const(op->args[1]) 747 && arg_info(op->args[1])->val == 0) { 748 op->opc = neg_op; 749 reset_temp(op->args[0]); 750 op->args[1] = op->args[2]; 751 continue; 752 } 753 } 754 break; 755 CASE_OP_32_64_VEC(xor): 756 CASE_OP_32_64(nand): 757 if (!arg_is_const(op->args[1]) 758 && arg_is_const(op->args[2]) 759 && arg_info(op->args[2])->val == -1) { 760 i = 1; 761 goto try_not; 762 } 763 break; 764 CASE_OP_32_64(nor): 765 if (!arg_is_const(op->args[1]) 766 && arg_is_const(op->args[2]) 767 && arg_info(op->args[2])->val == 0) { 768 i = 1; 769 goto try_not; 770 } 771 break; 772 CASE_OP_32_64_VEC(andc): 773 if (!arg_is_const(op->args[2]) 774 && arg_is_const(op->args[1]) 775 && arg_info(op->args[1])->val == -1) { 776 i = 2; 777 goto try_not; 778 } 779 break; 780 CASE_OP_32_64_VEC(orc): 781 CASE_OP_32_64(eqv): 782 if (!arg_is_const(op->args[2]) 783 && arg_is_const(op->args[1]) 784 && arg_info(op->args[1])->val == 0) { 785 i = 2; 786 goto try_not; 787 } 788 break; 789 try_not: 790 { 791 TCGOpcode not_op; 792 bool have_not; 793 794 if (def->flags & TCG_OPF_VECTOR) { 795 not_op = INDEX_op_not_vec; 796 have_not = TCG_TARGET_HAS_not_vec; 797 } else if (def->flags & TCG_OPF_64BIT) { 798 not_op = INDEX_op_not_i64; 799 have_not = TCG_TARGET_HAS_not_i64; 800 } else { 801 not_op = INDEX_op_not_i32; 802 have_not = TCG_TARGET_HAS_not_i32; 803 } 804 if (!have_not) { 805 break; 806 } 807 op->opc = not_op; 808 reset_temp(op->args[0]); 809 op->args[1] = op->args[i]; 810 continue; 811 } 812 default: 813 break; 814 } 815 816 /* Simplify expression for "op r, a, const => mov r, a" cases */ 817 switch (opc) { 818 CASE_OP_32_64_VEC(add): 819 CASE_OP_32_64_VEC(sub): 820 CASE_OP_32_64_VEC(or): 821 CASE_OP_32_64_VEC(xor): 822 CASE_OP_32_64_VEC(andc): 823 CASE_OP_32_64(shl): 824 CASE_OP_32_64(shr): 825 CASE_OP_32_64(sar): 826 CASE_OP_32_64(rotl): 827 CASE_OP_32_64(rotr): 828 if (!arg_is_const(op->args[1]) 829 && arg_is_const(op->args[2]) 830 && arg_info(op->args[2])->val == 0) { 831 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 832 continue; 833 } 834 break; 835 CASE_OP_32_64_VEC(and): 836 CASE_OP_32_64_VEC(orc): 837 CASE_OP_32_64(eqv): 838 if (!arg_is_const(op->args[1]) 839 && arg_is_const(op->args[2]) 840 && arg_info(op->args[2])->val == -1) { 841 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 842 continue; 843 } 844 break; 845 default: 846 break; 847 } 848 849 /* Simplify using known-zero bits. Currently only ops with a single 850 output argument is supported. */ 851 mask = -1; 852 affected = -1; 853 switch (opc) { 854 CASE_OP_32_64(ext8s): 855 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 856 break; 857 } 858 QEMU_FALLTHROUGH; 859 CASE_OP_32_64(ext8u): 860 mask = 0xff; 861 goto and_const; 862 CASE_OP_32_64(ext16s): 863 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 864 break; 865 } 866 QEMU_FALLTHROUGH; 867 CASE_OP_32_64(ext16u): 868 mask = 0xffff; 869 goto and_const; 870 case INDEX_op_ext32s_i64: 871 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 872 break; 873 } 874 QEMU_FALLTHROUGH; 875 case INDEX_op_ext32u_i64: 876 mask = 0xffffffffU; 877 goto and_const; 878 879 CASE_OP_32_64(and): 880 mask = arg_info(op->args[2])->mask; 881 if (arg_is_const(op->args[2])) { 882 and_const: 883 affected = arg_info(op->args[1])->mask & ~mask; 884 } 885 mask = arg_info(op->args[1])->mask & mask; 886 break; 887 888 case INDEX_op_ext_i32_i64: 889 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 890 break; 891 } 892 QEMU_FALLTHROUGH; 893 case INDEX_op_extu_i32_i64: 894 /* We do not compute affected as it is a size changing op. */ 895 mask = (uint32_t)arg_info(op->args[1])->mask; 896 break; 897 898 CASE_OP_32_64(andc): 899 /* Known-zeros does not imply known-ones. Therefore unless 900 op->args[2] is constant, we can't infer anything from it. */ 901 if (arg_is_const(op->args[2])) { 902 mask = ~arg_info(op->args[2])->mask; 903 goto and_const; 904 } 905 /* But we certainly know nothing outside args[1] may be set. */ 906 mask = arg_info(op->args[1])->mask; 907 break; 908 909 case INDEX_op_sar_i32: 910 if (arg_is_const(op->args[2])) { 911 tmp = arg_info(op->args[2])->val & 31; 912 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 913 } 914 break; 915 case INDEX_op_sar_i64: 916 if (arg_is_const(op->args[2])) { 917 tmp = arg_info(op->args[2])->val & 63; 918 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 919 } 920 break; 921 922 case INDEX_op_shr_i32: 923 if (arg_is_const(op->args[2])) { 924 tmp = arg_info(op->args[2])->val & 31; 925 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 926 } 927 break; 928 case INDEX_op_shr_i64: 929 if (arg_is_const(op->args[2])) { 930 tmp = arg_info(op->args[2])->val & 63; 931 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 932 } 933 break; 934 935 case INDEX_op_extrl_i64_i32: 936 mask = (uint32_t)arg_info(op->args[1])->mask; 937 break; 938 case INDEX_op_extrh_i64_i32: 939 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 940 break; 941 942 CASE_OP_32_64(shl): 943 if (arg_is_const(op->args[2])) { 944 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 945 mask = arg_info(op->args[1])->mask << tmp; 946 } 947 break; 948 949 CASE_OP_32_64(neg): 950 /* Set to 1 all bits to the left of the rightmost. */ 951 mask = -(arg_info(op->args[1])->mask 952 & -arg_info(op->args[1])->mask); 953 break; 954 955 CASE_OP_32_64(deposit): 956 mask = deposit64(arg_info(op->args[1])->mask, 957 op->args[3], op->args[4], 958 arg_info(op->args[2])->mask); 959 break; 960 961 CASE_OP_32_64(extract): 962 mask = extract64(arg_info(op->args[1])->mask, 963 op->args[2], op->args[3]); 964 if (op->args[2] == 0) { 965 affected = arg_info(op->args[1])->mask & ~mask; 966 } 967 break; 968 CASE_OP_32_64(sextract): 969 mask = sextract64(arg_info(op->args[1])->mask, 970 op->args[2], op->args[3]); 971 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 972 affected = arg_info(op->args[1])->mask & ~mask; 973 } 974 break; 975 976 CASE_OP_32_64(or): 977 CASE_OP_32_64(xor): 978 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 979 break; 980 981 case INDEX_op_clz_i32: 982 case INDEX_op_ctz_i32: 983 mask = arg_info(op->args[2])->mask | 31; 984 break; 985 986 case INDEX_op_clz_i64: 987 case INDEX_op_ctz_i64: 988 mask = arg_info(op->args[2])->mask | 63; 989 break; 990 991 case INDEX_op_ctpop_i32: 992 mask = 32 | 31; 993 break; 994 case INDEX_op_ctpop_i64: 995 mask = 64 | 63; 996 break; 997 998 CASE_OP_32_64(setcond): 999 case INDEX_op_setcond2_i32: 1000 mask = 1; 1001 break; 1002 1003 CASE_OP_32_64(movcond): 1004 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1005 break; 1006 1007 CASE_OP_32_64(ld8u): 1008 mask = 0xff; 1009 break; 1010 CASE_OP_32_64(ld16u): 1011 mask = 0xffff; 1012 break; 1013 case INDEX_op_ld32u_i64: 1014 mask = 0xffffffffu; 1015 break; 1016 1017 CASE_OP_32_64(qemu_ld): 1018 { 1019 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1020 MemOp mop = get_memop(oi); 1021 if (!(mop & MO_SIGN)) { 1022 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1023 } 1024 } 1025 break; 1026 1027 default: 1028 break; 1029 } 1030 1031 /* 32-bit ops generate 32-bit results. For the result is zero test 1032 below, we can ignore high bits, but for further optimizations we 1033 need to record that the high bits contain garbage. */ 1034 partmask = mask; 1035 if (!(def->flags & TCG_OPF_64BIT)) { 1036 mask |= ~(tcg_target_ulong)0xffffffffu; 1037 partmask &= 0xffffffffu; 1038 affected &= 0xffffffffu; 1039 } 1040 1041 if (partmask == 0) { 1042 tcg_debug_assert(nb_oargs == 1); 1043 tcg_opt_gen_movi(s, op, op->args[0], 0); 1044 continue; 1045 } 1046 if (affected == 0) { 1047 tcg_debug_assert(nb_oargs == 1); 1048 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1049 continue; 1050 } 1051 1052 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1053 switch (opc) { 1054 CASE_OP_32_64_VEC(and): 1055 CASE_OP_32_64_VEC(mul): 1056 CASE_OP_32_64(muluh): 1057 CASE_OP_32_64(mulsh): 1058 if (arg_is_const(op->args[2]) 1059 && arg_info(op->args[2])->val == 0) { 1060 tcg_opt_gen_movi(s, op, op->args[0], 0); 1061 continue; 1062 } 1063 break; 1064 default: 1065 break; 1066 } 1067 1068 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1069 switch (opc) { 1070 CASE_OP_32_64_VEC(or): 1071 CASE_OP_32_64_VEC(and): 1072 if (args_are_copies(op->args[1], op->args[2])) { 1073 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1074 continue; 1075 } 1076 break; 1077 default: 1078 break; 1079 } 1080 1081 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1082 switch (opc) { 1083 CASE_OP_32_64_VEC(andc): 1084 CASE_OP_32_64_VEC(sub): 1085 CASE_OP_32_64_VEC(xor): 1086 if (args_are_copies(op->args[1], op->args[2])) { 1087 tcg_opt_gen_movi(s, op, op->args[0], 0); 1088 continue; 1089 } 1090 break; 1091 default: 1092 break; 1093 } 1094 1095 /* Propagate constants through copy operations and do constant 1096 folding. Constants will be substituted to arguments by register 1097 allocator where needed and possible. Also detect copies. */ 1098 switch (opc) { 1099 CASE_OP_32_64_VEC(mov): 1100 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1101 break; 1102 CASE_OP_32_64(movi): 1103 case INDEX_op_dupi_vec: 1104 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1105 break; 1106 1107 case INDEX_op_dup_vec: 1108 if (arg_is_const(op->args[1])) { 1109 tmp = arg_info(op->args[1])->val; 1110 tmp = dup_const(TCGOP_VECE(op), tmp); 1111 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1112 break; 1113 } 1114 goto do_default; 1115 1116 case INDEX_op_dup2_vec: 1117 assert(TCG_TARGET_REG_BITS == 32); 1118 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1119 tmp = arg_info(op->args[1])->val; 1120 if (tmp == arg_info(op->args[2])->val) { 1121 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1122 break; 1123 } 1124 } else if (args_are_copies(op->args[1], op->args[2])) { 1125 op->opc = INDEX_op_dup_vec; 1126 TCGOP_VECE(op) = MO_32; 1127 nb_iargs = 1; 1128 } 1129 goto do_default; 1130 1131 CASE_OP_32_64(not): 1132 CASE_OP_32_64(neg): 1133 CASE_OP_32_64(ext8s): 1134 CASE_OP_32_64(ext8u): 1135 CASE_OP_32_64(ext16s): 1136 CASE_OP_32_64(ext16u): 1137 CASE_OP_32_64(ctpop): 1138 CASE_OP_32_64(bswap16): 1139 CASE_OP_32_64(bswap32): 1140 case INDEX_op_bswap64_i64: 1141 case INDEX_op_ext32s_i64: 1142 case INDEX_op_ext32u_i64: 1143 case INDEX_op_ext_i32_i64: 1144 case INDEX_op_extu_i32_i64: 1145 case INDEX_op_extrl_i64_i32: 1146 case INDEX_op_extrh_i64_i32: 1147 if (arg_is_const(op->args[1])) { 1148 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1149 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1150 break; 1151 } 1152 goto do_default; 1153 1154 CASE_OP_32_64(add): 1155 CASE_OP_32_64(sub): 1156 CASE_OP_32_64(mul): 1157 CASE_OP_32_64(or): 1158 CASE_OP_32_64(and): 1159 CASE_OP_32_64(xor): 1160 CASE_OP_32_64(shl): 1161 CASE_OP_32_64(shr): 1162 CASE_OP_32_64(sar): 1163 CASE_OP_32_64(rotl): 1164 CASE_OP_32_64(rotr): 1165 CASE_OP_32_64(andc): 1166 CASE_OP_32_64(orc): 1167 CASE_OP_32_64(eqv): 1168 CASE_OP_32_64(nand): 1169 CASE_OP_32_64(nor): 1170 CASE_OP_32_64(muluh): 1171 CASE_OP_32_64(mulsh): 1172 CASE_OP_32_64(div): 1173 CASE_OP_32_64(divu): 1174 CASE_OP_32_64(rem): 1175 CASE_OP_32_64(remu): 1176 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1177 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1178 arg_info(op->args[2])->val); 1179 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1180 break; 1181 } 1182 goto do_default; 1183 1184 CASE_OP_32_64(clz): 1185 CASE_OP_32_64(ctz): 1186 if (arg_is_const(op->args[1])) { 1187 TCGArg v = arg_info(op->args[1])->val; 1188 if (v != 0) { 1189 tmp = do_constant_folding(opc, v, 0); 1190 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1191 } else { 1192 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1193 } 1194 break; 1195 } 1196 goto do_default; 1197 1198 CASE_OP_32_64(deposit): 1199 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1200 tmp = deposit64(arg_info(op->args[1])->val, 1201 op->args[3], op->args[4], 1202 arg_info(op->args[2])->val); 1203 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1204 break; 1205 } 1206 goto do_default; 1207 1208 CASE_OP_32_64(extract): 1209 if (arg_is_const(op->args[1])) { 1210 tmp = extract64(arg_info(op->args[1])->val, 1211 op->args[2], op->args[3]); 1212 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1213 break; 1214 } 1215 goto do_default; 1216 1217 CASE_OP_32_64(sextract): 1218 if (arg_is_const(op->args[1])) { 1219 tmp = sextract64(arg_info(op->args[1])->val, 1220 op->args[2], op->args[3]); 1221 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1222 break; 1223 } 1224 goto do_default; 1225 1226 CASE_OP_32_64(extract2): 1227 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1228 TCGArg v1 = arg_info(op->args[1])->val; 1229 TCGArg v2 = arg_info(op->args[2])->val; 1230 1231 if (opc == INDEX_op_extract2_i64) { 1232 tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); 1233 } else { 1234 tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) | 1235 ((uint32_t)v2 << (32 - op->args[3]))); 1236 } 1237 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1238 break; 1239 } 1240 goto do_default; 1241 1242 CASE_OP_32_64(setcond): 1243 tmp = do_constant_folding_cond(opc, op->args[1], 1244 op->args[2], op->args[3]); 1245 if (tmp != 2) { 1246 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1247 break; 1248 } 1249 goto do_default; 1250 1251 CASE_OP_32_64(brcond): 1252 tmp = do_constant_folding_cond(opc, op->args[0], 1253 op->args[1], op->args[2]); 1254 if (tmp != 2) { 1255 if (tmp) { 1256 bitmap_zero(temps_used.l, nb_temps); 1257 op->opc = INDEX_op_br; 1258 op->args[0] = op->args[3]; 1259 } else { 1260 tcg_op_remove(s, op); 1261 } 1262 break; 1263 } 1264 goto do_default; 1265 1266 CASE_OP_32_64(movcond): 1267 tmp = do_constant_folding_cond(opc, op->args[1], 1268 op->args[2], op->args[5]); 1269 if (tmp != 2) { 1270 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1271 break; 1272 } 1273 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1274 tcg_target_ulong tv = arg_info(op->args[3])->val; 1275 tcg_target_ulong fv = arg_info(op->args[4])->val; 1276 TCGCond cond = op->args[5]; 1277 if (fv == 1 && tv == 0) { 1278 cond = tcg_invert_cond(cond); 1279 } else if (!(tv == 1 && fv == 0)) { 1280 goto do_default; 1281 } 1282 op->args[3] = cond; 1283 op->opc = opc = (opc == INDEX_op_movcond_i32 1284 ? INDEX_op_setcond_i32 1285 : INDEX_op_setcond_i64); 1286 nb_iargs = 2; 1287 } 1288 goto do_default; 1289 1290 case INDEX_op_add2_i32: 1291 case INDEX_op_sub2_i32: 1292 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1293 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1294 uint32_t al = arg_info(op->args[2])->val; 1295 uint32_t ah = arg_info(op->args[3])->val; 1296 uint32_t bl = arg_info(op->args[4])->val; 1297 uint32_t bh = arg_info(op->args[5])->val; 1298 uint64_t a = ((uint64_t)ah << 32) | al; 1299 uint64_t b = ((uint64_t)bh << 32) | bl; 1300 TCGArg rl, rh; 1301 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1302 1303 if (opc == INDEX_op_add2_i32) { 1304 a += b; 1305 } else { 1306 a -= b; 1307 } 1308 1309 rl = op->args[0]; 1310 rh = op->args[1]; 1311 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1312 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1313 break; 1314 } 1315 goto do_default; 1316 1317 case INDEX_op_mulu2_i32: 1318 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1319 uint32_t a = arg_info(op->args[2])->val; 1320 uint32_t b = arg_info(op->args[3])->val; 1321 uint64_t r = (uint64_t)a * b; 1322 TCGArg rl, rh; 1323 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1324 1325 rl = op->args[0]; 1326 rh = op->args[1]; 1327 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1328 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1329 break; 1330 } 1331 goto do_default; 1332 1333 case INDEX_op_brcond2_i32: 1334 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1335 op->args[4]); 1336 if (tmp != 2) { 1337 if (tmp) { 1338 do_brcond_true: 1339 bitmap_zero(temps_used.l, nb_temps); 1340 op->opc = INDEX_op_br; 1341 op->args[0] = op->args[5]; 1342 } else { 1343 do_brcond_false: 1344 tcg_op_remove(s, op); 1345 } 1346 } else if ((op->args[4] == TCG_COND_LT 1347 || op->args[4] == TCG_COND_GE) 1348 && arg_is_const(op->args[2]) 1349 && arg_info(op->args[2])->val == 0 1350 && arg_is_const(op->args[3]) 1351 && arg_info(op->args[3])->val == 0) { 1352 /* Simplify LT/GE comparisons vs zero to a single compare 1353 vs the high word of the input. */ 1354 do_brcond_high: 1355 bitmap_zero(temps_used.l, nb_temps); 1356 op->opc = INDEX_op_brcond_i32; 1357 op->args[0] = op->args[1]; 1358 op->args[1] = op->args[3]; 1359 op->args[2] = op->args[4]; 1360 op->args[3] = op->args[5]; 1361 } else if (op->args[4] == TCG_COND_EQ) { 1362 /* Simplify EQ comparisons where one of the pairs 1363 can be simplified. */ 1364 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1365 op->args[0], op->args[2], 1366 TCG_COND_EQ); 1367 if (tmp == 0) { 1368 goto do_brcond_false; 1369 } else if (tmp == 1) { 1370 goto do_brcond_high; 1371 } 1372 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1373 op->args[1], op->args[3], 1374 TCG_COND_EQ); 1375 if (tmp == 0) { 1376 goto do_brcond_false; 1377 } else if (tmp != 1) { 1378 goto do_default; 1379 } 1380 do_brcond_low: 1381 bitmap_zero(temps_used.l, nb_temps); 1382 op->opc = INDEX_op_brcond_i32; 1383 op->args[1] = op->args[2]; 1384 op->args[2] = op->args[4]; 1385 op->args[3] = op->args[5]; 1386 } else if (op->args[4] == TCG_COND_NE) { 1387 /* Simplify NE comparisons where one of the pairs 1388 can be simplified. */ 1389 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1390 op->args[0], op->args[2], 1391 TCG_COND_NE); 1392 if (tmp == 0) { 1393 goto do_brcond_high; 1394 } else if (tmp == 1) { 1395 goto do_brcond_true; 1396 } 1397 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1398 op->args[1], op->args[3], 1399 TCG_COND_NE); 1400 if (tmp == 0) { 1401 goto do_brcond_low; 1402 } else if (tmp == 1) { 1403 goto do_brcond_true; 1404 } 1405 goto do_default; 1406 } else { 1407 goto do_default; 1408 } 1409 break; 1410 1411 case INDEX_op_setcond2_i32: 1412 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1413 op->args[5]); 1414 if (tmp != 2) { 1415 do_setcond_const: 1416 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1417 } else if ((op->args[5] == TCG_COND_LT 1418 || op->args[5] == TCG_COND_GE) 1419 && arg_is_const(op->args[3]) 1420 && arg_info(op->args[3])->val == 0 1421 && arg_is_const(op->args[4]) 1422 && arg_info(op->args[4])->val == 0) { 1423 /* Simplify LT/GE comparisons vs zero to a single compare 1424 vs the high word of the input. */ 1425 do_setcond_high: 1426 reset_temp(op->args[0]); 1427 arg_info(op->args[0])->mask = 1; 1428 op->opc = INDEX_op_setcond_i32; 1429 op->args[1] = op->args[2]; 1430 op->args[2] = op->args[4]; 1431 op->args[3] = op->args[5]; 1432 } else if (op->args[5] == TCG_COND_EQ) { 1433 /* Simplify EQ comparisons where one of the pairs 1434 can be simplified. */ 1435 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1436 op->args[1], op->args[3], 1437 TCG_COND_EQ); 1438 if (tmp == 0) { 1439 goto do_setcond_const; 1440 } else if (tmp == 1) { 1441 goto do_setcond_high; 1442 } 1443 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1444 op->args[2], op->args[4], 1445 TCG_COND_EQ); 1446 if (tmp == 0) { 1447 goto do_setcond_high; 1448 } else if (tmp != 1) { 1449 goto do_default; 1450 } 1451 do_setcond_low: 1452 reset_temp(op->args[0]); 1453 arg_info(op->args[0])->mask = 1; 1454 op->opc = INDEX_op_setcond_i32; 1455 op->args[2] = op->args[3]; 1456 op->args[3] = op->args[5]; 1457 } else if (op->args[5] == TCG_COND_NE) { 1458 /* Simplify NE comparisons where one of the pairs 1459 can be simplified. */ 1460 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1461 op->args[1], op->args[3], 1462 TCG_COND_NE); 1463 if (tmp == 0) { 1464 goto do_setcond_high; 1465 } else if (tmp == 1) { 1466 goto do_setcond_const; 1467 } 1468 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1469 op->args[2], op->args[4], 1470 TCG_COND_NE); 1471 if (tmp == 0) { 1472 goto do_setcond_low; 1473 } else if (tmp == 1) { 1474 goto do_setcond_const; 1475 } 1476 goto do_default; 1477 } else { 1478 goto do_default; 1479 } 1480 break; 1481 1482 case INDEX_op_call: 1483 if (!(op->args[nb_oargs + nb_iargs + 1] 1484 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1485 for (i = 0; i < nb_globals; i++) { 1486 if (test_bit(i, temps_used.l)) { 1487 reset_ts(&s->temps[i]); 1488 } 1489 } 1490 } 1491 goto do_reset_output; 1492 1493 default: 1494 do_default: 1495 /* Default case: we know nothing about operation (or were unable 1496 to compute the operation result) so no propagation is done. 1497 We trash everything if the operation is the end of a basic 1498 block, otherwise we only trash the output args. "mask" is 1499 the non-zero bits mask for the first output arg. */ 1500 if (def->flags & TCG_OPF_BB_END) { 1501 bitmap_zero(temps_used.l, nb_temps); 1502 } else { 1503 do_reset_output: 1504 for (i = 0; i < nb_oargs; i++) { 1505 reset_temp(op->args[i]); 1506 /* Save the corresponding known-zero bits mask for the 1507 first output argument (only one supported so far). */ 1508 if (i == 0) { 1509 arg_info(op->args[i])->mask = mask; 1510 } 1511 } 1512 } 1513 break; 1514 } 1515 1516 /* Eliminate duplicate and redundant fence instructions. */ 1517 if (prev_mb) { 1518 switch (opc) { 1519 case INDEX_op_mb: 1520 /* Merge two barriers of the same type into one, 1521 * or a weaker barrier into a stronger one, 1522 * or two weaker barriers into a stronger one. 1523 * mb X; mb Y => mb X|Y 1524 * mb; strl => mb; st 1525 * ldaq; mb => ld; mb 1526 * ldaq; strl => ld; mb; st 1527 * Other combinations are also merged into a strong 1528 * barrier. This is stricter than specified but for 1529 * the purposes of TCG is better than not optimizing. 1530 */ 1531 prev_mb->args[0] |= op->args[0]; 1532 tcg_op_remove(s, op); 1533 break; 1534 1535 default: 1536 /* Opcodes that end the block stop the optimization. */ 1537 if ((def->flags & TCG_OPF_BB_END) == 0) { 1538 break; 1539 } 1540 /* fallthru */ 1541 case INDEX_op_qemu_ld_i32: 1542 case INDEX_op_qemu_ld_i64: 1543 case INDEX_op_qemu_st_i32: 1544 case INDEX_op_qemu_st8_i32: 1545 case INDEX_op_qemu_st_i64: 1546 case INDEX_op_call: 1547 /* Opcodes that touch guest memory stop the optimization. */ 1548 prev_mb = NULL; 1549 break; 1550 } 1551 } else if (opc == INDEX_op_mb) { 1552 prev_mb = op; 1553 } 1554 } 1555 } 1556