1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tcg/tcg-op.h" 28 29 #define CASE_OP_32_64(x) \ 30 glue(glue(case INDEX_op_, x), _i32): \ 31 glue(glue(case INDEX_op_, x), _i64) 32 33 #define CASE_OP_32_64_VEC(x) \ 34 glue(glue(case INDEX_op_, x), _i32): \ 35 glue(glue(case INDEX_op_, x), _i64): \ 36 glue(glue(case INDEX_op_, x), _vec) 37 38 typedef struct TempOptInfo { 39 bool is_const; 40 TCGTemp *prev_copy; 41 TCGTemp *next_copy; 42 uint64_t val; 43 uint64_t mask; 44 } TempOptInfo; 45 46 static inline TempOptInfo *ts_info(TCGTemp *ts) 47 { 48 return ts->state_ptr; 49 } 50 51 static inline TempOptInfo *arg_info(TCGArg arg) 52 { 53 return ts_info(arg_temp(arg)); 54 } 55 56 static inline bool ts_is_const(TCGTemp *ts) 57 { 58 return ts_info(ts)->is_const; 59 } 60 61 static inline bool arg_is_const(TCGArg arg) 62 { 63 return ts_is_const(arg_temp(arg)); 64 } 65 66 static inline bool ts_is_copy(TCGTemp *ts) 67 { 68 return ts_info(ts)->next_copy != ts; 69 } 70 71 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 72 static void reset_ts(TCGTemp *ts) 73 { 74 TempOptInfo *ti = ts_info(ts); 75 TempOptInfo *pi = ts_info(ti->prev_copy); 76 TempOptInfo *ni = ts_info(ti->next_copy); 77 78 ni->prev_copy = ti->prev_copy; 79 pi->next_copy = ti->next_copy; 80 ti->next_copy = ts; 81 ti->prev_copy = ts; 82 ti->is_const = false; 83 ti->mask = -1; 84 } 85 86 static void reset_temp(TCGArg arg) 87 { 88 reset_ts(arg_temp(arg)); 89 } 90 91 /* Initialize and activate a temporary. */ 92 static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts) 93 { 94 size_t idx = temp_idx(ts); 95 TempOptInfo *ti; 96 97 if (test_bit(idx, temps_used->l)) { 98 return; 99 } 100 set_bit(idx, temps_used->l); 101 102 ti = ts->state_ptr; 103 if (ti == NULL) { 104 ti = tcg_malloc(sizeof(TempOptInfo)); 105 ts->state_ptr = ti; 106 } 107 108 ti->next_copy = ts; 109 ti->prev_copy = ts; 110 if (ts->kind == TEMP_CONST) { 111 ti->is_const = true; 112 ti->val = ts->val; 113 ti->mask = ts->val; 114 if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) { 115 /* High bits of a 32-bit quantity are garbage. */ 116 ti->mask |= ~0xffffffffull; 117 } 118 } else { 119 ti->is_const = false; 120 ti->mask = -1; 121 } 122 } 123 124 static void init_arg_info(TCGTempSet *temps_used, TCGArg arg) 125 { 126 init_ts_info(temps_used, arg_temp(arg)); 127 } 128 129 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 130 { 131 TCGTemp *i, *g, *l; 132 133 /* If this is already readonly, we can't do better. */ 134 if (temp_readonly(ts)) { 135 return ts; 136 } 137 138 g = l = NULL; 139 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 140 if (temp_readonly(i)) { 141 return i; 142 } else if (i->kind > ts->kind) { 143 if (i->kind == TEMP_GLOBAL) { 144 g = i; 145 } else if (i->kind == TEMP_LOCAL) { 146 l = i; 147 } 148 } 149 } 150 151 /* If we didn't find a better representation, return the same temp. */ 152 return g ? g : l ? l : ts; 153 } 154 155 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 156 { 157 TCGTemp *i; 158 159 if (ts1 == ts2) { 160 return true; 161 } 162 163 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 164 return false; 165 } 166 167 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 168 if (i == ts2) { 169 return true; 170 } 171 } 172 173 return false; 174 } 175 176 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 177 { 178 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 179 } 180 181 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 182 { 183 TCGTemp *dst_ts = arg_temp(dst); 184 TCGTemp *src_ts = arg_temp(src); 185 const TCGOpDef *def; 186 TempOptInfo *di; 187 TempOptInfo *si; 188 uint64_t mask; 189 TCGOpcode new_op; 190 191 if (ts_are_copies(dst_ts, src_ts)) { 192 tcg_op_remove(s, op); 193 return; 194 } 195 196 reset_ts(dst_ts); 197 di = ts_info(dst_ts); 198 si = ts_info(src_ts); 199 def = &tcg_op_defs[op->opc]; 200 if (def->flags & TCG_OPF_VECTOR) { 201 new_op = INDEX_op_mov_vec; 202 } else if (def->flags & TCG_OPF_64BIT) { 203 new_op = INDEX_op_mov_i64; 204 } else { 205 new_op = INDEX_op_mov_i32; 206 } 207 op->opc = new_op; 208 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 209 op->args[0] = dst; 210 op->args[1] = src; 211 212 mask = si->mask; 213 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 214 /* High bits of the destination are now garbage. */ 215 mask |= ~0xffffffffull; 216 } 217 di->mask = mask; 218 219 if (src_ts->type == dst_ts->type) { 220 TempOptInfo *ni = ts_info(si->next_copy); 221 222 di->next_copy = si->next_copy; 223 di->prev_copy = src_ts; 224 ni->prev_copy = dst_ts; 225 si->next_copy = dst_ts; 226 di->is_const = si->is_const; 227 di->val = si->val; 228 } 229 } 230 231 static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used, 232 TCGOp *op, TCGArg dst, uint64_t val) 233 { 234 const TCGOpDef *def = &tcg_op_defs[op->opc]; 235 TCGType type; 236 TCGTemp *tv; 237 238 if (def->flags & TCG_OPF_VECTOR) { 239 type = TCGOP_VECL(op) + TCG_TYPE_V64; 240 } else if (def->flags & TCG_OPF_64BIT) { 241 type = TCG_TYPE_I64; 242 } else { 243 type = TCG_TYPE_I32; 244 } 245 246 /* Convert movi to mov with constant temp. */ 247 tv = tcg_constant_internal(type, val); 248 init_ts_info(temps_used, tv); 249 tcg_opt_gen_mov(s, op, dst, temp_arg(tv)); 250 } 251 252 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 253 { 254 uint64_t l64, h64; 255 256 switch (op) { 257 CASE_OP_32_64(add): 258 return x + y; 259 260 CASE_OP_32_64(sub): 261 return x - y; 262 263 CASE_OP_32_64(mul): 264 return x * y; 265 266 CASE_OP_32_64(and): 267 return x & y; 268 269 CASE_OP_32_64(or): 270 return x | y; 271 272 CASE_OP_32_64(xor): 273 return x ^ y; 274 275 case INDEX_op_shl_i32: 276 return (uint32_t)x << (y & 31); 277 278 case INDEX_op_shl_i64: 279 return (uint64_t)x << (y & 63); 280 281 case INDEX_op_shr_i32: 282 return (uint32_t)x >> (y & 31); 283 284 case INDEX_op_shr_i64: 285 return (uint64_t)x >> (y & 63); 286 287 case INDEX_op_sar_i32: 288 return (int32_t)x >> (y & 31); 289 290 case INDEX_op_sar_i64: 291 return (int64_t)x >> (y & 63); 292 293 case INDEX_op_rotr_i32: 294 return ror32(x, y & 31); 295 296 case INDEX_op_rotr_i64: 297 return ror64(x, y & 63); 298 299 case INDEX_op_rotl_i32: 300 return rol32(x, y & 31); 301 302 case INDEX_op_rotl_i64: 303 return rol64(x, y & 63); 304 305 CASE_OP_32_64(not): 306 return ~x; 307 308 CASE_OP_32_64(neg): 309 return -x; 310 311 CASE_OP_32_64(andc): 312 return x & ~y; 313 314 CASE_OP_32_64(orc): 315 return x | ~y; 316 317 CASE_OP_32_64(eqv): 318 return ~(x ^ y); 319 320 CASE_OP_32_64(nand): 321 return ~(x & y); 322 323 CASE_OP_32_64(nor): 324 return ~(x | y); 325 326 case INDEX_op_clz_i32: 327 return (uint32_t)x ? clz32(x) : y; 328 329 case INDEX_op_clz_i64: 330 return x ? clz64(x) : y; 331 332 case INDEX_op_ctz_i32: 333 return (uint32_t)x ? ctz32(x) : y; 334 335 case INDEX_op_ctz_i64: 336 return x ? ctz64(x) : y; 337 338 case INDEX_op_ctpop_i32: 339 return ctpop32(x); 340 341 case INDEX_op_ctpop_i64: 342 return ctpop64(x); 343 344 CASE_OP_32_64(ext8s): 345 return (int8_t)x; 346 347 CASE_OP_32_64(ext16s): 348 return (int16_t)x; 349 350 CASE_OP_32_64(ext8u): 351 return (uint8_t)x; 352 353 CASE_OP_32_64(ext16u): 354 return (uint16_t)x; 355 356 CASE_OP_32_64(bswap16): 357 return bswap16(x); 358 359 CASE_OP_32_64(bswap32): 360 return bswap32(x); 361 362 case INDEX_op_bswap64_i64: 363 return bswap64(x); 364 365 case INDEX_op_ext_i32_i64: 366 case INDEX_op_ext32s_i64: 367 return (int32_t)x; 368 369 case INDEX_op_extu_i32_i64: 370 case INDEX_op_extrl_i64_i32: 371 case INDEX_op_ext32u_i64: 372 return (uint32_t)x; 373 374 case INDEX_op_extrh_i64_i32: 375 return (uint64_t)x >> 32; 376 377 case INDEX_op_muluh_i32: 378 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 379 case INDEX_op_mulsh_i32: 380 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 381 382 case INDEX_op_muluh_i64: 383 mulu64(&l64, &h64, x, y); 384 return h64; 385 case INDEX_op_mulsh_i64: 386 muls64(&l64, &h64, x, y); 387 return h64; 388 389 case INDEX_op_div_i32: 390 /* Avoid crashing on divide by zero, otherwise undefined. */ 391 return (int32_t)x / ((int32_t)y ? : 1); 392 case INDEX_op_divu_i32: 393 return (uint32_t)x / ((uint32_t)y ? : 1); 394 case INDEX_op_div_i64: 395 return (int64_t)x / ((int64_t)y ? : 1); 396 case INDEX_op_divu_i64: 397 return (uint64_t)x / ((uint64_t)y ? : 1); 398 399 case INDEX_op_rem_i32: 400 return (int32_t)x % ((int32_t)y ? : 1); 401 case INDEX_op_remu_i32: 402 return (uint32_t)x % ((uint32_t)y ? : 1); 403 case INDEX_op_rem_i64: 404 return (int64_t)x % ((int64_t)y ? : 1); 405 case INDEX_op_remu_i64: 406 return (uint64_t)x % ((uint64_t)y ? : 1); 407 408 default: 409 fprintf(stderr, 410 "Unrecognized operation %d in do_constant_folding.\n", op); 411 tcg_abort(); 412 } 413 } 414 415 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y) 416 { 417 const TCGOpDef *def = &tcg_op_defs[op]; 418 uint64_t res = do_constant_folding_2(op, x, y); 419 if (!(def->flags & TCG_OPF_64BIT)) { 420 res = (int32_t)res; 421 } 422 return res; 423 } 424 425 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 426 { 427 switch (c) { 428 case TCG_COND_EQ: 429 return x == y; 430 case TCG_COND_NE: 431 return x != y; 432 case TCG_COND_LT: 433 return (int32_t)x < (int32_t)y; 434 case TCG_COND_GE: 435 return (int32_t)x >= (int32_t)y; 436 case TCG_COND_LE: 437 return (int32_t)x <= (int32_t)y; 438 case TCG_COND_GT: 439 return (int32_t)x > (int32_t)y; 440 case TCG_COND_LTU: 441 return x < y; 442 case TCG_COND_GEU: 443 return x >= y; 444 case TCG_COND_LEU: 445 return x <= y; 446 case TCG_COND_GTU: 447 return x > y; 448 default: 449 tcg_abort(); 450 } 451 } 452 453 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 454 { 455 switch (c) { 456 case TCG_COND_EQ: 457 return x == y; 458 case TCG_COND_NE: 459 return x != y; 460 case TCG_COND_LT: 461 return (int64_t)x < (int64_t)y; 462 case TCG_COND_GE: 463 return (int64_t)x >= (int64_t)y; 464 case TCG_COND_LE: 465 return (int64_t)x <= (int64_t)y; 466 case TCG_COND_GT: 467 return (int64_t)x > (int64_t)y; 468 case TCG_COND_LTU: 469 return x < y; 470 case TCG_COND_GEU: 471 return x >= y; 472 case TCG_COND_LEU: 473 return x <= y; 474 case TCG_COND_GTU: 475 return x > y; 476 default: 477 tcg_abort(); 478 } 479 } 480 481 static bool do_constant_folding_cond_eq(TCGCond c) 482 { 483 switch (c) { 484 case TCG_COND_GT: 485 case TCG_COND_LTU: 486 case TCG_COND_LT: 487 case TCG_COND_GTU: 488 case TCG_COND_NE: 489 return 0; 490 case TCG_COND_GE: 491 case TCG_COND_GEU: 492 case TCG_COND_LE: 493 case TCG_COND_LEU: 494 case TCG_COND_EQ: 495 return 1; 496 default: 497 tcg_abort(); 498 } 499 } 500 501 /* Return 2 if the condition can't be simplified, and the result 502 of the condition (0 or 1) if it can */ 503 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 504 TCGArg y, TCGCond c) 505 { 506 uint64_t xv = arg_info(x)->val; 507 uint64_t yv = arg_info(y)->val; 508 509 if (arg_is_const(x) && arg_is_const(y)) { 510 const TCGOpDef *def = &tcg_op_defs[op]; 511 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 512 if (def->flags & TCG_OPF_64BIT) { 513 return do_constant_folding_cond_64(xv, yv, c); 514 } else { 515 return do_constant_folding_cond_32(xv, yv, c); 516 } 517 } else if (args_are_copies(x, y)) { 518 return do_constant_folding_cond_eq(c); 519 } else if (arg_is_const(y) && yv == 0) { 520 switch (c) { 521 case TCG_COND_LTU: 522 return 0; 523 case TCG_COND_GEU: 524 return 1; 525 default: 526 return 2; 527 } 528 } 529 return 2; 530 } 531 532 /* Return 2 if the condition can't be simplified, and the result 533 of the condition (0 or 1) if it can */ 534 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 535 { 536 TCGArg al = p1[0], ah = p1[1]; 537 TCGArg bl = p2[0], bh = p2[1]; 538 539 if (arg_is_const(bl) && arg_is_const(bh)) { 540 tcg_target_ulong blv = arg_info(bl)->val; 541 tcg_target_ulong bhv = arg_info(bh)->val; 542 uint64_t b = deposit64(blv, 32, 32, bhv); 543 544 if (arg_is_const(al) && arg_is_const(ah)) { 545 tcg_target_ulong alv = arg_info(al)->val; 546 tcg_target_ulong ahv = arg_info(ah)->val; 547 uint64_t a = deposit64(alv, 32, 32, ahv); 548 return do_constant_folding_cond_64(a, b, c); 549 } 550 if (b == 0) { 551 switch (c) { 552 case TCG_COND_LTU: 553 return 0; 554 case TCG_COND_GEU: 555 return 1; 556 default: 557 break; 558 } 559 } 560 } 561 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 562 return do_constant_folding_cond_eq(c); 563 } 564 return 2; 565 } 566 567 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 568 { 569 TCGArg a1 = *p1, a2 = *p2; 570 int sum = 0; 571 sum += arg_is_const(a1); 572 sum -= arg_is_const(a2); 573 574 /* Prefer the constant in second argument, and then the form 575 op a, a, b, which is better handled on non-RISC hosts. */ 576 if (sum > 0 || (sum == 0 && dest == a2)) { 577 *p1 = a2; 578 *p2 = a1; 579 return true; 580 } 581 return false; 582 } 583 584 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 585 { 586 int sum = 0; 587 sum += arg_is_const(p1[0]); 588 sum += arg_is_const(p1[1]); 589 sum -= arg_is_const(p2[0]); 590 sum -= arg_is_const(p2[1]); 591 if (sum > 0) { 592 TCGArg t; 593 t = p1[0], p1[0] = p2[0], p2[0] = t; 594 t = p1[1], p1[1] = p2[1], p2[1] = t; 595 return true; 596 } 597 return false; 598 } 599 600 /* Propagate constants and copies, fold constant expressions. */ 601 void tcg_optimize(TCGContext *s) 602 { 603 int nb_temps, nb_globals, i; 604 TCGOp *op, *op_next, *prev_mb = NULL; 605 TCGTempSet temps_used; 606 607 /* Array VALS has an element for each temp. 608 If this temp holds a constant then its value is kept in VALS' element. 609 If this temp is a copy of other ones then the other copies are 610 available through the doubly linked circular list. */ 611 612 nb_temps = s->nb_temps; 613 nb_globals = s->nb_globals; 614 615 memset(&temps_used, 0, sizeof(temps_used)); 616 for (i = 0; i < nb_temps; ++i) { 617 s->temps[i].state_ptr = NULL; 618 } 619 620 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 621 uint64_t mask, partmask, affected, tmp; 622 int nb_oargs, nb_iargs; 623 TCGOpcode opc = op->opc; 624 const TCGOpDef *def = &tcg_op_defs[opc]; 625 626 /* Count the arguments, and initialize the temps that are 627 going to be used */ 628 if (opc == INDEX_op_call) { 629 nb_oargs = TCGOP_CALLO(op); 630 nb_iargs = TCGOP_CALLI(op); 631 for (i = 0; i < nb_oargs + nb_iargs; i++) { 632 TCGTemp *ts = arg_temp(op->args[i]); 633 if (ts) { 634 init_ts_info(&temps_used, ts); 635 } 636 } 637 } else { 638 nb_oargs = def->nb_oargs; 639 nb_iargs = def->nb_iargs; 640 for (i = 0; i < nb_oargs + nb_iargs; i++) { 641 init_arg_info(&temps_used, op->args[i]); 642 } 643 } 644 645 /* Do copy propagation */ 646 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 647 TCGTemp *ts = arg_temp(op->args[i]); 648 if (ts && ts_is_copy(ts)) { 649 op->args[i] = temp_arg(find_better_copy(s, ts)); 650 } 651 } 652 653 /* For commutative operations make constant second argument */ 654 switch (opc) { 655 CASE_OP_32_64_VEC(add): 656 CASE_OP_32_64_VEC(mul): 657 CASE_OP_32_64_VEC(and): 658 CASE_OP_32_64_VEC(or): 659 CASE_OP_32_64_VEC(xor): 660 CASE_OP_32_64(eqv): 661 CASE_OP_32_64(nand): 662 CASE_OP_32_64(nor): 663 CASE_OP_32_64(muluh): 664 CASE_OP_32_64(mulsh): 665 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 666 break; 667 CASE_OP_32_64(brcond): 668 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 669 op->args[2] = tcg_swap_cond(op->args[2]); 670 } 671 break; 672 CASE_OP_32_64(setcond): 673 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 674 op->args[3] = tcg_swap_cond(op->args[3]); 675 } 676 break; 677 CASE_OP_32_64(movcond): 678 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 679 op->args[5] = tcg_swap_cond(op->args[5]); 680 } 681 /* For movcond, we canonicalize the "false" input reg to match 682 the destination reg so that the tcg backend can implement 683 a "move if true" operation. */ 684 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 685 op->args[5] = tcg_invert_cond(op->args[5]); 686 } 687 break; 688 CASE_OP_32_64(add2): 689 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 690 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 691 break; 692 CASE_OP_32_64(mulu2): 693 CASE_OP_32_64(muls2): 694 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 695 break; 696 case INDEX_op_brcond2_i32: 697 if (swap_commutative2(&op->args[0], &op->args[2])) { 698 op->args[4] = tcg_swap_cond(op->args[4]); 699 } 700 break; 701 case INDEX_op_setcond2_i32: 702 if (swap_commutative2(&op->args[1], &op->args[3])) { 703 op->args[5] = tcg_swap_cond(op->args[5]); 704 } 705 break; 706 default: 707 break; 708 } 709 710 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 711 and "sub r, 0, a => neg r, a" case. */ 712 switch (opc) { 713 CASE_OP_32_64(shl): 714 CASE_OP_32_64(shr): 715 CASE_OP_32_64(sar): 716 CASE_OP_32_64(rotl): 717 CASE_OP_32_64(rotr): 718 if (arg_is_const(op->args[1]) 719 && arg_info(op->args[1])->val == 0) { 720 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 721 continue; 722 } 723 break; 724 CASE_OP_32_64_VEC(sub): 725 { 726 TCGOpcode neg_op; 727 bool have_neg; 728 729 if (arg_is_const(op->args[2])) { 730 /* Proceed with possible constant folding. */ 731 break; 732 } 733 if (opc == INDEX_op_sub_i32) { 734 neg_op = INDEX_op_neg_i32; 735 have_neg = TCG_TARGET_HAS_neg_i32; 736 } else if (opc == INDEX_op_sub_i64) { 737 neg_op = INDEX_op_neg_i64; 738 have_neg = TCG_TARGET_HAS_neg_i64; 739 } else if (TCG_TARGET_HAS_neg_vec) { 740 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 741 unsigned vece = TCGOP_VECE(op); 742 neg_op = INDEX_op_neg_vec; 743 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 744 } else { 745 break; 746 } 747 if (!have_neg) { 748 break; 749 } 750 if (arg_is_const(op->args[1]) 751 && arg_info(op->args[1])->val == 0) { 752 op->opc = neg_op; 753 reset_temp(op->args[0]); 754 op->args[1] = op->args[2]; 755 continue; 756 } 757 } 758 break; 759 CASE_OP_32_64_VEC(xor): 760 CASE_OP_32_64(nand): 761 if (!arg_is_const(op->args[1]) 762 && arg_is_const(op->args[2]) 763 && arg_info(op->args[2])->val == -1) { 764 i = 1; 765 goto try_not; 766 } 767 break; 768 CASE_OP_32_64(nor): 769 if (!arg_is_const(op->args[1]) 770 && arg_is_const(op->args[2]) 771 && arg_info(op->args[2])->val == 0) { 772 i = 1; 773 goto try_not; 774 } 775 break; 776 CASE_OP_32_64_VEC(andc): 777 if (!arg_is_const(op->args[2]) 778 && arg_is_const(op->args[1]) 779 && arg_info(op->args[1])->val == -1) { 780 i = 2; 781 goto try_not; 782 } 783 break; 784 CASE_OP_32_64_VEC(orc): 785 CASE_OP_32_64(eqv): 786 if (!arg_is_const(op->args[2]) 787 && arg_is_const(op->args[1]) 788 && arg_info(op->args[1])->val == 0) { 789 i = 2; 790 goto try_not; 791 } 792 break; 793 try_not: 794 { 795 TCGOpcode not_op; 796 bool have_not; 797 798 if (def->flags & TCG_OPF_VECTOR) { 799 not_op = INDEX_op_not_vec; 800 have_not = TCG_TARGET_HAS_not_vec; 801 } else if (def->flags & TCG_OPF_64BIT) { 802 not_op = INDEX_op_not_i64; 803 have_not = TCG_TARGET_HAS_not_i64; 804 } else { 805 not_op = INDEX_op_not_i32; 806 have_not = TCG_TARGET_HAS_not_i32; 807 } 808 if (!have_not) { 809 break; 810 } 811 op->opc = not_op; 812 reset_temp(op->args[0]); 813 op->args[1] = op->args[i]; 814 continue; 815 } 816 default: 817 break; 818 } 819 820 /* Simplify expression for "op r, a, const => mov r, a" cases */ 821 switch (opc) { 822 CASE_OP_32_64_VEC(add): 823 CASE_OP_32_64_VEC(sub): 824 CASE_OP_32_64_VEC(or): 825 CASE_OP_32_64_VEC(xor): 826 CASE_OP_32_64_VEC(andc): 827 CASE_OP_32_64(shl): 828 CASE_OP_32_64(shr): 829 CASE_OP_32_64(sar): 830 CASE_OP_32_64(rotl): 831 CASE_OP_32_64(rotr): 832 if (!arg_is_const(op->args[1]) 833 && arg_is_const(op->args[2]) 834 && arg_info(op->args[2])->val == 0) { 835 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 836 continue; 837 } 838 break; 839 CASE_OP_32_64_VEC(and): 840 CASE_OP_32_64_VEC(orc): 841 CASE_OP_32_64(eqv): 842 if (!arg_is_const(op->args[1]) 843 && arg_is_const(op->args[2]) 844 && arg_info(op->args[2])->val == -1) { 845 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 846 continue; 847 } 848 break; 849 default: 850 break; 851 } 852 853 /* Simplify using known-zero bits. Currently only ops with a single 854 output argument is supported. */ 855 mask = -1; 856 affected = -1; 857 switch (opc) { 858 CASE_OP_32_64(ext8s): 859 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 860 break; 861 } 862 QEMU_FALLTHROUGH; 863 CASE_OP_32_64(ext8u): 864 mask = 0xff; 865 goto and_const; 866 CASE_OP_32_64(ext16s): 867 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 868 break; 869 } 870 QEMU_FALLTHROUGH; 871 CASE_OP_32_64(ext16u): 872 mask = 0xffff; 873 goto and_const; 874 case INDEX_op_ext32s_i64: 875 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 876 break; 877 } 878 QEMU_FALLTHROUGH; 879 case INDEX_op_ext32u_i64: 880 mask = 0xffffffffU; 881 goto and_const; 882 883 CASE_OP_32_64(and): 884 mask = arg_info(op->args[2])->mask; 885 if (arg_is_const(op->args[2])) { 886 and_const: 887 affected = arg_info(op->args[1])->mask & ~mask; 888 } 889 mask = arg_info(op->args[1])->mask & mask; 890 break; 891 892 case INDEX_op_ext_i32_i64: 893 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 894 break; 895 } 896 QEMU_FALLTHROUGH; 897 case INDEX_op_extu_i32_i64: 898 /* We do not compute affected as it is a size changing op. */ 899 mask = (uint32_t)arg_info(op->args[1])->mask; 900 break; 901 902 CASE_OP_32_64(andc): 903 /* Known-zeros does not imply known-ones. Therefore unless 904 op->args[2] is constant, we can't infer anything from it. */ 905 if (arg_is_const(op->args[2])) { 906 mask = ~arg_info(op->args[2])->mask; 907 goto and_const; 908 } 909 /* But we certainly know nothing outside args[1] may be set. */ 910 mask = arg_info(op->args[1])->mask; 911 break; 912 913 case INDEX_op_sar_i32: 914 if (arg_is_const(op->args[2])) { 915 tmp = arg_info(op->args[2])->val & 31; 916 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 917 } 918 break; 919 case INDEX_op_sar_i64: 920 if (arg_is_const(op->args[2])) { 921 tmp = arg_info(op->args[2])->val & 63; 922 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 923 } 924 break; 925 926 case INDEX_op_shr_i32: 927 if (arg_is_const(op->args[2])) { 928 tmp = arg_info(op->args[2])->val & 31; 929 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 930 } 931 break; 932 case INDEX_op_shr_i64: 933 if (arg_is_const(op->args[2])) { 934 tmp = arg_info(op->args[2])->val & 63; 935 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 936 } 937 break; 938 939 case INDEX_op_extrl_i64_i32: 940 mask = (uint32_t)arg_info(op->args[1])->mask; 941 break; 942 case INDEX_op_extrh_i64_i32: 943 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 944 break; 945 946 CASE_OP_32_64(shl): 947 if (arg_is_const(op->args[2])) { 948 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 949 mask = arg_info(op->args[1])->mask << tmp; 950 } 951 break; 952 953 CASE_OP_32_64(neg): 954 /* Set to 1 all bits to the left of the rightmost. */ 955 mask = -(arg_info(op->args[1])->mask 956 & -arg_info(op->args[1])->mask); 957 break; 958 959 CASE_OP_32_64(deposit): 960 mask = deposit64(arg_info(op->args[1])->mask, 961 op->args[3], op->args[4], 962 arg_info(op->args[2])->mask); 963 break; 964 965 CASE_OP_32_64(extract): 966 mask = extract64(arg_info(op->args[1])->mask, 967 op->args[2], op->args[3]); 968 if (op->args[2] == 0) { 969 affected = arg_info(op->args[1])->mask & ~mask; 970 } 971 break; 972 CASE_OP_32_64(sextract): 973 mask = sextract64(arg_info(op->args[1])->mask, 974 op->args[2], op->args[3]); 975 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 976 affected = arg_info(op->args[1])->mask & ~mask; 977 } 978 break; 979 980 CASE_OP_32_64(or): 981 CASE_OP_32_64(xor): 982 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 983 break; 984 985 case INDEX_op_clz_i32: 986 case INDEX_op_ctz_i32: 987 mask = arg_info(op->args[2])->mask | 31; 988 break; 989 990 case INDEX_op_clz_i64: 991 case INDEX_op_ctz_i64: 992 mask = arg_info(op->args[2])->mask | 63; 993 break; 994 995 case INDEX_op_ctpop_i32: 996 mask = 32 | 31; 997 break; 998 case INDEX_op_ctpop_i64: 999 mask = 64 | 63; 1000 break; 1001 1002 CASE_OP_32_64(setcond): 1003 case INDEX_op_setcond2_i32: 1004 mask = 1; 1005 break; 1006 1007 CASE_OP_32_64(movcond): 1008 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1009 break; 1010 1011 CASE_OP_32_64(ld8u): 1012 mask = 0xff; 1013 break; 1014 CASE_OP_32_64(ld16u): 1015 mask = 0xffff; 1016 break; 1017 case INDEX_op_ld32u_i64: 1018 mask = 0xffffffffu; 1019 break; 1020 1021 CASE_OP_32_64(qemu_ld): 1022 { 1023 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1024 MemOp mop = get_memop(oi); 1025 if (!(mop & MO_SIGN)) { 1026 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1027 } 1028 } 1029 break; 1030 1031 default: 1032 break; 1033 } 1034 1035 /* 32-bit ops generate 32-bit results. For the result is zero test 1036 below, we can ignore high bits, but for further optimizations we 1037 need to record that the high bits contain garbage. */ 1038 partmask = mask; 1039 if (!(def->flags & TCG_OPF_64BIT)) { 1040 mask |= ~(tcg_target_ulong)0xffffffffu; 1041 partmask &= 0xffffffffu; 1042 affected &= 0xffffffffu; 1043 } 1044 1045 if (partmask == 0) { 1046 tcg_debug_assert(nb_oargs == 1); 1047 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1048 continue; 1049 } 1050 if (affected == 0) { 1051 tcg_debug_assert(nb_oargs == 1); 1052 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1053 continue; 1054 } 1055 1056 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1057 switch (opc) { 1058 CASE_OP_32_64_VEC(and): 1059 CASE_OP_32_64_VEC(mul): 1060 CASE_OP_32_64(muluh): 1061 CASE_OP_32_64(mulsh): 1062 if (arg_is_const(op->args[2]) 1063 && arg_info(op->args[2])->val == 0) { 1064 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1065 continue; 1066 } 1067 break; 1068 default: 1069 break; 1070 } 1071 1072 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1073 switch (opc) { 1074 CASE_OP_32_64_VEC(or): 1075 CASE_OP_32_64_VEC(and): 1076 if (args_are_copies(op->args[1], op->args[2])) { 1077 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1078 continue; 1079 } 1080 break; 1081 default: 1082 break; 1083 } 1084 1085 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1086 switch (opc) { 1087 CASE_OP_32_64_VEC(andc): 1088 CASE_OP_32_64_VEC(sub): 1089 CASE_OP_32_64_VEC(xor): 1090 if (args_are_copies(op->args[1], op->args[2])) { 1091 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1092 continue; 1093 } 1094 break; 1095 default: 1096 break; 1097 } 1098 1099 /* Propagate constants through copy operations and do constant 1100 folding. Constants will be substituted to arguments by register 1101 allocator where needed and possible. Also detect copies. */ 1102 switch (opc) { 1103 CASE_OP_32_64_VEC(mov): 1104 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1105 break; 1106 CASE_OP_32_64(movi): 1107 case INDEX_op_dupi_vec: 1108 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]); 1109 break; 1110 1111 case INDEX_op_dup_vec: 1112 if (arg_is_const(op->args[1])) { 1113 tmp = arg_info(op->args[1])->val; 1114 tmp = dup_const(TCGOP_VECE(op), tmp); 1115 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1116 break; 1117 } 1118 goto do_default; 1119 1120 case INDEX_op_dup2_vec: 1121 assert(TCG_TARGET_REG_BITS == 32); 1122 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1123 tmp = arg_info(op->args[1])->val; 1124 if (tmp == arg_info(op->args[2])->val) { 1125 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1126 break; 1127 } 1128 } else if (args_are_copies(op->args[1], op->args[2])) { 1129 op->opc = INDEX_op_dup_vec; 1130 TCGOP_VECE(op) = MO_32; 1131 nb_iargs = 1; 1132 } 1133 goto do_default; 1134 1135 CASE_OP_32_64(not): 1136 CASE_OP_32_64(neg): 1137 CASE_OP_32_64(ext8s): 1138 CASE_OP_32_64(ext8u): 1139 CASE_OP_32_64(ext16s): 1140 CASE_OP_32_64(ext16u): 1141 CASE_OP_32_64(ctpop): 1142 CASE_OP_32_64(bswap16): 1143 CASE_OP_32_64(bswap32): 1144 case INDEX_op_bswap64_i64: 1145 case INDEX_op_ext32s_i64: 1146 case INDEX_op_ext32u_i64: 1147 case INDEX_op_ext_i32_i64: 1148 case INDEX_op_extu_i32_i64: 1149 case INDEX_op_extrl_i64_i32: 1150 case INDEX_op_extrh_i64_i32: 1151 if (arg_is_const(op->args[1])) { 1152 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1153 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1154 break; 1155 } 1156 goto do_default; 1157 1158 CASE_OP_32_64(add): 1159 CASE_OP_32_64(sub): 1160 CASE_OP_32_64(mul): 1161 CASE_OP_32_64(or): 1162 CASE_OP_32_64(and): 1163 CASE_OP_32_64(xor): 1164 CASE_OP_32_64(shl): 1165 CASE_OP_32_64(shr): 1166 CASE_OP_32_64(sar): 1167 CASE_OP_32_64(rotl): 1168 CASE_OP_32_64(rotr): 1169 CASE_OP_32_64(andc): 1170 CASE_OP_32_64(orc): 1171 CASE_OP_32_64(eqv): 1172 CASE_OP_32_64(nand): 1173 CASE_OP_32_64(nor): 1174 CASE_OP_32_64(muluh): 1175 CASE_OP_32_64(mulsh): 1176 CASE_OP_32_64(div): 1177 CASE_OP_32_64(divu): 1178 CASE_OP_32_64(rem): 1179 CASE_OP_32_64(remu): 1180 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1181 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1182 arg_info(op->args[2])->val); 1183 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1184 break; 1185 } 1186 goto do_default; 1187 1188 CASE_OP_32_64(clz): 1189 CASE_OP_32_64(ctz): 1190 if (arg_is_const(op->args[1])) { 1191 TCGArg v = arg_info(op->args[1])->val; 1192 if (v != 0) { 1193 tmp = do_constant_folding(opc, v, 0); 1194 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1195 } else { 1196 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1197 } 1198 break; 1199 } 1200 goto do_default; 1201 1202 CASE_OP_32_64(deposit): 1203 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1204 tmp = deposit64(arg_info(op->args[1])->val, 1205 op->args[3], op->args[4], 1206 arg_info(op->args[2])->val); 1207 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1208 break; 1209 } 1210 goto do_default; 1211 1212 CASE_OP_32_64(extract): 1213 if (arg_is_const(op->args[1])) { 1214 tmp = extract64(arg_info(op->args[1])->val, 1215 op->args[2], op->args[3]); 1216 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1217 break; 1218 } 1219 goto do_default; 1220 1221 CASE_OP_32_64(sextract): 1222 if (arg_is_const(op->args[1])) { 1223 tmp = sextract64(arg_info(op->args[1])->val, 1224 op->args[2], op->args[3]); 1225 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1226 break; 1227 } 1228 goto do_default; 1229 1230 CASE_OP_32_64(extract2): 1231 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1232 uint64_t v1 = arg_info(op->args[1])->val; 1233 uint64_t v2 = arg_info(op->args[2])->val; 1234 int shr = op->args[3]; 1235 1236 if (opc == INDEX_op_extract2_i64) { 1237 tmp = (v1 >> shr) | (v2 << (64 - shr)); 1238 } else { 1239 tmp = (int32_t)(((uint32_t)v1 >> shr) | 1240 ((uint32_t)v2 << (32 - shr))); 1241 } 1242 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1243 break; 1244 } 1245 goto do_default; 1246 1247 CASE_OP_32_64(setcond): 1248 tmp = do_constant_folding_cond(opc, op->args[1], 1249 op->args[2], op->args[3]); 1250 if (tmp != 2) { 1251 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1252 break; 1253 } 1254 goto do_default; 1255 1256 CASE_OP_32_64(brcond): 1257 tmp = do_constant_folding_cond(opc, op->args[0], 1258 op->args[1], op->args[2]); 1259 if (tmp != 2) { 1260 if (tmp) { 1261 memset(&temps_used, 0, sizeof(temps_used)); 1262 op->opc = INDEX_op_br; 1263 op->args[0] = op->args[3]; 1264 } else { 1265 tcg_op_remove(s, op); 1266 } 1267 break; 1268 } 1269 goto do_default; 1270 1271 CASE_OP_32_64(movcond): 1272 tmp = do_constant_folding_cond(opc, op->args[1], 1273 op->args[2], op->args[5]); 1274 if (tmp != 2) { 1275 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1276 break; 1277 } 1278 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1279 uint64_t tv = arg_info(op->args[3])->val; 1280 uint64_t fv = arg_info(op->args[4])->val; 1281 TCGCond cond = op->args[5]; 1282 1283 if (fv == 1 && tv == 0) { 1284 cond = tcg_invert_cond(cond); 1285 } else if (!(tv == 1 && fv == 0)) { 1286 goto do_default; 1287 } 1288 op->args[3] = cond; 1289 op->opc = opc = (opc == INDEX_op_movcond_i32 1290 ? INDEX_op_setcond_i32 1291 : INDEX_op_setcond_i64); 1292 nb_iargs = 2; 1293 } 1294 goto do_default; 1295 1296 case INDEX_op_add2_i32: 1297 case INDEX_op_sub2_i32: 1298 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1299 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1300 uint32_t al = arg_info(op->args[2])->val; 1301 uint32_t ah = arg_info(op->args[3])->val; 1302 uint32_t bl = arg_info(op->args[4])->val; 1303 uint32_t bh = arg_info(op->args[5])->val; 1304 uint64_t a = ((uint64_t)ah << 32) | al; 1305 uint64_t b = ((uint64_t)bh << 32) | bl; 1306 TCGArg rl, rh; 1307 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1308 1309 if (opc == INDEX_op_add2_i32) { 1310 a += b; 1311 } else { 1312 a -= b; 1313 } 1314 1315 rl = op->args[0]; 1316 rh = op->args[1]; 1317 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a); 1318 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32)); 1319 break; 1320 } 1321 goto do_default; 1322 1323 case INDEX_op_mulu2_i32: 1324 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1325 uint32_t a = arg_info(op->args[2])->val; 1326 uint32_t b = arg_info(op->args[3])->val; 1327 uint64_t r = (uint64_t)a * b; 1328 TCGArg rl, rh; 1329 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1330 1331 rl = op->args[0]; 1332 rh = op->args[1]; 1333 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r); 1334 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32)); 1335 break; 1336 } 1337 goto do_default; 1338 1339 case INDEX_op_brcond2_i32: 1340 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1341 op->args[4]); 1342 if (tmp != 2) { 1343 if (tmp) { 1344 do_brcond_true: 1345 memset(&temps_used, 0, sizeof(temps_used)); 1346 op->opc = INDEX_op_br; 1347 op->args[0] = op->args[5]; 1348 } else { 1349 do_brcond_false: 1350 tcg_op_remove(s, op); 1351 } 1352 } else if ((op->args[4] == TCG_COND_LT 1353 || op->args[4] == TCG_COND_GE) 1354 && arg_is_const(op->args[2]) 1355 && arg_info(op->args[2])->val == 0 1356 && arg_is_const(op->args[3]) 1357 && arg_info(op->args[3])->val == 0) { 1358 /* Simplify LT/GE comparisons vs zero to a single compare 1359 vs the high word of the input. */ 1360 do_brcond_high: 1361 memset(&temps_used, 0, sizeof(temps_used)); 1362 op->opc = INDEX_op_brcond_i32; 1363 op->args[0] = op->args[1]; 1364 op->args[1] = op->args[3]; 1365 op->args[2] = op->args[4]; 1366 op->args[3] = op->args[5]; 1367 } else if (op->args[4] == TCG_COND_EQ) { 1368 /* Simplify EQ comparisons where one of the pairs 1369 can be simplified. */ 1370 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1371 op->args[0], op->args[2], 1372 TCG_COND_EQ); 1373 if (tmp == 0) { 1374 goto do_brcond_false; 1375 } else if (tmp == 1) { 1376 goto do_brcond_high; 1377 } 1378 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1379 op->args[1], op->args[3], 1380 TCG_COND_EQ); 1381 if (tmp == 0) { 1382 goto do_brcond_false; 1383 } else if (tmp != 1) { 1384 goto do_default; 1385 } 1386 do_brcond_low: 1387 memset(&temps_used, 0, sizeof(temps_used)); 1388 op->opc = INDEX_op_brcond_i32; 1389 op->args[1] = op->args[2]; 1390 op->args[2] = op->args[4]; 1391 op->args[3] = op->args[5]; 1392 } else if (op->args[4] == TCG_COND_NE) { 1393 /* Simplify NE comparisons where one of the pairs 1394 can be simplified. */ 1395 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1396 op->args[0], op->args[2], 1397 TCG_COND_NE); 1398 if (tmp == 0) { 1399 goto do_brcond_high; 1400 } else if (tmp == 1) { 1401 goto do_brcond_true; 1402 } 1403 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1404 op->args[1], op->args[3], 1405 TCG_COND_NE); 1406 if (tmp == 0) { 1407 goto do_brcond_low; 1408 } else if (tmp == 1) { 1409 goto do_brcond_true; 1410 } 1411 goto do_default; 1412 } else { 1413 goto do_default; 1414 } 1415 break; 1416 1417 case INDEX_op_setcond2_i32: 1418 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1419 op->args[5]); 1420 if (tmp != 2) { 1421 do_setcond_const: 1422 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1423 } else if ((op->args[5] == TCG_COND_LT 1424 || op->args[5] == TCG_COND_GE) 1425 && arg_is_const(op->args[3]) 1426 && arg_info(op->args[3])->val == 0 1427 && arg_is_const(op->args[4]) 1428 && arg_info(op->args[4])->val == 0) { 1429 /* Simplify LT/GE comparisons vs zero to a single compare 1430 vs the high word of the input. */ 1431 do_setcond_high: 1432 reset_temp(op->args[0]); 1433 arg_info(op->args[0])->mask = 1; 1434 op->opc = INDEX_op_setcond_i32; 1435 op->args[1] = op->args[2]; 1436 op->args[2] = op->args[4]; 1437 op->args[3] = op->args[5]; 1438 } else if (op->args[5] == TCG_COND_EQ) { 1439 /* Simplify EQ comparisons where one of the pairs 1440 can be simplified. */ 1441 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1442 op->args[1], op->args[3], 1443 TCG_COND_EQ); 1444 if (tmp == 0) { 1445 goto do_setcond_const; 1446 } else if (tmp == 1) { 1447 goto do_setcond_high; 1448 } 1449 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1450 op->args[2], op->args[4], 1451 TCG_COND_EQ); 1452 if (tmp == 0) { 1453 goto do_setcond_high; 1454 } else if (tmp != 1) { 1455 goto do_default; 1456 } 1457 do_setcond_low: 1458 reset_temp(op->args[0]); 1459 arg_info(op->args[0])->mask = 1; 1460 op->opc = INDEX_op_setcond_i32; 1461 op->args[2] = op->args[3]; 1462 op->args[3] = op->args[5]; 1463 } else if (op->args[5] == TCG_COND_NE) { 1464 /* Simplify NE comparisons where one of the pairs 1465 can be simplified. */ 1466 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1467 op->args[1], op->args[3], 1468 TCG_COND_NE); 1469 if (tmp == 0) { 1470 goto do_setcond_high; 1471 } else if (tmp == 1) { 1472 goto do_setcond_const; 1473 } 1474 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1475 op->args[2], op->args[4], 1476 TCG_COND_NE); 1477 if (tmp == 0) { 1478 goto do_setcond_low; 1479 } else if (tmp == 1) { 1480 goto do_setcond_const; 1481 } 1482 goto do_default; 1483 } else { 1484 goto do_default; 1485 } 1486 break; 1487 1488 case INDEX_op_call: 1489 if (!(op->args[nb_oargs + nb_iargs + 1] 1490 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1491 for (i = 0; i < nb_globals; i++) { 1492 if (test_bit(i, temps_used.l)) { 1493 reset_ts(&s->temps[i]); 1494 } 1495 } 1496 } 1497 goto do_reset_output; 1498 1499 default: 1500 do_default: 1501 /* Default case: we know nothing about operation (or were unable 1502 to compute the operation result) so no propagation is done. 1503 We trash everything if the operation is the end of a basic 1504 block, otherwise we only trash the output args. "mask" is 1505 the non-zero bits mask for the first output arg. */ 1506 if (def->flags & TCG_OPF_BB_END) { 1507 memset(&temps_used, 0, sizeof(temps_used)); 1508 } else { 1509 do_reset_output: 1510 for (i = 0; i < nb_oargs; i++) { 1511 reset_temp(op->args[i]); 1512 /* Save the corresponding known-zero bits mask for the 1513 first output argument (only one supported so far). */ 1514 if (i == 0) { 1515 arg_info(op->args[i])->mask = mask; 1516 } 1517 } 1518 } 1519 break; 1520 } 1521 1522 /* Eliminate duplicate and redundant fence instructions. */ 1523 if (prev_mb) { 1524 switch (opc) { 1525 case INDEX_op_mb: 1526 /* Merge two barriers of the same type into one, 1527 * or a weaker barrier into a stronger one, 1528 * or two weaker barriers into a stronger one. 1529 * mb X; mb Y => mb X|Y 1530 * mb; strl => mb; st 1531 * ldaq; mb => ld; mb 1532 * ldaq; strl => ld; mb; st 1533 * Other combinations are also merged into a strong 1534 * barrier. This is stricter than specified but for 1535 * the purposes of TCG is better than not optimizing. 1536 */ 1537 prev_mb->args[0] |= op->args[0]; 1538 tcg_op_remove(s, op); 1539 break; 1540 1541 default: 1542 /* Opcodes that end the block stop the optimization. */ 1543 if ((def->flags & TCG_OPF_BB_END) == 0) { 1544 break; 1545 } 1546 /* fallthru */ 1547 case INDEX_op_qemu_ld_i32: 1548 case INDEX_op_qemu_ld_i64: 1549 case INDEX_op_qemu_st_i32: 1550 case INDEX_op_qemu_st8_i32: 1551 case INDEX_op_qemu_st_i64: 1552 case INDEX_op_call: 1553 /* Opcodes that touch guest memory stop the optimization. */ 1554 prev_mb = NULL; 1555 break; 1556 } 1557 } else if (opc == INDEX_op_mb) { 1558 prev_mb = op; 1559 } 1560 } 1561 } 1562