1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "tcg/tcg-op.h" 28 #include "tcg-internal.h" 29 30 #define CASE_OP_32_64(x) \ 31 glue(glue(case INDEX_op_, x), _i32): \ 32 glue(glue(case INDEX_op_, x), _i64) 33 34 #define CASE_OP_32_64_VEC(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64): \ 37 glue(glue(case INDEX_op_, x), _vec) 38 39 typedef struct TempOptInfo { 40 bool is_const; 41 TCGTemp *prev_copy; 42 TCGTemp *next_copy; 43 uint64_t val; 44 uint64_t mask; 45 } TempOptInfo; 46 47 static inline TempOptInfo *ts_info(TCGTemp *ts) 48 { 49 return ts->state_ptr; 50 } 51 52 static inline TempOptInfo *arg_info(TCGArg arg) 53 { 54 return ts_info(arg_temp(arg)); 55 } 56 57 static inline bool ts_is_const(TCGTemp *ts) 58 { 59 return ts_info(ts)->is_const; 60 } 61 62 static inline bool arg_is_const(TCGArg arg) 63 { 64 return ts_is_const(arg_temp(arg)); 65 } 66 67 static inline bool ts_is_copy(TCGTemp *ts) 68 { 69 return ts_info(ts)->next_copy != ts; 70 } 71 72 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 73 static void reset_ts(TCGTemp *ts) 74 { 75 TempOptInfo *ti = ts_info(ts); 76 TempOptInfo *pi = ts_info(ti->prev_copy); 77 TempOptInfo *ni = ts_info(ti->next_copy); 78 79 ni->prev_copy = ti->prev_copy; 80 pi->next_copy = ti->next_copy; 81 ti->next_copy = ts; 82 ti->prev_copy = ts; 83 ti->is_const = false; 84 ti->mask = -1; 85 } 86 87 static void reset_temp(TCGArg arg) 88 { 89 reset_ts(arg_temp(arg)); 90 } 91 92 /* Initialize and activate a temporary. */ 93 static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts) 94 { 95 size_t idx = temp_idx(ts); 96 TempOptInfo *ti; 97 98 if (test_bit(idx, temps_used->l)) { 99 return; 100 } 101 set_bit(idx, temps_used->l); 102 103 ti = ts->state_ptr; 104 if (ti == NULL) { 105 ti = tcg_malloc(sizeof(TempOptInfo)); 106 ts->state_ptr = ti; 107 } 108 109 ti->next_copy = ts; 110 ti->prev_copy = ts; 111 if (ts->kind == TEMP_CONST) { 112 ti->is_const = true; 113 ti->val = ts->val; 114 ti->mask = ts->val; 115 if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) { 116 /* High bits of a 32-bit quantity are garbage. */ 117 ti->mask |= ~0xffffffffull; 118 } 119 } else { 120 ti->is_const = false; 121 ti->mask = -1; 122 } 123 } 124 125 static void init_arg_info(TCGTempSet *temps_used, TCGArg arg) 126 { 127 init_ts_info(temps_used, arg_temp(arg)); 128 } 129 130 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 131 { 132 TCGTemp *i, *g, *l; 133 134 /* If this is already readonly, we can't do better. */ 135 if (temp_readonly(ts)) { 136 return ts; 137 } 138 139 g = l = NULL; 140 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 141 if (temp_readonly(i)) { 142 return i; 143 } else if (i->kind > ts->kind) { 144 if (i->kind == TEMP_GLOBAL) { 145 g = i; 146 } else if (i->kind == TEMP_LOCAL) { 147 l = i; 148 } 149 } 150 } 151 152 /* If we didn't find a better representation, return the same temp. */ 153 return g ? g : l ? l : ts; 154 } 155 156 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 157 { 158 TCGTemp *i; 159 160 if (ts1 == ts2) { 161 return true; 162 } 163 164 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 165 return false; 166 } 167 168 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 169 if (i == ts2) { 170 return true; 171 } 172 } 173 174 return false; 175 } 176 177 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 178 { 179 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 180 } 181 182 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 183 { 184 TCGTemp *dst_ts = arg_temp(dst); 185 TCGTemp *src_ts = arg_temp(src); 186 const TCGOpDef *def; 187 TempOptInfo *di; 188 TempOptInfo *si; 189 uint64_t mask; 190 TCGOpcode new_op; 191 192 if (ts_are_copies(dst_ts, src_ts)) { 193 tcg_op_remove(s, op); 194 return; 195 } 196 197 reset_ts(dst_ts); 198 di = ts_info(dst_ts); 199 si = ts_info(src_ts); 200 def = &tcg_op_defs[op->opc]; 201 if (def->flags & TCG_OPF_VECTOR) { 202 new_op = INDEX_op_mov_vec; 203 } else if (def->flags & TCG_OPF_64BIT) { 204 new_op = INDEX_op_mov_i64; 205 } else { 206 new_op = INDEX_op_mov_i32; 207 } 208 op->opc = new_op; 209 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 210 op->args[0] = dst; 211 op->args[1] = src; 212 213 mask = si->mask; 214 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 215 /* High bits of the destination are now garbage. */ 216 mask |= ~0xffffffffull; 217 } 218 di->mask = mask; 219 220 if (src_ts->type == dst_ts->type) { 221 TempOptInfo *ni = ts_info(si->next_copy); 222 223 di->next_copy = si->next_copy; 224 di->prev_copy = src_ts; 225 ni->prev_copy = dst_ts; 226 si->next_copy = dst_ts; 227 di->is_const = si->is_const; 228 di->val = si->val; 229 } 230 } 231 232 static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used, 233 TCGOp *op, TCGArg dst, uint64_t val) 234 { 235 const TCGOpDef *def = &tcg_op_defs[op->opc]; 236 TCGType type; 237 TCGTemp *tv; 238 239 if (def->flags & TCG_OPF_VECTOR) { 240 type = TCGOP_VECL(op) + TCG_TYPE_V64; 241 } else if (def->flags & TCG_OPF_64BIT) { 242 type = TCG_TYPE_I64; 243 } else { 244 type = TCG_TYPE_I32; 245 } 246 247 /* Convert movi to mov with constant temp. */ 248 tv = tcg_constant_internal(type, val); 249 init_ts_info(temps_used, tv); 250 tcg_opt_gen_mov(s, op, dst, temp_arg(tv)); 251 } 252 253 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 254 { 255 uint64_t l64, h64; 256 257 switch (op) { 258 CASE_OP_32_64(add): 259 return x + y; 260 261 CASE_OP_32_64(sub): 262 return x - y; 263 264 CASE_OP_32_64(mul): 265 return x * y; 266 267 CASE_OP_32_64(and): 268 return x & y; 269 270 CASE_OP_32_64(or): 271 return x | y; 272 273 CASE_OP_32_64(xor): 274 return x ^ y; 275 276 case INDEX_op_shl_i32: 277 return (uint32_t)x << (y & 31); 278 279 case INDEX_op_shl_i64: 280 return (uint64_t)x << (y & 63); 281 282 case INDEX_op_shr_i32: 283 return (uint32_t)x >> (y & 31); 284 285 case INDEX_op_shr_i64: 286 return (uint64_t)x >> (y & 63); 287 288 case INDEX_op_sar_i32: 289 return (int32_t)x >> (y & 31); 290 291 case INDEX_op_sar_i64: 292 return (int64_t)x >> (y & 63); 293 294 case INDEX_op_rotr_i32: 295 return ror32(x, y & 31); 296 297 case INDEX_op_rotr_i64: 298 return ror64(x, y & 63); 299 300 case INDEX_op_rotl_i32: 301 return rol32(x, y & 31); 302 303 case INDEX_op_rotl_i64: 304 return rol64(x, y & 63); 305 306 CASE_OP_32_64(not): 307 return ~x; 308 309 CASE_OP_32_64(neg): 310 return -x; 311 312 CASE_OP_32_64(andc): 313 return x & ~y; 314 315 CASE_OP_32_64(orc): 316 return x | ~y; 317 318 CASE_OP_32_64(eqv): 319 return ~(x ^ y); 320 321 CASE_OP_32_64(nand): 322 return ~(x & y); 323 324 CASE_OP_32_64(nor): 325 return ~(x | y); 326 327 case INDEX_op_clz_i32: 328 return (uint32_t)x ? clz32(x) : y; 329 330 case INDEX_op_clz_i64: 331 return x ? clz64(x) : y; 332 333 case INDEX_op_ctz_i32: 334 return (uint32_t)x ? ctz32(x) : y; 335 336 case INDEX_op_ctz_i64: 337 return x ? ctz64(x) : y; 338 339 case INDEX_op_ctpop_i32: 340 return ctpop32(x); 341 342 case INDEX_op_ctpop_i64: 343 return ctpop64(x); 344 345 CASE_OP_32_64(ext8s): 346 return (int8_t)x; 347 348 CASE_OP_32_64(ext16s): 349 return (int16_t)x; 350 351 CASE_OP_32_64(ext8u): 352 return (uint8_t)x; 353 354 CASE_OP_32_64(ext16u): 355 return (uint16_t)x; 356 357 CASE_OP_32_64(bswap16): 358 x = bswap16(x); 359 return y & TCG_BSWAP_OS ? (int16_t)x : x; 360 361 CASE_OP_32_64(bswap32): 362 x = bswap32(x); 363 return y & TCG_BSWAP_OS ? (int32_t)x : x; 364 365 case INDEX_op_bswap64_i64: 366 return bswap64(x); 367 368 case INDEX_op_ext_i32_i64: 369 case INDEX_op_ext32s_i64: 370 return (int32_t)x; 371 372 case INDEX_op_extu_i32_i64: 373 case INDEX_op_extrl_i64_i32: 374 case INDEX_op_ext32u_i64: 375 return (uint32_t)x; 376 377 case INDEX_op_extrh_i64_i32: 378 return (uint64_t)x >> 32; 379 380 case INDEX_op_muluh_i32: 381 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 382 case INDEX_op_mulsh_i32: 383 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 384 385 case INDEX_op_muluh_i64: 386 mulu64(&l64, &h64, x, y); 387 return h64; 388 case INDEX_op_mulsh_i64: 389 muls64(&l64, &h64, x, y); 390 return h64; 391 392 case INDEX_op_div_i32: 393 /* Avoid crashing on divide by zero, otherwise undefined. */ 394 return (int32_t)x / ((int32_t)y ? : 1); 395 case INDEX_op_divu_i32: 396 return (uint32_t)x / ((uint32_t)y ? : 1); 397 case INDEX_op_div_i64: 398 return (int64_t)x / ((int64_t)y ? : 1); 399 case INDEX_op_divu_i64: 400 return (uint64_t)x / ((uint64_t)y ? : 1); 401 402 case INDEX_op_rem_i32: 403 return (int32_t)x % ((int32_t)y ? : 1); 404 case INDEX_op_remu_i32: 405 return (uint32_t)x % ((uint32_t)y ? : 1); 406 case INDEX_op_rem_i64: 407 return (int64_t)x % ((int64_t)y ? : 1); 408 case INDEX_op_remu_i64: 409 return (uint64_t)x % ((uint64_t)y ? : 1); 410 411 default: 412 fprintf(stderr, 413 "Unrecognized operation %d in do_constant_folding.\n", op); 414 tcg_abort(); 415 } 416 } 417 418 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y) 419 { 420 const TCGOpDef *def = &tcg_op_defs[op]; 421 uint64_t res = do_constant_folding_2(op, x, y); 422 if (!(def->flags & TCG_OPF_64BIT)) { 423 res = (int32_t)res; 424 } 425 return res; 426 } 427 428 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 429 { 430 switch (c) { 431 case TCG_COND_EQ: 432 return x == y; 433 case TCG_COND_NE: 434 return x != y; 435 case TCG_COND_LT: 436 return (int32_t)x < (int32_t)y; 437 case TCG_COND_GE: 438 return (int32_t)x >= (int32_t)y; 439 case TCG_COND_LE: 440 return (int32_t)x <= (int32_t)y; 441 case TCG_COND_GT: 442 return (int32_t)x > (int32_t)y; 443 case TCG_COND_LTU: 444 return x < y; 445 case TCG_COND_GEU: 446 return x >= y; 447 case TCG_COND_LEU: 448 return x <= y; 449 case TCG_COND_GTU: 450 return x > y; 451 default: 452 tcg_abort(); 453 } 454 } 455 456 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 457 { 458 switch (c) { 459 case TCG_COND_EQ: 460 return x == y; 461 case TCG_COND_NE: 462 return x != y; 463 case TCG_COND_LT: 464 return (int64_t)x < (int64_t)y; 465 case TCG_COND_GE: 466 return (int64_t)x >= (int64_t)y; 467 case TCG_COND_LE: 468 return (int64_t)x <= (int64_t)y; 469 case TCG_COND_GT: 470 return (int64_t)x > (int64_t)y; 471 case TCG_COND_LTU: 472 return x < y; 473 case TCG_COND_GEU: 474 return x >= y; 475 case TCG_COND_LEU: 476 return x <= y; 477 case TCG_COND_GTU: 478 return x > y; 479 default: 480 tcg_abort(); 481 } 482 } 483 484 static bool do_constant_folding_cond_eq(TCGCond c) 485 { 486 switch (c) { 487 case TCG_COND_GT: 488 case TCG_COND_LTU: 489 case TCG_COND_LT: 490 case TCG_COND_GTU: 491 case TCG_COND_NE: 492 return 0; 493 case TCG_COND_GE: 494 case TCG_COND_GEU: 495 case TCG_COND_LE: 496 case TCG_COND_LEU: 497 case TCG_COND_EQ: 498 return 1; 499 default: 500 tcg_abort(); 501 } 502 } 503 504 /* Return 2 if the condition can't be simplified, and the result 505 of the condition (0 or 1) if it can */ 506 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 507 TCGArg y, TCGCond c) 508 { 509 uint64_t xv = arg_info(x)->val; 510 uint64_t yv = arg_info(y)->val; 511 512 if (arg_is_const(x) && arg_is_const(y)) { 513 const TCGOpDef *def = &tcg_op_defs[op]; 514 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 515 if (def->flags & TCG_OPF_64BIT) { 516 return do_constant_folding_cond_64(xv, yv, c); 517 } else { 518 return do_constant_folding_cond_32(xv, yv, c); 519 } 520 } else if (args_are_copies(x, y)) { 521 return do_constant_folding_cond_eq(c); 522 } else if (arg_is_const(y) && yv == 0) { 523 switch (c) { 524 case TCG_COND_LTU: 525 return 0; 526 case TCG_COND_GEU: 527 return 1; 528 default: 529 return 2; 530 } 531 } 532 return 2; 533 } 534 535 /* Return 2 if the condition can't be simplified, and the result 536 of the condition (0 or 1) if it can */ 537 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 538 { 539 TCGArg al = p1[0], ah = p1[1]; 540 TCGArg bl = p2[0], bh = p2[1]; 541 542 if (arg_is_const(bl) && arg_is_const(bh)) { 543 tcg_target_ulong blv = arg_info(bl)->val; 544 tcg_target_ulong bhv = arg_info(bh)->val; 545 uint64_t b = deposit64(blv, 32, 32, bhv); 546 547 if (arg_is_const(al) && arg_is_const(ah)) { 548 tcg_target_ulong alv = arg_info(al)->val; 549 tcg_target_ulong ahv = arg_info(ah)->val; 550 uint64_t a = deposit64(alv, 32, 32, ahv); 551 return do_constant_folding_cond_64(a, b, c); 552 } 553 if (b == 0) { 554 switch (c) { 555 case TCG_COND_LTU: 556 return 0; 557 case TCG_COND_GEU: 558 return 1; 559 default: 560 break; 561 } 562 } 563 } 564 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 565 return do_constant_folding_cond_eq(c); 566 } 567 return 2; 568 } 569 570 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 571 { 572 TCGArg a1 = *p1, a2 = *p2; 573 int sum = 0; 574 sum += arg_is_const(a1); 575 sum -= arg_is_const(a2); 576 577 /* Prefer the constant in second argument, and then the form 578 op a, a, b, which is better handled on non-RISC hosts. */ 579 if (sum > 0 || (sum == 0 && dest == a2)) { 580 *p1 = a2; 581 *p2 = a1; 582 return true; 583 } 584 return false; 585 } 586 587 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 588 { 589 int sum = 0; 590 sum += arg_is_const(p1[0]); 591 sum += arg_is_const(p1[1]); 592 sum -= arg_is_const(p2[0]); 593 sum -= arg_is_const(p2[1]); 594 if (sum > 0) { 595 TCGArg t; 596 t = p1[0], p1[0] = p2[0], p2[0] = t; 597 t = p1[1], p1[1] = p2[1], p2[1] = t; 598 return true; 599 } 600 return false; 601 } 602 603 /* Propagate constants and copies, fold constant expressions. */ 604 void tcg_optimize(TCGContext *s) 605 { 606 int nb_temps, nb_globals, i; 607 TCGOp *op, *op_next, *prev_mb = NULL; 608 TCGTempSet temps_used; 609 610 /* Array VALS has an element for each temp. 611 If this temp holds a constant then its value is kept in VALS' element. 612 If this temp is a copy of other ones then the other copies are 613 available through the doubly linked circular list. */ 614 615 nb_temps = s->nb_temps; 616 nb_globals = s->nb_globals; 617 618 memset(&temps_used, 0, sizeof(temps_used)); 619 for (i = 0; i < nb_temps; ++i) { 620 s->temps[i].state_ptr = NULL; 621 } 622 623 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 624 uint64_t mask, partmask, affected, tmp; 625 int nb_oargs, nb_iargs; 626 TCGOpcode opc = op->opc; 627 const TCGOpDef *def = &tcg_op_defs[opc]; 628 629 /* Count the arguments, and initialize the temps that are 630 going to be used */ 631 if (opc == INDEX_op_call) { 632 nb_oargs = TCGOP_CALLO(op); 633 nb_iargs = TCGOP_CALLI(op); 634 for (i = 0; i < nb_oargs + nb_iargs; i++) { 635 TCGTemp *ts = arg_temp(op->args[i]); 636 if (ts) { 637 init_ts_info(&temps_used, ts); 638 } 639 } 640 } else { 641 nb_oargs = def->nb_oargs; 642 nb_iargs = def->nb_iargs; 643 for (i = 0; i < nb_oargs + nb_iargs; i++) { 644 init_arg_info(&temps_used, op->args[i]); 645 } 646 } 647 648 /* Do copy propagation */ 649 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 650 TCGTemp *ts = arg_temp(op->args[i]); 651 if (ts && ts_is_copy(ts)) { 652 op->args[i] = temp_arg(find_better_copy(s, ts)); 653 } 654 } 655 656 /* For commutative operations make constant second argument */ 657 switch (opc) { 658 CASE_OP_32_64_VEC(add): 659 CASE_OP_32_64_VEC(mul): 660 CASE_OP_32_64_VEC(and): 661 CASE_OP_32_64_VEC(or): 662 CASE_OP_32_64_VEC(xor): 663 CASE_OP_32_64(eqv): 664 CASE_OP_32_64(nand): 665 CASE_OP_32_64(nor): 666 CASE_OP_32_64(muluh): 667 CASE_OP_32_64(mulsh): 668 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 669 break; 670 CASE_OP_32_64(brcond): 671 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 672 op->args[2] = tcg_swap_cond(op->args[2]); 673 } 674 break; 675 CASE_OP_32_64(setcond): 676 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 677 op->args[3] = tcg_swap_cond(op->args[3]); 678 } 679 break; 680 CASE_OP_32_64(movcond): 681 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 682 op->args[5] = tcg_swap_cond(op->args[5]); 683 } 684 /* For movcond, we canonicalize the "false" input reg to match 685 the destination reg so that the tcg backend can implement 686 a "move if true" operation. */ 687 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 688 op->args[5] = tcg_invert_cond(op->args[5]); 689 } 690 break; 691 CASE_OP_32_64(add2): 692 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 693 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 694 break; 695 CASE_OP_32_64(mulu2): 696 CASE_OP_32_64(muls2): 697 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 698 break; 699 case INDEX_op_brcond2_i32: 700 if (swap_commutative2(&op->args[0], &op->args[2])) { 701 op->args[4] = tcg_swap_cond(op->args[4]); 702 } 703 break; 704 case INDEX_op_setcond2_i32: 705 if (swap_commutative2(&op->args[1], &op->args[3])) { 706 op->args[5] = tcg_swap_cond(op->args[5]); 707 } 708 break; 709 default: 710 break; 711 } 712 713 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 714 and "sub r, 0, a => neg r, a" case. */ 715 switch (opc) { 716 CASE_OP_32_64(shl): 717 CASE_OP_32_64(shr): 718 CASE_OP_32_64(sar): 719 CASE_OP_32_64(rotl): 720 CASE_OP_32_64(rotr): 721 if (arg_is_const(op->args[1]) 722 && arg_info(op->args[1])->val == 0) { 723 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 724 continue; 725 } 726 break; 727 CASE_OP_32_64_VEC(sub): 728 { 729 TCGOpcode neg_op; 730 bool have_neg; 731 732 if (arg_is_const(op->args[2])) { 733 /* Proceed with possible constant folding. */ 734 break; 735 } 736 if (opc == INDEX_op_sub_i32) { 737 neg_op = INDEX_op_neg_i32; 738 have_neg = TCG_TARGET_HAS_neg_i32; 739 } else if (opc == INDEX_op_sub_i64) { 740 neg_op = INDEX_op_neg_i64; 741 have_neg = TCG_TARGET_HAS_neg_i64; 742 } else if (TCG_TARGET_HAS_neg_vec) { 743 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 744 unsigned vece = TCGOP_VECE(op); 745 neg_op = INDEX_op_neg_vec; 746 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 747 } else { 748 break; 749 } 750 if (!have_neg) { 751 break; 752 } 753 if (arg_is_const(op->args[1]) 754 && arg_info(op->args[1])->val == 0) { 755 op->opc = neg_op; 756 reset_temp(op->args[0]); 757 op->args[1] = op->args[2]; 758 continue; 759 } 760 } 761 break; 762 CASE_OP_32_64_VEC(xor): 763 CASE_OP_32_64(nand): 764 if (!arg_is_const(op->args[1]) 765 && arg_is_const(op->args[2]) 766 && arg_info(op->args[2])->val == -1) { 767 i = 1; 768 goto try_not; 769 } 770 break; 771 CASE_OP_32_64(nor): 772 if (!arg_is_const(op->args[1]) 773 && arg_is_const(op->args[2]) 774 && arg_info(op->args[2])->val == 0) { 775 i = 1; 776 goto try_not; 777 } 778 break; 779 CASE_OP_32_64_VEC(andc): 780 if (!arg_is_const(op->args[2]) 781 && arg_is_const(op->args[1]) 782 && arg_info(op->args[1])->val == -1) { 783 i = 2; 784 goto try_not; 785 } 786 break; 787 CASE_OP_32_64_VEC(orc): 788 CASE_OP_32_64(eqv): 789 if (!arg_is_const(op->args[2]) 790 && arg_is_const(op->args[1]) 791 && arg_info(op->args[1])->val == 0) { 792 i = 2; 793 goto try_not; 794 } 795 break; 796 try_not: 797 { 798 TCGOpcode not_op; 799 bool have_not; 800 801 if (def->flags & TCG_OPF_VECTOR) { 802 not_op = INDEX_op_not_vec; 803 have_not = TCG_TARGET_HAS_not_vec; 804 } else if (def->flags & TCG_OPF_64BIT) { 805 not_op = INDEX_op_not_i64; 806 have_not = TCG_TARGET_HAS_not_i64; 807 } else { 808 not_op = INDEX_op_not_i32; 809 have_not = TCG_TARGET_HAS_not_i32; 810 } 811 if (!have_not) { 812 break; 813 } 814 op->opc = not_op; 815 reset_temp(op->args[0]); 816 op->args[1] = op->args[i]; 817 continue; 818 } 819 default: 820 break; 821 } 822 823 /* Simplify expression for "op r, a, const => mov r, a" cases */ 824 switch (opc) { 825 CASE_OP_32_64_VEC(add): 826 CASE_OP_32_64_VEC(sub): 827 CASE_OP_32_64_VEC(or): 828 CASE_OP_32_64_VEC(xor): 829 CASE_OP_32_64_VEC(andc): 830 CASE_OP_32_64(shl): 831 CASE_OP_32_64(shr): 832 CASE_OP_32_64(sar): 833 CASE_OP_32_64(rotl): 834 CASE_OP_32_64(rotr): 835 if (!arg_is_const(op->args[1]) 836 && arg_is_const(op->args[2]) 837 && arg_info(op->args[2])->val == 0) { 838 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 839 continue; 840 } 841 break; 842 CASE_OP_32_64_VEC(and): 843 CASE_OP_32_64_VEC(orc): 844 CASE_OP_32_64(eqv): 845 if (!arg_is_const(op->args[1]) 846 && arg_is_const(op->args[2]) 847 && arg_info(op->args[2])->val == -1) { 848 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 849 continue; 850 } 851 break; 852 default: 853 break; 854 } 855 856 /* Simplify using known-zero bits. Currently only ops with a single 857 output argument is supported. */ 858 mask = -1; 859 affected = -1; 860 switch (opc) { 861 CASE_OP_32_64(ext8s): 862 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 863 break; 864 } 865 QEMU_FALLTHROUGH; 866 CASE_OP_32_64(ext8u): 867 mask = 0xff; 868 goto and_const; 869 CASE_OP_32_64(ext16s): 870 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 871 break; 872 } 873 QEMU_FALLTHROUGH; 874 CASE_OP_32_64(ext16u): 875 mask = 0xffff; 876 goto and_const; 877 case INDEX_op_ext32s_i64: 878 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 879 break; 880 } 881 QEMU_FALLTHROUGH; 882 case INDEX_op_ext32u_i64: 883 mask = 0xffffffffU; 884 goto and_const; 885 886 CASE_OP_32_64(and): 887 mask = arg_info(op->args[2])->mask; 888 if (arg_is_const(op->args[2])) { 889 and_const: 890 affected = arg_info(op->args[1])->mask & ~mask; 891 } 892 mask = arg_info(op->args[1])->mask & mask; 893 break; 894 895 case INDEX_op_ext_i32_i64: 896 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 897 break; 898 } 899 QEMU_FALLTHROUGH; 900 case INDEX_op_extu_i32_i64: 901 /* We do not compute affected as it is a size changing op. */ 902 mask = (uint32_t)arg_info(op->args[1])->mask; 903 break; 904 905 CASE_OP_32_64(andc): 906 /* Known-zeros does not imply known-ones. Therefore unless 907 op->args[2] is constant, we can't infer anything from it. */ 908 if (arg_is_const(op->args[2])) { 909 mask = ~arg_info(op->args[2])->mask; 910 goto and_const; 911 } 912 /* But we certainly know nothing outside args[1] may be set. */ 913 mask = arg_info(op->args[1])->mask; 914 break; 915 916 case INDEX_op_sar_i32: 917 if (arg_is_const(op->args[2])) { 918 tmp = arg_info(op->args[2])->val & 31; 919 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 920 } 921 break; 922 case INDEX_op_sar_i64: 923 if (arg_is_const(op->args[2])) { 924 tmp = arg_info(op->args[2])->val & 63; 925 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 926 } 927 break; 928 929 case INDEX_op_shr_i32: 930 if (arg_is_const(op->args[2])) { 931 tmp = arg_info(op->args[2])->val & 31; 932 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 933 } 934 break; 935 case INDEX_op_shr_i64: 936 if (arg_is_const(op->args[2])) { 937 tmp = arg_info(op->args[2])->val & 63; 938 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 939 } 940 break; 941 942 case INDEX_op_extrl_i64_i32: 943 mask = (uint32_t)arg_info(op->args[1])->mask; 944 break; 945 case INDEX_op_extrh_i64_i32: 946 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 947 break; 948 949 CASE_OP_32_64(shl): 950 if (arg_is_const(op->args[2])) { 951 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 952 mask = arg_info(op->args[1])->mask << tmp; 953 } 954 break; 955 956 CASE_OP_32_64(neg): 957 /* Set to 1 all bits to the left of the rightmost. */ 958 mask = -(arg_info(op->args[1])->mask 959 & -arg_info(op->args[1])->mask); 960 break; 961 962 CASE_OP_32_64(deposit): 963 mask = deposit64(arg_info(op->args[1])->mask, 964 op->args[3], op->args[4], 965 arg_info(op->args[2])->mask); 966 break; 967 968 CASE_OP_32_64(extract): 969 mask = extract64(arg_info(op->args[1])->mask, 970 op->args[2], op->args[3]); 971 if (op->args[2] == 0) { 972 affected = arg_info(op->args[1])->mask & ~mask; 973 } 974 break; 975 CASE_OP_32_64(sextract): 976 mask = sextract64(arg_info(op->args[1])->mask, 977 op->args[2], op->args[3]); 978 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 979 affected = arg_info(op->args[1])->mask & ~mask; 980 } 981 break; 982 983 CASE_OP_32_64(or): 984 CASE_OP_32_64(xor): 985 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 986 break; 987 988 case INDEX_op_clz_i32: 989 case INDEX_op_ctz_i32: 990 mask = arg_info(op->args[2])->mask | 31; 991 break; 992 993 case INDEX_op_clz_i64: 994 case INDEX_op_ctz_i64: 995 mask = arg_info(op->args[2])->mask | 63; 996 break; 997 998 case INDEX_op_ctpop_i32: 999 mask = 32 | 31; 1000 break; 1001 case INDEX_op_ctpop_i64: 1002 mask = 64 | 63; 1003 break; 1004 1005 CASE_OP_32_64(setcond): 1006 case INDEX_op_setcond2_i32: 1007 mask = 1; 1008 break; 1009 1010 CASE_OP_32_64(movcond): 1011 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1012 break; 1013 1014 CASE_OP_32_64(ld8u): 1015 mask = 0xff; 1016 break; 1017 CASE_OP_32_64(ld16u): 1018 mask = 0xffff; 1019 break; 1020 case INDEX_op_ld32u_i64: 1021 mask = 0xffffffffu; 1022 break; 1023 1024 CASE_OP_32_64(qemu_ld): 1025 { 1026 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1027 MemOp mop = get_memop(oi); 1028 if (!(mop & MO_SIGN)) { 1029 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1030 } 1031 } 1032 break; 1033 1034 CASE_OP_32_64(bswap16): 1035 mask = arg_info(op->args[1])->mask; 1036 if (mask <= 0xffff) { 1037 op->args[2] |= TCG_BSWAP_IZ; 1038 } 1039 mask = bswap16(mask); 1040 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1041 case TCG_BSWAP_OZ: 1042 break; 1043 case TCG_BSWAP_OS: 1044 mask = (int16_t)mask; 1045 break; 1046 default: /* undefined high bits */ 1047 mask |= MAKE_64BIT_MASK(16, 48); 1048 break; 1049 } 1050 break; 1051 1052 case INDEX_op_bswap32_i64: 1053 mask = arg_info(op->args[1])->mask; 1054 if (mask <= 0xffffffffu) { 1055 op->args[2] |= TCG_BSWAP_IZ; 1056 } 1057 mask = bswap32(mask); 1058 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1059 case TCG_BSWAP_OZ: 1060 break; 1061 case TCG_BSWAP_OS: 1062 mask = (int32_t)mask; 1063 break; 1064 default: /* undefined high bits */ 1065 mask |= MAKE_64BIT_MASK(32, 32); 1066 break; 1067 } 1068 break; 1069 1070 default: 1071 break; 1072 } 1073 1074 /* 32-bit ops generate 32-bit results. For the result is zero test 1075 below, we can ignore high bits, but for further optimizations we 1076 need to record that the high bits contain garbage. */ 1077 partmask = mask; 1078 if (!(def->flags & TCG_OPF_64BIT)) { 1079 mask |= ~(tcg_target_ulong)0xffffffffu; 1080 partmask &= 0xffffffffu; 1081 affected &= 0xffffffffu; 1082 } 1083 1084 if (partmask == 0) { 1085 tcg_debug_assert(nb_oargs == 1); 1086 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1087 continue; 1088 } 1089 if (affected == 0) { 1090 tcg_debug_assert(nb_oargs == 1); 1091 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1092 continue; 1093 } 1094 1095 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1096 switch (opc) { 1097 CASE_OP_32_64_VEC(and): 1098 CASE_OP_32_64_VEC(mul): 1099 CASE_OP_32_64(muluh): 1100 CASE_OP_32_64(mulsh): 1101 if (arg_is_const(op->args[2]) 1102 && arg_info(op->args[2])->val == 0) { 1103 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1104 continue; 1105 } 1106 break; 1107 default: 1108 break; 1109 } 1110 1111 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1112 switch (opc) { 1113 CASE_OP_32_64_VEC(or): 1114 CASE_OP_32_64_VEC(and): 1115 if (args_are_copies(op->args[1], op->args[2])) { 1116 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1117 continue; 1118 } 1119 break; 1120 default: 1121 break; 1122 } 1123 1124 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1125 switch (opc) { 1126 CASE_OP_32_64_VEC(andc): 1127 CASE_OP_32_64_VEC(sub): 1128 CASE_OP_32_64_VEC(xor): 1129 if (args_are_copies(op->args[1], op->args[2])) { 1130 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0); 1131 continue; 1132 } 1133 break; 1134 default: 1135 break; 1136 } 1137 1138 /* Propagate constants through copy operations and do constant 1139 folding. Constants will be substituted to arguments by register 1140 allocator where needed and possible. Also detect copies. */ 1141 switch (opc) { 1142 CASE_OP_32_64_VEC(mov): 1143 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1144 break; 1145 1146 case INDEX_op_dup_vec: 1147 if (arg_is_const(op->args[1])) { 1148 tmp = arg_info(op->args[1])->val; 1149 tmp = dup_const(TCGOP_VECE(op), tmp); 1150 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1151 break; 1152 } 1153 goto do_default; 1154 1155 case INDEX_op_dup2_vec: 1156 assert(TCG_TARGET_REG_BITS == 32); 1157 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1158 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 1159 deposit64(arg_info(op->args[1])->val, 32, 32, 1160 arg_info(op->args[2])->val)); 1161 break; 1162 } else if (args_are_copies(op->args[1], op->args[2])) { 1163 op->opc = INDEX_op_dup_vec; 1164 TCGOP_VECE(op) = MO_32; 1165 nb_iargs = 1; 1166 } 1167 goto do_default; 1168 1169 CASE_OP_32_64(not): 1170 CASE_OP_32_64(neg): 1171 CASE_OP_32_64(ext8s): 1172 CASE_OP_32_64(ext8u): 1173 CASE_OP_32_64(ext16s): 1174 CASE_OP_32_64(ext16u): 1175 CASE_OP_32_64(ctpop): 1176 case INDEX_op_ext32s_i64: 1177 case INDEX_op_ext32u_i64: 1178 case INDEX_op_ext_i32_i64: 1179 case INDEX_op_extu_i32_i64: 1180 case INDEX_op_extrl_i64_i32: 1181 case INDEX_op_extrh_i64_i32: 1182 if (arg_is_const(op->args[1])) { 1183 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1184 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1185 break; 1186 } 1187 goto do_default; 1188 1189 CASE_OP_32_64(bswap16): 1190 CASE_OP_32_64(bswap32): 1191 case INDEX_op_bswap64_i64: 1192 if (arg_is_const(op->args[1])) { 1193 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1194 op->args[2]); 1195 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1196 break; 1197 } 1198 goto do_default; 1199 1200 CASE_OP_32_64(add): 1201 CASE_OP_32_64(sub): 1202 CASE_OP_32_64(mul): 1203 CASE_OP_32_64(or): 1204 CASE_OP_32_64(and): 1205 CASE_OP_32_64(xor): 1206 CASE_OP_32_64(shl): 1207 CASE_OP_32_64(shr): 1208 CASE_OP_32_64(sar): 1209 CASE_OP_32_64(rotl): 1210 CASE_OP_32_64(rotr): 1211 CASE_OP_32_64(andc): 1212 CASE_OP_32_64(orc): 1213 CASE_OP_32_64(eqv): 1214 CASE_OP_32_64(nand): 1215 CASE_OP_32_64(nor): 1216 CASE_OP_32_64(muluh): 1217 CASE_OP_32_64(mulsh): 1218 CASE_OP_32_64(div): 1219 CASE_OP_32_64(divu): 1220 CASE_OP_32_64(rem): 1221 CASE_OP_32_64(remu): 1222 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1223 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1224 arg_info(op->args[2])->val); 1225 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1226 break; 1227 } 1228 goto do_default; 1229 1230 CASE_OP_32_64(clz): 1231 CASE_OP_32_64(ctz): 1232 if (arg_is_const(op->args[1])) { 1233 TCGArg v = arg_info(op->args[1])->val; 1234 if (v != 0) { 1235 tmp = do_constant_folding(opc, v, 0); 1236 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1237 } else { 1238 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1239 } 1240 break; 1241 } 1242 goto do_default; 1243 1244 CASE_OP_32_64(deposit): 1245 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1246 tmp = deposit64(arg_info(op->args[1])->val, 1247 op->args[3], op->args[4], 1248 arg_info(op->args[2])->val); 1249 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1250 break; 1251 } 1252 goto do_default; 1253 1254 CASE_OP_32_64(extract): 1255 if (arg_is_const(op->args[1])) { 1256 tmp = extract64(arg_info(op->args[1])->val, 1257 op->args[2], op->args[3]); 1258 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1259 break; 1260 } 1261 goto do_default; 1262 1263 CASE_OP_32_64(sextract): 1264 if (arg_is_const(op->args[1])) { 1265 tmp = sextract64(arg_info(op->args[1])->val, 1266 op->args[2], op->args[3]); 1267 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1268 break; 1269 } 1270 goto do_default; 1271 1272 CASE_OP_32_64(extract2): 1273 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1274 uint64_t v1 = arg_info(op->args[1])->val; 1275 uint64_t v2 = arg_info(op->args[2])->val; 1276 int shr = op->args[3]; 1277 1278 if (opc == INDEX_op_extract2_i64) { 1279 tmp = (v1 >> shr) | (v2 << (64 - shr)); 1280 } else { 1281 tmp = (int32_t)(((uint32_t)v1 >> shr) | 1282 ((uint32_t)v2 << (32 - shr))); 1283 } 1284 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1285 break; 1286 } 1287 goto do_default; 1288 1289 CASE_OP_32_64(setcond): 1290 tmp = do_constant_folding_cond(opc, op->args[1], 1291 op->args[2], op->args[3]); 1292 if (tmp != 2) { 1293 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1294 break; 1295 } 1296 goto do_default; 1297 1298 CASE_OP_32_64(brcond): 1299 tmp = do_constant_folding_cond(opc, op->args[0], 1300 op->args[1], op->args[2]); 1301 if (tmp != 2) { 1302 if (tmp) { 1303 memset(&temps_used, 0, sizeof(temps_used)); 1304 op->opc = INDEX_op_br; 1305 op->args[0] = op->args[3]; 1306 } else { 1307 tcg_op_remove(s, op); 1308 } 1309 break; 1310 } 1311 goto do_default; 1312 1313 CASE_OP_32_64(movcond): 1314 tmp = do_constant_folding_cond(opc, op->args[1], 1315 op->args[2], op->args[5]); 1316 if (tmp != 2) { 1317 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1318 break; 1319 } 1320 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1321 uint64_t tv = arg_info(op->args[3])->val; 1322 uint64_t fv = arg_info(op->args[4])->val; 1323 TCGCond cond = op->args[5]; 1324 1325 if (fv == 1 && tv == 0) { 1326 cond = tcg_invert_cond(cond); 1327 } else if (!(tv == 1 && fv == 0)) { 1328 goto do_default; 1329 } 1330 op->args[3] = cond; 1331 op->opc = opc = (opc == INDEX_op_movcond_i32 1332 ? INDEX_op_setcond_i32 1333 : INDEX_op_setcond_i64); 1334 nb_iargs = 2; 1335 } 1336 goto do_default; 1337 1338 case INDEX_op_add2_i32: 1339 case INDEX_op_sub2_i32: 1340 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1341 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1342 uint32_t al = arg_info(op->args[2])->val; 1343 uint32_t ah = arg_info(op->args[3])->val; 1344 uint32_t bl = arg_info(op->args[4])->val; 1345 uint32_t bh = arg_info(op->args[5])->val; 1346 uint64_t a = ((uint64_t)ah << 32) | al; 1347 uint64_t b = ((uint64_t)bh << 32) | bl; 1348 TCGArg rl, rh; 1349 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1350 1351 if (opc == INDEX_op_add2_i32) { 1352 a += b; 1353 } else { 1354 a -= b; 1355 } 1356 1357 rl = op->args[0]; 1358 rh = op->args[1]; 1359 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a); 1360 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32)); 1361 break; 1362 } 1363 goto do_default; 1364 1365 case INDEX_op_mulu2_i32: 1366 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1367 uint32_t a = arg_info(op->args[2])->val; 1368 uint32_t b = arg_info(op->args[3])->val; 1369 uint64_t r = (uint64_t)a * b; 1370 TCGArg rl, rh; 1371 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); 1372 1373 rl = op->args[0]; 1374 rh = op->args[1]; 1375 tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r); 1376 tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32)); 1377 break; 1378 } 1379 goto do_default; 1380 1381 case INDEX_op_brcond2_i32: 1382 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1383 op->args[4]); 1384 if (tmp != 2) { 1385 if (tmp) { 1386 do_brcond_true: 1387 memset(&temps_used, 0, sizeof(temps_used)); 1388 op->opc = INDEX_op_br; 1389 op->args[0] = op->args[5]; 1390 } else { 1391 do_brcond_false: 1392 tcg_op_remove(s, op); 1393 } 1394 } else if ((op->args[4] == TCG_COND_LT 1395 || op->args[4] == TCG_COND_GE) 1396 && arg_is_const(op->args[2]) 1397 && arg_info(op->args[2])->val == 0 1398 && arg_is_const(op->args[3]) 1399 && arg_info(op->args[3])->val == 0) { 1400 /* Simplify LT/GE comparisons vs zero to a single compare 1401 vs the high word of the input. */ 1402 do_brcond_high: 1403 memset(&temps_used, 0, sizeof(temps_used)); 1404 op->opc = INDEX_op_brcond_i32; 1405 op->args[0] = op->args[1]; 1406 op->args[1] = op->args[3]; 1407 op->args[2] = op->args[4]; 1408 op->args[3] = op->args[5]; 1409 } else if (op->args[4] == TCG_COND_EQ) { 1410 /* Simplify EQ comparisons where one of the pairs 1411 can be simplified. */ 1412 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1413 op->args[0], op->args[2], 1414 TCG_COND_EQ); 1415 if (tmp == 0) { 1416 goto do_brcond_false; 1417 } else if (tmp == 1) { 1418 goto do_brcond_high; 1419 } 1420 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1421 op->args[1], op->args[3], 1422 TCG_COND_EQ); 1423 if (tmp == 0) { 1424 goto do_brcond_false; 1425 } else if (tmp != 1) { 1426 goto do_default; 1427 } 1428 do_brcond_low: 1429 memset(&temps_used, 0, sizeof(temps_used)); 1430 op->opc = INDEX_op_brcond_i32; 1431 op->args[1] = op->args[2]; 1432 op->args[2] = op->args[4]; 1433 op->args[3] = op->args[5]; 1434 } else if (op->args[4] == TCG_COND_NE) { 1435 /* Simplify NE comparisons where one of the pairs 1436 can be simplified. */ 1437 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1438 op->args[0], op->args[2], 1439 TCG_COND_NE); 1440 if (tmp == 0) { 1441 goto do_brcond_high; 1442 } else if (tmp == 1) { 1443 goto do_brcond_true; 1444 } 1445 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1446 op->args[1], op->args[3], 1447 TCG_COND_NE); 1448 if (tmp == 0) { 1449 goto do_brcond_low; 1450 } else if (tmp == 1) { 1451 goto do_brcond_true; 1452 } 1453 goto do_default; 1454 } else { 1455 goto do_default; 1456 } 1457 break; 1458 1459 case INDEX_op_setcond2_i32: 1460 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1461 op->args[5]); 1462 if (tmp != 2) { 1463 do_setcond_const: 1464 tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); 1465 } else if ((op->args[5] == TCG_COND_LT 1466 || op->args[5] == TCG_COND_GE) 1467 && arg_is_const(op->args[3]) 1468 && arg_info(op->args[3])->val == 0 1469 && arg_is_const(op->args[4]) 1470 && arg_info(op->args[4])->val == 0) { 1471 /* Simplify LT/GE comparisons vs zero to a single compare 1472 vs the high word of the input. */ 1473 do_setcond_high: 1474 reset_temp(op->args[0]); 1475 arg_info(op->args[0])->mask = 1; 1476 op->opc = INDEX_op_setcond_i32; 1477 op->args[1] = op->args[2]; 1478 op->args[2] = op->args[4]; 1479 op->args[3] = op->args[5]; 1480 } else if (op->args[5] == TCG_COND_EQ) { 1481 /* Simplify EQ comparisons where one of the pairs 1482 can be simplified. */ 1483 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1484 op->args[1], op->args[3], 1485 TCG_COND_EQ); 1486 if (tmp == 0) { 1487 goto do_setcond_const; 1488 } else if (tmp == 1) { 1489 goto do_setcond_high; 1490 } 1491 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1492 op->args[2], op->args[4], 1493 TCG_COND_EQ); 1494 if (tmp == 0) { 1495 goto do_setcond_high; 1496 } else if (tmp != 1) { 1497 goto do_default; 1498 } 1499 do_setcond_low: 1500 reset_temp(op->args[0]); 1501 arg_info(op->args[0])->mask = 1; 1502 op->opc = INDEX_op_setcond_i32; 1503 op->args[2] = op->args[3]; 1504 op->args[3] = op->args[5]; 1505 } else if (op->args[5] == TCG_COND_NE) { 1506 /* Simplify NE comparisons where one of the pairs 1507 can be simplified. */ 1508 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1509 op->args[1], op->args[3], 1510 TCG_COND_NE); 1511 if (tmp == 0) { 1512 goto do_setcond_high; 1513 } else if (tmp == 1) { 1514 goto do_setcond_const; 1515 } 1516 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1517 op->args[2], op->args[4], 1518 TCG_COND_NE); 1519 if (tmp == 0) { 1520 goto do_setcond_low; 1521 } else if (tmp == 1) { 1522 goto do_setcond_const; 1523 } 1524 goto do_default; 1525 } else { 1526 goto do_default; 1527 } 1528 break; 1529 1530 case INDEX_op_call: 1531 if (!(tcg_call_flags(op) 1532 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1533 for (i = 0; i < nb_globals; i++) { 1534 if (test_bit(i, temps_used.l)) { 1535 reset_ts(&s->temps[i]); 1536 } 1537 } 1538 } 1539 goto do_reset_output; 1540 1541 default: 1542 do_default: 1543 /* Default case: we know nothing about operation (or were unable 1544 to compute the operation result) so no propagation is done. 1545 We trash everything if the operation is the end of a basic 1546 block, otherwise we only trash the output args. "mask" is 1547 the non-zero bits mask for the first output arg. */ 1548 if (def->flags & TCG_OPF_BB_END) { 1549 memset(&temps_used, 0, sizeof(temps_used)); 1550 } else { 1551 do_reset_output: 1552 for (i = 0; i < nb_oargs; i++) { 1553 reset_temp(op->args[i]); 1554 /* Save the corresponding known-zero bits mask for the 1555 first output argument (only one supported so far). */ 1556 if (i == 0) { 1557 arg_info(op->args[i])->mask = mask; 1558 } 1559 } 1560 } 1561 break; 1562 } 1563 1564 /* Eliminate duplicate and redundant fence instructions. */ 1565 if (prev_mb) { 1566 switch (opc) { 1567 case INDEX_op_mb: 1568 /* Merge two barriers of the same type into one, 1569 * or a weaker barrier into a stronger one, 1570 * or two weaker barriers into a stronger one. 1571 * mb X; mb Y => mb X|Y 1572 * mb; strl => mb; st 1573 * ldaq; mb => ld; mb 1574 * ldaq; strl => ld; mb; st 1575 * Other combinations are also merged into a strong 1576 * barrier. This is stricter than specified but for 1577 * the purposes of TCG is better than not optimizing. 1578 */ 1579 prev_mb->args[0] |= op->args[0]; 1580 tcg_op_remove(s, op); 1581 break; 1582 1583 default: 1584 /* Opcodes that end the block stop the optimization. */ 1585 if ((def->flags & TCG_OPF_BB_END) == 0) { 1586 break; 1587 } 1588 /* fallthru */ 1589 case INDEX_op_qemu_ld_i32: 1590 case INDEX_op_qemu_ld_i64: 1591 case INDEX_op_qemu_st_i32: 1592 case INDEX_op_qemu_st8_i32: 1593 case INDEX_op_qemu_st_i64: 1594 case INDEX_op_call: 1595 /* Opcodes that touch guest memory stop the optimization. */ 1596 prev_mb = NULL; 1597 break; 1598 } 1599 } else if (opc == INDEX_op_mb) { 1600 prev_mb = op; 1601 } 1602 } 1603 } 1604