1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 #define CASE_OP_32_64_VEC(x) \ 36 glue(glue(case INDEX_op_, x), _i32): \ 37 glue(glue(case INDEX_op_, x), _i64): \ 38 glue(glue(case INDEX_op_, x), _vec) 39 40 struct tcg_temp_info { 41 bool is_const; 42 TCGTemp *prev_copy; 43 TCGTemp *next_copy; 44 tcg_target_ulong val; 45 tcg_target_ulong mask; 46 }; 47 48 static inline struct tcg_temp_info *ts_info(TCGTemp *ts) 49 { 50 return ts->state_ptr; 51 } 52 53 static inline struct tcg_temp_info *arg_info(TCGArg arg) 54 { 55 return ts_info(arg_temp(arg)); 56 } 57 58 static inline bool ts_is_const(TCGTemp *ts) 59 { 60 return ts_info(ts)->is_const; 61 } 62 63 static inline bool arg_is_const(TCGArg arg) 64 { 65 return ts_is_const(arg_temp(arg)); 66 } 67 68 static inline bool ts_is_copy(TCGTemp *ts) 69 { 70 return ts_info(ts)->next_copy != ts; 71 } 72 73 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 74 static void reset_ts(TCGTemp *ts) 75 { 76 struct tcg_temp_info *ti = ts_info(ts); 77 struct tcg_temp_info *pi = ts_info(ti->prev_copy); 78 struct tcg_temp_info *ni = ts_info(ti->next_copy); 79 80 ni->prev_copy = ti->prev_copy; 81 pi->next_copy = ti->next_copy; 82 ti->next_copy = ts; 83 ti->prev_copy = ts; 84 ti->is_const = false; 85 ti->mask = -1; 86 } 87 88 static void reset_temp(TCGArg arg) 89 { 90 reset_ts(arg_temp(arg)); 91 } 92 93 /* Initialize and activate a temporary. */ 94 static void init_ts_info(struct tcg_temp_info *infos, 95 TCGTempSet *temps_used, TCGTemp *ts) 96 { 97 size_t idx = temp_idx(ts); 98 if (!test_bit(idx, temps_used->l)) { 99 struct tcg_temp_info *ti = &infos[idx]; 100 101 ts->state_ptr = ti; 102 ti->next_copy = ts; 103 ti->prev_copy = ts; 104 ti->is_const = false; 105 ti->mask = -1; 106 set_bit(idx, temps_used->l); 107 } 108 } 109 110 static void init_arg_info(struct tcg_temp_info *infos, 111 TCGTempSet *temps_used, TCGArg arg) 112 { 113 init_ts_info(infos, temps_used, arg_temp(arg)); 114 } 115 116 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 117 { 118 TCGTemp *i; 119 120 /* If this is already a global, we can't do better. */ 121 if (ts->temp_global) { 122 return ts; 123 } 124 125 /* Search for a global first. */ 126 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 127 if (i->temp_global) { 128 return i; 129 } 130 } 131 132 /* If it is a temp, search for a temp local. */ 133 if (!ts->temp_local) { 134 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 135 if (ts->temp_local) { 136 return i; 137 } 138 } 139 } 140 141 /* Failure to find a better representation, return the same temp. */ 142 return ts; 143 } 144 145 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 146 { 147 TCGTemp *i; 148 149 if (ts1 == ts2) { 150 return true; 151 } 152 153 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 154 return false; 155 } 156 157 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 158 if (i == ts2) { 159 return true; 160 } 161 } 162 163 return false; 164 } 165 166 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 167 { 168 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 169 } 170 171 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val) 172 { 173 const TCGOpDef *def; 174 TCGOpcode new_op; 175 tcg_target_ulong mask; 176 struct tcg_temp_info *di = arg_info(dst); 177 178 def = &tcg_op_defs[op->opc]; 179 if (def->flags & TCG_OPF_VECTOR) { 180 new_op = INDEX_op_dupi_vec; 181 } else if (def->flags & TCG_OPF_64BIT) { 182 new_op = INDEX_op_movi_i64; 183 } else { 184 new_op = INDEX_op_movi_i32; 185 } 186 op->opc = new_op; 187 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 188 op->args[0] = dst; 189 op->args[1] = val; 190 191 reset_temp(dst); 192 di->is_const = true; 193 di->val = val; 194 mask = val; 195 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 196 /* High bits of the destination are now garbage. */ 197 mask |= ~0xffffffffull; 198 } 199 di->mask = mask; 200 } 201 202 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src) 203 { 204 TCGTemp *dst_ts = arg_temp(dst); 205 TCGTemp *src_ts = arg_temp(src); 206 const TCGOpDef *def; 207 struct tcg_temp_info *di; 208 struct tcg_temp_info *si; 209 tcg_target_ulong mask; 210 TCGOpcode new_op; 211 212 if (ts_are_copies(dst_ts, src_ts)) { 213 tcg_op_remove(s, op); 214 return; 215 } 216 217 reset_ts(dst_ts); 218 di = ts_info(dst_ts); 219 si = ts_info(src_ts); 220 def = &tcg_op_defs[op->opc]; 221 if (def->flags & TCG_OPF_VECTOR) { 222 new_op = INDEX_op_mov_vec; 223 } else if (def->flags & TCG_OPF_64BIT) { 224 new_op = INDEX_op_mov_i64; 225 } else { 226 new_op = INDEX_op_mov_i32; 227 } 228 op->opc = new_op; 229 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 230 op->args[0] = dst; 231 op->args[1] = src; 232 233 mask = si->mask; 234 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 235 /* High bits of the destination are now garbage. */ 236 mask |= ~0xffffffffull; 237 } 238 di->mask = mask; 239 240 if (src_ts->type == dst_ts->type) { 241 struct tcg_temp_info *ni = ts_info(si->next_copy); 242 243 di->next_copy = si->next_copy; 244 di->prev_copy = src_ts; 245 ni->prev_copy = dst_ts; 246 si->next_copy = dst_ts; 247 di->is_const = si->is_const; 248 di->val = si->val; 249 } 250 } 251 252 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 253 { 254 uint64_t l64, h64; 255 256 switch (op) { 257 CASE_OP_32_64(add): 258 return x + y; 259 260 CASE_OP_32_64(sub): 261 return x - y; 262 263 CASE_OP_32_64(mul): 264 return x * y; 265 266 CASE_OP_32_64(and): 267 return x & y; 268 269 CASE_OP_32_64(or): 270 return x | y; 271 272 CASE_OP_32_64(xor): 273 return x ^ y; 274 275 case INDEX_op_shl_i32: 276 return (uint32_t)x << (y & 31); 277 278 case INDEX_op_shl_i64: 279 return (uint64_t)x << (y & 63); 280 281 case INDEX_op_shr_i32: 282 return (uint32_t)x >> (y & 31); 283 284 case INDEX_op_shr_i64: 285 return (uint64_t)x >> (y & 63); 286 287 case INDEX_op_sar_i32: 288 return (int32_t)x >> (y & 31); 289 290 case INDEX_op_sar_i64: 291 return (int64_t)x >> (y & 63); 292 293 case INDEX_op_rotr_i32: 294 return ror32(x, y & 31); 295 296 case INDEX_op_rotr_i64: 297 return ror64(x, y & 63); 298 299 case INDEX_op_rotl_i32: 300 return rol32(x, y & 31); 301 302 case INDEX_op_rotl_i64: 303 return rol64(x, y & 63); 304 305 CASE_OP_32_64(not): 306 return ~x; 307 308 CASE_OP_32_64(neg): 309 return -x; 310 311 CASE_OP_32_64(andc): 312 return x & ~y; 313 314 CASE_OP_32_64(orc): 315 return x | ~y; 316 317 CASE_OP_32_64(eqv): 318 return ~(x ^ y); 319 320 CASE_OP_32_64(nand): 321 return ~(x & y); 322 323 CASE_OP_32_64(nor): 324 return ~(x | y); 325 326 case INDEX_op_clz_i32: 327 return (uint32_t)x ? clz32(x) : y; 328 329 case INDEX_op_clz_i64: 330 return x ? clz64(x) : y; 331 332 case INDEX_op_ctz_i32: 333 return (uint32_t)x ? ctz32(x) : y; 334 335 case INDEX_op_ctz_i64: 336 return x ? ctz64(x) : y; 337 338 case INDEX_op_ctpop_i32: 339 return ctpop32(x); 340 341 case INDEX_op_ctpop_i64: 342 return ctpop64(x); 343 344 CASE_OP_32_64(ext8s): 345 return (int8_t)x; 346 347 CASE_OP_32_64(ext16s): 348 return (int16_t)x; 349 350 CASE_OP_32_64(ext8u): 351 return (uint8_t)x; 352 353 CASE_OP_32_64(ext16u): 354 return (uint16_t)x; 355 356 CASE_OP_32_64(bswap16): 357 return bswap16(x); 358 359 CASE_OP_32_64(bswap32): 360 return bswap32(x); 361 362 case INDEX_op_bswap64_i64: 363 return bswap64(x); 364 365 case INDEX_op_ext_i32_i64: 366 case INDEX_op_ext32s_i64: 367 return (int32_t)x; 368 369 case INDEX_op_extu_i32_i64: 370 case INDEX_op_extrl_i64_i32: 371 case INDEX_op_ext32u_i64: 372 return (uint32_t)x; 373 374 case INDEX_op_extrh_i64_i32: 375 return (uint64_t)x >> 32; 376 377 case INDEX_op_muluh_i32: 378 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 379 case INDEX_op_mulsh_i32: 380 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 381 382 case INDEX_op_muluh_i64: 383 mulu64(&l64, &h64, x, y); 384 return h64; 385 case INDEX_op_mulsh_i64: 386 muls64(&l64, &h64, x, y); 387 return h64; 388 389 case INDEX_op_div_i32: 390 /* Avoid crashing on divide by zero, otherwise undefined. */ 391 return (int32_t)x / ((int32_t)y ? : 1); 392 case INDEX_op_divu_i32: 393 return (uint32_t)x / ((uint32_t)y ? : 1); 394 case INDEX_op_div_i64: 395 return (int64_t)x / ((int64_t)y ? : 1); 396 case INDEX_op_divu_i64: 397 return (uint64_t)x / ((uint64_t)y ? : 1); 398 399 case INDEX_op_rem_i32: 400 return (int32_t)x % ((int32_t)y ? : 1); 401 case INDEX_op_remu_i32: 402 return (uint32_t)x % ((uint32_t)y ? : 1); 403 case INDEX_op_rem_i64: 404 return (int64_t)x % ((int64_t)y ? : 1); 405 case INDEX_op_remu_i64: 406 return (uint64_t)x % ((uint64_t)y ? : 1); 407 408 default: 409 fprintf(stderr, 410 "Unrecognized operation %d in do_constant_folding.\n", op); 411 tcg_abort(); 412 } 413 } 414 415 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 416 { 417 const TCGOpDef *def = &tcg_op_defs[op]; 418 TCGArg res = do_constant_folding_2(op, x, y); 419 if (!(def->flags & TCG_OPF_64BIT)) { 420 res = (int32_t)res; 421 } 422 return res; 423 } 424 425 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 426 { 427 switch (c) { 428 case TCG_COND_EQ: 429 return x == y; 430 case TCG_COND_NE: 431 return x != y; 432 case TCG_COND_LT: 433 return (int32_t)x < (int32_t)y; 434 case TCG_COND_GE: 435 return (int32_t)x >= (int32_t)y; 436 case TCG_COND_LE: 437 return (int32_t)x <= (int32_t)y; 438 case TCG_COND_GT: 439 return (int32_t)x > (int32_t)y; 440 case TCG_COND_LTU: 441 return x < y; 442 case TCG_COND_GEU: 443 return x >= y; 444 case TCG_COND_LEU: 445 return x <= y; 446 case TCG_COND_GTU: 447 return x > y; 448 default: 449 tcg_abort(); 450 } 451 } 452 453 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 454 { 455 switch (c) { 456 case TCG_COND_EQ: 457 return x == y; 458 case TCG_COND_NE: 459 return x != y; 460 case TCG_COND_LT: 461 return (int64_t)x < (int64_t)y; 462 case TCG_COND_GE: 463 return (int64_t)x >= (int64_t)y; 464 case TCG_COND_LE: 465 return (int64_t)x <= (int64_t)y; 466 case TCG_COND_GT: 467 return (int64_t)x > (int64_t)y; 468 case TCG_COND_LTU: 469 return x < y; 470 case TCG_COND_GEU: 471 return x >= y; 472 case TCG_COND_LEU: 473 return x <= y; 474 case TCG_COND_GTU: 475 return x > y; 476 default: 477 tcg_abort(); 478 } 479 } 480 481 static bool do_constant_folding_cond_eq(TCGCond c) 482 { 483 switch (c) { 484 case TCG_COND_GT: 485 case TCG_COND_LTU: 486 case TCG_COND_LT: 487 case TCG_COND_GTU: 488 case TCG_COND_NE: 489 return 0; 490 case TCG_COND_GE: 491 case TCG_COND_GEU: 492 case TCG_COND_LE: 493 case TCG_COND_LEU: 494 case TCG_COND_EQ: 495 return 1; 496 default: 497 tcg_abort(); 498 } 499 } 500 501 /* Return 2 if the condition can't be simplified, and the result 502 of the condition (0 or 1) if it can */ 503 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 504 TCGArg y, TCGCond c) 505 { 506 tcg_target_ulong xv = arg_info(x)->val; 507 tcg_target_ulong yv = arg_info(y)->val; 508 if (arg_is_const(x) && arg_is_const(y)) { 509 const TCGOpDef *def = &tcg_op_defs[op]; 510 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); 511 if (def->flags & TCG_OPF_64BIT) { 512 return do_constant_folding_cond_64(xv, yv, c); 513 } else { 514 return do_constant_folding_cond_32(xv, yv, c); 515 } 516 } else if (args_are_copies(x, y)) { 517 return do_constant_folding_cond_eq(c); 518 } else if (arg_is_const(y) && yv == 0) { 519 switch (c) { 520 case TCG_COND_LTU: 521 return 0; 522 case TCG_COND_GEU: 523 return 1; 524 default: 525 return 2; 526 } 527 } 528 return 2; 529 } 530 531 /* Return 2 if the condition can't be simplified, and the result 532 of the condition (0 or 1) if it can */ 533 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 534 { 535 TCGArg al = p1[0], ah = p1[1]; 536 TCGArg bl = p2[0], bh = p2[1]; 537 538 if (arg_is_const(bl) && arg_is_const(bh)) { 539 tcg_target_ulong blv = arg_info(bl)->val; 540 tcg_target_ulong bhv = arg_info(bh)->val; 541 uint64_t b = deposit64(blv, 32, 32, bhv); 542 543 if (arg_is_const(al) && arg_is_const(ah)) { 544 tcg_target_ulong alv = arg_info(al)->val; 545 tcg_target_ulong ahv = arg_info(ah)->val; 546 uint64_t a = deposit64(alv, 32, 32, ahv); 547 return do_constant_folding_cond_64(a, b, c); 548 } 549 if (b == 0) { 550 switch (c) { 551 case TCG_COND_LTU: 552 return 0; 553 case TCG_COND_GEU: 554 return 1; 555 default: 556 break; 557 } 558 } 559 } 560 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 561 return do_constant_folding_cond_eq(c); 562 } 563 return 2; 564 } 565 566 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 567 { 568 TCGArg a1 = *p1, a2 = *p2; 569 int sum = 0; 570 sum += arg_is_const(a1); 571 sum -= arg_is_const(a2); 572 573 /* Prefer the constant in second argument, and then the form 574 op a, a, b, which is better handled on non-RISC hosts. */ 575 if (sum > 0 || (sum == 0 && dest == a2)) { 576 *p1 = a2; 577 *p2 = a1; 578 return true; 579 } 580 return false; 581 } 582 583 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 584 { 585 int sum = 0; 586 sum += arg_is_const(p1[0]); 587 sum += arg_is_const(p1[1]); 588 sum -= arg_is_const(p2[0]); 589 sum -= arg_is_const(p2[1]); 590 if (sum > 0) { 591 TCGArg t; 592 t = p1[0], p1[0] = p2[0], p2[0] = t; 593 t = p1[1], p1[1] = p2[1], p2[1] = t; 594 return true; 595 } 596 return false; 597 } 598 599 /* Propagate constants and copies, fold constant expressions. */ 600 void tcg_optimize(TCGContext *s) 601 { 602 int nb_temps, nb_globals; 603 TCGOp *op, *op_next, *prev_mb = NULL; 604 struct tcg_temp_info *infos; 605 TCGTempSet temps_used; 606 607 /* Array VALS has an element for each temp. 608 If this temp holds a constant then its value is kept in VALS' element. 609 If this temp is a copy of other ones then the other copies are 610 available through the doubly linked circular list. */ 611 612 nb_temps = s->nb_temps; 613 nb_globals = s->nb_globals; 614 bitmap_zero(temps_used.l, nb_temps); 615 infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps); 616 617 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 618 tcg_target_ulong mask, partmask, affected; 619 int nb_oargs, nb_iargs, i; 620 TCGArg tmp; 621 TCGOpcode opc = op->opc; 622 const TCGOpDef *def = &tcg_op_defs[opc]; 623 624 /* Count the arguments, and initialize the temps that are 625 going to be used */ 626 if (opc == INDEX_op_call) { 627 nb_oargs = TCGOP_CALLO(op); 628 nb_iargs = TCGOP_CALLI(op); 629 for (i = 0; i < nb_oargs + nb_iargs; i++) { 630 TCGTemp *ts = arg_temp(op->args[i]); 631 if (ts) { 632 init_ts_info(infos, &temps_used, ts); 633 } 634 } 635 } else { 636 nb_oargs = def->nb_oargs; 637 nb_iargs = def->nb_iargs; 638 for (i = 0; i < nb_oargs + nb_iargs; i++) { 639 init_arg_info(infos, &temps_used, op->args[i]); 640 } 641 } 642 643 /* Do copy propagation */ 644 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 645 TCGTemp *ts = arg_temp(op->args[i]); 646 if (ts && ts_is_copy(ts)) { 647 op->args[i] = temp_arg(find_better_copy(s, ts)); 648 } 649 } 650 651 /* For commutative operations make constant second argument */ 652 switch (opc) { 653 CASE_OP_32_64_VEC(add): 654 CASE_OP_32_64_VEC(mul): 655 CASE_OP_32_64_VEC(and): 656 CASE_OP_32_64_VEC(or): 657 CASE_OP_32_64_VEC(xor): 658 CASE_OP_32_64(eqv): 659 CASE_OP_32_64(nand): 660 CASE_OP_32_64(nor): 661 CASE_OP_32_64(muluh): 662 CASE_OP_32_64(mulsh): 663 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 664 break; 665 CASE_OP_32_64(brcond): 666 if (swap_commutative(-1, &op->args[0], &op->args[1])) { 667 op->args[2] = tcg_swap_cond(op->args[2]); 668 } 669 break; 670 CASE_OP_32_64(setcond): 671 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 672 op->args[3] = tcg_swap_cond(op->args[3]); 673 } 674 break; 675 CASE_OP_32_64(movcond): 676 if (swap_commutative(-1, &op->args[1], &op->args[2])) { 677 op->args[5] = tcg_swap_cond(op->args[5]); 678 } 679 /* For movcond, we canonicalize the "false" input reg to match 680 the destination reg so that the tcg backend can implement 681 a "move if true" operation. */ 682 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 683 op->args[5] = tcg_invert_cond(op->args[5]); 684 } 685 break; 686 CASE_OP_32_64(add2): 687 swap_commutative(op->args[0], &op->args[2], &op->args[4]); 688 swap_commutative(op->args[1], &op->args[3], &op->args[5]); 689 break; 690 CASE_OP_32_64(mulu2): 691 CASE_OP_32_64(muls2): 692 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 693 break; 694 case INDEX_op_brcond2_i32: 695 if (swap_commutative2(&op->args[0], &op->args[2])) { 696 op->args[4] = tcg_swap_cond(op->args[4]); 697 } 698 break; 699 case INDEX_op_setcond2_i32: 700 if (swap_commutative2(&op->args[1], &op->args[3])) { 701 op->args[5] = tcg_swap_cond(op->args[5]); 702 } 703 break; 704 default: 705 break; 706 } 707 708 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 709 and "sub r, 0, a => neg r, a" case. */ 710 switch (opc) { 711 CASE_OP_32_64(shl): 712 CASE_OP_32_64(shr): 713 CASE_OP_32_64(sar): 714 CASE_OP_32_64(rotl): 715 CASE_OP_32_64(rotr): 716 if (arg_is_const(op->args[1]) 717 && arg_info(op->args[1])->val == 0) { 718 tcg_opt_gen_movi(s, op, op->args[0], 0); 719 continue; 720 } 721 break; 722 CASE_OP_32_64_VEC(sub): 723 { 724 TCGOpcode neg_op; 725 bool have_neg; 726 727 if (arg_is_const(op->args[2])) { 728 /* Proceed with possible constant folding. */ 729 break; 730 } 731 if (opc == INDEX_op_sub_i32) { 732 neg_op = INDEX_op_neg_i32; 733 have_neg = TCG_TARGET_HAS_neg_i32; 734 } else if (opc == INDEX_op_sub_i64) { 735 neg_op = INDEX_op_neg_i64; 736 have_neg = TCG_TARGET_HAS_neg_i64; 737 } else if (TCG_TARGET_HAS_neg_vec) { 738 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; 739 unsigned vece = TCGOP_VECE(op); 740 neg_op = INDEX_op_neg_vec; 741 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; 742 } else { 743 break; 744 } 745 if (!have_neg) { 746 break; 747 } 748 if (arg_is_const(op->args[1]) 749 && arg_info(op->args[1])->val == 0) { 750 op->opc = neg_op; 751 reset_temp(op->args[0]); 752 op->args[1] = op->args[2]; 753 continue; 754 } 755 } 756 break; 757 CASE_OP_32_64_VEC(xor): 758 CASE_OP_32_64(nand): 759 if (!arg_is_const(op->args[1]) 760 && arg_is_const(op->args[2]) 761 && arg_info(op->args[2])->val == -1) { 762 i = 1; 763 goto try_not; 764 } 765 break; 766 CASE_OP_32_64(nor): 767 if (!arg_is_const(op->args[1]) 768 && arg_is_const(op->args[2]) 769 && arg_info(op->args[2])->val == 0) { 770 i = 1; 771 goto try_not; 772 } 773 break; 774 CASE_OP_32_64_VEC(andc): 775 if (!arg_is_const(op->args[2]) 776 && arg_is_const(op->args[1]) 777 && arg_info(op->args[1])->val == -1) { 778 i = 2; 779 goto try_not; 780 } 781 break; 782 CASE_OP_32_64_VEC(orc): 783 CASE_OP_32_64(eqv): 784 if (!arg_is_const(op->args[2]) 785 && arg_is_const(op->args[1]) 786 && arg_info(op->args[1])->val == 0) { 787 i = 2; 788 goto try_not; 789 } 790 break; 791 try_not: 792 { 793 TCGOpcode not_op; 794 bool have_not; 795 796 if (def->flags & TCG_OPF_VECTOR) { 797 not_op = INDEX_op_not_vec; 798 have_not = TCG_TARGET_HAS_not_vec; 799 } else if (def->flags & TCG_OPF_64BIT) { 800 not_op = INDEX_op_not_i64; 801 have_not = TCG_TARGET_HAS_not_i64; 802 } else { 803 not_op = INDEX_op_not_i32; 804 have_not = TCG_TARGET_HAS_not_i32; 805 } 806 if (!have_not) { 807 break; 808 } 809 op->opc = not_op; 810 reset_temp(op->args[0]); 811 op->args[1] = op->args[i]; 812 continue; 813 } 814 default: 815 break; 816 } 817 818 /* Simplify expression for "op r, a, const => mov r, a" cases */ 819 switch (opc) { 820 CASE_OP_32_64_VEC(add): 821 CASE_OP_32_64_VEC(sub): 822 CASE_OP_32_64_VEC(or): 823 CASE_OP_32_64_VEC(xor): 824 CASE_OP_32_64_VEC(andc): 825 CASE_OP_32_64(shl): 826 CASE_OP_32_64(shr): 827 CASE_OP_32_64(sar): 828 CASE_OP_32_64(rotl): 829 CASE_OP_32_64(rotr): 830 if (!arg_is_const(op->args[1]) 831 && arg_is_const(op->args[2]) 832 && arg_info(op->args[2])->val == 0) { 833 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 834 continue; 835 } 836 break; 837 CASE_OP_32_64_VEC(and): 838 CASE_OP_32_64_VEC(orc): 839 CASE_OP_32_64(eqv): 840 if (!arg_is_const(op->args[1]) 841 && arg_is_const(op->args[2]) 842 && arg_info(op->args[2])->val == -1) { 843 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 844 continue; 845 } 846 break; 847 default: 848 break; 849 } 850 851 /* Simplify using known-zero bits. Currently only ops with a single 852 output argument is supported. */ 853 mask = -1; 854 affected = -1; 855 switch (opc) { 856 CASE_OP_32_64(ext8s): 857 if ((arg_info(op->args[1])->mask & 0x80) != 0) { 858 break; 859 } 860 CASE_OP_32_64(ext8u): 861 mask = 0xff; 862 goto and_const; 863 CASE_OP_32_64(ext16s): 864 if ((arg_info(op->args[1])->mask & 0x8000) != 0) { 865 break; 866 } 867 CASE_OP_32_64(ext16u): 868 mask = 0xffff; 869 goto and_const; 870 case INDEX_op_ext32s_i64: 871 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 872 break; 873 } 874 case INDEX_op_ext32u_i64: 875 mask = 0xffffffffU; 876 goto and_const; 877 878 CASE_OP_32_64(and): 879 mask = arg_info(op->args[2])->mask; 880 if (arg_is_const(op->args[2])) { 881 and_const: 882 affected = arg_info(op->args[1])->mask & ~mask; 883 } 884 mask = arg_info(op->args[1])->mask & mask; 885 break; 886 887 case INDEX_op_ext_i32_i64: 888 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) { 889 break; 890 } 891 case INDEX_op_extu_i32_i64: 892 /* We do not compute affected as it is a size changing op. */ 893 mask = (uint32_t)arg_info(op->args[1])->mask; 894 break; 895 896 CASE_OP_32_64(andc): 897 /* Known-zeros does not imply known-ones. Therefore unless 898 op->args[2] is constant, we can't infer anything from it. */ 899 if (arg_is_const(op->args[2])) { 900 mask = ~arg_info(op->args[2])->mask; 901 goto and_const; 902 } 903 /* But we certainly know nothing outside args[1] may be set. */ 904 mask = arg_info(op->args[1])->mask; 905 break; 906 907 case INDEX_op_sar_i32: 908 if (arg_is_const(op->args[2])) { 909 tmp = arg_info(op->args[2])->val & 31; 910 mask = (int32_t)arg_info(op->args[1])->mask >> tmp; 911 } 912 break; 913 case INDEX_op_sar_i64: 914 if (arg_is_const(op->args[2])) { 915 tmp = arg_info(op->args[2])->val & 63; 916 mask = (int64_t)arg_info(op->args[1])->mask >> tmp; 917 } 918 break; 919 920 case INDEX_op_shr_i32: 921 if (arg_is_const(op->args[2])) { 922 tmp = arg_info(op->args[2])->val & 31; 923 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp; 924 } 925 break; 926 case INDEX_op_shr_i64: 927 if (arg_is_const(op->args[2])) { 928 tmp = arg_info(op->args[2])->val & 63; 929 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp; 930 } 931 break; 932 933 case INDEX_op_extrl_i64_i32: 934 mask = (uint32_t)arg_info(op->args[1])->mask; 935 break; 936 case INDEX_op_extrh_i64_i32: 937 mask = (uint64_t)arg_info(op->args[1])->mask >> 32; 938 break; 939 940 CASE_OP_32_64(shl): 941 if (arg_is_const(op->args[2])) { 942 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); 943 mask = arg_info(op->args[1])->mask << tmp; 944 } 945 break; 946 947 CASE_OP_32_64(neg): 948 /* Set to 1 all bits to the left of the rightmost. */ 949 mask = -(arg_info(op->args[1])->mask 950 & -arg_info(op->args[1])->mask); 951 break; 952 953 CASE_OP_32_64(deposit): 954 mask = deposit64(arg_info(op->args[1])->mask, 955 op->args[3], op->args[4], 956 arg_info(op->args[2])->mask); 957 break; 958 959 CASE_OP_32_64(extract): 960 mask = extract64(arg_info(op->args[1])->mask, 961 op->args[2], op->args[3]); 962 if (op->args[2] == 0) { 963 affected = arg_info(op->args[1])->mask & ~mask; 964 } 965 break; 966 CASE_OP_32_64(sextract): 967 mask = sextract64(arg_info(op->args[1])->mask, 968 op->args[2], op->args[3]); 969 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) { 970 affected = arg_info(op->args[1])->mask & ~mask; 971 } 972 break; 973 974 CASE_OP_32_64(or): 975 CASE_OP_32_64(xor): 976 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask; 977 break; 978 979 case INDEX_op_clz_i32: 980 case INDEX_op_ctz_i32: 981 mask = arg_info(op->args[2])->mask | 31; 982 break; 983 984 case INDEX_op_clz_i64: 985 case INDEX_op_ctz_i64: 986 mask = arg_info(op->args[2])->mask | 63; 987 break; 988 989 case INDEX_op_ctpop_i32: 990 mask = 32 | 31; 991 break; 992 case INDEX_op_ctpop_i64: 993 mask = 64 | 63; 994 break; 995 996 CASE_OP_32_64(setcond): 997 case INDEX_op_setcond2_i32: 998 mask = 1; 999 break; 1000 1001 CASE_OP_32_64(movcond): 1002 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask; 1003 break; 1004 1005 CASE_OP_32_64(ld8u): 1006 mask = 0xff; 1007 break; 1008 CASE_OP_32_64(ld16u): 1009 mask = 0xffff; 1010 break; 1011 case INDEX_op_ld32u_i64: 1012 mask = 0xffffffffu; 1013 break; 1014 1015 CASE_OP_32_64(qemu_ld): 1016 { 1017 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs]; 1018 TCGMemOp mop = get_memop(oi); 1019 if (!(mop & MO_SIGN)) { 1020 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 1021 } 1022 } 1023 break; 1024 1025 default: 1026 break; 1027 } 1028 1029 /* 32-bit ops generate 32-bit results. For the result is zero test 1030 below, we can ignore high bits, but for further optimizations we 1031 need to record that the high bits contain garbage. */ 1032 partmask = mask; 1033 if (!(def->flags & TCG_OPF_64BIT)) { 1034 mask |= ~(tcg_target_ulong)0xffffffffu; 1035 partmask &= 0xffffffffu; 1036 affected &= 0xffffffffu; 1037 } 1038 1039 if (partmask == 0) { 1040 tcg_debug_assert(nb_oargs == 1); 1041 tcg_opt_gen_movi(s, op, op->args[0], 0); 1042 continue; 1043 } 1044 if (affected == 0) { 1045 tcg_debug_assert(nb_oargs == 1); 1046 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1047 continue; 1048 } 1049 1050 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 1051 switch (opc) { 1052 CASE_OP_32_64_VEC(and): 1053 CASE_OP_32_64_VEC(mul): 1054 CASE_OP_32_64(muluh): 1055 CASE_OP_32_64(mulsh): 1056 if (arg_is_const(op->args[2]) 1057 && arg_info(op->args[2])->val == 0) { 1058 tcg_opt_gen_movi(s, op, op->args[0], 0); 1059 continue; 1060 } 1061 break; 1062 default: 1063 break; 1064 } 1065 1066 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1067 switch (opc) { 1068 CASE_OP_32_64_VEC(or): 1069 CASE_OP_32_64_VEC(and): 1070 if (args_are_copies(op->args[1], op->args[2])) { 1071 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1072 continue; 1073 } 1074 break; 1075 default: 1076 break; 1077 } 1078 1079 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1080 switch (opc) { 1081 CASE_OP_32_64_VEC(andc): 1082 CASE_OP_32_64_VEC(sub): 1083 CASE_OP_32_64_VEC(xor): 1084 if (args_are_copies(op->args[1], op->args[2])) { 1085 tcg_opt_gen_movi(s, op, op->args[0], 0); 1086 continue; 1087 } 1088 break; 1089 default: 1090 break; 1091 } 1092 1093 /* Propagate constants through copy operations and do constant 1094 folding. Constants will be substituted to arguments by register 1095 allocator where needed and possible. Also detect copies. */ 1096 switch (opc) { 1097 CASE_OP_32_64_VEC(mov): 1098 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]); 1099 break; 1100 CASE_OP_32_64(movi): 1101 case INDEX_op_dupi_vec: 1102 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]); 1103 break; 1104 1105 case INDEX_op_dup_vec: 1106 if (arg_is_const(op->args[1])) { 1107 tmp = arg_info(op->args[1])->val; 1108 tmp = dup_const(TCGOP_VECE(op), tmp); 1109 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1110 break; 1111 } 1112 goto do_default; 1113 1114 CASE_OP_32_64(not): 1115 CASE_OP_32_64(neg): 1116 CASE_OP_32_64(ext8s): 1117 CASE_OP_32_64(ext8u): 1118 CASE_OP_32_64(ext16s): 1119 CASE_OP_32_64(ext16u): 1120 CASE_OP_32_64(ctpop): 1121 CASE_OP_32_64(bswap16): 1122 CASE_OP_32_64(bswap32): 1123 case INDEX_op_bswap64_i64: 1124 case INDEX_op_ext32s_i64: 1125 case INDEX_op_ext32u_i64: 1126 case INDEX_op_ext_i32_i64: 1127 case INDEX_op_extu_i32_i64: 1128 case INDEX_op_extrl_i64_i32: 1129 case INDEX_op_extrh_i64_i32: 1130 if (arg_is_const(op->args[1])) { 1131 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); 1132 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1133 break; 1134 } 1135 goto do_default; 1136 1137 CASE_OP_32_64(add): 1138 CASE_OP_32_64(sub): 1139 CASE_OP_32_64(mul): 1140 CASE_OP_32_64(or): 1141 CASE_OP_32_64(and): 1142 CASE_OP_32_64(xor): 1143 CASE_OP_32_64(shl): 1144 CASE_OP_32_64(shr): 1145 CASE_OP_32_64(sar): 1146 CASE_OP_32_64(rotl): 1147 CASE_OP_32_64(rotr): 1148 CASE_OP_32_64(andc): 1149 CASE_OP_32_64(orc): 1150 CASE_OP_32_64(eqv): 1151 CASE_OP_32_64(nand): 1152 CASE_OP_32_64(nor): 1153 CASE_OP_32_64(muluh): 1154 CASE_OP_32_64(mulsh): 1155 CASE_OP_32_64(div): 1156 CASE_OP_32_64(divu): 1157 CASE_OP_32_64(rem): 1158 CASE_OP_32_64(remu): 1159 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1160 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 1161 arg_info(op->args[2])->val); 1162 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1163 break; 1164 } 1165 goto do_default; 1166 1167 CASE_OP_32_64(clz): 1168 CASE_OP_32_64(ctz): 1169 if (arg_is_const(op->args[1])) { 1170 TCGArg v = arg_info(op->args[1])->val; 1171 if (v != 0) { 1172 tmp = do_constant_folding(opc, v, 0); 1173 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1174 } else { 1175 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]); 1176 } 1177 break; 1178 } 1179 goto do_default; 1180 1181 CASE_OP_32_64(deposit): 1182 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1183 tmp = deposit64(arg_info(op->args[1])->val, 1184 op->args[3], op->args[4], 1185 arg_info(op->args[2])->val); 1186 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1187 break; 1188 } 1189 goto do_default; 1190 1191 CASE_OP_32_64(extract): 1192 if (arg_is_const(op->args[1])) { 1193 tmp = extract64(arg_info(op->args[1])->val, 1194 op->args[2], op->args[3]); 1195 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1196 break; 1197 } 1198 goto do_default; 1199 1200 CASE_OP_32_64(sextract): 1201 if (arg_is_const(op->args[1])) { 1202 tmp = sextract64(arg_info(op->args[1])->val, 1203 op->args[2], op->args[3]); 1204 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1205 break; 1206 } 1207 goto do_default; 1208 1209 CASE_OP_32_64(extract2): 1210 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1211 TCGArg v1 = arg_info(op->args[1])->val; 1212 TCGArg v2 = arg_info(op->args[2])->val; 1213 1214 if (opc == INDEX_op_extract2_i64) { 1215 tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); 1216 } else { 1217 tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); 1218 tmp = (int32_t)tmp; 1219 } 1220 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1221 break; 1222 } 1223 goto do_default; 1224 1225 CASE_OP_32_64(setcond): 1226 tmp = do_constant_folding_cond(opc, op->args[1], 1227 op->args[2], op->args[3]); 1228 if (tmp != 2) { 1229 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1230 break; 1231 } 1232 goto do_default; 1233 1234 CASE_OP_32_64(brcond): 1235 tmp = do_constant_folding_cond(opc, op->args[0], 1236 op->args[1], op->args[2]); 1237 if (tmp != 2) { 1238 if (tmp) { 1239 bitmap_zero(temps_used.l, nb_temps); 1240 op->opc = INDEX_op_br; 1241 op->args[0] = op->args[3]; 1242 } else { 1243 tcg_op_remove(s, op); 1244 } 1245 break; 1246 } 1247 goto do_default; 1248 1249 CASE_OP_32_64(movcond): 1250 tmp = do_constant_folding_cond(opc, op->args[1], 1251 op->args[2], op->args[5]); 1252 if (tmp != 2) { 1253 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]); 1254 break; 1255 } 1256 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 1257 tcg_target_ulong tv = arg_info(op->args[3])->val; 1258 tcg_target_ulong fv = arg_info(op->args[4])->val; 1259 TCGCond cond = op->args[5]; 1260 if (fv == 1 && tv == 0) { 1261 cond = tcg_invert_cond(cond); 1262 } else if (!(tv == 1 && fv == 0)) { 1263 goto do_default; 1264 } 1265 op->args[3] = cond; 1266 op->opc = opc = (opc == INDEX_op_movcond_i32 1267 ? INDEX_op_setcond_i32 1268 : INDEX_op_setcond_i64); 1269 nb_iargs = 2; 1270 } 1271 goto do_default; 1272 1273 case INDEX_op_add2_i32: 1274 case INDEX_op_sub2_i32: 1275 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) 1276 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 1277 uint32_t al = arg_info(op->args[2])->val; 1278 uint32_t ah = arg_info(op->args[3])->val; 1279 uint32_t bl = arg_info(op->args[4])->val; 1280 uint32_t bh = arg_info(op->args[5])->val; 1281 uint64_t a = ((uint64_t)ah << 32) | al; 1282 uint64_t b = ((uint64_t)bh << 32) | bl; 1283 TCGArg rl, rh; 1284 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1285 1286 if (opc == INDEX_op_add2_i32) { 1287 a += b; 1288 } else { 1289 a -= b; 1290 } 1291 1292 rl = op->args[0]; 1293 rh = op->args[1]; 1294 tcg_opt_gen_movi(s, op, rl, (int32_t)a); 1295 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32)); 1296 break; 1297 } 1298 goto do_default; 1299 1300 case INDEX_op_mulu2_i32: 1301 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1302 uint32_t a = arg_info(op->args[2])->val; 1303 uint32_t b = arg_info(op->args[3])->val; 1304 uint64_t r = (uint64_t)a * b; 1305 TCGArg rl, rh; 1306 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); 1307 1308 rl = op->args[0]; 1309 rh = op->args[1]; 1310 tcg_opt_gen_movi(s, op, rl, (int32_t)r); 1311 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32)); 1312 break; 1313 } 1314 goto do_default; 1315 1316 case INDEX_op_brcond2_i32: 1317 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], 1318 op->args[4]); 1319 if (tmp != 2) { 1320 if (tmp) { 1321 do_brcond_true: 1322 bitmap_zero(temps_used.l, nb_temps); 1323 op->opc = INDEX_op_br; 1324 op->args[0] = op->args[5]; 1325 } else { 1326 do_brcond_false: 1327 tcg_op_remove(s, op); 1328 } 1329 } else if ((op->args[4] == TCG_COND_LT 1330 || op->args[4] == TCG_COND_GE) 1331 && arg_is_const(op->args[2]) 1332 && arg_info(op->args[2])->val == 0 1333 && arg_is_const(op->args[3]) 1334 && arg_info(op->args[3])->val == 0) { 1335 /* Simplify LT/GE comparisons vs zero to a single compare 1336 vs the high word of the input. */ 1337 do_brcond_high: 1338 bitmap_zero(temps_used.l, nb_temps); 1339 op->opc = INDEX_op_brcond_i32; 1340 op->args[0] = op->args[1]; 1341 op->args[1] = op->args[3]; 1342 op->args[2] = op->args[4]; 1343 op->args[3] = op->args[5]; 1344 } else if (op->args[4] == TCG_COND_EQ) { 1345 /* Simplify EQ comparisons where one of the pairs 1346 can be simplified. */ 1347 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1348 op->args[0], op->args[2], 1349 TCG_COND_EQ); 1350 if (tmp == 0) { 1351 goto do_brcond_false; 1352 } else if (tmp == 1) { 1353 goto do_brcond_high; 1354 } 1355 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1356 op->args[1], op->args[3], 1357 TCG_COND_EQ); 1358 if (tmp == 0) { 1359 goto do_brcond_false; 1360 } else if (tmp != 1) { 1361 goto do_default; 1362 } 1363 do_brcond_low: 1364 bitmap_zero(temps_used.l, nb_temps); 1365 op->opc = INDEX_op_brcond_i32; 1366 op->args[1] = op->args[2]; 1367 op->args[2] = op->args[4]; 1368 op->args[3] = op->args[5]; 1369 } else if (op->args[4] == TCG_COND_NE) { 1370 /* Simplify NE comparisons where one of the pairs 1371 can be simplified. */ 1372 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1373 op->args[0], op->args[2], 1374 TCG_COND_NE); 1375 if (tmp == 0) { 1376 goto do_brcond_high; 1377 } else if (tmp == 1) { 1378 goto do_brcond_true; 1379 } 1380 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1381 op->args[1], op->args[3], 1382 TCG_COND_NE); 1383 if (tmp == 0) { 1384 goto do_brcond_low; 1385 } else if (tmp == 1) { 1386 goto do_brcond_true; 1387 } 1388 goto do_default; 1389 } else { 1390 goto do_default; 1391 } 1392 break; 1393 1394 case INDEX_op_setcond2_i32: 1395 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], 1396 op->args[5]); 1397 if (tmp != 2) { 1398 do_setcond_const: 1399 tcg_opt_gen_movi(s, op, op->args[0], tmp); 1400 } else if ((op->args[5] == TCG_COND_LT 1401 || op->args[5] == TCG_COND_GE) 1402 && arg_is_const(op->args[3]) 1403 && arg_info(op->args[3])->val == 0 1404 && arg_is_const(op->args[4]) 1405 && arg_info(op->args[4])->val == 0) { 1406 /* Simplify LT/GE comparisons vs zero to a single compare 1407 vs the high word of the input. */ 1408 do_setcond_high: 1409 reset_temp(op->args[0]); 1410 arg_info(op->args[0])->mask = 1; 1411 op->opc = INDEX_op_setcond_i32; 1412 op->args[1] = op->args[2]; 1413 op->args[2] = op->args[4]; 1414 op->args[3] = op->args[5]; 1415 } else if (op->args[5] == TCG_COND_EQ) { 1416 /* Simplify EQ comparisons where one of the pairs 1417 can be simplified. */ 1418 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1419 op->args[1], op->args[3], 1420 TCG_COND_EQ); 1421 if (tmp == 0) { 1422 goto do_setcond_const; 1423 } else if (tmp == 1) { 1424 goto do_setcond_high; 1425 } 1426 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1427 op->args[2], op->args[4], 1428 TCG_COND_EQ); 1429 if (tmp == 0) { 1430 goto do_setcond_high; 1431 } else if (tmp != 1) { 1432 goto do_default; 1433 } 1434 do_setcond_low: 1435 reset_temp(op->args[0]); 1436 arg_info(op->args[0])->mask = 1; 1437 op->opc = INDEX_op_setcond_i32; 1438 op->args[2] = op->args[3]; 1439 op->args[3] = op->args[5]; 1440 } else if (op->args[5] == TCG_COND_NE) { 1441 /* Simplify NE comparisons where one of the pairs 1442 can be simplified. */ 1443 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1444 op->args[1], op->args[3], 1445 TCG_COND_NE); 1446 if (tmp == 0) { 1447 goto do_setcond_high; 1448 } else if (tmp == 1) { 1449 goto do_setcond_const; 1450 } 1451 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1452 op->args[2], op->args[4], 1453 TCG_COND_NE); 1454 if (tmp == 0) { 1455 goto do_setcond_low; 1456 } else if (tmp == 1) { 1457 goto do_setcond_const; 1458 } 1459 goto do_default; 1460 } else { 1461 goto do_default; 1462 } 1463 break; 1464 1465 case INDEX_op_call: 1466 if (!(op->args[nb_oargs + nb_iargs + 1] 1467 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1468 for (i = 0; i < nb_globals; i++) { 1469 if (test_bit(i, temps_used.l)) { 1470 reset_ts(&s->temps[i]); 1471 } 1472 } 1473 } 1474 goto do_reset_output; 1475 1476 default: 1477 do_default: 1478 /* Default case: we know nothing about operation (or were unable 1479 to compute the operation result) so no propagation is done. 1480 We trash everything if the operation is the end of a basic 1481 block, otherwise we only trash the output args. "mask" is 1482 the non-zero bits mask for the first output arg. */ 1483 if (def->flags & TCG_OPF_BB_END) { 1484 bitmap_zero(temps_used.l, nb_temps); 1485 } else { 1486 do_reset_output: 1487 for (i = 0; i < nb_oargs; i++) { 1488 reset_temp(op->args[i]); 1489 /* Save the corresponding known-zero bits mask for the 1490 first output argument (only one supported so far). */ 1491 if (i == 0) { 1492 arg_info(op->args[i])->mask = mask; 1493 } 1494 } 1495 } 1496 break; 1497 } 1498 1499 /* Eliminate duplicate and redundant fence instructions. */ 1500 if (prev_mb) { 1501 switch (opc) { 1502 case INDEX_op_mb: 1503 /* Merge two barriers of the same type into one, 1504 * or a weaker barrier into a stronger one, 1505 * or two weaker barriers into a stronger one. 1506 * mb X; mb Y => mb X|Y 1507 * mb; strl => mb; st 1508 * ldaq; mb => ld; mb 1509 * ldaq; strl => ld; mb; st 1510 * Other combinations are also merged into a strong 1511 * barrier. This is stricter than specified but for 1512 * the purposes of TCG is better than not optimizing. 1513 */ 1514 prev_mb->args[0] |= op->args[0]; 1515 tcg_op_remove(s, op); 1516 break; 1517 1518 default: 1519 /* Opcodes that end the block stop the optimization. */ 1520 if ((def->flags & TCG_OPF_BB_END) == 0) { 1521 break; 1522 } 1523 /* fallthru */ 1524 case INDEX_op_qemu_ld_i32: 1525 case INDEX_op_qemu_ld_i64: 1526 case INDEX_op_qemu_st_i32: 1527 case INDEX_op_qemu_st_i64: 1528 case INDEX_op_call: 1529 /* Opcodes that touch guest memory stop the optimization. */ 1530 prev_mb = NULL; 1531 break; 1532 } 1533 } else if (opc == INDEX_op_mb) { 1534 prev_mb = op; 1535 } 1536 } 1537 } 1538