1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 uint16_t prev_copy; 38 uint16_t next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 44 static TCGTempSet temps_used; 45 46 static inline bool temp_is_const(TCGArg arg) 47 { 48 return temps[arg].is_const; 49 } 50 51 static inline bool temp_is_copy(TCGArg arg) 52 { 53 return temps[arg].next_copy != arg; 54 } 55 56 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 57 static void reset_temp(TCGArg temp) 58 { 59 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 60 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 61 temps[temp].next_copy = temp; 62 temps[temp].prev_copy = temp; 63 temps[temp].is_const = false; 64 temps[temp].mask = -1; 65 } 66 67 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 68 static void reset_all_temps(int nb_temps) 69 { 70 bitmap_zero(temps_used.l, nb_temps); 71 } 72 73 /* Initialize and activate a temporary. */ 74 static void init_temp_info(TCGArg temp) 75 { 76 if (!test_bit(temp, temps_used.l)) { 77 temps[temp].next_copy = temp; 78 temps[temp].prev_copy = temp; 79 temps[temp].is_const = false; 80 temps[temp].mask = -1; 81 set_bit(temp, temps_used.l); 82 } 83 } 84 85 static int op_bits(TCGOpcode op) 86 { 87 const TCGOpDef *def = &tcg_op_defs[op]; 88 return def->flags & TCG_OPF_64BIT ? 64 : 32; 89 } 90 91 static TCGOpcode op_to_mov(TCGOpcode op) 92 { 93 switch (op_bits(op)) { 94 case 32: 95 return INDEX_op_mov_i32; 96 case 64: 97 return INDEX_op_mov_i64; 98 default: 99 fprintf(stderr, "op_to_mov: unexpected return value of " 100 "function op_bits.\n"); 101 tcg_abort(); 102 } 103 } 104 105 static TCGOpcode op_to_movi(TCGOpcode op) 106 { 107 switch (op_bits(op)) { 108 case 32: 109 return INDEX_op_movi_i32; 110 case 64: 111 return INDEX_op_movi_i64; 112 default: 113 fprintf(stderr, "op_to_movi: unexpected return value of " 114 "function op_bits.\n"); 115 tcg_abort(); 116 } 117 } 118 119 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 120 { 121 TCGArg i; 122 123 /* If this is already a global, we can't do better. */ 124 if (temp < s->nb_globals) { 125 return temp; 126 } 127 128 /* Search for a global first. */ 129 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 130 if (i < s->nb_globals) { 131 return i; 132 } 133 } 134 135 /* If it is a temp, search for a temp local. */ 136 if (!s->temps[temp].temp_local) { 137 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 138 if (s->temps[i].temp_local) { 139 return i; 140 } 141 } 142 } 143 144 /* Failure to find a better representation, return the same temp. */ 145 return temp; 146 } 147 148 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 149 { 150 TCGArg i; 151 152 if (arg1 == arg2) { 153 return true; 154 } 155 156 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 157 return false; 158 } 159 160 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 161 if (i == arg2) { 162 return true; 163 } 164 } 165 166 return false; 167 } 168 169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 170 TCGArg dst, TCGArg val) 171 { 172 TCGOpcode new_op = op_to_movi(op->opc); 173 tcg_target_ulong mask; 174 175 op->opc = new_op; 176 177 reset_temp(dst); 178 temps[dst].is_const = true; 179 temps[dst].val = val; 180 mask = val; 181 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 182 /* High bits of the destination are now garbage. */ 183 mask |= ~0xffffffffull; 184 } 185 temps[dst].mask = mask; 186 187 args[0] = dst; 188 args[1] = val; 189 } 190 191 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 192 TCGArg dst, TCGArg src) 193 { 194 if (temps_are_copies(dst, src)) { 195 tcg_op_remove(s, op); 196 return; 197 } 198 199 TCGOpcode new_op = op_to_mov(op->opc); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 mask = temps[src].mask; 206 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 207 /* High bits of the destination are now garbage. */ 208 mask |= ~0xffffffffull; 209 } 210 temps[dst].mask = mask; 211 212 if (s->temps[src].type == s->temps[dst].type) { 213 temps[dst].next_copy = temps[src].next_copy; 214 temps[dst].prev_copy = src; 215 temps[temps[dst].next_copy].prev_copy = dst; 216 temps[src].next_copy = dst; 217 temps[dst].is_const = temps[src].is_const; 218 temps[dst].val = temps[src].val; 219 } 220 221 args[0] = dst; 222 args[1] = src; 223 } 224 225 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 226 { 227 uint64_t l64, h64; 228 229 switch (op) { 230 CASE_OP_32_64(add): 231 return x + y; 232 233 CASE_OP_32_64(sub): 234 return x - y; 235 236 CASE_OP_32_64(mul): 237 return x * y; 238 239 CASE_OP_32_64(and): 240 return x & y; 241 242 CASE_OP_32_64(or): 243 return x | y; 244 245 CASE_OP_32_64(xor): 246 return x ^ y; 247 248 case INDEX_op_shl_i32: 249 return (uint32_t)x << (y & 31); 250 251 case INDEX_op_shl_i64: 252 return (uint64_t)x << (y & 63); 253 254 case INDEX_op_shr_i32: 255 return (uint32_t)x >> (y & 31); 256 257 case INDEX_op_shr_i64: 258 return (uint64_t)x >> (y & 63); 259 260 case INDEX_op_sar_i32: 261 return (int32_t)x >> (y & 31); 262 263 case INDEX_op_sar_i64: 264 return (int64_t)x >> (y & 63); 265 266 case INDEX_op_rotr_i32: 267 return ror32(x, y & 31); 268 269 case INDEX_op_rotr_i64: 270 return ror64(x, y & 63); 271 272 case INDEX_op_rotl_i32: 273 return rol32(x, y & 31); 274 275 case INDEX_op_rotl_i64: 276 return rol64(x, y & 63); 277 278 CASE_OP_32_64(not): 279 return ~x; 280 281 CASE_OP_32_64(neg): 282 return -x; 283 284 CASE_OP_32_64(andc): 285 return x & ~y; 286 287 CASE_OP_32_64(orc): 288 return x | ~y; 289 290 CASE_OP_32_64(eqv): 291 return ~(x ^ y); 292 293 CASE_OP_32_64(nand): 294 return ~(x & y); 295 296 CASE_OP_32_64(nor): 297 return ~(x | y); 298 299 case INDEX_op_clz_i32: 300 return (uint32_t)x ? clz32(x) : y; 301 302 case INDEX_op_clz_i64: 303 return x ? clz64(x) : y; 304 305 case INDEX_op_ctz_i32: 306 return (uint32_t)x ? ctz32(x) : y; 307 308 case INDEX_op_ctz_i64: 309 return x ? ctz64(x) : y; 310 311 case INDEX_op_ctpop_i32: 312 return ctpop32(x); 313 314 case INDEX_op_ctpop_i64: 315 return ctpop64(x); 316 317 CASE_OP_32_64(ext8s): 318 return (int8_t)x; 319 320 CASE_OP_32_64(ext16s): 321 return (int16_t)x; 322 323 CASE_OP_32_64(ext8u): 324 return (uint8_t)x; 325 326 CASE_OP_32_64(ext16u): 327 return (uint16_t)x; 328 329 case INDEX_op_ext_i32_i64: 330 case INDEX_op_ext32s_i64: 331 return (int32_t)x; 332 333 case INDEX_op_extu_i32_i64: 334 case INDEX_op_extrl_i64_i32: 335 case INDEX_op_ext32u_i64: 336 return (uint32_t)x; 337 338 case INDEX_op_extrh_i64_i32: 339 return (uint64_t)x >> 32; 340 341 case INDEX_op_muluh_i32: 342 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 343 case INDEX_op_mulsh_i32: 344 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 345 346 case INDEX_op_muluh_i64: 347 mulu64(&l64, &h64, x, y); 348 return h64; 349 case INDEX_op_mulsh_i64: 350 muls64(&l64, &h64, x, y); 351 return h64; 352 353 case INDEX_op_div_i32: 354 /* Avoid crashing on divide by zero, otherwise undefined. */ 355 return (int32_t)x / ((int32_t)y ? : 1); 356 case INDEX_op_divu_i32: 357 return (uint32_t)x / ((uint32_t)y ? : 1); 358 case INDEX_op_div_i64: 359 return (int64_t)x / ((int64_t)y ? : 1); 360 case INDEX_op_divu_i64: 361 return (uint64_t)x / ((uint64_t)y ? : 1); 362 363 case INDEX_op_rem_i32: 364 return (int32_t)x % ((int32_t)y ? : 1); 365 case INDEX_op_remu_i32: 366 return (uint32_t)x % ((uint32_t)y ? : 1); 367 case INDEX_op_rem_i64: 368 return (int64_t)x % ((int64_t)y ? : 1); 369 case INDEX_op_remu_i64: 370 return (uint64_t)x % ((uint64_t)y ? : 1); 371 372 default: 373 fprintf(stderr, 374 "Unrecognized operation %d in do_constant_folding.\n", op); 375 tcg_abort(); 376 } 377 } 378 379 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 380 { 381 TCGArg res = do_constant_folding_2(op, x, y); 382 if (op_bits(op) == 32) { 383 res = (int32_t)res; 384 } 385 return res; 386 } 387 388 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 389 { 390 switch (c) { 391 case TCG_COND_EQ: 392 return x == y; 393 case TCG_COND_NE: 394 return x != y; 395 case TCG_COND_LT: 396 return (int32_t)x < (int32_t)y; 397 case TCG_COND_GE: 398 return (int32_t)x >= (int32_t)y; 399 case TCG_COND_LE: 400 return (int32_t)x <= (int32_t)y; 401 case TCG_COND_GT: 402 return (int32_t)x > (int32_t)y; 403 case TCG_COND_LTU: 404 return x < y; 405 case TCG_COND_GEU: 406 return x >= y; 407 case TCG_COND_LEU: 408 return x <= y; 409 case TCG_COND_GTU: 410 return x > y; 411 default: 412 tcg_abort(); 413 } 414 } 415 416 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 417 { 418 switch (c) { 419 case TCG_COND_EQ: 420 return x == y; 421 case TCG_COND_NE: 422 return x != y; 423 case TCG_COND_LT: 424 return (int64_t)x < (int64_t)y; 425 case TCG_COND_GE: 426 return (int64_t)x >= (int64_t)y; 427 case TCG_COND_LE: 428 return (int64_t)x <= (int64_t)y; 429 case TCG_COND_GT: 430 return (int64_t)x > (int64_t)y; 431 case TCG_COND_LTU: 432 return x < y; 433 case TCG_COND_GEU: 434 return x >= y; 435 case TCG_COND_LEU: 436 return x <= y; 437 case TCG_COND_GTU: 438 return x > y; 439 default: 440 tcg_abort(); 441 } 442 } 443 444 static bool do_constant_folding_cond_eq(TCGCond c) 445 { 446 switch (c) { 447 case TCG_COND_GT: 448 case TCG_COND_LTU: 449 case TCG_COND_LT: 450 case TCG_COND_GTU: 451 case TCG_COND_NE: 452 return 0; 453 case TCG_COND_GE: 454 case TCG_COND_GEU: 455 case TCG_COND_LE: 456 case TCG_COND_LEU: 457 case TCG_COND_EQ: 458 return 1; 459 default: 460 tcg_abort(); 461 } 462 } 463 464 /* Return 2 if the condition can't be simplified, and the result 465 of the condition (0 or 1) if it can */ 466 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 467 TCGArg y, TCGCond c) 468 { 469 if (temp_is_const(x) && temp_is_const(y)) { 470 switch (op_bits(op)) { 471 case 32: 472 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 473 case 64: 474 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 475 default: 476 tcg_abort(); 477 } 478 } else if (temps_are_copies(x, y)) { 479 return do_constant_folding_cond_eq(c); 480 } else if (temp_is_const(y) && temps[y].val == 0) { 481 switch (c) { 482 case TCG_COND_LTU: 483 return 0; 484 case TCG_COND_GEU: 485 return 1; 486 default: 487 return 2; 488 } 489 } 490 return 2; 491 } 492 493 /* Return 2 if the condition can't be simplified, and the result 494 of the condition (0 or 1) if it can */ 495 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 496 { 497 TCGArg al = p1[0], ah = p1[1]; 498 TCGArg bl = p2[0], bh = p2[1]; 499 500 if (temp_is_const(bl) && temp_is_const(bh)) { 501 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 502 503 if (temp_is_const(al) && temp_is_const(ah)) { 504 uint64_t a; 505 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 506 return do_constant_folding_cond_64(a, b, c); 507 } 508 if (b == 0) { 509 switch (c) { 510 case TCG_COND_LTU: 511 return 0; 512 case TCG_COND_GEU: 513 return 1; 514 default: 515 break; 516 } 517 } 518 } 519 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 520 return do_constant_folding_cond_eq(c); 521 } 522 return 2; 523 } 524 525 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 526 { 527 TCGArg a1 = *p1, a2 = *p2; 528 int sum = 0; 529 sum += temp_is_const(a1); 530 sum -= temp_is_const(a2); 531 532 /* Prefer the constant in second argument, and then the form 533 op a, a, b, which is better handled on non-RISC hosts. */ 534 if (sum > 0 || (sum == 0 && dest == a2)) { 535 *p1 = a2; 536 *p2 = a1; 537 return true; 538 } 539 return false; 540 } 541 542 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 543 { 544 int sum = 0; 545 sum += temp_is_const(p1[0]); 546 sum += temp_is_const(p1[1]); 547 sum -= temp_is_const(p2[0]); 548 sum -= temp_is_const(p2[1]); 549 if (sum > 0) { 550 TCGArg t; 551 t = p1[0], p1[0] = p2[0], p2[0] = t; 552 t = p1[1], p1[1] = p2[1], p2[1] = t; 553 return true; 554 } 555 return false; 556 } 557 558 /* Propagate constants and copies, fold constant expressions. */ 559 void tcg_optimize(TCGContext *s) 560 { 561 int oi, oi_next, nb_temps, nb_globals; 562 TCGArg *prev_mb_args = NULL; 563 564 /* Array VALS has an element for each temp. 565 If this temp holds a constant then its value is kept in VALS' element. 566 If this temp is a copy of other ones then the other copies are 567 available through the doubly linked circular list. */ 568 569 nb_temps = s->nb_temps; 570 nb_globals = s->nb_globals; 571 reset_all_temps(nb_temps); 572 573 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 574 tcg_target_ulong mask, partmask, affected; 575 int nb_oargs, nb_iargs, i; 576 TCGArg tmp; 577 578 TCGOp * const op = &s->gen_op_buf[oi]; 579 TCGArg * const args = &s->gen_opparam_buf[op->args]; 580 TCGOpcode opc = op->opc; 581 const TCGOpDef *def = &tcg_op_defs[opc]; 582 583 oi_next = op->next; 584 585 /* Count the arguments, and initialize the temps that are 586 going to be used */ 587 if (opc == INDEX_op_call) { 588 nb_oargs = op->callo; 589 nb_iargs = op->calli; 590 for (i = 0; i < nb_oargs + nb_iargs; i++) { 591 tmp = args[i]; 592 if (tmp != TCG_CALL_DUMMY_ARG) { 593 init_temp_info(tmp); 594 } 595 } 596 } else { 597 nb_oargs = def->nb_oargs; 598 nb_iargs = def->nb_iargs; 599 for (i = 0; i < nb_oargs + nb_iargs; i++) { 600 init_temp_info(args[i]); 601 } 602 } 603 604 /* Do copy propagation */ 605 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 606 if (temp_is_copy(args[i])) { 607 args[i] = find_better_copy(s, args[i]); 608 } 609 } 610 611 /* For commutative operations make constant second argument */ 612 switch (opc) { 613 CASE_OP_32_64(add): 614 CASE_OP_32_64(mul): 615 CASE_OP_32_64(and): 616 CASE_OP_32_64(or): 617 CASE_OP_32_64(xor): 618 CASE_OP_32_64(eqv): 619 CASE_OP_32_64(nand): 620 CASE_OP_32_64(nor): 621 CASE_OP_32_64(muluh): 622 CASE_OP_32_64(mulsh): 623 swap_commutative(args[0], &args[1], &args[2]); 624 break; 625 CASE_OP_32_64(brcond): 626 if (swap_commutative(-1, &args[0], &args[1])) { 627 args[2] = tcg_swap_cond(args[2]); 628 } 629 break; 630 CASE_OP_32_64(setcond): 631 if (swap_commutative(args[0], &args[1], &args[2])) { 632 args[3] = tcg_swap_cond(args[3]); 633 } 634 break; 635 CASE_OP_32_64(movcond): 636 if (swap_commutative(-1, &args[1], &args[2])) { 637 args[5] = tcg_swap_cond(args[5]); 638 } 639 /* For movcond, we canonicalize the "false" input reg to match 640 the destination reg so that the tcg backend can implement 641 a "move if true" operation. */ 642 if (swap_commutative(args[0], &args[4], &args[3])) { 643 args[5] = tcg_invert_cond(args[5]); 644 } 645 break; 646 CASE_OP_32_64(add2): 647 swap_commutative(args[0], &args[2], &args[4]); 648 swap_commutative(args[1], &args[3], &args[5]); 649 break; 650 CASE_OP_32_64(mulu2): 651 CASE_OP_32_64(muls2): 652 swap_commutative(args[0], &args[2], &args[3]); 653 break; 654 case INDEX_op_brcond2_i32: 655 if (swap_commutative2(&args[0], &args[2])) { 656 args[4] = tcg_swap_cond(args[4]); 657 } 658 break; 659 case INDEX_op_setcond2_i32: 660 if (swap_commutative2(&args[1], &args[3])) { 661 args[5] = tcg_swap_cond(args[5]); 662 } 663 break; 664 default: 665 break; 666 } 667 668 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 669 and "sub r, 0, a => neg r, a" case. */ 670 switch (opc) { 671 CASE_OP_32_64(shl): 672 CASE_OP_32_64(shr): 673 CASE_OP_32_64(sar): 674 CASE_OP_32_64(rotl): 675 CASE_OP_32_64(rotr): 676 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 677 tcg_opt_gen_movi(s, op, args, args[0], 0); 678 continue; 679 } 680 break; 681 CASE_OP_32_64(sub): 682 { 683 TCGOpcode neg_op; 684 bool have_neg; 685 686 if (temp_is_const(args[2])) { 687 /* Proceed with possible constant folding. */ 688 break; 689 } 690 if (opc == INDEX_op_sub_i32) { 691 neg_op = INDEX_op_neg_i32; 692 have_neg = TCG_TARGET_HAS_neg_i32; 693 } else { 694 neg_op = INDEX_op_neg_i64; 695 have_neg = TCG_TARGET_HAS_neg_i64; 696 } 697 if (!have_neg) { 698 break; 699 } 700 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 701 op->opc = neg_op; 702 reset_temp(args[0]); 703 args[1] = args[2]; 704 continue; 705 } 706 } 707 break; 708 CASE_OP_32_64(xor): 709 CASE_OP_32_64(nand): 710 if (!temp_is_const(args[1]) 711 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 712 i = 1; 713 goto try_not; 714 } 715 break; 716 CASE_OP_32_64(nor): 717 if (!temp_is_const(args[1]) 718 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 719 i = 1; 720 goto try_not; 721 } 722 break; 723 CASE_OP_32_64(andc): 724 if (!temp_is_const(args[2]) 725 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 726 i = 2; 727 goto try_not; 728 } 729 break; 730 CASE_OP_32_64(orc): 731 CASE_OP_32_64(eqv): 732 if (!temp_is_const(args[2]) 733 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 734 i = 2; 735 goto try_not; 736 } 737 break; 738 try_not: 739 { 740 TCGOpcode not_op; 741 bool have_not; 742 743 if (def->flags & TCG_OPF_64BIT) { 744 not_op = INDEX_op_not_i64; 745 have_not = TCG_TARGET_HAS_not_i64; 746 } else { 747 not_op = INDEX_op_not_i32; 748 have_not = TCG_TARGET_HAS_not_i32; 749 } 750 if (!have_not) { 751 break; 752 } 753 op->opc = not_op; 754 reset_temp(args[0]); 755 args[1] = args[i]; 756 continue; 757 } 758 default: 759 break; 760 } 761 762 /* Simplify expression for "op r, a, const => mov r, a" cases */ 763 switch (opc) { 764 CASE_OP_32_64(add): 765 CASE_OP_32_64(sub): 766 CASE_OP_32_64(shl): 767 CASE_OP_32_64(shr): 768 CASE_OP_32_64(sar): 769 CASE_OP_32_64(rotl): 770 CASE_OP_32_64(rotr): 771 CASE_OP_32_64(or): 772 CASE_OP_32_64(xor): 773 CASE_OP_32_64(andc): 774 if (!temp_is_const(args[1]) 775 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 776 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 777 continue; 778 } 779 break; 780 CASE_OP_32_64(and): 781 CASE_OP_32_64(orc): 782 CASE_OP_32_64(eqv): 783 if (!temp_is_const(args[1]) 784 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 785 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 786 continue; 787 } 788 break; 789 default: 790 break; 791 } 792 793 /* Simplify using known-zero bits. Currently only ops with a single 794 output argument is supported. */ 795 mask = -1; 796 affected = -1; 797 switch (opc) { 798 CASE_OP_32_64(ext8s): 799 if ((temps[args[1]].mask & 0x80) != 0) { 800 break; 801 } 802 CASE_OP_32_64(ext8u): 803 mask = 0xff; 804 goto and_const; 805 CASE_OP_32_64(ext16s): 806 if ((temps[args[1]].mask & 0x8000) != 0) { 807 break; 808 } 809 CASE_OP_32_64(ext16u): 810 mask = 0xffff; 811 goto and_const; 812 case INDEX_op_ext32s_i64: 813 if ((temps[args[1]].mask & 0x80000000) != 0) { 814 break; 815 } 816 case INDEX_op_ext32u_i64: 817 mask = 0xffffffffU; 818 goto and_const; 819 820 CASE_OP_32_64(and): 821 mask = temps[args[2]].mask; 822 if (temp_is_const(args[2])) { 823 and_const: 824 affected = temps[args[1]].mask & ~mask; 825 } 826 mask = temps[args[1]].mask & mask; 827 break; 828 829 case INDEX_op_ext_i32_i64: 830 if ((temps[args[1]].mask & 0x80000000) != 0) { 831 break; 832 } 833 case INDEX_op_extu_i32_i64: 834 /* We do not compute affected as it is a size changing op. */ 835 mask = (uint32_t)temps[args[1]].mask; 836 break; 837 838 CASE_OP_32_64(andc): 839 /* Known-zeros does not imply known-ones. Therefore unless 840 args[2] is constant, we can't infer anything from it. */ 841 if (temp_is_const(args[2])) { 842 mask = ~temps[args[2]].mask; 843 goto and_const; 844 } 845 /* But we certainly know nothing outside args[1] may be set. */ 846 mask = temps[args[1]].mask; 847 break; 848 849 case INDEX_op_sar_i32: 850 if (temp_is_const(args[2])) { 851 tmp = temps[args[2]].val & 31; 852 mask = (int32_t)temps[args[1]].mask >> tmp; 853 } 854 break; 855 case INDEX_op_sar_i64: 856 if (temp_is_const(args[2])) { 857 tmp = temps[args[2]].val & 63; 858 mask = (int64_t)temps[args[1]].mask >> tmp; 859 } 860 break; 861 862 case INDEX_op_shr_i32: 863 if (temp_is_const(args[2])) { 864 tmp = temps[args[2]].val & 31; 865 mask = (uint32_t)temps[args[1]].mask >> tmp; 866 } 867 break; 868 case INDEX_op_shr_i64: 869 if (temp_is_const(args[2])) { 870 tmp = temps[args[2]].val & 63; 871 mask = (uint64_t)temps[args[1]].mask >> tmp; 872 } 873 break; 874 875 case INDEX_op_extrl_i64_i32: 876 mask = (uint32_t)temps[args[1]].mask; 877 break; 878 case INDEX_op_extrh_i64_i32: 879 mask = (uint64_t)temps[args[1]].mask >> 32; 880 break; 881 882 CASE_OP_32_64(shl): 883 if (temp_is_const(args[2])) { 884 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 885 mask = temps[args[1]].mask << tmp; 886 } 887 break; 888 889 CASE_OP_32_64(neg): 890 /* Set to 1 all bits to the left of the rightmost. */ 891 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 892 break; 893 894 CASE_OP_32_64(deposit): 895 mask = deposit64(temps[args[1]].mask, args[3], args[4], 896 temps[args[2]].mask); 897 break; 898 899 CASE_OP_32_64(extract): 900 mask = extract64(temps[args[1]].mask, args[2], args[3]); 901 if (args[2] == 0) { 902 affected = temps[args[1]].mask & ~mask; 903 } 904 break; 905 CASE_OP_32_64(sextract): 906 mask = sextract64(temps[args[1]].mask, args[2], args[3]); 907 if (args[2] == 0 && (tcg_target_long)mask >= 0) { 908 affected = temps[args[1]].mask & ~mask; 909 } 910 break; 911 912 CASE_OP_32_64(or): 913 CASE_OP_32_64(xor): 914 mask = temps[args[1]].mask | temps[args[2]].mask; 915 break; 916 917 case INDEX_op_clz_i32: 918 case INDEX_op_ctz_i32: 919 mask = temps[args[2]].mask | 31; 920 break; 921 922 case INDEX_op_clz_i64: 923 case INDEX_op_ctz_i64: 924 mask = temps[args[2]].mask | 63; 925 break; 926 927 case INDEX_op_ctpop_i32: 928 mask = 32 | 31; 929 break; 930 case INDEX_op_ctpop_i64: 931 mask = 64 | 63; 932 break; 933 934 CASE_OP_32_64(setcond): 935 case INDEX_op_setcond2_i32: 936 mask = 1; 937 break; 938 939 CASE_OP_32_64(movcond): 940 mask = temps[args[3]].mask | temps[args[4]].mask; 941 break; 942 943 CASE_OP_32_64(ld8u): 944 mask = 0xff; 945 break; 946 CASE_OP_32_64(ld16u): 947 mask = 0xffff; 948 break; 949 case INDEX_op_ld32u_i64: 950 mask = 0xffffffffu; 951 break; 952 953 CASE_OP_32_64(qemu_ld): 954 { 955 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 956 TCGMemOp mop = get_memop(oi); 957 if (!(mop & MO_SIGN)) { 958 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 959 } 960 } 961 break; 962 963 default: 964 break; 965 } 966 967 /* 32-bit ops generate 32-bit results. For the result is zero test 968 below, we can ignore high bits, but for further optimizations we 969 need to record that the high bits contain garbage. */ 970 partmask = mask; 971 if (!(def->flags & TCG_OPF_64BIT)) { 972 mask |= ~(tcg_target_ulong)0xffffffffu; 973 partmask &= 0xffffffffu; 974 affected &= 0xffffffffu; 975 } 976 977 if (partmask == 0) { 978 tcg_debug_assert(nb_oargs == 1); 979 tcg_opt_gen_movi(s, op, args, args[0], 0); 980 continue; 981 } 982 if (affected == 0) { 983 tcg_debug_assert(nb_oargs == 1); 984 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 985 continue; 986 } 987 988 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 989 switch (opc) { 990 CASE_OP_32_64(and): 991 CASE_OP_32_64(mul): 992 CASE_OP_32_64(muluh): 993 CASE_OP_32_64(mulsh): 994 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 995 tcg_opt_gen_movi(s, op, args, args[0], 0); 996 continue; 997 } 998 break; 999 default: 1000 break; 1001 } 1002 1003 /* Simplify expression for "op r, a, a => mov r, a" cases */ 1004 switch (opc) { 1005 CASE_OP_32_64(or): 1006 CASE_OP_32_64(and): 1007 if (temps_are_copies(args[1], args[2])) { 1008 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1009 continue; 1010 } 1011 break; 1012 default: 1013 break; 1014 } 1015 1016 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1017 switch (opc) { 1018 CASE_OP_32_64(andc): 1019 CASE_OP_32_64(sub): 1020 CASE_OP_32_64(xor): 1021 if (temps_are_copies(args[1], args[2])) { 1022 tcg_opt_gen_movi(s, op, args, args[0], 0); 1023 continue; 1024 } 1025 break; 1026 default: 1027 break; 1028 } 1029 1030 /* Propagate constants through copy operations and do constant 1031 folding. Constants will be substituted to arguments by register 1032 allocator where needed and possible. Also detect copies. */ 1033 switch (opc) { 1034 CASE_OP_32_64(mov): 1035 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1036 break; 1037 CASE_OP_32_64(movi): 1038 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 1039 break; 1040 1041 CASE_OP_32_64(not): 1042 CASE_OP_32_64(neg): 1043 CASE_OP_32_64(ext8s): 1044 CASE_OP_32_64(ext8u): 1045 CASE_OP_32_64(ext16s): 1046 CASE_OP_32_64(ext16u): 1047 CASE_OP_32_64(ctpop): 1048 case INDEX_op_ext32s_i64: 1049 case INDEX_op_ext32u_i64: 1050 case INDEX_op_ext_i32_i64: 1051 case INDEX_op_extu_i32_i64: 1052 case INDEX_op_extrl_i64_i32: 1053 case INDEX_op_extrh_i64_i32: 1054 if (temp_is_const(args[1])) { 1055 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1056 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1057 break; 1058 } 1059 goto do_default; 1060 1061 CASE_OP_32_64(add): 1062 CASE_OP_32_64(sub): 1063 CASE_OP_32_64(mul): 1064 CASE_OP_32_64(or): 1065 CASE_OP_32_64(and): 1066 CASE_OP_32_64(xor): 1067 CASE_OP_32_64(shl): 1068 CASE_OP_32_64(shr): 1069 CASE_OP_32_64(sar): 1070 CASE_OP_32_64(rotl): 1071 CASE_OP_32_64(rotr): 1072 CASE_OP_32_64(andc): 1073 CASE_OP_32_64(orc): 1074 CASE_OP_32_64(eqv): 1075 CASE_OP_32_64(nand): 1076 CASE_OP_32_64(nor): 1077 CASE_OP_32_64(muluh): 1078 CASE_OP_32_64(mulsh): 1079 CASE_OP_32_64(div): 1080 CASE_OP_32_64(divu): 1081 CASE_OP_32_64(rem): 1082 CASE_OP_32_64(remu): 1083 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1084 tmp = do_constant_folding(opc, temps[args[1]].val, 1085 temps[args[2]].val); 1086 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1087 break; 1088 } 1089 goto do_default; 1090 1091 CASE_OP_32_64(clz): 1092 CASE_OP_32_64(ctz): 1093 if (temp_is_const(args[1])) { 1094 TCGArg v = temps[args[1]].val; 1095 if (v != 0) { 1096 tmp = do_constant_folding(opc, v, 0); 1097 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1098 } else { 1099 tcg_opt_gen_mov(s, op, args, args[0], args[2]); 1100 } 1101 break; 1102 } 1103 goto do_default; 1104 1105 CASE_OP_32_64(deposit): 1106 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1107 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1108 temps[args[2]].val); 1109 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1110 break; 1111 } 1112 goto do_default; 1113 1114 CASE_OP_32_64(extract): 1115 if (temp_is_const(args[1])) { 1116 tmp = extract64(temps[args[1]].val, args[2], args[3]); 1117 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1118 break; 1119 } 1120 goto do_default; 1121 1122 CASE_OP_32_64(sextract): 1123 if (temp_is_const(args[1])) { 1124 tmp = sextract64(temps[args[1]].val, args[2], args[3]); 1125 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1126 break; 1127 } 1128 goto do_default; 1129 1130 CASE_OP_32_64(setcond): 1131 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1132 if (tmp != 2) { 1133 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1134 break; 1135 } 1136 goto do_default; 1137 1138 CASE_OP_32_64(brcond): 1139 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1140 if (tmp != 2) { 1141 if (tmp) { 1142 reset_all_temps(nb_temps); 1143 op->opc = INDEX_op_br; 1144 args[0] = args[3]; 1145 } else { 1146 tcg_op_remove(s, op); 1147 } 1148 break; 1149 } 1150 goto do_default; 1151 1152 CASE_OP_32_64(movcond): 1153 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1154 if (tmp != 2) { 1155 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1156 break; 1157 } 1158 if (temp_is_const(args[3]) && temp_is_const(args[4])) { 1159 tcg_target_ulong tv = temps[args[3]].val; 1160 tcg_target_ulong fv = temps[args[4]].val; 1161 TCGCond cond = args[5]; 1162 if (fv == 1 && tv == 0) { 1163 cond = tcg_invert_cond(cond); 1164 } else if (!(tv == 1 && fv == 0)) { 1165 goto do_default; 1166 } 1167 args[3] = cond; 1168 op->opc = opc = (opc == INDEX_op_movcond_i32 1169 ? INDEX_op_setcond_i32 1170 : INDEX_op_setcond_i64); 1171 nb_iargs = 2; 1172 } 1173 goto do_default; 1174 1175 case INDEX_op_add2_i32: 1176 case INDEX_op_sub2_i32: 1177 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1178 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1179 uint32_t al = temps[args[2]].val; 1180 uint32_t ah = temps[args[3]].val; 1181 uint32_t bl = temps[args[4]].val; 1182 uint32_t bh = temps[args[5]].val; 1183 uint64_t a = ((uint64_t)ah << 32) | al; 1184 uint64_t b = ((uint64_t)bh << 32) | bl; 1185 TCGArg rl, rh; 1186 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1187 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1188 1189 if (opc == INDEX_op_add2_i32) { 1190 a += b; 1191 } else { 1192 a -= b; 1193 } 1194 1195 rl = args[0]; 1196 rh = args[1]; 1197 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1198 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1199 1200 /* We've done all we need to do with the movi. Skip it. */ 1201 oi_next = op2->next; 1202 break; 1203 } 1204 goto do_default; 1205 1206 case INDEX_op_mulu2_i32: 1207 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1208 uint32_t a = temps[args[2]].val; 1209 uint32_t b = temps[args[3]].val; 1210 uint64_t r = (uint64_t)a * b; 1211 TCGArg rl, rh; 1212 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1213 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1214 1215 rl = args[0]; 1216 rh = args[1]; 1217 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1218 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1219 1220 /* We've done all we need to do with the movi. Skip it. */ 1221 oi_next = op2->next; 1222 break; 1223 } 1224 goto do_default; 1225 1226 case INDEX_op_brcond2_i32: 1227 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1228 if (tmp != 2) { 1229 if (tmp) { 1230 do_brcond_true: 1231 reset_all_temps(nb_temps); 1232 op->opc = INDEX_op_br; 1233 args[0] = args[5]; 1234 } else { 1235 do_brcond_false: 1236 tcg_op_remove(s, op); 1237 } 1238 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1239 && temp_is_const(args[2]) && temps[args[2]].val == 0 1240 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1241 /* Simplify LT/GE comparisons vs zero to a single compare 1242 vs the high word of the input. */ 1243 do_brcond_high: 1244 reset_all_temps(nb_temps); 1245 op->opc = INDEX_op_brcond_i32; 1246 args[0] = args[1]; 1247 args[1] = args[3]; 1248 args[2] = args[4]; 1249 args[3] = args[5]; 1250 } else if (args[4] == TCG_COND_EQ) { 1251 /* Simplify EQ comparisons where one of the pairs 1252 can be simplified. */ 1253 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1254 args[0], args[2], TCG_COND_EQ); 1255 if (tmp == 0) { 1256 goto do_brcond_false; 1257 } else if (tmp == 1) { 1258 goto do_brcond_high; 1259 } 1260 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1261 args[1], args[3], TCG_COND_EQ); 1262 if (tmp == 0) { 1263 goto do_brcond_false; 1264 } else if (tmp != 1) { 1265 goto do_default; 1266 } 1267 do_brcond_low: 1268 reset_all_temps(nb_temps); 1269 op->opc = INDEX_op_brcond_i32; 1270 args[1] = args[2]; 1271 args[2] = args[4]; 1272 args[3] = args[5]; 1273 } else if (args[4] == TCG_COND_NE) { 1274 /* Simplify NE comparisons where one of the pairs 1275 can be simplified. */ 1276 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1277 args[0], args[2], TCG_COND_NE); 1278 if (tmp == 0) { 1279 goto do_brcond_high; 1280 } else if (tmp == 1) { 1281 goto do_brcond_true; 1282 } 1283 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1284 args[1], args[3], TCG_COND_NE); 1285 if (tmp == 0) { 1286 goto do_brcond_low; 1287 } else if (tmp == 1) { 1288 goto do_brcond_true; 1289 } 1290 goto do_default; 1291 } else { 1292 goto do_default; 1293 } 1294 break; 1295 1296 case INDEX_op_setcond2_i32: 1297 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1298 if (tmp != 2) { 1299 do_setcond_const: 1300 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1301 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1302 && temp_is_const(args[3]) && temps[args[3]].val == 0 1303 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1304 /* Simplify LT/GE comparisons vs zero to a single compare 1305 vs the high word of the input. */ 1306 do_setcond_high: 1307 reset_temp(args[0]); 1308 temps[args[0]].mask = 1; 1309 op->opc = INDEX_op_setcond_i32; 1310 args[1] = args[2]; 1311 args[2] = args[4]; 1312 args[3] = args[5]; 1313 } else if (args[5] == TCG_COND_EQ) { 1314 /* Simplify EQ comparisons where one of the pairs 1315 can be simplified. */ 1316 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1317 args[1], args[3], TCG_COND_EQ); 1318 if (tmp == 0) { 1319 goto do_setcond_const; 1320 } else if (tmp == 1) { 1321 goto do_setcond_high; 1322 } 1323 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1324 args[2], args[4], TCG_COND_EQ); 1325 if (tmp == 0) { 1326 goto do_setcond_high; 1327 } else if (tmp != 1) { 1328 goto do_default; 1329 } 1330 do_setcond_low: 1331 reset_temp(args[0]); 1332 temps[args[0]].mask = 1; 1333 op->opc = INDEX_op_setcond_i32; 1334 args[2] = args[3]; 1335 args[3] = args[5]; 1336 } else if (args[5] == TCG_COND_NE) { 1337 /* Simplify NE comparisons where one of the pairs 1338 can be simplified. */ 1339 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1340 args[1], args[3], TCG_COND_NE); 1341 if (tmp == 0) { 1342 goto do_setcond_high; 1343 } else if (tmp == 1) { 1344 goto do_setcond_const; 1345 } 1346 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1347 args[2], args[4], TCG_COND_NE); 1348 if (tmp == 0) { 1349 goto do_setcond_low; 1350 } else if (tmp == 1) { 1351 goto do_setcond_const; 1352 } 1353 goto do_default; 1354 } else { 1355 goto do_default; 1356 } 1357 break; 1358 1359 case INDEX_op_call: 1360 if (!(args[nb_oargs + nb_iargs + 1] 1361 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1362 for (i = 0; i < nb_globals; i++) { 1363 if (test_bit(i, temps_used.l)) { 1364 reset_temp(i); 1365 } 1366 } 1367 } 1368 goto do_reset_output; 1369 1370 default: 1371 do_default: 1372 /* Default case: we know nothing about operation (or were unable 1373 to compute the operation result) so no propagation is done. 1374 We trash everything if the operation is the end of a basic 1375 block, otherwise we only trash the output args. "mask" is 1376 the non-zero bits mask for the first output arg. */ 1377 if (def->flags & TCG_OPF_BB_END) { 1378 reset_all_temps(nb_temps); 1379 } else { 1380 do_reset_output: 1381 for (i = 0; i < nb_oargs; i++) { 1382 reset_temp(args[i]); 1383 /* Save the corresponding known-zero bits mask for the 1384 first output argument (only one supported so far). */ 1385 if (i == 0) { 1386 temps[args[i]].mask = mask; 1387 } 1388 } 1389 } 1390 break; 1391 } 1392 1393 /* Eliminate duplicate and redundant fence instructions. */ 1394 if (prev_mb_args) { 1395 switch (opc) { 1396 case INDEX_op_mb: 1397 /* Merge two barriers of the same type into one, 1398 * or a weaker barrier into a stronger one, 1399 * or two weaker barriers into a stronger one. 1400 * mb X; mb Y => mb X|Y 1401 * mb; strl => mb; st 1402 * ldaq; mb => ld; mb 1403 * ldaq; strl => ld; mb; st 1404 * Other combinations are also merged into a strong 1405 * barrier. This is stricter than specified but for 1406 * the purposes of TCG is better than not optimizing. 1407 */ 1408 prev_mb_args[0] |= args[0]; 1409 tcg_op_remove(s, op); 1410 break; 1411 1412 default: 1413 /* Opcodes that end the block stop the optimization. */ 1414 if ((def->flags & TCG_OPF_BB_END) == 0) { 1415 break; 1416 } 1417 /* fallthru */ 1418 case INDEX_op_qemu_ld_i32: 1419 case INDEX_op_qemu_ld_i64: 1420 case INDEX_op_qemu_st_i32: 1421 case INDEX_op_qemu_st_i64: 1422 case INDEX_op_call: 1423 /* Opcodes that touch guest memory stop the optimization. */ 1424 prev_mb_args = NULL; 1425 break; 1426 } 1427 } else if (opc == INDEX_op_mb) { 1428 prev_mb_args = args; 1429 } 1430 } 1431 } 1432