1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 uint16_t prev_copy; 38 uint16_t next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 44 static TCGTempSet temps_used; 45 46 static inline bool temp_is_const(TCGArg arg) 47 { 48 return temps[arg].is_const; 49 } 50 51 static inline bool temp_is_copy(TCGArg arg) 52 { 53 return temps[arg].next_copy != arg; 54 } 55 56 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 57 static void reset_temp(TCGArg temp) 58 { 59 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 60 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 61 temps[temp].next_copy = temp; 62 temps[temp].prev_copy = temp; 63 temps[temp].is_const = false; 64 temps[temp].mask = -1; 65 } 66 67 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 68 static void reset_all_temps(int nb_temps) 69 { 70 bitmap_zero(temps_used.l, nb_temps); 71 } 72 73 /* Initialize and activate a temporary. */ 74 static void init_temp_info(TCGArg temp) 75 { 76 if (!test_bit(temp, temps_used.l)) { 77 temps[temp].next_copy = temp; 78 temps[temp].prev_copy = temp; 79 temps[temp].is_const = false; 80 temps[temp].mask = -1; 81 set_bit(temp, temps_used.l); 82 } 83 } 84 85 static int op_bits(TCGOpcode op) 86 { 87 const TCGOpDef *def = &tcg_op_defs[op]; 88 return def->flags & TCG_OPF_64BIT ? 64 : 32; 89 } 90 91 static TCGOpcode op_to_mov(TCGOpcode op) 92 { 93 switch (op_bits(op)) { 94 case 32: 95 return INDEX_op_mov_i32; 96 case 64: 97 return INDEX_op_mov_i64; 98 default: 99 fprintf(stderr, "op_to_mov: unexpected return value of " 100 "function op_bits.\n"); 101 tcg_abort(); 102 } 103 } 104 105 static TCGOpcode op_to_movi(TCGOpcode op) 106 { 107 switch (op_bits(op)) { 108 case 32: 109 return INDEX_op_movi_i32; 110 case 64: 111 return INDEX_op_movi_i64; 112 default: 113 fprintf(stderr, "op_to_movi: unexpected return value of " 114 "function op_bits.\n"); 115 tcg_abort(); 116 } 117 } 118 119 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 120 { 121 TCGArg i; 122 123 /* If this is already a global, we can't do better. */ 124 if (temp < s->nb_globals) { 125 return temp; 126 } 127 128 /* Search for a global first. */ 129 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 130 if (i < s->nb_globals) { 131 return i; 132 } 133 } 134 135 /* If it is a temp, search for a temp local. */ 136 if (!s->temps[temp].temp_local) { 137 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 138 if (s->temps[i].temp_local) { 139 return i; 140 } 141 } 142 } 143 144 /* Failure to find a better representation, return the same temp. */ 145 return temp; 146 } 147 148 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 149 { 150 TCGArg i; 151 152 if (arg1 == arg2) { 153 return true; 154 } 155 156 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 157 return false; 158 } 159 160 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 161 if (i == arg2) { 162 return true; 163 } 164 } 165 166 return false; 167 } 168 169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 170 TCGArg dst, TCGArg val) 171 { 172 TCGOpcode new_op = op_to_movi(op->opc); 173 tcg_target_ulong mask; 174 175 op->opc = new_op; 176 177 reset_temp(dst); 178 temps[dst].is_const = true; 179 temps[dst].val = val; 180 mask = val; 181 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 182 /* High bits of the destination are now garbage. */ 183 mask |= ~0xffffffffull; 184 } 185 temps[dst].mask = mask; 186 187 args[0] = dst; 188 args[1] = val; 189 } 190 191 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 192 TCGArg dst, TCGArg src) 193 { 194 if (temps_are_copies(dst, src)) { 195 tcg_op_remove(s, op); 196 return; 197 } 198 199 TCGOpcode new_op = op_to_mov(op->opc); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 mask = temps[src].mask; 206 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 207 /* High bits of the destination are now garbage. */ 208 mask |= ~0xffffffffull; 209 } 210 temps[dst].mask = mask; 211 212 if (s->temps[src].type == s->temps[dst].type) { 213 temps[dst].next_copy = temps[src].next_copy; 214 temps[dst].prev_copy = src; 215 temps[temps[dst].next_copy].prev_copy = dst; 216 temps[src].next_copy = dst; 217 temps[dst].is_const = temps[src].is_const; 218 temps[dst].val = temps[src].val; 219 } 220 221 args[0] = dst; 222 args[1] = src; 223 } 224 225 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 226 { 227 uint64_t l64, h64; 228 229 switch (op) { 230 CASE_OP_32_64(add): 231 return x + y; 232 233 CASE_OP_32_64(sub): 234 return x - y; 235 236 CASE_OP_32_64(mul): 237 return x * y; 238 239 CASE_OP_32_64(and): 240 return x & y; 241 242 CASE_OP_32_64(or): 243 return x | y; 244 245 CASE_OP_32_64(xor): 246 return x ^ y; 247 248 case INDEX_op_shl_i32: 249 return (uint32_t)x << (y & 31); 250 251 case INDEX_op_shl_i64: 252 return (uint64_t)x << (y & 63); 253 254 case INDEX_op_shr_i32: 255 return (uint32_t)x >> (y & 31); 256 257 case INDEX_op_shr_i64: 258 return (uint64_t)x >> (y & 63); 259 260 case INDEX_op_sar_i32: 261 return (int32_t)x >> (y & 31); 262 263 case INDEX_op_sar_i64: 264 return (int64_t)x >> (y & 63); 265 266 case INDEX_op_rotr_i32: 267 return ror32(x, y & 31); 268 269 case INDEX_op_rotr_i64: 270 return ror64(x, y & 63); 271 272 case INDEX_op_rotl_i32: 273 return rol32(x, y & 31); 274 275 case INDEX_op_rotl_i64: 276 return rol64(x, y & 63); 277 278 CASE_OP_32_64(not): 279 return ~x; 280 281 CASE_OP_32_64(neg): 282 return -x; 283 284 CASE_OP_32_64(andc): 285 return x & ~y; 286 287 CASE_OP_32_64(orc): 288 return x | ~y; 289 290 CASE_OP_32_64(eqv): 291 return ~(x ^ y); 292 293 CASE_OP_32_64(nand): 294 return ~(x & y); 295 296 CASE_OP_32_64(nor): 297 return ~(x | y); 298 299 CASE_OP_32_64(ext8s): 300 return (int8_t)x; 301 302 CASE_OP_32_64(ext16s): 303 return (int16_t)x; 304 305 CASE_OP_32_64(ext8u): 306 return (uint8_t)x; 307 308 CASE_OP_32_64(ext16u): 309 return (uint16_t)x; 310 311 case INDEX_op_ext_i32_i64: 312 case INDEX_op_ext32s_i64: 313 return (int32_t)x; 314 315 case INDEX_op_extu_i32_i64: 316 case INDEX_op_extrl_i64_i32: 317 case INDEX_op_ext32u_i64: 318 return (uint32_t)x; 319 320 case INDEX_op_extrh_i64_i32: 321 return (uint64_t)x >> 32; 322 323 case INDEX_op_muluh_i32: 324 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 325 case INDEX_op_mulsh_i32: 326 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 327 328 case INDEX_op_muluh_i64: 329 mulu64(&l64, &h64, x, y); 330 return h64; 331 case INDEX_op_mulsh_i64: 332 muls64(&l64, &h64, x, y); 333 return h64; 334 335 case INDEX_op_div_i32: 336 /* Avoid crashing on divide by zero, otherwise undefined. */ 337 return (int32_t)x / ((int32_t)y ? : 1); 338 case INDEX_op_divu_i32: 339 return (uint32_t)x / ((uint32_t)y ? : 1); 340 case INDEX_op_div_i64: 341 return (int64_t)x / ((int64_t)y ? : 1); 342 case INDEX_op_divu_i64: 343 return (uint64_t)x / ((uint64_t)y ? : 1); 344 345 case INDEX_op_rem_i32: 346 return (int32_t)x % ((int32_t)y ? : 1); 347 case INDEX_op_remu_i32: 348 return (uint32_t)x % ((uint32_t)y ? : 1); 349 case INDEX_op_rem_i64: 350 return (int64_t)x % ((int64_t)y ? : 1); 351 case INDEX_op_remu_i64: 352 return (uint64_t)x % ((uint64_t)y ? : 1); 353 354 default: 355 fprintf(stderr, 356 "Unrecognized operation %d in do_constant_folding.\n", op); 357 tcg_abort(); 358 } 359 } 360 361 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 362 { 363 TCGArg res = do_constant_folding_2(op, x, y); 364 if (op_bits(op) == 32) { 365 res = (int32_t)res; 366 } 367 return res; 368 } 369 370 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 371 { 372 switch (c) { 373 case TCG_COND_EQ: 374 return x == y; 375 case TCG_COND_NE: 376 return x != y; 377 case TCG_COND_LT: 378 return (int32_t)x < (int32_t)y; 379 case TCG_COND_GE: 380 return (int32_t)x >= (int32_t)y; 381 case TCG_COND_LE: 382 return (int32_t)x <= (int32_t)y; 383 case TCG_COND_GT: 384 return (int32_t)x > (int32_t)y; 385 case TCG_COND_LTU: 386 return x < y; 387 case TCG_COND_GEU: 388 return x >= y; 389 case TCG_COND_LEU: 390 return x <= y; 391 case TCG_COND_GTU: 392 return x > y; 393 default: 394 tcg_abort(); 395 } 396 } 397 398 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 399 { 400 switch (c) { 401 case TCG_COND_EQ: 402 return x == y; 403 case TCG_COND_NE: 404 return x != y; 405 case TCG_COND_LT: 406 return (int64_t)x < (int64_t)y; 407 case TCG_COND_GE: 408 return (int64_t)x >= (int64_t)y; 409 case TCG_COND_LE: 410 return (int64_t)x <= (int64_t)y; 411 case TCG_COND_GT: 412 return (int64_t)x > (int64_t)y; 413 case TCG_COND_LTU: 414 return x < y; 415 case TCG_COND_GEU: 416 return x >= y; 417 case TCG_COND_LEU: 418 return x <= y; 419 case TCG_COND_GTU: 420 return x > y; 421 default: 422 tcg_abort(); 423 } 424 } 425 426 static bool do_constant_folding_cond_eq(TCGCond c) 427 { 428 switch (c) { 429 case TCG_COND_GT: 430 case TCG_COND_LTU: 431 case TCG_COND_LT: 432 case TCG_COND_GTU: 433 case TCG_COND_NE: 434 return 0; 435 case TCG_COND_GE: 436 case TCG_COND_GEU: 437 case TCG_COND_LE: 438 case TCG_COND_LEU: 439 case TCG_COND_EQ: 440 return 1; 441 default: 442 tcg_abort(); 443 } 444 } 445 446 /* Return 2 if the condition can't be simplified, and the result 447 of the condition (0 or 1) if it can */ 448 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 449 TCGArg y, TCGCond c) 450 { 451 if (temp_is_const(x) && temp_is_const(y)) { 452 switch (op_bits(op)) { 453 case 32: 454 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 455 case 64: 456 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 457 default: 458 tcg_abort(); 459 } 460 } else if (temps_are_copies(x, y)) { 461 return do_constant_folding_cond_eq(c); 462 } else if (temp_is_const(y) && temps[y].val == 0) { 463 switch (c) { 464 case TCG_COND_LTU: 465 return 0; 466 case TCG_COND_GEU: 467 return 1; 468 default: 469 return 2; 470 } 471 } 472 return 2; 473 } 474 475 /* Return 2 if the condition can't be simplified, and the result 476 of the condition (0 or 1) if it can */ 477 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 478 { 479 TCGArg al = p1[0], ah = p1[1]; 480 TCGArg bl = p2[0], bh = p2[1]; 481 482 if (temp_is_const(bl) && temp_is_const(bh)) { 483 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 484 485 if (temp_is_const(al) && temp_is_const(ah)) { 486 uint64_t a; 487 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 488 return do_constant_folding_cond_64(a, b, c); 489 } 490 if (b == 0) { 491 switch (c) { 492 case TCG_COND_LTU: 493 return 0; 494 case TCG_COND_GEU: 495 return 1; 496 default: 497 break; 498 } 499 } 500 } 501 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 502 return do_constant_folding_cond_eq(c); 503 } 504 return 2; 505 } 506 507 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 508 { 509 TCGArg a1 = *p1, a2 = *p2; 510 int sum = 0; 511 sum += temp_is_const(a1); 512 sum -= temp_is_const(a2); 513 514 /* Prefer the constant in second argument, and then the form 515 op a, a, b, which is better handled on non-RISC hosts. */ 516 if (sum > 0 || (sum == 0 && dest == a2)) { 517 *p1 = a2; 518 *p2 = a1; 519 return true; 520 } 521 return false; 522 } 523 524 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 525 { 526 int sum = 0; 527 sum += temp_is_const(p1[0]); 528 sum += temp_is_const(p1[1]); 529 sum -= temp_is_const(p2[0]); 530 sum -= temp_is_const(p2[1]); 531 if (sum > 0) { 532 TCGArg t; 533 t = p1[0], p1[0] = p2[0], p2[0] = t; 534 t = p1[1], p1[1] = p2[1], p2[1] = t; 535 return true; 536 } 537 return false; 538 } 539 540 /* Propagate constants and copies, fold constant expressions. */ 541 void tcg_optimize(TCGContext *s) 542 { 543 int oi, oi_next, nb_temps, nb_globals; 544 TCGArg *prev_mb_args = NULL; 545 546 /* Array VALS has an element for each temp. 547 If this temp holds a constant then its value is kept in VALS' element. 548 If this temp is a copy of other ones then the other copies are 549 available through the doubly linked circular list. */ 550 551 nb_temps = s->nb_temps; 552 nb_globals = s->nb_globals; 553 reset_all_temps(nb_temps); 554 555 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 556 tcg_target_ulong mask, partmask, affected; 557 int nb_oargs, nb_iargs, i; 558 TCGArg tmp; 559 560 TCGOp * const op = &s->gen_op_buf[oi]; 561 TCGArg * const args = &s->gen_opparam_buf[op->args]; 562 TCGOpcode opc = op->opc; 563 const TCGOpDef *def = &tcg_op_defs[opc]; 564 565 oi_next = op->next; 566 567 /* Count the arguments, and initialize the temps that are 568 going to be used */ 569 if (opc == INDEX_op_call) { 570 nb_oargs = op->callo; 571 nb_iargs = op->calli; 572 for (i = 0; i < nb_oargs + nb_iargs; i++) { 573 tmp = args[i]; 574 if (tmp != TCG_CALL_DUMMY_ARG) { 575 init_temp_info(tmp); 576 } 577 } 578 } else { 579 nb_oargs = def->nb_oargs; 580 nb_iargs = def->nb_iargs; 581 for (i = 0; i < nb_oargs + nb_iargs; i++) { 582 init_temp_info(args[i]); 583 } 584 } 585 586 /* Do copy propagation */ 587 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 588 if (temp_is_copy(args[i])) { 589 args[i] = find_better_copy(s, args[i]); 590 } 591 } 592 593 /* For commutative operations make constant second argument */ 594 switch (opc) { 595 CASE_OP_32_64(add): 596 CASE_OP_32_64(mul): 597 CASE_OP_32_64(and): 598 CASE_OP_32_64(or): 599 CASE_OP_32_64(xor): 600 CASE_OP_32_64(eqv): 601 CASE_OP_32_64(nand): 602 CASE_OP_32_64(nor): 603 CASE_OP_32_64(muluh): 604 CASE_OP_32_64(mulsh): 605 swap_commutative(args[0], &args[1], &args[2]); 606 break; 607 CASE_OP_32_64(brcond): 608 if (swap_commutative(-1, &args[0], &args[1])) { 609 args[2] = tcg_swap_cond(args[2]); 610 } 611 break; 612 CASE_OP_32_64(setcond): 613 if (swap_commutative(args[0], &args[1], &args[2])) { 614 args[3] = tcg_swap_cond(args[3]); 615 } 616 break; 617 CASE_OP_32_64(movcond): 618 if (swap_commutative(-1, &args[1], &args[2])) { 619 args[5] = tcg_swap_cond(args[5]); 620 } 621 /* For movcond, we canonicalize the "false" input reg to match 622 the destination reg so that the tcg backend can implement 623 a "move if true" operation. */ 624 if (swap_commutative(args[0], &args[4], &args[3])) { 625 args[5] = tcg_invert_cond(args[5]); 626 } 627 break; 628 CASE_OP_32_64(add2): 629 swap_commutative(args[0], &args[2], &args[4]); 630 swap_commutative(args[1], &args[3], &args[5]); 631 break; 632 CASE_OP_32_64(mulu2): 633 CASE_OP_32_64(muls2): 634 swap_commutative(args[0], &args[2], &args[3]); 635 break; 636 case INDEX_op_brcond2_i32: 637 if (swap_commutative2(&args[0], &args[2])) { 638 args[4] = tcg_swap_cond(args[4]); 639 } 640 break; 641 case INDEX_op_setcond2_i32: 642 if (swap_commutative2(&args[1], &args[3])) { 643 args[5] = tcg_swap_cond(args[5]); 644 } 645 break; 646 default: 647 break; 648 } 649 650 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 651 and "sub r, 0, a => neg r, a" case. */ 652 switch (opc) { 653 CASE_OP_32_64(shl): 654 CASE_OP_32_64(shr): 655 CASE_OP_32_64(sar): 656 CASE_OP_32_64(rotl): 657 CASE_OP_32_64(rotr): 658 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 659 tcg_opt_gen_movi(s, op, args, args[0], 0); 660 continue; 661 } 662 break; 663 CASE_OP_32_64(sub): 664 { 665 TCGOpcode neg_op; 666 bool have_neg; 667 668 if (temp_is_const(args[2])) { 669 /* Proceed with possible constant folding. */ 670 break; 671 } 672 if (opc == INDEX_op_sub_i32) { 673 neg_op = INDEX_op_neg_i32; 674 have_neg = TCG_TARGET_HAS_neg_i32; 675 } else { 676 neg_op = INDEX_op_neg_i64; 677 have_neg = TCG_TARGET_HAS_neg_i64; 678 } 679 if (!have_neg) { 680 break; 681 } 682 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 683 op->opc = neg_op; 684 reset_temp(args[0]); 685 args[1] = args[2]; 686 continue; 687 } 688 } 689 break; 690 CASE_OP_32_64(xor): 691 CASE_OP_32_64(nand): 692 if (!temp_is_const(args[1]) 693 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 694 i = 1; 695 goto try_not; 696 } 697 break; 698 CASE_OP_32_64(nor): 699 if (!temp_is_const(args[1]) 700 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 701 i = 1; 702 goto try_not; 703 } 704 break; 705 CASE_OP_32_64(andc): 706 if (!temp_is_const(args[2]) 707 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 708 i = 2; 709 goto try_not; 710 } 711 break; 712 CASE_OP_32_64(orc): 713 CASE_OP_32_64(eqv): 714 if (!temp_is_const(args[2]) 715 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 716 i = 2; 717 goto try_not; 718 } 719 break; 720 try_not: 721 { 722 TCGOpcode not_op; 723 bool have_not; 724 725 if (def->flags & TCG_OPF_64BIT) { 726 not_op = INDEX_op_not_i64; 727 have_not = TCG_TARGET_HAS_not_i64; 728 } else { 729 not_op = INDEX_op_not_i32; 730 have_not = TCG_TARGET_HAS_not_i32; 731 } 732 if (!have_not) { 733 break; 734 } 735 op->opc = not_op; 736 reset_temp(args[0]); 737 args[1] = args[i]; 738 continue; 739 } 740 default: 741 break; 742 } 743 744 /* Simplify expression for "op r, a, const => mov r, a" cases */ 745 switch (opc) { 746 CASE_OP_32_64(add): 747 CASE_OP_32_64(sub): 748 CASE_OP_32_64(shl): 749 CASE_OP_32_64(shr): 750 CASE_OP_32_64(sar): 751 CASE_OP_32_64(rotl): 752 CASE_OP_32_64(rotr): 753 CASE_OP_32_64(or): 754 CASE_OP_32_64(xor): 755 CASE_OP_32_64(andc): 756 if (!temp_is_const(args[1]) 757 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 758 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 759 continue; 760 } 761 break; 762 CASE_OP_32_64(and): 763 CASE_OP_32_64(orc): 764 CASE_OP_32_64(eqv): 765 if (!temp_is_const(args[1]) 766 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 767 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 768 continue; 769 } 770 break; 771 default: 772 break; 773 } 774 775 /* Simplify using known-zero bits. Currently only ops with a single 776 output argument is supported. */ 777 mask = -1; 778 affected = -1; 779 switch (opc) { 780 CASE_OP_32_64(ext8s): 781 if ((temps[args[1]].mask & 0x80) != 0) { 782 break; 783 } 784 CASE_OP_32_64(ext8u): 785 mask = 0xff; 786 goto and_const; 787 CASE_OP_32_64(ext16s): 788 if ((temps[args[1]].mask & 0x8000) != 0) { 789 break; 790 } 791 CASE_OP_32_64(ext16u): 792 mask = 0xffff; 793 goto and_const; 794 case INDEX_op_ext32s_i64: 795 if ((temps[args[1]].mask & 0x80000000) != 0) { 796 break; 797 } 798 case INDEX_op_ext32u_i64: 799 mask = 0xffffffffU; 800 goto and_const; 801 802 CASE_OP_32_64(and): 803 mask = temps[args[2]].mask; 804 if (temp_is_const(args[2])) { 805 and_const: 806 affected = temps[args[1]].mask & ~mask; 807 } 808 mask = temps[args[1]].mask & mask; 809 break; 810 811 case INDEX_op_ext_i32_i64: 812 if ((temps[args[1]].mask & 0x80000000) != 0) { 813 break; 814 } 815 case INDEX_op_extu_i32_i64: 816 /* We do not compute affected as it is a size changing op. */ 817 mask = (uint32_t)temps[args[1]].mask; 818 break; 819 820 CASE_OP_32_64(andc): 821 /* Known-zeros does not imply known-ones. Therefore unless 822 args[2] is constant, we can't infer anything from it. */ 823 if (temp_is_const(args[2])) { 824 mask = ~temps[args[2]].mask; 825 goto and_const; 826 } 827 /* But we certainly know nothing outside args[1] may be set. */ 828 mask = temps[args[1]].mask; 829 break; 830 831 case INDEX_op_sar_i32: 832 if (temp_is_const(args[2])) { 833 tmp = temps[args[2]].val & 31; 834 mask = (int32_t)temps[args[1]].mask >> tmp; 835 } 836 break; 837 case INDEX_op_sar_i64: 838 if (temp_is_const(args[2])) { 839 tmp = temps[args[2]].val & 63; 840 mask = (int64_t)temps[args[1]].mask >> tmp; 841 } 842 break; 843 844 case INDEX_op_shr_i32: 845 if (temp_is_const(args[2])) { 846 tmp = temps[args[2]].val & 31; 847 mask = (uint32_t)temps[args[1]].mask >> tmp; 848 } 849 break; 850 case INDEX_op_shr_i64: 851 if (temp_is_const(args[2])) { 852 tmp = temps[args[2]].val & 63; 853 mask = (uint64_t)temps[args[1]].mask >> tmp; 854 } 855 break; 856 857 case INDEX_op_extrl_i64_i32: 858 mask = (uint32_t)temps[args[1]].mask; 859 break; 860 case INDEX_op_extrh_i64_i32: 861 mask = (uint64_t)temps[args[1]].mask >> 32; 862 break; 863 864 CASE_OP_32_64(shl): 865 if (temp_is_const(args[2])) { 866 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 867 mask = temps[args[1]].mask << tmp; 868 } 869 break; 870 871 CASE_OP_32_64(neg): 872 /* Set to 1 all bits to the left of the rightmost. */ 873 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 874 break; 875 876 CASE_OP_32_64(deposit): 877 mask = deposit64(temps[args[1]].mask, args[3], args[4], 878 temps[args[2]].mask); 879 break; 880 881 CASE_OP_32_64(or): 882 CASE_OP_32_64(xor): 883 mask = temps[args[1]].mask | temps[args[2]].mask; 884 break; 885 886 CASE_OP_32_64(setcond): 887 case INDEX_op_setcond2_i32: 888 mask = 1; 889 break; 890 891 CASE_OP_32_64(movcond): 892 mask = temps[args[3]].mask | temps[args[4]].mask; 893 break; 894 895 CASE_OP_32_64(ld8u): 896 mask = 0xff; 897 break; 898 CASE_OP_32_64(ld16u): 899 mask = 0xffff; 900 break; 901 case INDEX_op_ld32u_i64: 902 mask = 0xffffffffu; 903 break; 904 905 CASE_OP_32_64(qemu_ld): 906 { 907 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 908 TCGMemOp mop = get_memop(oi); 909 if (!(mop & MO_SIGN)) { 910 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 911 } 912 } 913 break; 914 915 default: 916 break; 917 } 918 919 /* 32-bit ops generate 32-bit results. For the result is zero test 920 below, we can ignore high bits, but for further optimizations we 921 need to record that the high bits contain garbage. */ 922 partmask = mask; 923 if (!(def->flags & TCG_OPF_64BIT)) { 924 mask |= ~(tcg_target_ulong)0xffffffffu; 925 partmask &= 0xffffffffu; 926 affected &= 0xffffffffu; 927 } 928 929 if (partmask == 0) { 930 tcg_debug_assert(nb_oargs == 1); 931 tcg_opt_gen_movi(s, op, args, args[0], 0); 932 continue; 933 } 934 if (affected == 0) { 935 tcg_debug_assert(nb_oargs == 1); 936 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 937 continue; 938 } 939 940 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 941 switch (opc) { 942 CASE_OP_32_64(and): 943 CASE_OP_32_64(mul): 944 CASE_OP_32_64(muluh): 945 CASE_OP_32_64(mulsh): 946 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 947 tcg_opt_gen_movi(s, op, args, args[0], 0); 948 continue; 949 } 950 break; 951 default: 952 break; 953 } 954 955 /* Simplify expression for "op r, a, a => mov r, a" cases */ 956 switch (opc) { 957 CASE_OP_32_64(or): 958 CASE_OP_32_64(and): 959 if (temps_are_copies(args[1], args[2])) { 960 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 961 continue; 962 } 963 break; 964 default: 965 break; 966 } 967 968 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 969 switch (opc) { 970 CASE_OP_32_64(andc): 971 CASE_OP_32_64(sub): 972 CASE_OP_32_64(xor): 973 if (temps_are_copies(args[1], args[2])) { 974 tcg_opt_gen_movi(s, op, args, args[0], 0); 975 continue; 976 } 977 break; 978 default: 979 break; 980 } 981 982 /* Propagate constants through copy operations and do constant 983 folding. Constants will be substituted to arguments by register 984 allocator where needed and possible. Also detect copies. */ 985 switch (opc) { 986 CASE_OP_32_64(mov): 987 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 988 break; 989 CASE_OP_32_64(movi): 990 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 991 break; 992 993 CASE_OP_32_64(not): 994 CASE_OP_32_64(neg): 995 CASE_OP_32_64(ext8s): 996 CASE_OP_32_64(ext8u): 997 CASE_OP_32_64(ext16s): 998 CASE_OP_32_64(ext16u): 999 case INDEX_op_ext32s_i64: 1000 case INDEX_op_ext32u_i64: 1001 case INDEX_op_ext_i32_i64: 1002 case INDEX_op_extu_i32_i64: 1003 case INDEX_op_extrl_i64_i32: 1004 case INDEX_op_extrh_i64_i32: 1005 if (temp_is_const(args[1])) { 1006 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1007 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1008 break; 1009 } 1010 goto do_default; 1011 1012 CASE_OP_32_64(add): 1013 CASE_OP_32_64(sub): 1014 CASE_OP_32_64(mul): 1015 CASE_OP_32_64(or): 1016 CASE_OP_32_64(and): 1017 CASE_OP_32_64(xor): 1018 CASE_OP_32_64(shl): 1019 CASE_OP_32_64(shr): 1020 CASE_OP_32_64(sar): 1021 CASE_OP_32_64(rotl): 1022 CASE_OP_32_64(rotr): 1023 CASE_OP_32_64(andc): 1024 CASE_OP_32_64(orc): 1025 CASE_OP_32_64(eqv): 1026 CASE_OP_32_64(nand): 1027 CASE_OP_32_64(nor): 1028 CASE_OP_32_64(muluh): 1029 CASE_OP_32_64(mulsh): 1030 CASE_OP_32_64(div): 1031 CASE_OP_32_64(divu): 1032 CASE_OP_32_64(rem): 1033 CASE_OP_32_64(remu): 1034 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1035 tmp = do_constant_folding(opc, temps[args[1]].val, 1036 temps[args[2]].val); 1037 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1038 break; 1039 } 1040 goto do_default; 1041 1042 CASE_OP_32_64(deposit): 1043 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1044 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1045 temps[args[2]].val); 1046 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1047 break; 1048 } 1049 goto do_default; 1050 1051 CASE_OP_32_64(setcond): 1052 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1053 if (tmp != 2) { 1054 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1055 break; 1056 } 1057 goto do_default; 1058 1059 CASE_OP_32_64(brcond): 1060 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1061 if (tmp != 2) { 1062 if (tmp) { 1063 reset_all_temps(nb_temps); 1064 op->opc = INDEX_op_br; 1065 args[0] = args[3]; 1066 } else { 1067 tcg_op_remove(s, op); 1068 } 1069 break; 1070 } 1071 goto do_default; 1072 1073 CASE_OP_32_64(movcond): 1074 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1075 if (tmp != 2) { 1076 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1077 break; 1078 } 1079 goto do_default; 1080 1081 case INDEX_op_add2_i32: 1082 case INDEX_op_sub2_i32: 1083 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1084 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1085 uint32_t al = temps[args[2]].val; 1086 uint32_t ah = temps[args[3]].val; 1087 uint32_t bl = temps[args[4]].val; 1088 uint32_t bh = temps[args[5]].val; 1089 uint64_t a = ((uint64_t)ah << 32) | al; 1090 uint64_t b = ((uint64_t)bh << 32) | bl; 1091 TCGArg rl, rh; 1092 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1093 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1094 1095 if (opc == INDEX_op_add2_i32) { 1096 a += b; 1097 } else { 1098 a -= b; 1099 } 1100 1101 rl = args[0]; 1102 rh = args[1]; 1103 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1104 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1105 1106 /* We've done all we need to do with the movi. Skip it. */ 1107 oi_next = op2->next; 1108 break; 1109 } 1110 goto do_default; 1111 1112 case INDEX_op_mulu2_i32: 1113 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1114 uint32_t a = temps[args[2]].val; 1115 uint32_t b = temps[args[3]].val; 1116 uint64_t r = (uint64_t)a * b; 1117 TCGArg rl, rh; 1118 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1119 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1120 1121 rl = args[0]; 1122 rh = args[1]; 1123 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1124 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1125 1126 /* We've done all we need to do with the movi. Skip it. */ 1127 oi_next = op2->next; 1128 break; 1129 } 1130 goto do_default; 1131 1132 case INDEX_op_brcond2_i32: 1133 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1134 if (tmp != 2) { 1135 if (tmp) { 1136 do_brcond_true: 1137 reset_all_temps(nb_temps); 1138 op->opc = INDEX_op_br; 1139 args[0] = args[5]; 1140 } else { 1141 do_brcond_false: 1142 tcg_op_remove(s, op); 1143 } 1144 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1145 && temp_is_const(args[2]) && temps[args[2]].val == 0 1146 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1147 /* Simplify LT/GE comparisons vs zero to a single compare 1148 vs the high word of the input. */ 1149 do_brcond_high: 1150 reset_all_temps(nb_temps); 1151 op->opc = INDEX_op_brcond_i32; 1152 args[0] = args[1]; 1153 args[1] = args[3]; 1154 args[2] = args[4]; 1155 args[3] = args[5]; 1156 } else if (args[4] == TCG_COND_EQ) { 1157 /* Simplify EQ comparisons where one of the pairs 1158 can be simplified. */ 1159 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1160 args[0], args[2], TCG_COND_EQ); 1161 if (tmp == 0) { 1162 goto do_brcond_false; 1163 } else if (tmp == 1) { 1164 goto do_brcond_high; 1165 } 1166 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1167 args[1], args[3], TCG_COND_EQ); 1168 if (tmp == 0) { 1169 goto do_brcond_false; 1170 } else if (tmp != 1) { 1171 goto do_default; 1172 } 1173 do_brcond_low: 1174 reset_all_temps(nb_temps); 1175 op->opc = INDEX_op_brcond_i32; 1176 args[1] = args[2]; 1177 args[2] = args[4]; 1178 args[3] = args[5]; 1179 } else if (args[4] == TCG_COND_NE) { 1180 /* Simplify NE comparisons where one of the pairs 1181 can be simplified. */ 1182 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1183 args[0], args[2], TCG_COND_NE); 1184 if (tmp == 0) { 1185 goto do_brcond_high; 1186 } else if (tmp == 1) { 1187 goto do_brcond_true; 1188 } 1189 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1190 args[1], args[3], TCG_COND_NE); 1191 if (tmp == 0) { 1192 goto do_brcond_low; 1193 } else if (tmp == 1) { 1194 goto do_brcond_true; 1195 } 1196 goto do_default; 1197 } else { 1198 goto do_default; 1199 } 1200 break; 1201 1202 case INDEX_op_setcond2_i32: 1203 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1204 if (tmp != 2) { 1205 do_setcond_const: 1206 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1207 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1208 && temp_is_const(args[3]) && temps[args[3]].val == 0 1209 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1210 /* Simplify LT/GE comparisons vs zero to a single compare 1211 vs the high word of the input. */ 1212 do_setcond_high: 1213 reset_temp(args[0]); 1214 temps[args[0]].mask = 1; 1215 op->opc = INDEX_op_setcond_i32; 1216 args[1] = args[2]; 1217 args[2] = args[4]; 1218 args[3] = args[5]; 1219 } else if (args[5] == TCG_COND_EQ) { 1220 /* Simplify EQ comparisons where one of the pairs 1221 can be simplified. */ 1222 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1223 args[1], args[3], TCG_COND_EQ); 1224 if (tmp == 0) { 1225 goto do_setcond_const; 1226 } else if (tmp == 1) { 1227 goto do_setcond_high; 1228 } 1229 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1230 args[2], args[4], TCG_COND_EQ); 1231 if (tmp == 0) { 1232 goto do_setcond_high; 1233 } else if (tmp != 1) { 1234 goto do_default; 1235 } 1236 do_setcond_low: 1237 reset_temp(args[0]); 1238 temps[args[0]].mask = 1; 1239 op->opc = INDEX_op_setcond_i32; 1240 args[2] = args[3]; 1241 args[3] = args[5]; 1242 } else if (args[5] == TCG_COND_NE) { 1243 /* Simplify NE comparisons where one of the pairs 1244 can be simplified. */ 1245 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1246 args[1], args[3], TCG_COND_NE); 1247 if (tmp == 0) { 1248 goto do_setcond_high; 1249 } else if (tmp == 1) { 1250 goto do_setcond_const; 1251 } 1252 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1253 args[2], args[4], TCG_COND_NE); 1254 if (tmp == 0) { 1255 goto do_setcond_low; 1256 } else if (tmp == 1) { 1257 goto do_setcond_const; 1258 } 1259 goto do_default; 1260 } else { 1261 goto do_default; 1262 } 1263 break; 1264 1265 case INDEX_op_call: 1266 if (!(args[nb_oargs + nb_iargs + 1] 1267 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1268 for (i = 0; i < nb_globals; i++) { 1269 if (test_bit(i, temps_used.l)) { 1270 reset_temp(i); 1271 } 1272 } 1273 } 1274 goto do_reset_output; 1275 1276 default: 1277 do_default: 1278 /* Default case: we know nothing about operation (or were unable 1279 to compute the operation result) so no propagation is done. 1280 We trash everything if the operation is the end of a basic 1281 block, otherwise we only trash the output args. "mask" is 1282 the non-zero bits mask for the first output arg. */ 1283 if (def->flags & TCG_OPF_BB_END) { 1284 reset_all_temps(nb_temps); 1285 } else { 1286 do_reset_output: 1287 for (i = 0; i < nb_oargs; i++) { 1288 reset_temp(args[i]); 1289 /* Save the corresponding known-zero bits mask for the 1290 first output argument (only one supported so far). */ 1291 if (i == 0) { 1292 temps[args[i]].mask = mask; 1293 } 1294 } 1295 } 1296 break; 1297 } 1298 1299 /* Eliminate duplicate and redundant fence instructions. */ 1300 if (prev_mb_args) { 1301 switch (opc) { 1302 case INDEX_op_mb: 1303 /* Merge two barriers of the same type into one, 1304 * or a weaker barrier into a stronger one, 1305 * or two weaker barriers into a stronger one. 1306 * mb X; mb Y => mb X|Y 1307 * mb; strl => mb; st 1308 * ldaq; mb => ld; mb 1309 * ldaq; strl => ld; mb; st 1310 * Other combinations are also merged into a strong 1311 * barrier. This is stricter than specified but for 1312 * the purposes of TCG is better than not optimizing. 1313 */ 1314 prev_mb_args[0] |= args[0]; 1315 tcg_op_remove(s, op); 1316 break; 1317 1318 default: 1319 /* Opcodes that end the block stop the optimization. */ 1320 if ((def->flags & TCG_OPF_BB_END) == 0) { 1321 break; 1322 } 1323 /* fallthru */ 1324 case INDEX_op_qemu_ld_i32: 1325 case INDEX_op_qemu_ld_i64: 1326 case INDEX_op_qemu_st_i32: 1327 case INDEX_op_qemu_st_i64: 1328 case INDEX_op_call: 1329 /* Opcodes that touch guest memory stop the optimization. */ 1330 prev_mb_args = NULL; 1331 break; 1332 } 1333 } else if (opc == INDEX_op_mb) { 1334 prev_mb_args = args; 1335 } 1336 } 1337 } 1338