1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 uint16_t prev_copy; 38 uint16_t next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 44 static TCGTempSet temps_used; 45 46 static inline bool temp_is_const(TCGArg arg) 47 { 48 return temps[arg].is_const; 49 } 50 51 static inline bool temp_is_copy(TCGArg arg) 52 { 53 return temps[arg].next_copy != arg; 54 } 55 56 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 57 static void reset_temp(TCGArg temp) 58 { 59 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 60 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 61 temps[temp].next_copy = temp; 62 temps[temp].prev_copy = temp; 63 temps[temp].is_const = false; 64 temps[temp].mask = -1; 65 } 66 67 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 68 static void reset_all_temps(int nb_temps) 69 { 70 bitmap_zero(temps_used.l, nb_temps); 71 } 72 73 /* Initialize and activate a temporary. */ 74 static void init_temp_info(TCGArg temp) 75 { 76 if (!test_bit(temp, temps_used.l)) { 77 temps[temp].next_copy = temp; 78 temps[temp].prev_copy = temp; 79 temps[temp].is_const = false; 80 temps[temp].mask = -1; 81 set_bit(temp, temps_used.l); 82 } 83 } 84 85 static int op_bits(TCGOpcode op) 86 { 87 const TCGOpDef *def = &tcg_op_defs[op]; 88 return def->flags & TCG_OPF_64BIT ? 64 : 32; 89 } 90 91 static TCGOpcode op_to_mov(TCGOpcode op) 92 { 93 switch (op_bits(op)) { 94 case 32: 95 return INDEX_op_mov_i32; 96 case 64: 97 return INDEX_op_mov_i64; 98 default: 99 fprintf(stderr, "op_to_mov: unexpected return value of " 100 "function op_bits.\n"); 101 tcg_abort(); 102 } 103 } 104 105 static TCGOpcode op_to_movi(TCGOpcode op) 106 { 107 switch (op_bits(op)) { 108 case 32: 109 return INDEX_op_movi_i32; 110 case 64: 111 return INDEX_op_movi_i64; 112 default: 113 fprintf(stderr, "op_to_movi: unexpected return value of " 114 "function op_bits.\n"); 115 tcg_abort(); 116 } 117 } 118 119 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 120 { 121 TCGArg i; 122 123 /* If this is already a global, we can't do better. */ 124 if (temp < s->nb_globals) { 125 return temp; 126 } 127 128 /* Search for a global first. */ 129 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 130 if (i < s->nb_globals) { 131 return i; 132 } 133 } 134 135 /* If it is a temp, search for a temp local. */ 136 if (!s->temps[temp].temp_local) { 137 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 138 if (s->temps[i].temp_local) { 139 return i; 140 } 141 } 142 } 143 144 /* Failure to find a better representation, return the same temp. */ 145 return temp; 146 } 147 148 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 149 { 150 TCGArg i; 151 152 if (arg1 == arg2) { 153 return true; 154 } 155 156 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 157 return false; 158 } 159 160 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 161 if (i == arg2) { 162 return true; 163 } 164 } 165 166 return false; 167 } 168 169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 170 TCGArg dst, TCGArg val) 171 { 172 TCGOpcode new_op = op_to_movi(op->opc); 173 tcg_target_ulong mask; 174 175 op->opc = new_op; 176 177 reset_temp(dst); 178 temps[dst].is_const = true; 179 temps[dst].val = val; 180 mask = val; 181 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 182 /* High bits of the destination are now garbage. */ 183 mask |= ~0xffffffffull; 184 } 185 temps[dst].mask = mask; 186 187 args[0] = dst; 188 args[1] = val; 189 } 190 191 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 192 TCGArg dst, TCGArg src) 193 { 194 if (temps_are_copies(dst, src)) { 195 tcg_op_remove(s, op); 196 return; 197 } 198 199 TCGOpcode new_op = op_to_mov(op->opc); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 mask = temps[src].mask; 206 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 207 /* High bits of the destination are now garbage. */ 208 mask |= ~0xffffffffull; 209 } 210 temps[dst].mask = mask; 211 212 if (s->temps[src].type == s->temps[dst].type) { 213 temps[dst].next_copy = temps[src].next_copy; 214 temps[dst].prev_copy = src; 215 temps[temps[dst].next_copy].prev_copy = dst; 216 temps[src].next_copy = dst; 217 temps[dst].is_const = temps[src].is_const; 218 temps[dst].val = temps[src].val; 219 } 220 221 args[0] = dst; 222 args[1] = src; 223 } 224 225 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 226 { 227 uint64_t l64, h64; 228 229 switch (op) { 230 CASE_OP_32_64(add): 231 return x + y; 232 233 CASE_OP_32_64(sub): 234 return x - y; 235 236 CASE_OP_32_64(mul): 237 return x * y; 238 239 CASE_OP_32_64(and): 240 return x & y; 241 242 CASE_OP_32_64(or): 243 return x | y; 244 245 CASE_OP_32_64(xor): 246 return x ^ y; 247 248 case INDEX_op_shl_i32: 249 return (uint32_t)x << (y & 31); 250 251 case INDEX_op_shl_i64: 252 return (uint64_t)x << (y & 63); 253 254 case INDEX_op_shr_i32: 255 return (uint32_t)x >> (y & 31); 256 257 case INDEX_op_shr_i64: 258 return (uint64_t)x >> (y & 63); 259 260 case INDEX_op_sar_i32: 261 return (int32_t)x >> (y & 31); 262 263 case INDEX_op_sar_i64: 264 return (int64_t)x >> (y & 63); 265 266 case INDEX_op_rotr_i32: 267 return ror32(x, y & 31); 268 269 case INDEX_op_rotr_i64: 270 return ror64(x, y & 63); 271 272 case INDEX_op_rotl_i32: 273 return rol32(x, y & 31); 274 275 case INDEX_op_rotl_i64: 276 return rol64(x, y & 63); 277 278 CASE_OP_32_64(not): 279 return ~x; 280 281 CASE_OP_32_64(neg): 282 return -x; 283 284 CASE_OP_32_64(andc): 285 return x & ~y; 286 287 CASE_OP_32_64(orc): 288 return x | ~y; 289 290 CASE_OP_32_64(eqv): 291 return ~(x ^ y); 292 293 CASE_OP_32_64(nand): 294 return ~(x & y); 295 296 CASE_OP_32_64(nor): 297 return ~(x | y); 298 299 CASE_OP_32_64(ext8s): 300 return (int8_t)x; 301 302 CASE_OP_32_64(ext16s): 303 return (int16_t)x; 304 305 CASE_OP_32_64(ext8u): 306 return (uint8_t)x; 307 308 CASE_OP_32_64(ext16u): 309 return (uint16_t)x; 310 311 case INDEX_op_ext_i32_i64: 312 case INDEX_op_ext32s_i64: 313 return (int32_t)x; 314 315 case INDEX_op_extu_i32_i64: 316 case INDEX_op_extrl_i64_i32: 317 case INDEX_op_ext32u_i64: 318 return (uint32_t)x; 319 320 case INDEX_op_extrh_i64_i32: 321 return (uint64_t)x >> 32; 322 323 case INDEX_op_muluh_i32: 324 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 325 case INDEX_op_mulsh_i32: 326 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 327 328 case INDEX_op_muluh_i64: 329 mulu64(&l64, &h64, x, y); 330 return h64; 331 case INDEX_op_mulsh_i64: 332 muls64(&l64, &h64, x, y); 333 return h64; 334 335 case INDEX_op_div_i32: 336 /* Avoid crashing on divide by zero, otherwise undefined. */ 337 return (int32_t)x / ((int32_t)y ? : 1); 338 case INDEX_op_divu_i32: 339 return (uint32_t)x / ((uint32_t)y ? : 1); 340 case INDEX_op_div_i64: 341 return (int64_t)x / ((int64_t)y ? : 1); 342 case INDEX_op_divu_i64: 343 return (uint64_t)x / ((uint64_t)y ? : 1); 344 345 case INDEX_op_rem_i32: 346 return (int32_t)x % ((int32_t)y ? : 1); 347 case INDEX_op_remu_i32: 348 return (uint32_t)x % ((uint32_t)y ? : 1); 349 case INDEX_op_rem_i64: 350 return (int64_t)x % ((int64_t)y ? : 1); 351 case INDEX_op_remu_i64: 352 return (uint64_t)x % ((uint64_t)y ? : 1); 353 354 default: 355 fprintf(stderr, 356 "Unrecognized operation %d in do_constant_folding.\n", op); 357 tcg_abort(); 358 } 359 } 360 361 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 362 { 363 TCGArg res = do_constant_folding_2(op, x, y); 364 if (op_bits(op) == 32) { 365 res = (int32_t)res; 366 } 367 return res; 368 } 369 370 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 371 { 372 switch (c) { 373 case TCG_COND_EQ: 374 return x == y; 375 case TCG_COND_NE: 376 return x != y; 377 case TCG_COND_LT: 378 return (int32_t)x < (int32_t)y; 379 case TCG_COND_GE: 380 return (int32_t)x >= (int32_t)y; 381 case TCG_COND_LE: 382 return (int32_t)x <= (int32_t)y; 383 case TCG_COND_GT: 384 return (int32_t)x > (int32_t)y; 385 case TCG_COND_LTU: 386 return x < y; 387 case TCG_COND_GEU: 388 return x >= y; 389 case TCG_COND_LEU: 390 return x <= y; 391 case TCG_COND_GTU: 392 return x > y; 393 default: 394 tcg_abort(); 395 } 396 } 397 398 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 399 { 400 switch (c) { 401 case TCG_COND_EQ: 402 return x == y; 403 case TCG_COND_NE: 404 return x != y; 405 case TCG_COND_LT: 406 return (int64_t)x < (int64_t)y; 407 case TCG_COND_GE: 408 return (int64_t)x >= (int64_t)y; 409 case TCG_COND_LE: 410 return (int64_t)x <= (int64_t)y; 411 case TCG_COND_GT: 412 return (int64_t)x > (int64_t)y; 413 case TCG_COND_LTU: 414 return x < y; 415 case TCG_COND_GEU: 416 return x >= y; 417 case TCG_COND_LEU: 418 return x <= y; 419 case TCG_COND_GTU: 420 return x > y; 421 default: 422 tcg_abort(); 423 } 424 } 425 426 static bool do_constant_folding_cond_eq(TCGCond c) 427 { 428 switch (c) { 429 case TCG_COND_GT: 430 case TCG_COND_LTU: 431 case TCG_COND_LT: 432 case TCG_COND_GTU: 433 case TCG_COND_NE: 434 return 0; 435 case TCG_COND_GE: 436 case TCG_COND_GEU: 437 case TCG_COND_LE: 438 case TCG_COND_LEU: 439 case TCG_COND_EQ: 440 return 1; 441 default: 442 tcg_abort(); 443 } 444 } 445 446 /* Return 2 if the condition can't be simplified, and the result 447 of the condition (0 or 1) if it can */ 448 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 449 TCGArg y, TCGCond c) 450 { 451 if (temp_is_const(x) && temp_is_const(y)) { 452 switch (op_bits(op)) { 453 case 32: 454 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 455 case 64: 456 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 457 default: 458 tcg_abort(); 459 } 460 } else if (temps_are_copies(x, y)) { 461 return do_constant_folding_cond_eq(c); 462 } else if (temp_is_const(y) && temps[y].val == 0) { 463 switch (c) { 464 case TCG_COND_LTU: 465 return 0; 466 case TCG_COND_GEU: 467 return 1; 468 default: 469 return 2; 470 } 471 } else { 472 return 2; 473 } 474 } 475 476 /* Return 2 if the condition can't be simplified, and the result 477 of the condition (0 or 1) if it can */ 478 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 479 { 480 TCGArg al = p1[0], ah = p1[1]; 481 TCGArg bl = p2[0], bh = p2[1]; 482 483 if (temp_is_const(bl) && temp_is_const(bh)) { 484 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 485 486 if (temp_is_const(al) && temp_is_const(ah)) { 487 uint64_t a; 488 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 489 return do_constant_folding_cond_64(a, b, c); 490 } 491 if (b == 0) { 492 switch (c) { 493 case TCG_COND_LTU: 494 return 0; 495 case TCG_COND_GEU: 496 return 1; 497 default: 498 break; 499 } 500 } 501 } 502 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 503 return do_constant_folding_cond_eq(c); 504 } 505 return 2; 506 } 507 508 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 509 { 510 TCGArg a1 = *p1, a2 = *p2; 511 int sum = 0; 512 sum += temp_is_const(a1); 513 sum -= temp_is_const(a2); 514 515 /* Prefer the constant in second argument, and then the form 516 op a, a, b, which is better handled on non-RISC hosts. */ 517 if (sum > 0 || (sum == 0 && dest == a2)) { 518 *p1 = a2; 519 *p2 = a1; 520 return true; 521 } 522 return false; 523 } 524 525 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 526 { 527 int sum = 0; 528 sum += temp_is_const(p1[0]); 529 sum += temp_is_const(p1[1]); 530 sum -= temp_is_const(p2[0]); 531 sum -= temp_is_const(p2[1]); 532 if (sum > 0) { 533 TCGArg t; 534 t = p1[0], p1[0] = p2[0], p2[0] = t; 535 t = p1[1], p1[1] = p2[1], p2[1] = t; 536 return true; 537 } 538 return false; 539 } 540 541 /* Propagate constants and copies, fold constant expressions. */ 542 void tcg_optimize(TCGContext *s) 543 { 544 int oi, oi_next, nb_temps, nb_globals; 545 TCGArg *prev_mb_args = NULL; 546 547 /* Array VALS has an element for each temp. 548 If this temp holds a constant then its value is kept in VALS' element. 549 If this temp is a copy of other ones then the other copies are 550 available through the doubly linked circular list. */ 551 552 nb_temps = s->nb_temps; 553 nb_globals = s->nb_globals; 554 reset_all_temps(nb_temps); 555 556 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 557 tcg_target_ulong mask, partmask, affected; 558 int nb_oargs, nb_iargs, i; 559 TCGArg tmp; 560 561 TCGOp * const op = &s->gen_op_buf[oi]; 562 TCGArg * const args = &s->gen_opparam_buf[op->args]; 563 TCGOpcode opc = op->opc; 564 const TCGOpDef *def = &tcg_op_defs[opc]; 565 566 oi_next = op->next; 567 568 /* Count the arguments, and initialize the temps that are 569 going to be used */ 570 if (opc == INDEX_op_call) { 571 nb_oargs = op->callo; 572 nb_iargs = op->calli; 573 for (i = 0; i < nb_oargs + nb_iargs; i++) { 574 tmp = args[i]; 575 if (tmp != TCG_CALL_DUMMY_ARG) { 576 init_temp_info(tmp); 577 } 578 } 579 } else { 580 nb_oargs = def->nb_oargs; 581 nb_iargs = def->nb_iargs; 582 for (i = 0; i < nb_oargs + nb_iargs; i++) { 583 init_temp_info(args[i]); 584 } 585 } 586 587 /* Do copy propagation */ 588 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 589 if (temp_is_copy(args[i])) { 590 args[i] = find_better_copy(s, args[i]); 591 } 592 } 593 594 /* For commutative operations make constant second argument */ 595 switch (opc) { 596 CASE_OP_32_64(add): 597 CASE_OP_32_64(mul): 598 CASE_OP_32_64(and): 599 CASE_OP_32_64(or): 600 CASE_OP_32_64(xor): 601 CASE_OP_32_64(eqv): 602 CASE_OP_32_64(nand): 603 CASE_OP_32_64(nor): 604 CASE_OP_32_64(muluh): 605 CASE_OP_32_64(mulsh): 606 swap_commutative(args[0], &args[1], &args[2]); 607 break; 608 CASE_OP_32_64(brcond): 609 if (swap_commutative(-1, &args[0], &args[1])) { 610 args[2] = tcg_swap_cond(args[2]); 611 } 612 break; 613 CASE_OP_32_64(setcond): 614 if (swap_commutative(args[0], &args[1], &args[2])) { 615 args[3] = tcg_swap_cond(args[3]); 616 } 617 break; 618 CASE_OP_32_64(movcond): 619 if (swap_commutative(-1, &args[1], &args[2])) { 620 args[5] = tcg_swap_cond(args[5]); 621 } 622 /* For movcond, we canonicalize the "false" input reg to match 623 the destination reg so that the tcg backend can implement 624 a "move if true" operation. */ 625 if (swap_commutative(args[0], &args[4], &args[3])) { 626 args[5] = tcg_invert_cond(args[5]); 627 } 628 break; 629 CASE_OP_32_64(add2): 630 swap_commutative(args[0], &args[2], &args[4]); 631 swap_commutative(args[1], &args[3], &args[5]); 632 break; 633 CASE_OP_32_64(mulu2): 634 CASE_OP_32_64(muls2): 635 swap_commutative(args[0], &args[2], &args[3]); 636 break; 637 case INDEX_op_brcond2_i32: 638 if (swap_commutative2(&args[0], &args[2])) { 639 args[4] = tcg_swap_cond(args[4]); 640 } 641 break; 642 case INDEX_op_setcond2_i32: 643 if (swap_commutative2(&args[1], &args[3])) { 644 args[5] = tcg_swap_cond(args[5]); 645 } 646 break; 647 default: 648 break; 649 } 650 651 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 652 and "sub r, 0, a => neg r, a" case. */ 653 switch (opc) { 654 CASE_OP_32_64(shl): 655 CASE_OP_32_64(shr): 656 CASE_OP_32_64(sar): 657 CASE_OP_32_64(rotl): 658 CASE_OP_32_64(rotr): 659 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 660 tcg_opt_gen_movi(s, op, args, args[0], 0); 661 continue; 662 } 663 break; 664 CASE_OP_32_64(sub): 665 { 666 TCGOpcode neg_op; 667 bool have_neg; 668 669 if (temp_is_const(args[2])) { 670 /* Proceed with possible constant folding. */ 671 break; 672 } 673 if (opc == INDEX_op_sub_i32) { 674 neg_op = INDEX_op_neg_i32; 675 have_neg = TCG_TARGET_HAS_neg_i32; 676 } else { 677 neg_op = INDEX_op_neg_i64; 678 have_neg = TCG_TARGET_HAS_neg_i64; 679 } 680 if (!have_neg) { 681 break; 682 } 683 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 684 op->opc = neg_op; 685 reset_temp(args[0]); 686 args[1] = args[2]; 687 continue; 688 } 689 } 690 break; 691 CASE_OP_32_64(xor): 692 CASE_OP_32_64(nand): 693 if (!temp_is_const(args[1]) 694 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 695 i = 1; 696 goto try_not; 697 } 698 break; 699 CASE_OP_32_64(nor): 700 if (!temp_is_const(args[1]) 701 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 702 i = 1; 703 goto try_not; 704 } 705 break; 706 CASE_OP_32_64(andc): 707 if (!temp_is_const(args[2]) 708 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 709 i = 2; 710 goto try_not; 711 } 712 break; 713 CASE_OP_32_64(orc): 714 CASE_OP_32_64(eqv): 715 if (!temp_is_const(args[2]) 716 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 717 i = 2; 718 goto try_not; 719 } 720 break; 721 try_not: 722 { 723 TCGOpcode not_op; 724 bool have_not; 725 726 if (def->flags & TCG_OPF_64BIT) { 727 not_op = INDEX_op_not_i64; 728 have_not = TCG_TARGET_HAS_not_i64; 729 } else { 730 not_op = INDEX_op_not_i32; 731 have_not = TCG_TARGET_HAS_not_i32; 732 } 733 if (!have_not) { 734 break; 735 } 736 op->opc = not_op; 737 reset_temp(args[0]); 738 args[1] = args[i]; 739 continue; 740 } 741 default: 742 break; 743 } 744 745 /* Simplify expression for "op r, a, const => mov r, a" cases */ 746 switch (opc) { 747 CASE_OP_32_64(add): 748 CASE_OP_32_64(sub): 749 CASE_OP_32_64(shl): 750 CASE_OP_32_64(shr): 751 CASE_OP_32_64(sar): 752 CASE_OP_32_64(rotl): 753 CASE_OP_32_64(rotr): 754 CASE_OP_32_64(or): 755 CASE_OP_32_64(xor): 756 CASE_OP_32_64(andc): 757 if (!temp_is_const(args[1]) 758 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 759 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 760 continue; 761 } 762 break; 763 CASE_OP_32_64(and): 764 CASE_OP_32_64(orc): 765 CASE_OP_32_64(eqv): 766 if (!temp_is_const(args[1]) 767 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 768 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 769 continue; 770 } 771 break; 772 default: 773 break; 774 } 775 776 /* Simplify using known-zero bits. Currently only ops with a single 777 output argument is supported. */ 778 mask = -1; 779 affected = -1; 780 switch (opc) { 781 CASE_OP_32_64(ext8s): 782 if ((temps[args[1]].mask & 0x80) != 0) { 783 break; 784 } 785 CASE_OP_32_64(ext8u): 786 mask = 0xff; 787 goto and_const; 788 CASE_OP_32_64(ext16s): 789 if ((temps[args[1]].mask & 0x8000) != 0) { 790 break; 791 } 792 CASE_OP_32_64(ext16u): 793 mask = 0xffff; 794 goto and_const; 795 case INDEX_op_ext32s_i64: 796 if ((temps[args[1]].mask & 0x80000000) != 0) { 797 break; 798 } 799 case INDEX_op_ext32u_i64: 800 mask = 0xffffffffU; 801 goto and_const; 802 803 CASE_OP_32_64(and): 804 mask = temps[args[2]].mask; 805 if (temp_is_const(args[2])) { 806 and_const: 807 affected = temps[args[1]].mask & ~mask; 808 } 809 mask = temps[args[1]].mask & mask; 810 break; 811 812 case INDEX_op_ext_i32_i64: 813 if ((temps[args[1]].mask & 0x80000000) != 0) { 814 break; 815 } 816 case INDEX_op_extu_i32_i64: 817 /* We do not compute affected as it is a size changing op. */ 818 mask = (uint32_t)temps[args[1]].mask; 819 break; 820 821 CASE_OP_32_64(andc): 822 /* Known-zeros does not imply known-ones. Therefore unless 823 args[2] is constant, we can't infer anything from it. */ 824 if (temp_is_const(args[2])) { 825 mask = ~temps[args[2]].mask; 826 goto and_const; 827 } 828 /* But we certainly know nothing outside args[1] may be set. */ 829 mask = temps[args[1]].mask; 830 break; 831 832 case INDEX_op_sar_i32: 833 if (temp_is_const(args[2])) { 834 tmp = temps[args[2]].val & 31; 835 mask = (int32_t)temps[args[1]].mask >> tmp; 836 } 837 break; 838 case INDEX_op_sar_i64: 839 if (temp_is_const(args[2])) { 840 tmp = temps[args[2]].val & 63; 841 mask = (int64_t)temps[args[1]].mask >> tmp; 842 } 843 break; 844 845 case INDEX_op_shr_i32: 846 if (temp_is_const(args[2])) { 847 tmp = temps[args[2]].val & 31; 848 mask = (uint32_t)temps[args[1]].mask >> tmp; 849 } 850 break; 851 case INDEX_op_shr_i64: 852 if (temp_is_const(args[2])) { 853 tmp = temps[args[2]].val & 63; 854 mask = (uint64_t)temps[args[1]].mask >> tmp; 855 } 856 break; 857 858 case INDEX_op_extrl_i64_i32: 859 mask = (uint32_t)temps[args[1]].mask; 860 break; 861 case INDEX_op_extrh_i64_i32: 862 mask = (uint64_t)temps[args[1]].mask >> 32; 863 break; 864 865 CASE_OP_32_64(shl): 866 if (temp_is_const(args[2])) { 867 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 868 mask = temps[args[1]].mask << tmp; 869 } 870 break; 871 872 CASE_OP_32_64(neg): 873 /* Set to 1 all bits to the left of the rightmost. */ 874 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 875 break; 876 877 CASE_OP_32_64(deposit): 878 mask = deposit64(temps[args[1]].mask, args[3], args[4], 879 temps[args[2]].mask); 880 break; 881 882 CASE_OP_32_64(or): 883 CASE_OP_32_64(xor): 884 mask = temps[args[1]].mask | temps[args[2]].mask; 885 break; 886 887 CASE_OP_32_64(setcond): 888 case INDEX_op_setcond2_i32: 889 mask = 1; 890 break; 891 892 CASE_OP_32_64(movcond): 893 mask = temps[args[3]].mask | temps[args[4]].mask; 894 break; 895 896 CASE_OP_32_64(ld8u): 897 mask = 0xff; 898 break; 899 CASE_OP_32_64(ld16u): 900 mask = 0xffff; 901 break; 902 case INDEX_op_ld32u_i64: 903 mask = 0xffffffffu; 904 break; 905 906 CASE_OP_32_64(qemu_ld): 907 { 908 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 909 TCGMemOp mop = get_memop(oi); 910 if (!(mop & MO_SIGN)) { 911 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 912 } 913 } 914 break; 915 916 default: 917 break; 918 } 919 920 /* 32-bit ops generate 32-bit results. For the result is zero test 921 below, we can ignore high bits, but for further optimizations we 922 need to record that the high bits contain garbage. */ 923 partmask = mask; 924 if (!(def->flags & TCG_OPF_64BIT)) { 925 mask |= ~(tcg_target_ulong)0xffffffffu; 926 partmask &= 0xffffffffu; 927 affected &= 0xffffffffu; 928 } 929 930 if (partmask == 0) { 931 tcg_debug_assert(nb_oargs == 1); 932 tcg_opt_gen_movi(s, op, args, args[0], 0); 933 continue; 934 } 935 if (affected == 0) { 936 tcg_debug_assert(nb_oargs == 1); 937 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 938 continue; 939 } 940 941 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 942 switch (opc) { 943 CASE_OP_32_64(and): 944 CASE_OP_32_64(mul): 945 CASE_OP_32_64(muluh): 946 CASE_OP_32_64(mulsh): 947 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 948 tcg_opt_gen_movi(s, op, args, args[0], 0); 949 continue; 950 } 951 break; 952 default: 953 break; 954 } 955 956 /* Simplify expression for "op r, a, a => mov r, a" cases */ 957 switch (opc) { 958 CASE_OP_32_64(or): 959 CASE_OP_32_64(and): 960 if (temps_are_copies(args[1], args[2])) { 961 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 962 continue; 963 } 964 break; 965 default: 966 break; 967 } 968 969 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 970 switch (opc) { 971 CASE_OP_32_64(andc): 972 CASE_OP_32_64(sub): 973 CASE_OP_32_64(xor): 974 if (temps_are_copies(args[1], args[2])) { 975 tcg_opt_gen_movi(s, op, args, args[0], 0); 976 continue; 977 } 978 break; 979 default: 980 break; 981 } 982 983 /* Propagate constants through copy operations and do constant 984 folding. Constants will be substituted to arguments by register 985 allocator where needed and possible. Also detect copies. */ 986 switch (opc) { 987 CASE_OP_32_64(mov): 988 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 989 break; 990 CASE_OP_32_64(movi): 991 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 992 break; 993 994 CASE_OP_32_64(not): 995 CASE_OP_32_64(neg): 996 CASE_OP_32_64(ext8s): 997 CASE_OP_32_64(ext8u): 998 CASE_OP_32_64(ext16s): 999 CASE_OP_32_64(ext16u): 1000 case INDEX_op_ext32s_i64: 1001 case INDEX_op_ext32u_i64: 1002 case INDEX_op_ext_i32_i64: 1003 case INDEX_op_extu_i32_i64: 1004 case INDEX_op_extrl_i64_i32: 1005 case INDEX_op_extrh_i64_i32: 1006 if (temp_is_const(args[1])) { 1007 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1008 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1009 break; 1010 } 1011 goto do_default; 1012 1013 CASE_OP_32_64(add): 1014 CASE_OP_32_64(sub): 1015 CASE_OP_32_64(mul): 1016 CASE_OP_32_64(or): 1017 CASE_OP_32_64(and): 1018 CASE_OP_32_64(xor): 1019 CASE_OP_32_64(shl): 1020 CASE_OP_32_64(shr): 1021 CASE_OP_32_64(sar): 1022 CASE_OP_32_64(rotl): 1023 CASE_OP_32_64(rotr): 1024 CASE_OP_32_64(andc): 1025 CASE_OP_32_64(orc): 1026 CASE_OP_32_64(eqv): 1027 CASE_OP_32_64(nand): 1028 CASE_OP_32_64(nor): 1029 CASE_OP_32_64(muluh): 1030 CASE_OP_32_64(mulsh): 1031 CASE_OP_32_64(div): 1032 CASE_OP_32_64(divu): 1033 CASE_OP_32_64(rem): 1034 CASE_OP_32_64(remu): 1035 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1036 tmp = do_constant_folding(opc, temps[args[1]].val, 1037 temps[args[2]].val); 1038 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1039 break; 1040 } 1041 goto do_default; 1042 1043 CASE_OP_32_64(deposit): 1044 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1045 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1046 temps[args[2]].val); 1047 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1048 break; 1049 } 1050 goto do_default; 1051 1052 CASE_OP_32_64(setcond): 1053 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1054 if (tmp != 2) { 1055 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1056 break; 1057 } 1058 goto do_default; 1059 1060 CASE_OP_32_64(brcond): 1061 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1062 if (tmp != 2) { 1063 if (tmp) { 1064 reset_all_temps(nb_temps); 1065 op->opc = INDEX_op_br; 1066 args[0] = args[3]; 1067 } else { 1068 tcg_op_remove(s, op); 1069 } 1070 break; 1071 } 1072 goto do_default; 1073 1074 CASE_OP_32_64(movcond): 1075 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1076 if (tmp != 2) { 1077 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1078 break; 1079 } 1080 goto do_default; 1081 1082 case INDEX_op_add2_i32: 1083 case INDEX_op_sub2_i32: 1084 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1085 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1086 uint32_t al = temps[args[2]].val; 1087 uint32_t ah = temps[args[3]].val; 1088 uint32_t bl = temps[args[4]].val; 1089 uint32_t bh = temps[args[5]].val; 1090 uint64_t a = ((uint64_t)ah << 32) | al; 1091 uint64_t b = ((uint64_t)bh << 32) | bl; 1092 TCGArg rl, rh; 1093 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1094 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1095 1096 if (opc == INDEX_op_add2_i32) { 1097 a += b; 1098 } else { 1099 a -= b; 1100 } 1101 1102 rl = args[0]; 1103 rh = args[1]; 1104 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1105 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1106 1107 /* We've done all we need to do with the movi. Skip it. */ 1108 oi_next = op2->next; 1109 break; 1110 } 1111 goto do_default; 1112 1113 case INDEX_op_mulu2_i32: 1114 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1115 uint32_t a = temps[args[2]].val; 1116 uint32_t b = temps[args[3]].val; 1117 uint64_t r = (uint64_t)a * b; 1118 TCGArg rl, rh; 1119 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); 1120 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1121 1122 rl = args[0]; 1123 rh = args[1]; 1124 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1125 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1126 1127 /* We've done all we need to do with the movi. Skip it. */ 1128 oi_next = op2->next; 1129 break; 1130 } 1131 goto do_default; 1132 1133 case INDEX_op_brcond2_i32: 1134 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1135 if (tmp != 2) { 1136 if (tmp) { 1137 do_brcond_true: 1138 reset_all_temps(nb_temps); 1139 op->opc = INDEX_op_br; 1140 args[0] = args[5]; 1141 } else { 1142 do_brcond_false: 1143 tcg_op_remove(s, op); 1144 } 1145 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1146 && temp_is_const(args[2]) && temps[args[2]].val == 0 1147 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1148 /* Simplify LT/GE comparisons vs zero to a single compare 1149 vs the high word of the input. */ 1150 do_brcond_high: 1151 reset_all_temps(nb_temps); 1152 op->opc = INDEX_op_brcond_i32; 1153 args[0] = args[1]; 1154 args[1] = args[3]; 1155 args[2] = args[4]; 1156 args[3] = args[5]; 1157 } else if (args[4] == TCG_COND_EQ) { 1158 /* Simplify EQ comparisons where one of the pairs 1159 can be simplified. */ 1160 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1161 args[0], args[2], TCG_COND_EQ); 1162 if (tmp == 0) { 1163 goto do_brcond_false; 1164 } else if (tmp == 1) { 1165 goto do_brcond_high; 1166 } 1167 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1168 args[1], args[3], TCG_COND_EQ); 1169 if (tmp == 0) { 1170 goto do_brcond_false; 1171 } else if (tmp != 1) { 1172 goto do_default; 1173 } 1174 do_brcond_low: 1175 reset_all_temps(nb_temps); 1176 op->opc = INDEX_op_brcond_i32; 1177 args[1] = args[2]; 1178 args[2] = args[4]; 1179 args[3] = args[5]; 1180 } else if (args[4] == TCG_COND_NE) { 1181 /* Simplify NE comparisons where one of the pairs 1182 can be simplified. */ 1183 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1184 args[0], args[2], TCG_COND_NE); 1185 if (tmp == 0) { 1186 goto do_brcond_high; 1187 } else if (tmp == 1) { 1188 goto do_brcond_true; 1189 } 1190 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1191 args[1], args[3], TCG_COND_NE); 1192 if (tmp == 0) { 1193 goto do_brcond_low; 1194 } else if (tmp == 1) { 1195 goto do_brcond_true; 1196 } 1197 goto do_default; 1198 } else { 1199 goto do_default; 1200 } 1201 break; 1202 1203 case INDEX_op_setcond2_i32: 1204 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1205 if (tmp != 2) { 1206 do_setcond_const: 1207 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1208 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1209 && temp_is_const(args[3]) && temps[args[3]].val == 0 1210 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1211 /* Simplify LT/GE comparisons vs zero to a single compare 1212 vs the high word of the input. */ 1213 do_setcond_high: 1214 reset_temp(args[0]); 1215 temps[args[0]].mask = 1; 1216 op->opc = INDEX_op_setcond_i32; 1217 args[1] = args[2]; 1218 args[2] = args[4]; 1219 args[3] = args[5]; 1220 } else if (args[5] == TCG_COND_EQ) { 1221 /* Simplify EQ comparisons where one of the pairs 1222 can be simplified. */ 1223 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1224 args[1], args[3], TCG_COND_EQ); 1225 if (tmp == 0) { 1226 goto do_setcond_const; 1227 } else if (tmp == 1) { 1228 goto do_setcond_high; 1229 } 1230 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1231 args[2], args[4], TCG_COND_EQ); 1232 if (tmp == 0) { 1233 goto do_setcond_high; 1234 } else if (tmp != 1) { 1235 goto do_default; 1236 } 1237 do_setcond_low: 1238 reset_temp(args[0]); 1239 temps[args[0]].mask = 1; 1240 op->opc = INDEX_op_setcond_i32; 1241 args[2] = args[3]; 1242 args[3] = args[5]; 1243 } else if (args[5] == TCG_COND_NE) { 1244 /* Simplify NE comparisons where one of the pairs 1245 can be simplified. */ 1246 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1247 args[1], args[3], TCG_COND_NE); 1248 if (tmp == 0) { 1249 goto do_setcond_high; 1250 } else if (tmp == 1) { 1251 goto do_setcond_const; 1252 } 1253 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1254 args[2], args[4], TCG_COND_NE); 1255 if (tmp == 0) { 1256 goto do_setcond_low; 1257 } else if (tmp == 1) { 1258 goto do_setcond_const; 1259 } 1260 goto do_default; 1261 } else { 1262 goto do_default; 1263 } 1264 break; 1265 1266 case INDEX_op_call: 1267 if (!(args[nb_oargs + nb_iargs + 1] 1268 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1269 for (i = 0; i < nb_globals; i++) { 1270 if (test_bit(i, temps_used.l)) { 1271 reset_temp(i); 1272 } 1273 } 1274 } 1275 goto do_reset_output; 1276 1277 default: 1278 do_default: 1279 /* Default case: we know nothing about operation (or were unable 1280 to compute the operation result) so no propagation is done. 1281 We trash everything if the operation is the end of a basic 1282 block, otherwise we only trash the output args. "mask" is 1283 the non-zero bits mask for the first output arg. */ 1284 if (def->flags & TCG_OPF_BB_END) { 1285 reset_all_temps(nb_temps); 1286 } else { 1287 do_reset_output: 1288 for (i = 0; i < nb_oargs; i++) { 1289 reset_temp(args[i]); 1290 /* Save the corresponding known-zero bits mask for the 1291 first output argument (only one supported so far). */ 1292 if (i == 0) { 1293 temps[args[i]].mask = mask; 1294 } 1295 } 1296 } 1297 break; 1298 } 1299 1300 /* Eliminate duplicate and redundant fence instructions. */ 1301 if (prev_mb_args) { 1302 switch (opc) { 1303 case INDEX_op_mb: 1304 /* Merge two barriers of the same type into one, 1305 * or a weaker barrier into a stronger one, 1306 * or two weaker barriers into a stronger one. 1307 * mb X; mb Y => mb X|Y 1308 * mb; strl => mb; st 1309 * ldaq; mb => ld; mb 1310 * ldaq; strl => ld; mb; st 1311 * Other combinations are also merged into a strong 1312 * barrier. This is stricter than specified but for 1313 * the purposes of TCG is better than not optimizing. 1314 */ 1315 prev_mb_args[0] |= args[0]; 1316 tcg_op_remove(s, op); 1317 break; 1318 1319 default: 1320 /* Opcodes that end the block stop the optimization. */ 1321 if ((def->flags & TCG_OPF_BB_END) == 0) { 1322 break; 1323 } 1324 /* fallthru */ 1325 case INDEX_op_qemu_ld_i32: 1326 case INDEX_op_qemu_ld_i64: 1327 case INDEX_op_qemu_st_i32: 1328 case INDEX_op_qemu_st_i64: 1329 case INDEX_op_call: 1330 /* Opcodes that touch guest memory stop the optimization. */ 1331 prev_mb_args = NULL; 1332 break; 1333 } 1334 } else if (opc == INDEX_op_mb) { 1335 prev_mb_args = args; 1336 } 1337 } 1338 } 1339