1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 71 TCGOpcode opc, int nargs) 72 { 73 int oi = s->gen_next_op_idx; 74 int pi = s->gen_next_parm_idx; 75 int prev = old_op->prev; 76 int next = old_op - s->gen_op_buf; 77 TCGOp *new_op; 78 79 tcg_debug_assert(oi < OPC_BUF_SIZE); 80 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 81 s->gen_next_op_idx = oi + 1; 82 s->gen_next_parm_idx = pi + nargs; 83 84 new_op = &s->gen_op_buf[oi]; 85 *new_op = (TCGOp){ 86 .opc = opc, 87 .args = pi, 88 .prev = prev, 89 .next = next 90 }; 91 if (prev >= 0) { 92 s->gen_op_buf[prev].next = oi; 93 } else { 94 s->gen_first_op_idx = oi; 95 } 96 old_op->prev = oi; 97 98 return new_op; 99 } 100 101 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 102 static void reset_all_temps(int nb_temps) 103 { 104 int i; 105 for (i = 0; i < nb_temps; i++) { 106 temps[i].state = TCG_TEMP_UNDEF; 107 temps[i].mask = -1; 108 } 109 } 110 111 static int op_bits(TCGOpcode op) 112 { 113 const TCGOpDef *def = &tcg_op_defs[op]; 114 return def->flags & TCG_OPF_64BIT ? 64 : 32; 115 } 116 117 static TCGOpcode op_to_mov(TCGOpcode op) 118 { 119 switch (op_bits(op)) { 120 case 32: 121 return INDEX_op_mov_i32; 122 case 64: 123 return INDEX_op_mov_i64; 124 default: 125 fprintf(stderr, "op_to_mov: unexpected return value of " 126 "function op_bits.\n"); 127 tcg_abort(); 128 } 129 } 130 131 static TCGOpcode op_to_movi(TCGOpcode op) 132 { 133 switch (op_bits(op)) { 134 case 32: 135 return INDEX_op_movi_i32; 136 case 64: 137 return INDEX_op_movi_i64; 138 default: 139 fprintf(stderr, "op_to_movi: unexpected return value of " 140 "function op_bits.\n"); 141 tcg_abort(); 142 } 143 } 144 145 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 146 { 147 TCGArg i; 148 149 /* If this is already a global, we can't do better. */ 150 if (temp < s->nb_globals) { 151 return temp; 152 } 153 154 /* Search for a global first. */ 155 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 156 if (i < s->nb_globals) { 157 return i; 158 } 159 } 160 161 /* If it is a temp, search for a temp local. */ 162 if (!s->temps[temp].temp_local) { 163 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 164 if (s->temps[i].temp_local) { 165 return i; 166 } 167 } 168 } 169 170 /* Failure to find a better representation, return the same temp. */ 171 return temp; 172 } 173 174 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 175 { 176 TCGArg i; 177 178 if (arg1 == arg2) { 179 return true; 180 } 181 182 if (temps[arg1].state != TCG_TEMP_COPY 183 || temps[arg2].state != TCG_TEMP_COPY) { 184 return false; 185 } 186 187 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 188 if (i == arg2) { 189 return true; 190 } 191 } 192 193 return false; 194 } 195 196 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 197 TCGOpcode old_op, TCGArg dst, TCGArg src) 198 { 199 TCGOpcode new_op = op_to_mov(old_op); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 mask = temps[src].mask; 206 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 207 /* High bits of the destination are now garbage. */ 208 mask |= ~0xffffffffull; 209 } 210 temps[dst].mask = mask; 211 212 assert(temps[src].state != TCG_TEMP_CONST); 213 214 if (s->temps[src].type == s->temps[dst].type) { 215 if (temps[src].state != TCG_TEMP_COPY) { 216 temps[src].state = TCG_TEMP_COPY; 217 temps[src].next_copy = src; 218 temps[src].prev_copy = src; 219 } 220 temps[dst].state = TCG_TEMP_COPY; 221 temps[dst].next_copy = temps[src].next_copy; 222 temps[dst].prev_copy = src; 223 temps[temps[dst].next_copy].prev_copy = dst; 224 temps[src].next_copy = dst; 225 } 226 227 args[0] = dst; 228 args[1] = src; 229 } 230 231 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 232 TCGOpcode old_op, TCGArg dst, TCGArg val) 233 { 234 TCGOpcode new_op = op_to_movi(old_op); 235 tcg_target_ulong mask; 236 237 op->opc = new_op; 238 239 reset_temp(dst); 240 temps[dst].state = TCG_TEMP_CONST; 241 temps[dst].val = val; 242 mask = val; 243 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 244 /* High bits of the destination are now garbage. */ 245 mask |= ~0xffffffffull; 246 } 247 temps[dst].mask = mask; 248 249 args[0] = dst; 250 args[1] = val; 251 } 252 253 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 254 { 255 uint64_t l64, h64; 256 257 switch (op) { 258 CASE_OP_32_64(add): 259 return x + y; 260 261 CASE_OP_32_64(sub): 262 return x - y; 263 264 CASE_OP_32_64(mul): 265 return x * y; 266 267 CASE_OP_32_64(and): 268 return x & y; 269 270 CASE_OP_32_64(or): 271 return x | y; 272 273 CASE_OP_32_64(xor): 274 return x ^ y; 275 276 case INDEX_op_shl_i32: 277 return (uint32_t)x << (y & 31); 278 279 case INDEX_op_shl_i64: 280 return (uint64_t)x << (y & 63); 281 282 case INDEX_op_shr_i32: 283 return (uint32_t)x >> (y & 31); 284 285 case INDEX_op_trunc_shr_i32: 286 case INDEX_op_shr_i64: 287 return (uint64_t)x >> (y & 63); 288 289 case INDEX_op_sar_i32: 290 return (int32_t)x >> (y & 31); 291 292 case INDEX_op_sar_i64: 293 return (int64_t)x >> (y & 63); 294 295 case INDEX_op_rotr_i32: 296 return ror32(x, y & 31); 297 298 case INDEX_op_rotr_i64: 299 return ror64(x, y & 63); 300 301 case INDEX_op_rotl_i32: 302 return rol32(x, y & 31); 303 304 case INDEX_op_rotl_i64: 305 return rol64(x, y & 63); 306 307 CASE_OP_32_64(not): 308 return ~x; 309 310 CASE_OP_32_64(neg): 311 return -x; 312 313 CASE_OP_32_64(andc): 314 return x & ~y; 315 316 CASE_OP_32_64(orc): 317 return x | ~y; 318 319 CASE_OP_32_64(eqv): 320 return ~(x ^ y); 321 322 CASE_OP_32_64(nand): 323 return ~(x & y); 324 325 CASE_OP_32_64(nor): 326 return ~(x | y); 327 328 CASE_OP_32_64(ext8s): 329 return (int8_t)x; 330 331 CASE_OP_32_64(ext16s): 332 return (int16_t)x; 333 334 CASE_OP_32_64(ext8u): 335 return (uint8_t)x; 336 337 CASE_OP_32_64(ext16u): 338 return (uint16_t)x; 339 340 case INDEX_op_ext32s_i64: 341 return (int32_t)x; 342 343 case INDEX_op_ext32u_i64: 344 return (uint32_t)x; 345 346 case INDEX_op_muluh_i32: 347 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 348 case INDEX_op_mulsh_i32: 349 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 350 351 case INDEX_op_muluh_i64: 352 mulu64(&l64, &h64, x, y); 353 return h64; 354 case INDEX_op_mulsh_i64: 355 muls64(&l64, &h64, x, y); 356 return h64; 357 358 case INDEX_op_div_i32: 359 /* Avoid crashing on divide by zero, otherwise undefined. */ 360 return (int32_t)x / ((int32_t)y ? : 1); 361 case INDEX_op_divu_i32: 362 return (uint32_t)x / ((uint32_t)y ? : 1); 363 case INDEX_op_div_i64: 364 return (int64_t)x / ((int64_t)y ? : 1); 365 case INDEX_op_divu_i64: 366 return (uint64_t)x / ((uint64_t)y ? : 1); 367 368 case INDEX_op_rem_i32: 369 return (int32_t)x % ((int32_t)y ? : 1); 370 case INDEX_op_remu_i32: 371 return (uint32_t)x % ((uint32_t)y ? : 1); 372 case INDEX_op_rem_i64: 373 return (int64_t)x % ((int64_t)y ? : 1); 374 case INDEX_op_remu_i64: 375 return (uint64_t)x % ((uint64_t)y ? : 1); 376 377 default: 378 fprintf(stderr, 379 "Unrecognized operation %d in do_constant_folding.\n", op); 380 tcg_abort(); 381 } 382 } 383 384 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 385 { 386 TCGArg res = do_constant_folding_2(op, x, y); 387 if (op_bits(op) == 32) { 388 res &= 0xffffffff; 389 } 390 return res; 391 } 392 393 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 394 { 395 switch (c) { 396 case TCG_COND_EQ: 397 return x == y; 398 case TCG_COND_NE: 399 return x != y; 400 case TCG_COND_LT: 401 return (int32_t)x < (int32_t)y; 402 case TCG_COND_GE: 403 return (int32_t)x >= (int32_t)y; 404 case TCG_COND_LE: 405 return (int32_t)x <= (int32_t)y; 406 case TCG_COND_GT: 407 return (int32_t)x > (int32_t)y; 408 case TCG_COND_LTU: 409 return x < y; 410 case TCG_COND_GEU: 411 return x >= y; 412 case TCG_COND_LEU: 413 return x <= y; 414 case TCG_COND_GTU: 415 return x > y; 416 default: 417 tcg_abort(); 418 } 419 } 420 421 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 422 { 423 switch (c) { 424 case TCG_COND_EQ: 425 return x == y; 426 case TCG_COND_NE: 427 return x != y; 428 case TCG_COND_LT: 429 return (int64_t)x < (int64_t)y; 430 case TCG_COND_GE: 431 return (int64_t)x >= (int64_t)y; 432 case TCG_COND_LE: 433 return (int64_t)x <= (int64_t)y; 434 case TCG_COND_GT: 435 return (int64_t)x > (int64_t)y; 436 case TCG_COND_LTU: 437 return x < y; 438 case TCG_COND_GEU: 439 return x >= y; 440 case TCG_COND_LEU: 441 return x <= y; 442 case TCG_COND_GTU: 443 return x > y; 444 default: 445 tcg_abort(); 446 } 447 } 448 449 static bool do_constant_folding_cond_eq(TCGCond c) 450 { 451 switch (c) { 452 case TCG_COND_GT: 453 case TCG_COND_LTU: 454 case TCG_COND_LT: 455 case TCG_COND_GTU: 456 case TCG_COND_NE: 457 return 0; 458 case TCG_COND_GE: 459 case TCG_COND_GEU: 460 case TCG_COND_LE: 461 case TCG_COND_LEU: 462 case TCG_COND_EQ: 463 return 1; 464 default: 465 tcg_abort(); 466 } 467 } 468 469 /* Return 2 if the condition can't be simplified, and the result 470 of the condition (0 or 1) if it can */ 471 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 472 TCGArg y, TCGCond c) 473 { 474 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 475 switch (op_bits(op)) { 476 case 32: 477 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 478 case 64: 479 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 480 default: 481 tcg_abort(); 482 } 483 } else if (temps_are_copies(x, y)) { 484 return do_constant_folding_cond_eq(c); 485 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 486 switch (c) { 487 case TCG_COND_LTU: 488 return 0; 489 case TCG_COND_GEU: 490 return 1; 491 default: 492 return 2; 493 } 494 } else { 495 return 2; 496 } 497 } 498 499 /* Return 2 if the condition can't be simplified, and the result 500 of the condition (0 or 1) if it can */ 501 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 502 { 503 TCGArg al = p1[0], ah = p1[1]; 504 TCGArg bl = p2[0], bh = p2[1]; 505 506 if (temps[bl].state == TCG_TEMP_CONST 507 && temps[bh].state == TCG_TEMP_CONST) { 508 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 509 510 if (temps[al].state == TCG_TEMP_CONST 511 && temps[ah].state == TCG_TEMP_CONST) { 512 uint64_t a; 513 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 514 return do_constant_folding_cond_64(a, b, c); 515 } 516 if (b == 0) { 517 switch (c) { 518 case TCG_COND_LTU: 519 return 0; 520 case TCG_COND_GEU: 521 return 1; 522 default: 523 break; 524 } 525 } 526 } 527 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 528 return do_constant_folding_cond_eq(c); 529 } 530 return 2; 531 } 532 533 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 534 { 535 TCGArg a1 = *p1, a2 = *p2; 536 int sum = 0; 537 sum += temps[a1].state == TCG_TEMP_CONST; 538 sum -= temps[a2].state == TCG_TEMP_CONST; 539 540 /* Prefer the constant in second argument, and then the form 541 op a, a, b, which is better handled on non-RISC hosts. */ 542 if (sum > 0 || (sum == 0 && dest == a2)) { 543 *p1 = a2; 544 *p2 = a1; 545 return true; 546 } 547 return false; 548 } 549 550 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 551 { 552 int sum = 0; 553 sum += temps[p1[0]].state == TCG_TEMP_CONST; 554 sum += temps[p1[1]].state == TCG_TEMP_CONST; 555 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 556 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 557 if (sum > 0) { 558 TCGArg t; 559 t = p1[0], p1[0] = p2[0], p2[0] = t; 560 t = p1[1], p1[1] = p2[1], p2[1] = t; 561 return true; 562 } 563 return false; 564 } 565 566 /* Propagate constants and copies, fold constant expressions. */ 567 static void tcg_constant_folding(TCGContext *s) 568 { 569 int oi, oi_next, nb_temps, nb_globals; 570 571 /* Array VALS has an element for each temp. 572 If this temp holds a constant then its value is kept in VALS' element. 573 If this temp is a copy of other ones then the other copies are 574 available through the doubly linked circular list. */ 575 576 nb_temps = s->nb_temps; 577 nb_globals = s->nb_globals; 578 reset_all_temps(nb_temps); 579 580 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 581 tcg_target_ulong mask, partmask, affected; 582 int nb_oargs, nb_iargs, i; 583 TCGArg tmp; 584 585 TCGOp * const op = &s->gen_op_buf[oi]; 586 TCGArg * const args = &s->gen_opparam_buf[op->args]; 587 TCGOpcode opc = op->opc; 588 const TCGOpDef *def = &tcg_op_defs[opc]; 589 590 oi_next = op->next; 591 if (opc == INDEX_op_call) { 592 nb_oargs = op->callo; 593 nb_iargs = op->calli; 594 } else { 595 nb_oargs = def->nb_oargs; 596 nb_iargs = def->nb_iargs; 597 } 598 599 /* Do copy propagation */ 600 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 601 if (temps[args[i]].state == TCG_TEMP_COPY) { 602 args[i] = find_better_copy(s, args[i]); 603 } 604 } 605 606 /* For commutative operations make constant second argument */ 607 switch (opc) { 608 CASE_OP_32_64(add): 609 CASE_OP_32_64(mul): 610 CASE_OP_32_64(and): 611 CASE_OP_32_64(or): 612 CASE_OP_32_64(xor): 613 CASE_OP_32_64(eqv): 614 CASE_OP_32_64(nand): 615 CASE_OP_32_64(nor): 616 CASE_OP_32_64(muluh): 617 CASE_OP_32_64(mulsh): 618 swap_commutative(args[0], &args[1], &args[2]); 619 break; 620 CASE_OP_32_64(brcond): 621 if (swap_commutative(-1, &args[0], &args[1])) { 622 args[2] = tcg_swap_cond(args[2]); 623 } 624 break; 625 CASE_OP_32_64(setcond): 626 if (swap_commutative(args[0], &args[1], &args[2])) { 627 args[3] = tcg_swap_cond(args[3]); 628 } 629 break; 630 CASE_OP_32_64(movcond): 631 if (swap_commutative(-1, &args[1], &args[2])) { 632 args[5] = tcg_swap_cond(args[5]); 633 } 634 /* For movcond, we canonicalize the "false" input reg to match 635 the destination reg so that the tcg backend can implement 636 a "move if true" operation. */ 637 if (swap_commutative(args[0], &args[4], &args[3])) { 638 args[5] = tcg_invert_cond(args[5]); 639 } 640 break; 641 CASE_OP_32_64(add2): 642 swap_commutative(args[0], &args[2], &args[4]); 643 swap_commutative(args[1], &args[3], &args[5]); 644 break; 645 CASE_OP_32_64(mulu2): 646 CASE_OP_32_64(muls2): 647 swap_commutative(args[0], &args[2], &args[3]); 648 break; 649 case INDEX_op_brcond2_i32: 650 if (swap_commutative2(&args[0], &args[2])) { 651 args[4] = tcg_swap_cond(args[4]); 652 } 653 break; 654 case INDEX_op_setcond2_i32: 655 if (swap_commutative2(&args[1], &args[3])) { 656 args[5] = tcg_swap_cond(args[5]); 657 } 658 break; 659 default: 660 break; 661 } 662 663 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 664 and "sub r, 0, a => neg r, a" case. */ 665 switch (opc) { 666 CASE_OP_32_64(shl): 667 CASE_OP_32_64(shr): 668 CASE_OP_32_64(sar): 669 CASE_OP_32_64(rotl): 670 CASE_OP_32_64(rotr): 671 if (temps[args[1]].state == TCG_TEMP_CONST 672 && temps[args[1]].val == 0) { 673 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 674 continue; 675 } 676 break; 677 CASE_OP_32_64(sub): 678 { 679 TCGOpcode neg_op; 680 bool have_neg; 681 682 if (temps[args[2]].state == TCG_TEMP_CONST) { 683 /* Proceed with possible constant folding. */ 684 break; 685 } 686 if (opc == INDEX_op_sub_i32) { 687 neg_op = INDEX_op_neg_i32; 688 have_neg = TCG_TARGET_HAS_neg_i32; 689 } else { 690 neg_op = INDEX_op_neg_i64; 691 have_neg = TCG_TARGET_HAS_neg_i64; 692 } 693 if (!have_neg) { 694 break; 695 } 696 if (temps[args[1]].state == TCG_TEMP_CONST 697 && temps[args[1]].val == 0) { 698 op->opc = neg_op; 699 reset_temp(args[0]); 700 args[1] = args[2]; 701 continue; 702 } 703 } 704 break; 705 CASE_OP_32_64(xor): 706 CASE_OP_32_64(nand): 707 if (temps[args[1]].state != TCG_TEMP_CONST 708 && temps[args[2]].state == TCG_TEMP_CONST 709 && temps[args[2]].val == -1) { 710 i = 1; 711 goto try_not; 712 } 713 break; 714 CASE_OP_32_64(nor): 715 if (temps[args[1]].state != TCG_TEMP_CONST 716 && temps[args[2]].state == TCG_TEMP_CONST 717 && temps[args[2]].val == 0) { 718 i = 1; 719 goto try_not; 720 } 721 break; 722 CASE_OP_32_64(andc): 723 if (temps[args[2]].state != TCG_TEMP_CONST 724 && temps[args[1]].state == TCG_TEMP_CONST 725 && temps[args[1]].val == -1) { 726 i = 2; 727 goto try_not; 728 } 729 break; 730 CASE_OP_32_64(orc): 731 CASE_OP_32_64(eqv): 732 if (temps[args[2]].state != TCG_TEMP_CONST 733 && temps[args[1]].state == TCG_TEMP_CONST 734 && temps[args[1]].val == 0) { 735 i = 2; 736 goto try_not; 737 } 738 break; 739 try_not: 740 { 741 TCGOpcode not_op; 742 bool have_not; 743 744 if (def->flags & TCG_OPF_64BIT) { 745 not_op = INDEX_op_not_i64; 746 have_not = TCG_TARGET_HAS_not_i64; 747 } else { 748 not_op = INDEX_op_not_i32; 749 have_not = TCG_TARGET_HAS_not_i32; 750 } 751 if (!have_not) { 752 break; 753 } 754 op->opc = not_op; 755 reset_temp(args[0]); 756 args[1] = args[i]; 757 continue; 758 } 759 default: 760 break; 761 } 762 763 /* Simplify expression for "op r, a, const => mov r, a" cases */ 764 switch (opc) { 765 CASE_OP_32_64(add): 766 CASE_OP_32_64(sub): 767 CASE_OP_32_64(shl): 768 CASE_OP_32_64(shr): 769 CASE_OP_32_64(sar): 770 CASE_OP_32_64(rotl): 771 CASE_OP_32_64(rotr): 772 CASE_OP_32_64(or): 773 CASE_OP_32_64(xor): 774 CASE_OP_32_64(andc): 775 if (temps[args[1]].state != TCG_TEMP_CONST 776 && temps[args[2]].state == TCG_TEMP_CONST 777 && temps[args[2]].val == 0) { 778 goto do_mov3; 779 } 780 break; 781 CASE_OP_32_64(and): 782 CASE_OP_32_64(orc): 783 CASE_OP_32_64(eqv): 784 if (temps[args[1]].state != TCG_TEMP_CONST 785 && temps[args[2]].state == TCG_TEMP_CONST 786 && temps[args[2]].val == -1) { 787 goto do_mov3; 788 } 789 break; 790 do_mov3: 791 if (temps_are_copies(args[0], args[1])) { 792 tcg_op_remove(s, op); 793 } else { 794 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 795 } 796 continue; 797 default: 798 break; 799 } 800 801 /* Simplify using known-zero bits. Currently only ops with a single 802 output argument is supported. */ 803 mask = -1; 804 affected = -1; 805 switch (opc) { 806 CASE_OP_32_64(ext8s): 807 if ((temps[args[1]].mask & 0x80) != 0) { 808 break; 809 } 810 CASE_OP_32_64(ext8u): 811 mask = 0xff; 812 goto and_const; 813 CASE_OP_32_64(ext16s): 814 if ((temps[args[1]].mask & 0x8000) != 0) { 815 break; 816 } 817 CASE_OP_32_64(ext16u): 818 mask = 0xffff; 819 goto and_const; 820 case INDEX_op_ext32s_i64: 821 if ((temps[args[1]].mask & 0x80000000) != 0) { 822 break; 823 } 824 case INDEX_op_ext32u_i64: 825 mask = 0xffffffffU; 826 goto and_const; 827 828 CASE_OP_32_64(and): 829 mask = temps[args[2]].mask; 830 if (temps[args[2]].state == TCG_TEMP_CONST) { 831 and_const: 832 affected = temps[args[1]].mask & ~mask; 833 } 834 mask = temps[args[1]].mask & mask; 835 break; 836 837 CASE_OP_32_64(andc): 838 /* Known-zeros does not imply known-ones. Therefore unless 839 args[2] is constant, we can't infer anything from it. */ 840 if (temps[args[2]].state == TCG_TEMP_CONST) { 841 mask = ~temps[args[2]].mask; 842 goto and_const; 843 } 844 /* But we certainly know nothing outside args[1] may be set. */ 845 mask = temps[args[1]].mask; 846 break; 847 848 case INDEX_op_sar_i32: 849 if (temps[args[2]].state == TCG_TEMP_CONST) { 850 tmp = temps[args[2]].val & 31; 851 mask = (int32_t)temps[args[1]].mask >> tmp; 852 } 853 break; 854 case INDEX_op_sar_i64: 855 if (temps[args[2]].state == TCG_TEMP_CONST) { 856 tmp = temps[args[2]].val & 63; 857 mask = (int64_t)temps[args[1]].mask >> tmp; 858 } 859 break; 860 861 case INDEX_op_shr_i32: 862 if (temps[args[2]].state == TCG_TEMP_CONST) { 863 tmp = temps[args[2]].val & 31; 864 mask = (uint32_t)temps[args[1]].mask >> tmp; 865 } 866 break; 867 case INDEX_op_shr_i64: 868 if (temps[args[2]].state == TCG_TEMP_CONST) { 869 tmp = temps[args[2]].val & 63; 870 mask = (uint64_t)temps[args[1]].mask >> tmp; 871 } 872 break; 873 874 case INDEX_op_trunc_shr_i32: 875 mask = (uint64_t)temps[args[1]].mask >> args[2]; 876 break; 877 878 CASE_OP_32_64(shl): 879 if (temps[args[2]].state == TCG_TEMP_CONST) { 880 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 881 mask = temps[args[1]].mask << tmp; 882 } 883 break; 884 885 CASE_OP_32_64(neg): 886 /* Set to 1 all bits to the left of the rightmost. */ 887 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 888 break; 889 890 CASE_OP_32_64(deposit): 891 mask = deposit64(temps[args[1]].mask, args[3], args[4], 892 temps[args[2]].mask); 893 break; 894 895 CASE_OP_32_64(or): 896 CASE_OP_32_64(xor): 897 mask = temps[args[1]].mask | temps[args[2]].mask; 898 break; 899 900 CASE_OP_32_64(setcond): 901 case INDEX_op_setcond2_i32: 902 mask = 1; 903 break; 904 905 CASE_OP_32_64(movcond): 906 mask = temps[args[3]].mask | temps[args[4]].mask; 907 break; 908 909 CASE_OP_32_64(ld8u): 910 mask = 0xff; 911 break; 912 CASE_OP_32_64(ld16u): 913 mask = 0xffff; 914 break; 915 case INDEX_op_ld32u_i64: 916 mask = 0xffffffffu; 917 break; 918 919 CASE_OP_32_64(qemu_ld): 920 { 921 TCGMemOp mop = args[nb_oargs + nb_iargs]; 922 if (!(mop & MO_SIGN)) { 923 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 924 } 925 } 926 break; 927 928 default: 929 break; 930 } 931 932 /* 32-bit ops generate 32-bit results. For the result is zero test 933 below, we can ignore high bits, but for further optimizations we 934 need to record that the high bits contain garbage. */ 935 partmask = mask; 936 if (!(def->flags & TCG_OPF_64BIT)) { 937 mask |= ~(tcg_target_ulong)0xffffffffu; 938 partmask &= 0xffffffffu; 939 affected &= 0xffffffffu; 940 } 941 942 if (partmask == 0) { 943 assert(nb_oargs == 1); 944 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 945 continue; 946 } 947 if (affected == 0) { 948 assert(nb_oargs == 1); 949 if (temps_are_copies(args[0], args[1])) { 950 tcg_op_remove(s, op); 951 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 952 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 953 } else { 954 tcg_opt_gen_movi(s, op, args, opc, 955 args[0], temps[args[1]].val); 956 } 957 continue; 958 } 959 960 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 961 switch (opc) { 962 CASE_OP_32_64(and): 963 CASE_OP_32_64(mul): 964 CASE_OP_32_64(muluh): 965 CASE_OP_32_64(mulsh): 966 if ((temps[args[2]].state == TCG_TEMP_CONST 967 && temps[args[2]].val == 0)) { 968 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 969 continue; 970 } 971 break; 972 default: 973 break; 974 } 975 976 /* Simplify expression for "op r, a, a => mov r, a" cases */ 977 switch (opc) { 978 CASE_OP_32_64(or): 979 CASE_OP_32_64(and): 980 if (temps_are_copies(args[1], args[2])) { 981 if (temps_are_copies(args[0], args[1])) { 982 tcg_op_remove(s, op); 983 } else { 984 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 985 } 986 continue; 987 } 988 break; 989 default: 990 break; 991 } 992 993 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 994 switch (opc) { 995 CASE_OP_32_64(andc): 996 CASE_OP_32_64(sub): 997 CASE_OP_32_64(xor): 998 if (temps_are_copies(args[1], args[2])) { 999 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 1000 continue; 1001 } 1002 break; 1003 default: 1004 break; 1005 } 1006 1007 /* Propagate constants through copy operations and do constant 1008 folding. Constants will be substituted to arguments by register 1009 allocator where needed and possible. Also detect copies. */ 1010 switch (opc) { 1011 CASE_OP_32_64(mov): 1012 if (temps_are_copies(args[0], args[1])) { 1013 tcg_op_remove(s, op); 1014 break; 1015 } 1016 if (temps[args[1]].state != TCG_TEMP_CONST) { 1017 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 1018 break; 1019 } 1020 /* Source argument is constant. Rewrite the operation and 1021 let movi case handle it. */ 1022 args[1] = temps[args[1]].val; 1023 /* fallthrough */ 1024 CASE_OP_32_64(movi): 1025 tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]); 1026 break; 1027 1028 CASE_OP_32_64(not): 1029 CASE_OP_32_64(neg): 1030 CASE_OP_32_64(ext8s): 1031 CASE_OP_32_64(ext8u): 1032 CASE_OP_32_64(ext16s): 1033 CASE_OP_32_64(ext16u): 1034 case INDEX_op_ext32s_i64: 1035 case INDEX_op_ext32u_i64: 1036 if (temps[args[1]].state == TCG_TEMP_CONST) { 1037 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1038 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1039 break; 1040 } 1041 goto do_default; 1042 1043 case INDEX_op_trunc_shr_i32: 1044 if (temps[args[1]].state == TCG_TEMP_CONST) { 1045 tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); 1046 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1047 break; 1048 } 1049 goto do_default; 1050 1051 CASE_OP_32_64(add): 1052 CASE_OP_32_64(sub): 1053 CASE_OP_32_64(mul): 1054 CASE_OP_32_64(or): 1055 CASE_OP_32_64(and): 1056 CASE_OP_32_64(xor): 1057 CASE_OP_32_64(shl): 1058 CASE_OP_32_64(shr): 1059 CASE_OP_32_64(sar): 1060 CASE_OP_32_64(rotl): 1061 CASE_OP_32_64(rotr): 1062 CASE_OP_32_64(andc): 1063 CASE_OP_32_64(orc): 1064 CASE_OP_32_64(eqv): 1065 CASE_OP_32_64(nand): 1066 CASE_OP_32_64(nor): 1067 CASE_OP_32_64(muluh): 1068 CASE_OP_32_64(mulsh): 1069 CASE_OP_32_64(div): 1070 CASE_OP_32_64(divu): 1071 CASE_OP_32_64(rem): 1072 CASE_OP_32_64(remu): 1073 if (temps[args[1]].state == TCG_TEMP_CONST 1074 && temps[args[2]].state == TCG_TEMP_CONST) { 1075 tmp = do_constant_folding(opc, temps[args[1]].val, 1076 temps[args[2]].val); 1077 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1078 break; 1079 } 1080 goto do_default; 1081 1082 CASE_OP_32_64(deposit): 1083 if (temps[args[1]].state == TCG_TEMP_CONST 1084 && temps[args[2]].state == TCG_TEMP_CONST) { 1085 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1086 temps[args[2]].val); 1087 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1088 break; 1089 } 1090 goto do_default; 1091 1092 CASE_OP_32_64(setcond): 1093 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1094 if (tmp != 2) { 1095 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1096 break; 1097 } 1098 goto do_default; 1099 1100 CASE_OP_32_64(brcond): 1101 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1102 if (tmp != 2) { 1103 if (tmp) { 1104 reset_all_temps(nb_temps); 1105 op->opc = INDEX_op_br; 1106 args[0] = args[3]; 1107 } else { 1108 tcg_op_remove(s, op); 1109 } 1110 break; 1111 } 1112 goto do_default; 1113 1114 CASE_OP_32_64(movcond): 1115 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1116 if (tmp != 2) { 1117 if (temps_are_copies(args[0], args[4-tmp])) { 1118 tcg_op_remove(s, op); 1119 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 1120 tcg_opt_gen_movi(s, op, args, opc, 1121 args[0], temps[args[4-tmp]].val); 1122 } else { 1123 tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); 1124 } 1125 break; 1126 } 1127 goto do_default; 1128 1129 case INDEX_op_add2_i32: 1130 case INDEX_op_sub2_i32: 1131 if (temps[args[2]].state == TCG_TEMP_CONST 1132 && temps[args[3]].state == TCG_TEMP_CONST 1133 && temps[args[4]].state == TCG_TEMP_CONST 1134 && temps[args[5]].state == TCG_TEMP_CONST) { 1135 uint32_t al = temps[args[2]].val; 1136 uint32_t ah = temps[args[3]].val; 1137 uint32_t bl = temps[args[4]].val; 1138 uint32_t bh = temps[args[5]].val; 1139 uint64_t a = ((uint64_t)ah << 32) | al; 1140 uint64_t b = ((uint64_t)bh << 32) | bl; 1141 TCGArg rl, rh; 1142 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1143 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1144 1145 if (opc == INDEX_op_add2_i32) { 1146 a += b; 1147 } else { 1148 a -= b; 1149 } 1150 1151 rl = args[0]; 1152 rh = args[1]; 1153 tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); 1154 tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32)); 1155 1156 /* We've done all we need to do with the movi. Skip it. */ 1157 oi_next = op2->next; 1158 break; 1159 } 1160 goto do_default; 1161 1162 case INDEX_op_mulu2_i32: 1163 if (temps[args[2]].state == TCG_TEMP_CONST 1164 && temps[args[3]].state == TCG_TEMP_CONST) { 1165 uint32_t a = temps[args[2]].val; 1166 uint32_t b = temps[args[3]].val; 1167 uint64_t r = (uint64_t)a * b; 1168 TCGArg rl, rh; 1169 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1170 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1171 1172 rl = args[0]; 1173 rh = args[1]; 1174 tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r); 1175 tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32)); 1176 1177 /* We've done all we need to do with the movi. Skip it. */ 1178 oi_next = op2->next; 1179 break; 1180 } 1181 goto do_default; 1182 1183 case INDEX_op_brcond2_i32: 1184 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1185 if (tmp != 2) { 1186 if (tmp) { 1187 do_brcond_true: 1188 reset_all_temps(nb_temps); 1189 op->opc = INDEX_op_br; 1190 args[0] = args[5]; 1191 } else { 1192 do_brcond_false: 1193 tcg_op_remove(s, op); 1194 } 1195 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1196 && temps[args[2]].state == TCG_TEMP_CONST 1197 && temps[args[3]].state == TCG_TEMP_CONST 1198 && temps[args[2]].val == 0 1199 && temps[args[3]].val == 0) { 1200 /* Simplify LT/GE comparisons vs zero to a single compare 1201 vs the high word of the input. */ 1202 do_brcond_high: 1203 reset_all_temps(nb_temps); 1204 op->opc = INDEX_op_brcond_i32; 1205 args[0] = args[1]; 1206 args[1] = args[3]; 1207 args[2] = args[4]; 1208 args[3] = args[5]; 1209 } else if (args[4] == TCG_COND_EQ) { 1210 /* Simplify EQ comparisons where one of the pairs 1211 can be simplified. */ 1212 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1213 args[0], args[2], TCG_COND_EQ); 1214 if (tmp == 0) { 1215 goto do_brcond_false; 1216 } else if (tmp == 1) { 1217 goto do_brcond_high; 1218 } 1219 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1220 args[1], args[3], TCG_COND_EQ); 1221 if (tmp == 0) { 1222 goto do_brcond_false; 1223 } else if (tmp != 1) { 1224 goto do_default; 1225 } 1226 do_brcond_low: 1227 reset_all_temps(nb_temps); 1228 op->opc = INDEX_op_brcond_i32; 1229 args[1] = args[2]; 1230 args[2] = args[4]; 1231 args[3] = args[5]; 1232 } else if (args[4] == TCG_COND_NE) { 1233 /* Simplify NE comparisons where one of the pairs 1234 can be simplified. */ 1235 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1236 args[0], args[2], TCG_COND_NE); 1237 if (tmp == 0) { 1238 goto do_brcond_high; 1239 } else if (tmp == 1) { 1240 goto do_brcond_true; 1241 } 1242 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1243 args[1], args[3], TCG_COND_NE); 1244 if (tmp == 0) { 1245 goto do_brcond_low; 1246 } else if (tmp == 1) { 1247 goto do_brcond_true; 1248 } 1249 goto do_default; 1250 } else { 1251 goto do_default; 1252 } 1253 break; 1254 1255 case INDEX_op_setcond2_i32: 1256 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1257 if (tmp != 2) { 1258 do_setcond_const: 1259 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1260 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1261 && temps[args[3]].state == TCG_TEMP_CONST 1262 && temps[args[4]].state == TCG_TEMP_CONST 1263 && temps[args[3]].val == 0 1264 && temps[args[4]].val == 0) { 1265 /* Simplify LT/GE comparisons vs zero to a single compare 1266 vs the high word of the input. */ 1267 do_setcond_high: 1268 reset_temp(args[0]); 1269 temps[args[0]].mask = 1; 1270 op->opc = INDEX_op_setcond_i32; 1271 args[1] = args[2]; 1272 args[2] = args[4]; 1273 args[3] = args[5]; 1274 } else if (args[5] == TCG_COND_EQ) { 1275 /* Simplify EQ comparisons where one of the pairs 1276 can be simplified. */ 1277 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1278 args[1], args[3], TCG_COND_EQ); 1279 if (tmp == 0) { 1280 goto do_setcond_const; 1281 } else if (tmp == 1) { 1282 goto do_setcond_high; 1283 } 1284 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1285 args[2], args[4], TCG_COND_EQ); 1286 if (tmp == 0) { 1287 goto do_setcond_high; 1288 } else if (tmp != 1) { 1289 goto do_default; 1290 } 1291 do_setcond_low: 1292 reset_temp(args[0]); 1293 temps[args[0]].mask = 1; 1294 op->opc = INDEX_op_setcond_i32; 1295 args[2] = args[3]; 1296 args[3] = args[5]; 1297 } else if (args[5] == TCG_COND_NE) { 1298 /* Simplify NE comparisons where one of the pairs 1299 can be simplified. */ 1300 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1301 args[1], args[3], TCG_COND_NE); 1302 if (tmp == 0) { 1303 goto do_setcond_high; 1304 } else if (tmp == 1) { 1305 goto do_setcond_const; 1306 } 1307 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1308 args[2], args[4], TCG_COND_NE); 1309 if (tmp == 0) { 1310 goto do_setcond_low; 1311 } else if (tmp == 1) { 1312 goto do_setcond_const; 1313 } 1314 goto do_default; 1315 } else { 1316 goto do_default; 1317 } 1318 break; 1319 1320 case INDEX_op_call: 1321 if (!(args[nb_oargs + nb_iargs + 1] 1322 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1323 for (i = 0; i < nb_globals; i++) { 1324 reset_temp(i); 1325 } 1326 } 1327 goto do_reset_output; 1328 1329 default: 1330 do_default: 1331 /* Default case: we know nothing about operation (or were unable 1332 to compute the operation result) so no propagation is done. 1333 We trash everything if the operation is the end of a basic 1334 block, otherwise we only trash the output args. "mask" is 1335 the non-zero bits mask for the first output arg. */ 1336 if (def->flags & TCG_OPF_BB_END) { 1337 reset_all_temps(nb_temps); 1338 } else { 1339 do_reset_output: 1340 for (i = 0; i < nb_oargs; i++) { 1341 reset_temp(args[i]); 1342 /* Save the corresponding known-zero bits mask for the 1343 first output argument (only one supported so far). */ 1344 if (i == 0) { 1345 temps[args[i]].mask = mask; 1346 } 1347 } 1348 } 1349 break; 1350 } 1351 } 1352 } 1353 1354 void tcg_optimize(TCGContext *s) 1355 { 1356 tcg_constant_folding(s); 1357 } 1358