1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 71 TCGOpcode opc, int nargs) 72 { 73 int oi = s->gen_next_op_idx; 74 int pi = s->gen_next_parm_idx; 75 int prev = old_op->prev; 76 int next = old_op - s->gen_op_buf; 77 TCGOp *new_op; 78 79 tcg_debug_assert(oi < OPC_BUF_SIZE); 80 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 81 s->gen_next_op_idx = oi + 1; 82 s->gen_next_parm_idx = pi + nargs; 83 84 new_op = &s->gen_op_buf[oi]; 85 *new_op = (TCGOp){ 86 .opc = opc, 87 .args = pi, 88 .prev = prev, 89 .next = next 90 }; 91 if (prev >= 0) { 92 s->gen_op_buf[prev].next = oi; 93 } else { 94 s->gen_first_op_idx = oi; 95 } 96 old_op->prev = oi; 97 98 return new_op; 99 } 100 101 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 102 static void reset_all_temps(int nb_temps) 103 { 104 int i; 105 for (i = 0; i < nb_temps; i++) { 106 temps[i].state = TCG_TEMP_UNDEF; 107 temps[i].mask = -1; 108 } 109 } 110 111 static int op_bits(TCGOpcode op) 112 { 113 const TCGOpDef *def = &tcg_op_defs[op]; 114 return def->flags & TCG_OPF_64BIT ? 64 : 32; 115 } 116 117 static TCGOpcode op_to_mov(TCGOpcode op) 118 { 119 switch (op_bits(op)) { 120 case 32: 121 return INDEX_op_mov_i32; 122 case 64: 123 return INDEX_op_mov_i64; 124 default: 125 fprintf(stderr, "op_to_mov: unexpected return value of " 126 "function op_bits.\n"); 127 tcg_abort(); 128 } 129 } 130 131 static TCGOpcode op_to_movi(TCGOpcode op) 132 { 133 switch (op_bits(op)) { 134 case 32: 135 return INDEX_op_movi_i32; 136 case 64: 137 return INDEX_op_movi_i64; 138 default: 139 fprintf(stderr, "op_to_movi: unexpected return value of " 140 "function op_bits.\n"); 141 tcg_abort(); 142 } 143 } 144 145 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 146 { 147 TCGArg i; 148 149 /* If this is already a global, we can't do better. */ 150 if (temp < s->nb_globals) { 151 return temp; 152 } 153 154 /* Search for a global first. */ 155 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 156 if (i < s->nb_globals) { 157 return i; 158 } 159 } 160 161 /* If it is a temp, search for a temp local. */ 162 if (!s->temps[temp].temp_local) { 163 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 164 if (s->temps[i].temp_local) { 165 return i; 166 } 167 } 168 } 169 170 /* Failure to find a better representation, return the same temp. */ 171 return temp; 172 } 173 174 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 175 { 176 TCGArg i; 177 178 if (arg1 == arg2) { 179 return true; 180 } 181 182 if (temps[arg1].state != TCG_TEMP_COPY 183 || temps[arg2].state != TCG_TEMP_COPY) { 184 return false; 185 } 186 187 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 188 if (i == arg2) { 189 return true; 190 } 191 } 192 193 return false; 194 } 195 196 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 197 TCGOpcode old_op, TCGArg dst, TCGArg src) 198 { 199 TCGOpcode new_op = op_to_mov(old_op); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 mask = temps[src].mask; 206 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 207 /* High bits of the destination are now garbage. */ 208 mask |= ~0xffffffffull; 209 } 210 temps[dst].mask = mask; 211 212 assert(temps[src].state != TCG_TEMP_CONST); 213 214 if (s->temps[src].type == s->temps[dst].type) { 215 if (temps[src].state != TCG_TEMP_COPY) { 216 temps[src].state = TCG_TEMP_COPY; 217 temps[src].next_copy = src; 218 temps[src].prev_copy = src; 219 } 220 temps[dst].state = TCG_TEMP_COPY; 221 temps[dst].next_copy = temps[src].next_copy; 222 temps[dst].prev_copy = src; 223 temps[temps[dst].next_copy].prev_copy = dst; 224 temps[src].next_copy = dst; 225 } 226 227 args[0] = dst; 228 args[1] = src; 229 } 230 231 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 232 TCGOpcode old_op, TCGArg dst, TCGArg val) 233 { 234 TCGOpcode new_op = op_to_movi(old_op); 235 tcg_target_ulong mask; 236 237 op->opc = new_op; 238 239 reset_temp(dst); 240 temps[dst].state = TCG_TEMP_CONST; 241 temps[dst].val = val; 242 mask = val; 243 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 244 /* High bits of the destination are now garbage. */ 245 mask |= ~0xffffffffull; 246 } 247 temps[dst].mask = mask; 248 249 args[0] = dst; 250 args[1] = val; 251 } 252 253 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 254 { 255 uint64_t l64, h64; 256 257 switch (op) { 258 CASE_OP_32_64(add): 259 return x + y; 260 261 CASE_OP_32_64(sub): 262 return x - y; 263 264 CASE_OP_32_64(mul): 265 return x * y; 266 267 CASE_OP_32_64(and): 268 return x & y; 269 270 CASE_OP_32_64(or): 271 return x | y; 272 273 CASE_OP_32_64(xor): 274 return x ^ y; 275 276 case INDEX_op_shl_i32: 277 return (uint32_t)x << (y & 31); 278 279 case INDEX_op_shl_i64: 280 return (uint64_t)x << (y & 63); 281 282 case INDEX_op_shr_i32: 283 return (uint32_t)x >> (y & 31); 284 285 case INDEX_op_trunc_shr_i32: 286 case INDEX_op_shr_i64: 287 return (uint64_t)x >> (y & 63); 288 289 case INDEX_op_sar_i32: 290 return (int32_t)x >> (y & 31); 291 292 case INDEX_op_sar_i64: 293 return (int64_t)x >> (y & 63); 294 295 case INDEX_op_rotr_i32: 296 return ror32(x, y & 31); 297 298 case INDEX_op_rotr_i64: 299 return ror64(x, y & 63); 300 301 case INDEX_op_rotl_i32: 302 return rol32(x, y & 31); 303 304 case INDEX_op_rotl_i64: 305 return rol64(x, y & 63); 306 307 CASE_OP_32_64(not): 308 return ~x; 309 310 CASE_OP_32_64(neg): 311 return -x; 312 313 CASE_OP_32_64(andc): 314 return x & ~y; 315 316 CASE_OP_32_64(orc): 317 return x | ~y; 318 319 CASE_OP_32_64(eqv): 320 return ~(x ^ y); 321 322 CASE_OP_32_64(nand): 323 return ~(x & y); 324 325 CASE_OP_32_64(nor): 326 return ~(x | y); 327 328 CASE_OP_32_64(ext8s): 329 return (int8_t)x; 330 331 CASE_OP_32_64(ext16s): 332 return (int16_t)x; 333 334 CASE_OP_32_64(ext8u): 335 return (uint8_t)x; 336 337 CASE_OP_32_64(ext16u): 338 return (uint16_t)x; 339 340 case INDEX_op_ext32s_i64: 341 return (int32_t)x; 342 343 case INDEX_op_ext32u_i64: 344 return (uint32_t)x; 345 346 case INDEX_op_muluh_i32: 347 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 348 case INDEX_op_mulsh_i32: 349 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 350 351 case INDEX_op_muluh_i64: 352 mulu64(&l64, &h64, x, y); 353 return h64; 354 case INDEX_op_mulsh_i64: 355 muls64(&l64, &h64, x, y); 356 return h64; 357 358 case INDEX_op_div_i32: 359 /* Avoid crashing on divide by zero, otherwise undefined. */ 360 return (int32_t)x / ((int32_t)y ? : 1); 361 case INDEX_op_divu_i32: 362 return (uint32_t)x / ((uint32_t)y ? : 1); 363 case INDEX_op_div_i64: 364 return (int64_t)x / ((int64_t)y ? : 1); 365 case INDEX_op_divu_i64: 366 return (uint64_t)x / ((uint64_t)y ? : 1); 367 368 case INDEX_op_rem_i32: 369 return (int32_t)x % ((int32_t)y ? : 1); 370 case INDEX_op_remu_i32: 371 return (uint32_t)x % ((uint32_t)y ? : 1); 372 case INDEX_op_rem_i64: 373 return (int64_t)x % ((int64_t)y ? : 1); 374 case INDEX_op_remu_i64: 375 return (uint64_t)x % ((uint64_t)y ? : 1); 376 377 default: 378 fprintf(stderr, 379 "Unrecognized operation %d in do_constant_folding.\n", op); 380 tcg_abort(); 381 } 382 } 383 384 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 385 { 386 TCGArg res = do_constant_folding_2(op, x, y); 387 if (op_bits(op) == 32) { 388 res &= 0xffffffff; 389 } 390 return res; 391 } 392 393 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 394 { 395 switch (c) { 396 case TCG_COND_EQ: 397 return x == y; 398 case TCG_COND_NE: 399 return x != y; 400 case TCG_COND_LT: 401 return (int32_t)x < (int32_t)y; 402 case TCG_COND_GE: 403 return (int32_t)x >= (int32_t)y; 404 case TCG_COND_LE: 405 return (int32_t)x <= (int32_t)y; 406 case TCG_COND_GT: 407 return (int32_t)x > (int32_t)y; 408 case TCG_COND_LTU: 409 return x < y; 410 case TCG_COND_GEU: 411 return x >= y; 412 case TCG_COND_LEU: 413 return x <= y; 414 case TCG_COND_GTU: 415 return x > y; 416 default: 417 tcg_abort(); 418 } 419 } 420 421 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 422 { 423 switch (c) { 424 case TCG_COND_EQ: 425 return x == y; 426 case TCG_COND_NE: 427 return x != y; 428 case TCG_COND_LT: 429 return (int64_t)x < (int64_t)y; 430 case TCG_COND_GE: 431 return (int64_t)x >= (int64_t)y; 432 case TCG_COND_LE: 433 return (int64_t)x <= (int64_t)y; 434 case TCG_COND_GT: 435 return (int64_t)x > (int64_t)y; 436 case TCG_COND_LTU: 437 return x < y; 438 case TCG_COND_GEU: 439 return x >= y; 440 case TCG_COND_LEU: 441 return x <= y; 442 case TCG_COND_GTU: 443 return x > y; 444 default: 445 tcg_abort(); 446 } 447 } 448 449 static bool do_constant_folding_cond_eq(TCGCond c) 450 { 451 switch (c) { 452 case TCG_COND_GT: 453 case TCG_COND_LTU: 454 case TCG_COND_LT: 455 case TCG_COND_GTU: 456 case TCG_COND_NE: 457 return 0; 458 case TCG_COND_GE: 459 case TCG_COND_GEU: 460 case TCG_COND_LE: 461 case TCG_COND_LEU: 462 case TCG_COND_EQ: 463 return 1; 464 default: 465 tcg_abort(); 466 } 467 } 468 469 /* Return 2 if the condition can't be simplified, and the result 470 of the condition (0 or 1) if it can */ 471 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 472 TCGArg y, TCGCond c) 473 { 474 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 475 switch (op_bits(op)) { 476 case 32: 477 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 478 case 64: 479 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 480 default: 481 tcg_abort(); 482 } 483 } else if (temps_are_copies(x, y)) { 484 return do_constant_folding_cond_eq(c); 485 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 486 switch (c) { 487 case TCG_COND_LTU: 488 return 0; 489 case TCG_COND_GEU: 490 return 1; 491 default: 492 return 2; 493 } 494 } else { 495 return 2; 496 } 497 } 498 499 /* Return 2 if the condition can't be simplified, and the result 500 of the condition (0 or 1) if it can */ 501 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 502 { 503 TCGArg al = p1[0], ah = p1[1]; 504 TCGArg bl = p2[0], bh = p2[1]; 505 506 if (temps[bl].state == TCG_TEMP_CONST 507 && temps[bh].state == TCG_TEMP_CONST) { 508 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 509 510 if (temps[al].state == TCG_TEMP_CONST 511 && temps[ah].state == TCG_TEMP_CONST) { 512 uint64_t a; 513 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 514 return do_constant_folding_cond_64(a, b, c); 515 } 516 if (b == 0) { 517 switch (c) { 518 case TCG_COND_LTU: 519 return 0; 520 case TCG_COND_GEU: 521 return 1; 522 default: 523 break; 524 } 525 } 526 } 527 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 528 return do_constant_folding_cond_eq(c); 529 } 530 return 2; 531 } 532 533 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 534 { 535 TCGArg a1 = *p1, a2 = *p2; 536 int sum = 0; 537 sum += temps[a1].state == TCG_TEMP_CONST; 538 sum -= temps[a2].state == TCG_TEMP_CONST; 539 540 /* Prefer the constant in second argument, and then the form 541 op a, a, b, which is better handled on non-RISC hosts. */ 542 if (sum > 0 || (sum == 0 && dest == a2)) { 543 *p1 = a2; 544 *p2 = a1; 545 return true; 546 } 547 return false; 548 } 549 550 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 551 { 552 int sum = 0; 553 sum += temps[p1[0]].state == TCG_TEMP_CONST; 554 sum += temps[p1[1]].state == TCG_TEMP_CONST; 555 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 556 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 557 if (sum > 0) { 558 TCGArg t; 559 t = p1[0], p1[0] = p2[0], p2[0] = t; 560 t = p1[1], p1[1] = p2[1], p2[1] = t; 561 return true; 562 } 563 return false; 564 } 565 566 /* Propagate constants and copies, fold constant expressions. */ 567 static void tcg_constant_folding(TCGContext *s) 568 { 569 int oi, oi_next, nb_temps, nb_globals; 570 571 /* Array VALS has an element for each temp. 572 If this temp holds a constant then its value is kept in VALS' element. 573 If this temp is a copy of other ones then the other copies are 574 available through the doubly linked circular list. */ 575 576 nb_temps = s->nb_temps; 577 nb_globals = s->nb_globals; 578 reset_all_temps(nb_temps); 579 580 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 581 tcg_target_ulong mask, partmask, affected; 582 int nb_oargs, nb_iargs, i; 583 TCGArg tmp; 584 585 TCGOp * const op = &s->gen_op_buf[oi]; 586 TCGArg * const args = &s->gen_opparam_buf[op->args]; 587 TCGOpcode opc = op->opc; 588 const TCGOpDef *def = &tcg_op_defs[opc]; 589 590 oi_next = op->next; 591 if (opc == INDEX_op_call) { 592 nb_oargs = op->callo; 593 nb_iargs = op->calli; 594 } else { 595 nb_oargs = def->nb_oargs; 596 nb_iargs = def->nb_iargs; 597 } 598 599 /* Do copy propagation */ 600 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 601 if (temps[args[i]].state == TCG_TEMP_COPY) { 602 args[i] = find_better_copy(s, args[i]); 603 } 604 } 605 606 /* For commutative operations make constant second argument */ 607 switch (opc) { 608 CASE_OP_32_64(add): 609 CASE_OP_32_64(mul): 610 CASE_OP_32_64(and): 611 CASE_OP_32_64(or): 612 CASE_OP_32_64(xor): 613 CASE_OP_32_64(eqv): 614 CASE_OP_32_64(nand): 615 CASE_OP_32_64(nor): 616 CASE_OP_32_64(muluh): 617 CASE_OP_32_64(mulsh): 618 swap_commutative(args[0], &args[1], &args[2]); 619 break; 620 CASE_OP_32_64(brcond): 621 if (swap_commutative(-1, &args[0], &args[1])) { 622 args[2] = tcg_swap_cond(args[2]); 623 } 624 break; 625 CASE_OP_32_64(setcond): 626 if (swap_commutative(args[0], &args[1], &args[2])) { 627 args[3] = tcg_swap_cond(args[3]); 628 } 629 break; 630 CASE_OP_32_64(movcond): 631 if (swap_commutative(-1, &args[1], &args[2])) { 632 args[5] = tcg_swap_cond(args[5]); 633 } 634 /* For movcond, we canonicalize the "false" input reg to match 635 the destination reg so that the tcg backend can implement 636 a "move if true" operation. */ 637 if (swap_commutative(args[0], &args[4], &args[3])) { 638 args[5] = tcg_invert_cond(args[5]); 639 } 640 break; 641 CASE_OP_32_64(add2): 642 swap_commutative(args[0], &args[2], &args[4]); 643 swap_commutative(args[1], &args[3], &args[5]); 644 break; 645 CASE_OP_32_64(mulu2): 646 CASE_OP_32_64(muls2): 647 swap_commutative(args[0], &args[2], &args[3]); 648 break; 649 case INDEX_op_brcond2_i32: 650 if (swap_commutative2(&args[0], &args[2])) { 651 args[4] = tcg_swap_cond(args[4]); 652 } 653 break; 654 case INDEX_op_setcond2_i32: 655 if (swap_commutative2(&args[1], &args[3])) { 656 args[5] = tcg_swap_cond(args[5]); 657 } 658 break; 659 default: 660 break; 661 } 662 663 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 664 and "sub r, 0, a => neg r, a" case. */ 665 switch (opc) { 666 CASE_OP_32_64(shl): 667 CASE_OP_32_64(shr): 668 CASE_OP_32_64(sar): 669 CASE_OP_32_64(rotl): 670 CASE_OP_32_64(rotr): 671 if (temps[args[1]].state == TCG_TEMP_CONST 672 && temps[args[1]].val == 0) { 673 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 674 continue; 675 } 676 break; 677 CASE_OP_32_64(sub): 678 { 679 TCGOpcode neg_op; 680 bool have_neg; 681 682 if (temps[args[2]].state == TCG_TEMP_CONST) { 683 /* Proceed with possible constant folding. */ 684 break; 685 } 686 if (opc == INDEX_op_sub_i32) { 687 neg_op = INDEX_op_neg_i32; 688 have_neg = TCG_TARGET_HAS_neg_i32; 689 } else { 690 neg_op = INDEX_op_neg_i64; 691 have_neg = TCG_TARGET_HAS_neg_i64; 692 } 693 if (!have_neg) { 694 break; 695 } 696 if (temps[args[1]].state == TCG_TEMP_CONST 697 && temps[args[1]].val == 0) { 698 op->opc = neg_op; 699 reset_temp(args[0]); 700 args[1] = args[2]; 701 continue; 702 } 703 } 704 break; 705 CASE_OP_32_64(xor): 706 CASE_OP_32_64(nand): 707 if (temps[args[1]].state != TCG_TEMP_CONST 708 && temps[args[2]].state == TCG_TEMP_CONST 709 && temps[args[2]].val == -1) { 710 i = 1; 711 goto try_not; 712 } 713 break; 714 CASE_OP_32_64(nor): 715 if (temps[args[1]].state != TCG_TEMP_CONST 716 && temps[args[2]].state == TCG_TEMP_CONST 717 && temps[args[2]].val == 0) { 718 i = 1; 719 goto try_not; 720 } 721 break; 722 CASE_OP_32_64(andc): 723 if (temps[args[2]].state != TCG_TEMP_CONST 724 && temps[args[1]].state == TCG_TEMP_CONST 725 && temps[args[1]].val == -1) { 726 i = 2; 727 goto try_not; 728 } 729 break; 730 CASE_OP_32_64(orc): 731 CASE_OP_32_64(eqv): 732 if (temps[args[2]].state != TCG_TEMP_CONST 733 && temps[args[1]].state == TCG_TEMP_CONST 734 && temps[args[1]].val == 0) { 735 i = 2; 736 goto try_not; 737 } 738 break; 739 try_not: 740 { 741 TCGOpcode not_op; 742 bool have_not; 743 744 if (def->flags & TCG_OPF_64BIT) { 745 not_op = INDEX_op_not_i64; 746 have_not = TCG_TARGET_HAS_not_i64; 747 } else { 748 not_op = INDEX_op_not_i32; 749 have_not = TCG_TARGET_HAS_not_i32; 750 } 751 if (!have_not) { 752 break; 753 } 754 op->opc = not_op; 755 reset_temp(args[0]); 756 args[1] = args[i]; 757 continue; 758 } 759 default: 760 break; 761 } 762 763 /* Simplify expression for "op r, a, const => mov r, a" cases */ 764 switch (opc) { 765 CASE_OP_32_64(add): 766 CASE_OP_32_64(sub): 767 CASE_OP_32_64(shl): 768 CASE_OP_32_64(shr): 769 CASE_OP_32_64(sar): 770 CASE_OP_32_64(rotl): 771 CASE_OP_32_64(rotr): 772 CASE_OP_32_64(or): 773 CASE_OP_32_64(xor): 774 CASE_OP_32_64(andc): 775 if (temps[args[1]].state != TCG_TEMP_CONST 776 && temps[args[2]].state == TCG_TEMP_CONST 777 && temps[args[2]].val == 0) { 778 goto do_mov3; 779 } 780 break; 781 CASE_OP_32_64(and): 782 CASE_OP_32_64(orc): 783 CASE_OP_32_64(eqv): 784 if (temps[args[1]].state != TCG_TEMP_CONST 785 && temps[args[2]].state == TCG_TEMP_CONST 786 && temps[args[2]].val == -1) { 787 goto do_mov3; 788 } 789 break; 790 do_mov3: 791 if (temps_are_copies(args[0], args[1])) { 792 tcg_op_remove(s, op); 793 } else { 794 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 795 } 796 continue; 797 default: 798 break; 799 } 800 801 /* Simplify using known-zero bits. Currently only ops with a single 802 output argument is supported. */ 803 mask = -1; 804 affected = -1; 805 switch (opc) { 806 CASE_OP_32_64(ext8s): 807 if ((temps[args[1]].mask & 0x80) != 0) { 808 break; 809 } 810 CASE_OP_32_64(ext8u): 811 mask = 0xff; 812 goto and_const; 813 CASE_OP_32_64(ext16s): 814 if ((temps[args[1]].mask & 0x8000) != 0) { 815 break; 816 } 817 CASE_OP_32_64(ext16u): 818 mask = 0xffff; 819 goto and_const; 820 case INDEX_op_ext32s_i64: 821 if ((temps[args[1]].mask & 0x80000000) != 0) { 822 break; 823 } 824 case INDEX_op_ext32u_i64: 825 mask = 0xffffffffU; 826 goto and_const; 827 828 CASE_OP_32_64(and): 829 mask = temps[args[2]].mask; 830 if (temps[args[2]].state == TCG_TEMP_CONST) { 831 and_const: 832 affected = temps[args[1]].mask & ~mask; 833 } 834 mask = temps[args[1]].mask & mask; 835 break; 836 837 CASE_OP_32_64(andc): 838 /* Known-zeros does not imply known-ones. Therefore unless 839 args[2] is constant, we can't infer anything from it. */ 840 if (temps[args[2]].state == TCG_TEMP_CONST) { 841 mask = ~temps[args[2]].mask; 842 goto and_const; 843 } 844 /* But we certainly know nothing outside args[1] may be set. */ 845 mask = temps[args[1]].mask; 846 break; 847 848 case INDEX_op_sar_i32: 849 if (temps[args[2]].state == TCG_TEMP_CONST) { 850 tmp = temps[args[2]].val & 31; 851 mask = (int32_t)temps[args[1]].mask >> tmp; 852 } 853 break; 854 case INDEX_op_sar_i64: 855 if (temps[args[2]].state == TCG_TEMP_CONST) { 856 tmp = temps[args[2]].val & 63; 857 mask = (int64_t)temps[args[1]].mask >> tmp; 858 } 859 break; 860 861 case INDEX_op_shr_i32: 862 if (temps[args[2]].state == TCG_TEMP_CONST) { 863 tmp = temps[args[2]].val & 31; 864 mask = (uint32_t)temps[args[1]].mask >> tmp; 865 } 866 break; 867 case INDEX_op_shr_i64: 868 if (temps[args[2]].state == TCG_TEMP_CONST) { 869 tmp = temps[args[2]].val & 63; 870 mask = (uint64_t)temps[args[1]].mask >> tmp; 871 } 872 break; 873 874 case INDEX_op_trunc_shr_i32: 875 mask = (uint64_t)temps[args[1]].mask >> args[2]; 876 break; 877 878 CASE_OP_32_64(shl): 879 if (temps[args[2]].state == TCG_TEMP_CONST) { 880 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 881 mask = temps[args[1]].mask << tmp; 882 } 883 break; 884 885 CASE_OP_32_64(neg): 886 /* Set to 1 all bits to the left of the rightmost. */ 887 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 888 break; 889 890 CASE_OP_32_64(deposit): 891 mask = deposit64(temps[args[1]].mask, args[3], args[4], 892 temps[args[2]].mask); 893 break; 894 895 CASE_OP_32_64(or): 896 CASE_OP_32_64(xor): 897 mask = temps[args[1]].mask | temps[args[2]].mask; 898 break; 899 900 CASE_OP_32_64(setcond): 901 case INDEX_op_setcond2_i32: 902 mask = 1; 903 break; 904 905 CASE_OP_32_64(movcond): 906 mask = temps[args[3]].mask | temps[args[4]].mask; 907 break; 908 909 CASE_OP_32_64(ld8u): 910 mask = 0xff; 911 break; 912 CASE_OP_32_64(ld16u): 913 mask = 0xffff; 914 break; 915 case INDEX_op_ld32u_i64: 916 mask = 0xffffffffu; 917 break; 918 919 CASE_OP_32_64(qemu_ld): 920 { 921 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 922 TCGMemOp mop = get_memop(oi); 923 if (!(mop & MO_SIGN)) { 924 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 925 } 926 } 927 break; 928 929 default: 930 break; 931 } 932 933 /* 32-bit ops generate 32-bit results. For the result is zero test 934 below, we can ignore high bits, but for further optimizations we 935 need to record that the high bits contain garbage. */ 936 partmask = mask; 937 if (!(def->flags & TCG_OPF_64BIT)) { 938 mask |= ~(tcg_target_ulong)0xffffffffu; 939 partmask &= 0xffffffffu; 940 affected &= 0xffffffffu; 941 } 942 943 if (partmask == 0) { 944 assert(nb_oargs == 1); 945 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 946 continue; 947 } 948 if (affected == 0) { 949 assert(nb_oargs == 1); 950 if (temps_are_copies(args[0], args[1])) { 951 tcg_op_remove(s, op); 952 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 953 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 954 } else { 955 tcg_opt_gen_movi(s, op, args, opc, 956 args[0], temps[args[1]].val); 957 } 958 continue; 959 } 960 961 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 962 switch (opc) { 963 CASE_OP_32_64(and): 964 CASE_OP_32_64(mul): 965 CASE_OP_32_64(muluh): 966 CASE_OP_32_64(mulsh): 967 if ((temps[args[2]].state == TCG_TEMP_CONST 968 && temps[args[2]].val == 0)) { 969 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 970 continue; 971 } 972 break; 973 default: 974 break; 975 } 976 977 /* Simplify expression for "op r, a, a => mov r, a" cases */ 978 switch (opc) { 979 CASE_OP_32_64(or): 980 CASE_OP_32_64(and): 981 if (temps_are_copies(args[1], args[2])) { 982 if (temps_are_copies(args[0], args[1])) { 983 tcg_op_remove(s, op); 984 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 985 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 986 } else { 987 tcg_opt_gen_movi(s, op, args, opc, 988 args[0], temps[args[1]].val); 989 } 990 continue; 991 } 992 break; 993 default: 994 break; 995 } 996 997 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 998 switch (opc) { 999 CASE_OP_32_64(andc): 1000 CASE_OP_32_64(sub): 1001 CASE_OP_32_64(xor): 1002 if (temps_are_copies(args[1], args[2])) { 1003 tcg_opt_gen_movi(s, op, args, opc, args[0], 0); 1004 continue; 1005 } 1006 break; 1007 default: 1008 break; 1009 } 1010 1011 /* Propagate constants through copy operations and do constant 1012 folding. Constants will be substituted to arguments by register 1013 allocator where needed and possible. Also detect copies. */ 1014 switch (opc) { 1015 CASE_OP_32_64(mov): 1016 if (temps_are_copies(args[0], args[1])) { 1017 tcg_op_remove(s, op); 1018 break; 1019 } 1020 if (temps[args[1]].state != TCG_TEMP_CONST) { 1021 tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); 1022 break; 1023 } 1024 /* Source argument is constant. Rewrite the operation and 1025 let movi case handle it. */ 1026 args[1] = temps[args[1]].val; 1027 /* fallthrough */ 1028 CASE_OP_32_64(movi): 1029 tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]); 1030 break; 1031 1032 CASE_OP_32_64(not): 1033 CASE_OP_32_64(neg): 1034 CASE_OP_32_64(ext8s): 1035 CASE_OP_32_64(ext8u): 1036 CASE_OP_32_64(ext16s): 1037 CASE_OP_32_64(ext16u): 1038 case INDEX_op_ext32s_i64: 1039 case INDEX_op_ext32u_i64: 1040 if (temps[args[1]].state == TCG_TEMP_CONST) { 1041 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1042 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1043 break; 1044 } 1045 goto do_default; 1046 1047 case INDEX_op_trunc_shr_i32: 1048 if (temps[args[1]].state == TCG_TEMP_CONST) { 1049 tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); 1050 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1051 break; 1052 } 1053 goto do_default; 1054 1055 CASE_OP_32_64(add): 1056 CASE_OP_32_64(sub): 1057 CASE_OP_32_64(mul): 1058 CASE_OP_32_64(or): 1059 CASE_OP_32_64(and): 1060 CASE_OP_32_64(xor): 1061 CASE_OP_32_64(shl): 1062 CASE_OP_32_64(shr): 1063 CASE_OP_32_64(sar): 1064 CASE_OP_32_64(rotl): 1065 CASE_OP_32_64(rotr): 1066 CASE_OP_32_64(andc): 1067 CASE_OP_32_64(orc): 1068 CASE_OP_32_64(eqv): 1069 CASE_OP_32_64(nand): 1070 CASE_OP_32_64(nor): 1071 CASE_OP_32_64(muluh): 1072 CASE_OP_32_64(mulsh): 1073 CASE_OP_32_64(div): 1074 CASE_OP_32_64(divu): 1075 CASE_OP_32_64(rem): 1076 CASE_OP_32_64(remu): 1077 if (temps[args[1]].state == TCG_TEMP_CONST 1078 && temps[args[2]].state == TCG_TEMP_CONST) { 1079 tmp = do_constant_folding(opc, temps[args[1]].val, 1080 temps[args[2]].val); 1081 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1082 break; 1083 } 1084 goto do_default; 1085 1086 CASE_OP_32_64(deposit): 1087 if (temps[args[1]].state == TCG_TEMP_CONST 1088 && temps[args[2]].state == TCG_TEMP_CONST) { 1089 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1090 temps[args[2]].val); 1091 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1092 break; 1093 } 1094 goto do_default; 1095 1096 CASE_OP_32_64(setcond): 1097 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1098 if (tmp != 2) { 1099 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1100 break; 1101 } 1102 goto do_default; 1103 1104 CASE_OP_32_64(brcond): 1105 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1106 if (tmp != 2) { 1107 if (tmp) { 1108 reset_all_temps(nb_temps); 1109 op->opc = INDEX_op_br; 1110 args[0] = args[3]; 1111 } else { 1112 tcg_op_remove(s, op); 1113 } 1114 break; 1115 } 1116 goto do_default; 1117 1118 CASE_OP_32_64(movcond): 1119 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1120 if (tmp != 2) { 1121 if (temps_are_copies(args[0], args[4-tmp])) { 1122 tcg_op_remove(s, op); 1123 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 1124 tcg_opt_gen_movi(s, op, args, opc, 1125 args[0], temps[args[4-tmp]].val); 1126 } else { 1127 tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); 1128 } 1129 break; 1130 } 1131 goto do_default; 1132 1133 case INDEX_op_add2_i32: 1134 case INDEX_op_sub2_i32: 1135 if (temps[args[2]].state == TCG_TEMP_CONST 1136 && temps[args[3]].state == TCG_TEMP_CONST 1137 && temps[args[4]].state == TCG_TEMP_CONST 1138 && temps[args[5]].state == TCG_TEMP_CONST) { 1139 uint32_t al = temps[args[2]].val; 1140 uint32_t ah = temps[args[3]].val; 1141 uint32_t bl = temps[args[4]].val; 1142 uint32_t bh = temps[args[5]].val; 1143 uint64_t a = ((uint64_t)ah << 32) | al; 1144 uint64_t b = ((uint64_t)bh << 32) | bl; 1145 TCGArg rl, rh; 1146 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1147 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1148 1149 if (opc == INDEX_op_add2_i32) { 1150 a += b; 1151 } else { 1152 a -= b; 1153 } 1154 1155 rl = args[0]; 1156 rh = args[1]; 1157 tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); 1158 tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32)); 1159 1160 /* We've done all we need to do with the movi. Skip it. */ 1161 oi_next = op2->next; 1162 break; 1163 } 1164 goto do_default; 1165 1166 case INDEX_op_mulu2_i32: 1167 if (temps[args[2]].state == TCG_TEMP_CONST 1168 && temps[args[3]].state == TCG_TEMP_CONST) { 1169 uint32_t a = temps[args[2]].val; 1170 uint32_t b = temps[args[3]].val; 1171 uint64_t r = (uint64_t)a * b; 1172 TCGArg rl, rh; 1173 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1174 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1175 1176 rl = args[0]; 1177 rh = args[1]; 1178 tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r); 1179 tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32)); 1180 1181 /* We've done all we need to do with the movi. Skip it. */ 1182 oi_next = op2->next; 1183 break; 1184 } 1185 goto do_default; 1186 1187 case INDEX_op_brcond2_i32: 1188 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1189 if (tmp != 2) { 1190 if (tmp) { 1191 do_brcond_true: 1192 reset_all_temps(nb_temps); 1193 op->opc = INDEX_op_br; 1194 args[0] = args[5]; 1195 } else { 1196 do_brcond_false: 1197 tcg_op_remove(s, op); 1198 } 1199 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1200 && temps[args[2]].state == TCG_TEMP_CONST 1201 && temps[args[3]].state == TCG_TEMP_CONST 1202 && temps[args[2]].val == 0 1203 && temps[args[3]].val == 0) { 1204 /* Simplify LT/GE comparisons vs zero to a single compare 1205 vs the high word of the input. */ 1206 do_brcond_high: 1207 reset_all_temps(nb_temps); 1208 op->opc = INDEX_op_brcond_i32; 1209 args[0] = args[1]; 1210 args[1] = args[3]; 1211 args[2] = args[4]; 1212 args[3] = args[5]; 1213 } else if (args[4] == TCG_COND_EQ) { 1214 /* Simplify EQ comparisons where one of the pairs 1215 can be simplified. */ 1216 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1217 args[0], args[2], TCG_COND_EQ); 1218 if (tmp == 0) { 1219 goto do_brcond_false; 1220 } else if (tmp == 1) { 1221 goto do_brcond_high; 1222 } 1223 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1224 args[1], args[3], TCG_COND_EQ); 1225 if (tmp == 0) { 1226 goto do_brcond_false; 1227 } else if (tmp != 1) { 1228 goto do_default; 1229 } 1230 do_brcond_low: 1231 reset_all_temps(nb_temps); 1232 op->opc = INDEX_op_brcond_i32; 1233 args[1] = args[2]; 1234 args[2] = args[4]; 1235 args[3] = args[5]; 1236 } else if (args[4] == TCG_COND_NE) { 1237 /* Simplify NE comparisons where one of the pairs 1238 can be simplified. */ 1239 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1240 args[0], args[2], TCG_COND_NE); 1241 if (tmp == 0) { 1242 goto do_brcond_high; 1243 } else if (tmp == 1) { 1244 goto do_brcond_true; 1245 } 1246 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1247 args[1], args[3], TCG_COND_NE); 1248 if (tmp == 0) { 1249 goto do_brcond_low; 1250 } else if (tmp == 1) { 1251 goto do_brcond_true; 1252 } 1253 goto do_default; 1254 } else { 1255 goto do_default; 1256 } 1257 break; 1258 1259 case INDEX_op_setcond2_i32: 1260 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1261 if (tmp != 2) { 1262 do_setcond_const: 1263 tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); 1264 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1265 && temps[args[3]].state == TCG_TEMP_CONST 1266 && temps[args[4]].state == TCG_TEMP_CONST 1267 && temps[args[3]].val == 0 1268 && temps[args[4]].val == 0) { 1269 /* Simplify LT/GE comparisons vs zero to a single compare 1270 vs the high word of the input. */ 1271 do_setcond_high: 1272 reset_temp(args[0]); 1273 temps[args[0]].mask = 1; 1274 op->opc = INDEX_op_setcond_i32; 1275 args[1] = args[2]; 1276 args[2] = args[4]; 1277 args[3] = args[5]; 1278 } else if (args[5] == TCG_COND_EQ) { 1279 /* Simplify EQ comparisons where one of the pairs 1280 can be simplified. */ 1281 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1282 args[1], args[3], TCG_COND_EQ); 1283 if (tmp == 0) { 1284 goto do_setcond_const; 1285 } else if (tmp == 1) { 1286 goto do_setcond_high; 1287 } 1288 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1289 args[2], args[4], TCG_COND_EQ); 1290 if (tmp == 0) { 1291 goto do_setcond_high; 1292 } else if (tmp != 1) { 1293 goto do_default; 1294 } 1295 do_setcond_low: 1296 reset_temp(args[0]); 1297 temps[args[0]].mask = 1; 1298 op->opc = INDEX_op_setcond_i32; 1299 args[2] = args[3]; 1300 args[3] = args[5]; 1301 } else if (args[5] == TCG_COND_NE) { 1302 /* Simplify NE comparisons where one of the pairs 1303 can be simplified. */ 1304 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1305 args[1], args[3], TCG_COND_NE); 1306 if (tmp == 0) { 1307 goto do_setcond_high; 1308 } else if (tmp == 1) { 1309 goto do_setcond_const; 1310 } 1311 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1312 args[2], args[4], TCG_COND_NE); 1313 if (tmp == 0) { 1314 goto do_setcond_low; 1315 } else if (tmp == 1) { 1316 goto do_setcond_const; 1317 } 1318 goto do_default; 1319 } else { 1320 goto do_default; 1321 } 1322 break; 1323 1324 case INDEX_op_call: 1325 if (!(args[nb_oargs + nb_iargs + 1] 1326 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1327 for (i = 0; i < nb_globals; i++) { 1328 reset_temp(i); 1329 } 1330 } 1331 goto do_reset_output; 1332 1333 default: 1334 do_default: 1335 /* Default case: we know nothing about operation (or were unable 1336 to compute the operation result) so no propagation is done. 1337 We trash everything if the operation is the end of a basic 1338 block, otherwise we only trash the output args. "mask" is 1339 the non-zero bits mask for the first output arg. */ 1340 if (def->flags & TCG_OPF_BB_END) { 1341 reset_all_temps(nb_temps); 1342 } else { 1343 do_reset_output: 1344 for (i = 0; i < nb_oargs; i++) { 1345 reset_temp(args[i]); 1346 /* Save the corresponding known-zero bits mask for the 1347 first output argument (only one supported so far). */ 1348 if (i == 0) { 1349 temps[args[i]].mask = mask; 1350 } 1351 } 1352 } 1353 break; 1354 } 1355 } 1356 } 1357 1358 void tcg_optimize(TCGContext *s) 1359 { 1360 tcg_constant_folding(s); 1361 } 1362