1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 int i; 74 for (i = 0; i < nb_temps; i++) { 75 temps[i].state = TCG_TEMP_UNDEF; 76 temps[i].mask = -1; 77 } 78 } 79 80 static int op_bits(TCGOpcode op) 81 { 82 const TCGOpDef *def = &tcg_op_defs[op]; 83 return def->flags & TCG_OPF_64BIT ? 64 : 32; 84 } 85 86 static TCGOpcode op_to_movi(TCGOpcode op) 87 { 88 switch (op_bits(op)) { 89 case 32: 90 return INDEX_op_movi_i32; 91 case 64: 92 return INDEX_op_movi_i64; 93 default: 94 fprintf(stderr, "op_to_movi: unexpected return value of " 95 "function op_bits.\n"); 96 tcg_abort(); 97 } 98 } 99 100 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 101 { 102 TCGArg i; 103 104 /* If this is already a global, we can't do better. */ 105 if (temp < s->nb_globals) { 106 return temp; 107 } 108 109 /* Search for a global first. */ 110 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 111 if (i < s->nb_globals) { 112 return i; 113 } 114 } 115 116 /* If it is a temp, search for a temp local. */ 117 if (!s->temps[temp].temp_local) { 118 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 119 if (s->temps[i].temp_local) { 120 return i; 121 } 122 } 123 } 124 125 /* Failure to find a better representation, return the same temp. */ 126 return temp; 127 } 128 129 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 130 { 131 TCGArg i; 132 133 if (arg1 == arg2) { 134 return true; 135 } 136 137 if (temps[arg1].state != TCG_TEMP_COPY 138 || temps[arg2].state != TCG_TEMP_COPY) { 139 return false; 140 } 141 142 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 143 if (i == arg2) { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 152 TCGArg dst, TCGArg src) 153 { 154 reset_temp(dst); 155 temps[dst].mask = temps[src].mask; 156 assert(temps[src].state != TCG_TEMP_CONST); 157 158 if (s->temps[src].type == s->temps[dst].type) { 159 if (temps[src].state != TCG_TEMP_COPY) { 160 temps[src].state = TCG_TEMP_COPY; 161 temps[src].next_copy = src; 162 temps[src].prev_copy = src; 163 } 164 temps[dst].state = TCG_TEMP_COPY; 165 temps[dst].next_copy = temps[src].next_copy; 166 temps[dst].prev_copy = src; 167 temps[temps[dst].next_copy].prev_copy = dst; 168 temps[src].next_copy = dst; 169 } 170 171 gen_args[0] = dst; 172 gen_args[1] = src; 173 } 174 175 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 176 { 177 reset_temp(dst); 178 temps[dst].state = TCG_TEMP_CONST; 179 temps[dst].val = val; 180 temps[dst].mask = val; 181 gen_args[0] = dst; 182 gen_args[1] = val; 183 } 184 185 static TCGOpcode op_to_mov(TCGOpcode op) 186 { 187 switch (op_bits(op)) { 188 case 32: 189 return INDEX_op_mov_i32; 190 case 64: 191 return INDEX_op_mov_i64; 192 default: 193 fprintf(stderr, "op_to_mov: unexpected return value of " 194 "function op_bits.\n"); 195 tcg_abort(); 196 } 197 } 198 199 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 200 { 201 uint64_t l64, h64; 202 203 switch (op) { 204 CASE_OP_32_64(add): 205 return x + y; 206 207 CASE_OP_32_64(sub): 208 return x - y; 209 210 CASE_OP_32_64(mul): 211 return x * y; 212 213 CASE_OP_32_64(and): 214 return x & y; 215 216 CASE_OP_32_64(or): 217 return x | y; 218 219 CASE_OP_32_64(xor): 220 return x ^ y; 221 222 case INDEX_op_shl_i32: 223 return (uint32_t)x << (y & 31); 224 225 case INDEX_op_shl_i64: 226 return (uint64_t)x << (y & 63); 227 228 case INDEX_op_shr_i32: 229 return (uint32_t)x >> (y & 31); 230 231 case INDEX_op_trunc_shr_i32: 232 case INDEX_op_shr_i64: 233 return (uint64_t)x >> (y & 63); 234 235 case INDEX_op_sar_i32: 236 return (int32_t)x >> (y & 31); 237 238 case INDEX_op_sar_i64: 239 return (int64_t)x >> (y & 63); 240 241 case INDEX_op_rotr_i32: 242 return ror32(x, y & 31); 243 244 case INDEX_op_rotr_i64: 245 return ror64(x, y & 63); 246 247 case INDEX_op_rotl_i32: 248 return rol32(x, y & 31); 249 250 case INDEX_op_rotl_i64: 251 return rol64(x, y & 63); 252 253 CASE_OP_32_64(not): 254 return ~x; 255 256 CASE_OP_32_64(neg): 257 return -x; 258 259 CASE_OP_32_64(andc): 260 return x & ~y; 261 262 CASE_OP_32_64(orc): 263 return x | ~y; 264 265 CASE_OP_32_64(eqv): 266 return ~(x ^ y); 267 268 CASE_OP_32_64(nand): 269 return ~(x & y); 270 271 CASE_OP_32_64(nor): 272 return ~(x | y); 273 274 CASE_OP_32_64(ext8s): 275 return (int8_t)x; 276 277 CASE_OP_32_64(ext16s): 278 return (int16_t)x; 279 280 CASE_OP_32_64(ext8u): 281 return (uint8_t)x; 282 283 CASE_OP_32_64(ext16u): 284 return (uint16_t)x; 285 286 case INDEX_op_ext32s_i64: 287 return (int32_t)x; 288 289 case INDEX_op_ext32u_i64: 290 return (uint32_t)x; 291 292 case INDEX_op_muluh_i32: 293 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 294 case INDEX_op_mulsh_i32: 295 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 296 297 case INDEX_op_muluh_i64: 298 mulu64(&l64, &h64, x, y); 299 return h64; 300 case INDEX_op_mulsh_i64: 301 muls64(&l64, &h64, x, y); 302 return h64; 303 304 case INDEX_op_div_i32: 305 /* Avoid crashing on divide by zero, otherwise undefined. */ 306 return (int32_t)x / ((int32_t)y ? : 1); 307 case INDEX_op_divu_i32: 308 return (uint32_t)x / ((uint32_t)y ? : 1); 309 case INDEX_op_div_i64: 310 return (int64_t)x / ((int64_t)y ? : 1); 311 case INDEX_op_divu_i64: 312 return (uint64_t)x / ((uint64_t)y ? : 1); 313 314 case INDEX_op_rem_i32: 315 return (int32_t)x % ((int32_t)y ? : 1); 316 case INDEX_op_remu_i32: 317 return (uint32_t)x % ((uint32_t)y ? : 1); 318 case INDEX_op_rem_i64: 319 return (int64_t)x % ((int64_t)y ? : 1); 320 case INDEX_op_remu_i64: 321 return (uint64_t)x % ((uint64_t)y ? : 1); 322 323 default: 324 fprintf(stderr, 325 "Unrecognized operation %d in do_constant_folding.\n", op); 326 tcg_abort(); 327 } 328 } 329 330 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 331 { 332 TCGArg res = do_constant_folding_2(op, x, y); 333 if (op_bits(op) == 32) { 334 res &= 0xffffffff; 335 } 336 return res; 337 } 338 339 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 340 { 341 switch (c) { 342 case TCG_COND_EQ: 343 return x == y; 344 case TCG_COND_NE: 345 return x != y; 346 case TCG_COND_LT: 347 return (int32_t)x < (int32_t)y; 348 case TCG_COND_GE: 349 return (int32_t)x >= (int32_t)y; 350 case TCG_COND_LE: 351 return (int32_t)x <= (int32_t)y; 352 case TCG_COND_GT: 353 return (int32_t)x > (int32_t)y; 354 case TCG_COND_LTU: 355 return x < y; 356 case TCG_COND_GEU: 357 return x >= y; 358 case TCG_COND_LEU: 359 return x <= y; 360 case TCG_COND_GTU: 361 return x > y; 362 default: 363 tcg_abort(); 364 } 365 } 366 367 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 368 { 369 switch (c) { 370 case TCG_COND_EQ: 371 return x == y; 372 case TCG_COND_NE: 373 return x != y; 374 case TCG_COND_LT: 375 return (int64_t)x < (int64_t)y; 376 case TCG_COND_GE: 377 return (int64_t)x >= (int64_t)y; 378 case TCG_COND_LE: 379 return (int64_t)x <= (int64_t)y; 380 case TCG_COND_GT: 381 return (int64_t)x > (int64_t)y; 382 case TCG_COND_LTU: 383 return x < y; 384 case TCG_COND_GEU: 385 return x >= y; 386 case TCG_COND_LEU: 387 return x <= y; 388 case TCG_COND_GTU: 389 return x > y; 390 default: 391 tcg_abort(); 392 } 393 } 394 395 static bool do_constant_folding_cond_eq(TCGCond c) 396 { 397 switch (c) { 398 case TCG_COND_GT: 399 case TCG_COND_LTU: 400 case TCG_COND_LT: 401 case TCG_COND_GTU: 402 case TCG_COND_NE: 403 return 0; 404 case TCG_COND_GE: 405 case TCG_COND_GEU: 406 case TCG_COND_LE: 407 case TCG_COND_LEU: 408 case TCG_COND_EQ: 409 return 1; 410 default: 411 tcg_abort(); 412 } 413 } 414 415 /* Return 2 if the condition can't be simplified, and the result 416 of the condition (0 or 1) if it can */ 417 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 418 TCGArg y, TCGCond c) 419 { 420 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 421 switch (op_bits(op)) { 422 case 32: 423 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 424 case 64: 425 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 426 default: 427 tcg_abort(); 428 } 429 } else if (temps_are_copies(x, y)) { 430 return do_constant_folding_cond_eq(c); 431 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 432 switch (c) { 433 case TCG_COND_LTU: 434 return 0; 435 case TCG_COND_GEU: 436 return 1; 437 default: 438 return 2; 439 } 440 } else { 441 return 2; 442 } 443 } 444 445 /* Return 2 if the condition can't be simplified, and the result 446 of the condition (0 or 1) if it can */ 447 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 448 { 449 TCGArg al = p1[0], ah = p1[1]; 450 TCGArg bl = p2[0], bh = p2[1]; 451 452 if (temps[bl].state == TCG_TEMP_CONST 453 && temps[bh].state == TCG_TEMP_CONST) { 454 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 455 456 if (temps[al].state == TCG_TEMP_CONST 457 && temps[ah].state == TCG_TEMP_CONST) { 458 uint64_t a; 459 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 460 return do_constant_folding_cond_64(a, b, c); 461 } 462 if (b == 0) { 463 switch (c) { 464 case TCG_COND_LTU: 465 return 0; 466 case TCG_COND_GEU: 467 return 1; 468 default: 469 break; 470 } 471 } 472 } 473 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 474 return do_constant_folding_cond_eq(c); 475 } 476 return 2; 477 } 478 479 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 480 { 481 TCGArg a1 = *p1, a2 = *p2; 482 int sum = 0; 483 sum += temps[a1].state == TCG_TEMP_CONST; 484 sum -= temps[a2].state == TCG_TEMP_CONST; 485 486 /* Prefer the constant in second argument, and then the form 487 op a, a, b, which is better handled on non-RISC hosts. */ 488 if (sum > 0 || (sum == 0 && dest == a2)) { 489 *p1 = a2; 490 *p2 = a1; 491 return true; 492 } 493 return false; 494 } 495 496 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 497 { 498 int sum = 0; 499 sum += temps[p1[0]].state == TCG_TEMP_CONST; 500 sum += temps[p1[1]].state == TCG_TEMP_CONST; 501 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 502 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 503 if (sum > 0) { 504 TCGArg t; 505 t = p1[0], p1[0] = p2[0], p2[0] = t; 506 t = p1[1], p1[1] = p2[1], p2[1] = t; 507 return true; 508 } 509 return false; 510 } 511 512 /* Propagate constants and copies, fold constant expressions. */ 513 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 514 TCGArg *args, TCGOpDef *tcg_op_defs) 515 { 516 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 517 tcg_target_ulong mask, affected; 518 TCGOpcode op; 519 const TCGOpDef *def; 520 TCGArg *gen_args; 521 TCGArg tmp; 522 523 /* Array VALS has an element for each temp. 524 If this temp holds a constant then its value is kept in VALS' element. 525 If this temp is a copy of other ones then the other copies are 526 available through the doubly linked circular list. */ 527 528 nb_temps = s->nb_temps; 529 nb_globals = s->nb_globals; 530 reset_all_temps(nb_temps); 531 532 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 533 gen_args = args; 534 for (op_index = 0; op_index < nb_ops; op_index++) { 535 op = s->gen_opc_buf[op_index]; 536 def = &tcg_op_defs[op]; 537 /* Do copy propagation */ 538 if (op == INDEX_op_call) { 539 int nb_oargs = args[0] >> 16; 540 int nb_iargs = args[0] & 0xffff; 541 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { 542 if (temps[args[i]].state == TCG_TEMP_COPY) { 543 args[i] = find_better_copy(s, args[i]); 544 } 545 } 546 } else { 547 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 548 if (temps[args[i]].state == TCG_TEMP_COPY) { 549 args[i] = find_better_copy(s, args[i]); 550 } 551 } 552 } 553 554 /* For commutative operations make constant second argument */ 555 switch (op) { 556 CASE_OP_32_64(add): 557 CASE_OP_32_64(mul): 558 CASE_OP_32_64(and): 559 CASE_OP_32_64(or): 560 CASE_OP_32_64(xor): 561 CASE_OP_32_64(eqv): 562 CASE_OP_32_64(nand): 563 CASE_OP_32_64(nor): 564 CASE_OP_32_64(muluh): 565 CASE_OP_32_64(mulsh): 566 swap_commutative(args[0], &args[1], &args[2]); 567 break; 568 CASE_OP_32_64(brcond): 569 if (swap_commutative(-1, &args[0], &args[1])) { 570 args[2] = tcg_swap_cond(args[2]); 571 } 572 break; 573 CASE_OP_32_64(setcond): 574 if (swap_commutative(args[0], &args[1], &args[2])) { 575 args[3] = tcg_swap_cond(args[3]); 576 } 577 break; 578 CASE_OP_32_64(movcond): 579 if (swap_commutative(-1, &args[1], &args[2])) { 580 args[5] = tcg_swap_cond(args[5]); 581 } 582 /* For movcond, we canonicalize the "false" input reg to match 583 the destination reg so that the tcg backend can implement 584 a "move if true" operation. */ 585 if (swap_commutative(args[0], &args[4], &args[3])) { 586 args[5] = tcg_invert_cond(args[5]); 587 } 588 break; 589 CASE_OP_32_64(add2): 590 swap_commutative(args[0], &args[2], &args[4]); 591 swap_commutative(args[1], &args[3], &args[5]); 592 break; 593 CASE_OP_32_64(mulu2): 594 CASE_OP_32_64(muls2): 595 swap_commutative(args[0], &args[2], &args[3]); 596 break; 597 case INDEX_op_brcond2_i32: 598 if (swap_commutative2(&args[0], &args[2])) { 599 args[4] = tcg_swap_cond(args[4]); 600 } 601 break; 602 case INDEX_op_setcond2_i32: 603 if (swap_commutative2(&args[1], &args[3])) { 604 args[5] = tcg_swap_cond(args[5]); 605 } 606 break; 607 default: 608 break; 609 } 610 611 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 612 and "sub r, 0, a => neg r, a" case. */ 613 switch (op) { 614 CASE_OP_32_64(shl): 615 CASE_OP_32_64(shr): 616 CASE_OP_32_64(sar): 617 CASE_OP_32_64(rotl): 618 CASE_OP_32_64(rotr): 619 if (temps[args[1]].state == TCG_TEMP_CONST 620 && temps[args[1]].val == 0) { 621 s->gen_opc_buf[op_index] = op_to_movi(op); 622 tcg_opt_gen_movi(gen_args, args[0], 0); 623 args += 3; 624 gen_args += 2; 625 continue; 626 } 627 break; 628 CASE_OP_32_64(sub): 629 { 630 TCGOpcode neg_op; 631 bool have_neg; 632 633 if (temps[args[2]].state == TCG_TEMP_CONST) { 634 /* Proceed with possible constant folding. */ 635 break; 636 } 637 if (op == INDEX_op_sub_i32) { 638 neg_op = INDEX_op_neg_i32; 639 have_neg = TCG_TARGET_HAS_neg_i32; 640 } else { 641 neg_op = INDEX_op_neg_i64; 642 have_neg = TCG_TARGET_HAS_neg_i64; 643 } 644 if (!have_neg) { 645 break; 646 } 647 if (temps[args[1]].state == TCG_TEMP_CONST 648 && temps[args[1]].val == 0) { 649 s->gen_opc_buf[op_index] = neg_op; 650 reset_temp(args[0]); 651 gen_args[0] = args[0]; 652 gen_args[1] = args[2]; 653 args += 3; 654 gen_args += 2; 655 continue; 656 } 657 } 658 break; 659 CASE_OP_32_64(xor): 660 CASE_OP_32_64(nand): 661 if (temps[args[1]].state != TCG_TEMP_CONST 662 && temps[args[2]].state == TCG_TEMP_CONST 663 && temps[args[2]].val == -1) { 664 i = 1; 665 goto try_not; 666 } 667 break; 668 CASE_OP_32_64(nor): 669 if (temps[args[1]].state != TCG_TEMP_CONST 670 && temps[args[2]].state == TCG_TEMP_CONST 671 && temps[args[2]].val == 0) { 672 i = 1; 673 goto try_not; 674 } 675 break; 676 CASE_OP_32_64(andc): 677 if (temps[args[2]].state != TCG_TEMP_CONST 678 && temps[args[1]].state == TCG_TEMP_CONST 679 && temps[args[1]].val == -1) { 680 i = 2; 681 goto try_not; 682 } 683 break; 684 CASE_OP_32_64(orc): 685 CASE_OP_32_64(eqv): 686 if (temps[args[2]].state != TCG_TEMP_CONST 687 && temps[args[1]].state == TCG_TEMP_CONST 688 && temps[args[1]].val == 0) { 689 i = 2; 690 goto try_not; 691 } 692 break; 693 try_not: 694 { 695 TCGOpcode not_op; 696 bool have_not; 697 698 if (def->flags & TCG_OPF_64BIT) { 699 not_op = INDEX_op_not_i64; 700 have_not = TCG_TARGET_HAS_not_i64; 701 } else { 702 not_op = INDEX_op_not_i32; 703 have_not = TCG_TARGET_HAS_not_i32; 704 } 705 if (!have_not) { 706 break; 707 } 708 s->gen_opc_buf[op_index] = not_op; 709 reset_temp(args[0]); 710 gen_args[0] = args[0]; 711 gen_args[1] = args[i]; 712 args += 3; 713 gen_args += 2; 714 continue; 715 } 716 default: 717 break; 718 } 719 720 /* Simplify expression for "op r, a, const => mov r, a" cases */ 721 switch (op) { 722 CASE_OP_32_64(add): 723 CASE_OP_32_64(sub): 724 CASE_OP_32_64(shl): 725 CASE_OP_32_64(shr): 726 CASE_OP_32_64(sar): 727 CASE_OP_32_64(rotl): 728 CASE_OP_32_64(rotr): 729 CASE_OP_32_64(or): 730 CASE_OP_32_64(xor): 731 CASE_OP_32_64(andc): 732 if (temps[args[1]].state != TCG_TEMP_CONST 733 && temps[args[2]].state == TCG_TEMP_CONST 734 && temps[args[2]].val == 0) { 735 goto do_mov3; 736 } 737 break; 738 CASE_OP_32_64(and): 739 CASE_OP_32_64(orc): 740 CASE_OP_32_64(eqv): 741 if (temps[args[1]].state != TCG_TEMP_CONST 742 && temps[args[2]].state == TCG_TEMP_CONST 743 && temps[args[2]].val == -1) { 744 goto do_mov3; 745 } 746 break; 747 do_mov3: 748 if (temps_are_copies(args[0], args[1])) { 749 s->gen_opc_buf[op_index] = INDEX_op_nop; 750 } else { 751 s->gen_opc_buf[op_index] = op_to_mov(op); 752 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 753 gen_args += 2; 754 } 755 args += 3; 756 continue; 757 default: 758 break; 759 } 760 761 /* Simplify using known-zero bits. Currently only ops with a single 762 output argument is supported. */ 763 mask = -1; 764 affected = -1; 765 switch (op) { 766 CASE_OP_32_64(ext8s): 767 if ((temps[args[1]].mask & 0x80) != 0) { 768 break; 769 } 770 CASE_OP_32_64(ext8u): 771 mask = 0xff; 772 goto and_const; 773 CASE_OP_32_64(ext16s): 774 if ((temps[args[1]].mask & 0x8000) != 0) { 775 break; 776 } 777 CASE_OP_32_64(ext16u): 778 mask = 0xffff; 779 goto and_const; 780 case INDEX_op_ext32s_i64: 781 if ((temps[args[1]].mask & 0x80000000) != 0) { 782 break; 783 } 784 case INDEX_op_ext32u_i64: 785 mask = 0xffffffffU; 786 goto and_const; 787 788 CASE_OP_32_64(and): 789 mask = temps[args[2]].mask; 790 if (temps[args[2]].state == TCG_TEMP_CONST) { 791 and_const: 792 affected = temps[args[1]].mask & ~mask; 793 } 794 mask = temps[args[1]].mask & mask; 795 break; 796 797 CASE_OP_32_64(andc): 798 /* Known-zeros does not imply known-ones. Therefore unless 799 args[2] is constant, we can't infer anything from it. */ 800 if (temps[args[2]].state == TCG_TEMP_CONST) { 801 mask = ~temps[args[2]].mask; 802 goto and_const; 803 } 804 /* But we certainly know nothing outside args[1] may be set. */ 805 mask = temps[args[1]].mask; 806 break; 807 808 case INDEX_op_sar_i32: 809 if (temps[args[2]].state == TCG_TEMP_CONST) { 810 tmp = temps[args[2]].val & 31; 811 mask = (int32_t)temps[args[1]].mask >> tmp; 812 } 813 break; 814 case INDEX_op_sar_i64: 815 if (temps[args[2]].state == TCG_TEMP_CONST) { 816 tmp = temps[args[2]].val & 63; 817 mask = (int64_t)temps[args[1]].mask >> tmp; 818 } 819 break; 820 821 case INDEX_op_shr_i32: 822 if (temps[args[2]].state == TCG_TEMP_CONST) { 823 tmp = temps[args[2]].val & 31; 824 mask = (uint32_t)temps[args[1]].mask >> tmp; 825 } 826 break; 827 case INDEX_op_shr_i64: 828 if (temps[args[2]].state == TCG_TEMP_CONST) { 829 tmp = temps[args[2]].val & 63; 830 mask = (uint64_t)temps[args[1]].mask >> tmp; 831 } 832 break; 833 834 case INDEX_op_trunc_shr_i32: 835 mask = (uint64_t)temps[args[1]].mask >> args[2]; 836 break; 837 838 CASE_OP_32_64(shl): 839 if (temps[args[2]].state == TCG_TEMP_CONST) { 840 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 841 mask = temps[args[1]].mask << tmp; 842 } 843 break; 844 845 CASE_OP_32_64(neg): 846 /* Set to 1 all bits to the left of the rightmost. */ 847 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 848 break; 849 850 CASE_OP_32_64(deposit): 851 mask = deposit64(temps[args[1]].mask, args[3], args[4], 852 temps[args[2]].mask); 853 break; 854 855 CASE_OP_32_64(or): 856 CASE_OP_32_64(xor): 857 mask = temps[args[1]].mask | temps[args[2]].mask; 858 break; 859 860 CASE_OP_32_64(setcond): 861 mask = 1; 862 break; 863 864 CASE_OP_32_64(movcond): 865 mask = temps[args[3]].mask | temps[args[4]].mask; 866 break; 867 868 CASE_OP_32_64(ld8u): 869 case INDEX_op_qemu_ld8u: 870 mask = 0xff; 871 break; 872 CASE_OP_32_64(ld16u): 873 case INDEX_op_qemu_ld16u: 874 mask = 0xffff; 875 break; 876 case INDEX_op_ld32u_i64: 877 #if TCG_TARGET_REG_BITS == 64 878 case INDEX_op_qemu_ld32u: 879 #endif 880 mask = 0xffffffffu; 881 break; 882 883 CASE_OP_32_64(qemu_ld): 884 { 885 TCGMemOp mop = args[def->nb_oargs + def->nb_iargs]; 886 if (!(mop & MO_SIGN)) { 887 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 888 } 889 } 890 break; 891 892 default: 893 break; 894 } 895 896 /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit 897 results */ 898 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { 899 mask &= 0xffffffffu; 900 } 901 902 if (mask == 0) { 903 assert(def->nb_oargs == 1); 904 s->gen_opc_buf[op_index] = op_to_movi(op); 905 tcg_opt_gen_movi(gen_args, args[0], 0); 906 args += def->nb_oargs + def->nb_iargs + def->nb_cargs; 907 gen_args += 2; 908 continue; 909 } 910 if (affected == 0) { 911 assert(def->nb_oargs == 1); 912 if (temps_are_copies(args[0], args[1])) { 913 s->gen_opc_buf[op_index] = INDEX_op_nop; 914 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 915 s->gen_opc_buf[op_index] = op_to_mov(op); 916 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 917 gen_args += 2; 918 } else { 919 s->gen_opc_buf[op_index] = op_to_movi(op); 920 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 921 gen_args += 2; 922 } 923 args += def->nb_iargs + 1; 924 continue; 925 } 926 927 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 928 switch (op) { 929 CASE_OP_32_64(and): 930 CASE_OP_32_64(mul): 931 CASE_OP_32_64(muluh): 932 CASE_OP_32_64(mulsh): 933 if ((temps[args[2]].state == TCG_TEMP_CONST 934 && temps[args[2]].val == 0)) { 935 s->gen_opc_buf[op_index] = op_to_movi(op); 936 tcg_opt_gen_movi(gen_args, args[0], 0); 937 args += 3; 938 gen_args += 2; 939 continue; 940 } 941 break; 942 default: 943 break; 944 } 945 946 /* Simplify expression for "op r, a, a => mov r, a" cases */ 947 switch (op) { 948 CASE_OP_32_64(or): 949 CASE_OP_32_64(and): 950 if (temps_are_copies(args[1], args[2])) { 951 if (temps_are_copies(args[0], args[1])) { 952 s->gen_opc_buf[op_index] = INDEX_op_nop; 953 } else { 954 s->gen_opc_buf[op_index] = op_to_mov(op); 955 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 956 gen_args += 2; 957 } 958 args += 3; 959 continue; 960 } 961 break; 962 default: 963 break; 964 } 965 966 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 967 switch (op) { 968 CASE_OP_32_64(andc): 969 CASE_OP_32_64(sub): 970 CASE_OP_32_64(xor): 971 if (temps_are_copies(args[1], args[2])) { 972 s->gen_opc_buf[op_index] = op_to_movi(op); 973 tcg_opt_gen_movi(gen_args, args[0], 0); 974 gen_args += 2; 975 args += 3; 976 continue; 977 } 978 break; 979 default: 980 break; 981 } 982 983 /* Propagate constants through copy operations and do constant 984 folding. Constants will be substituted to arguments by register 985 allocator where needed and possible. Also detect copies. */ 986 switch (op) { 987 CASE_OP_32_64(mov): 988 if (temps_are_copies(args[0], args[1])) { 989 args += 2; 990 s->gen_opc_buf[op_index] = INDEX_op_nop; 991 break; 992 } 993 if (temps[args[1]].state != TCG_TEMP_CONST) { 994 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 995 gen_args += 2; 996 args += 2; 997 break; 998 } 999 /* Source argument is constant. Rewrite the operation and 1000 let movi case handle it. */ 1001 op = op_to_movi(op); 1002 s->gen_opc_buf[op_index] = op; 1003 args[1] = temps[args[1]].val; 1004 /* fallthrough */ 1005 CASE_OP_32_64(movi): 1006 tcg_opt_gen_movi(gen_args, args[0], args[1]); 1007 gen_args += 2; 1008 args += 2; 1009 break; 1010 1011 CASE_OP_32_64(not): 1012 CASE_OP_32_64(neg): 1013 CASE_OP_32_64(ext8s): 1014 CASE_OP_32_64(ext8u): 1015 CASE_OP_32_64(ext16s): 1016 CASE_OP_32_64(ext16u): 1017 case INDEX_op_ext32s_i64: 1018 case INDEX_op_ext32u_i64: 1019 if (temps[args[1]].state == TCG_TEMP_CONST) { 1020 s->gen_opc_buf[op_index] = op_to_movi(op); 1021 tmp = do_constant_folding(op, temps[args[1]].val, 0); 1022 tcg_opt_gen_movi(gen_args, args[0], tmp); 1023 gen_args += 2; 1024 args += 2; 1025 break; 1026 } 1027 goto do_default; 1028 1029 case INDEX_op_trunc_shr_i32: 1030 if (temps[args[1]].state == TCG_TEMP_CONST) { 1031 s->gen_opc_buf[op_index] = op_to_movi(op); 1032 tmp = do_constant_folding(op, temps[args[1]].val, args[2]); 1033 tcg_opt_gen_movi(gen_args, args[0], tmp); 1034 gen_args += 2; 1035 args += 3; 1036 break; 1037 } 1038 goto do_default; 1039 1040 CASE_OP_32_64(add): 1041 CASE_OP_32_64(sub): 1042 CASE_OP_32_64(mul): 1043 CASE_OP_32_64(or): 1044 CASE_OP_32_64(and): 1045 CASE_OP_32_64(xor): 1046 CASE_OP_32_64(shl): 1047 CASE_OP_32_64(shr): 1048 CASE_OP_32_64(sar): 1049 CASE_OP_32_64(rotl): 1050 CASE_OP_32_64(rotr): 1051 CASE_OP_32_64(andc): 1052 CASE_OP_32_64(orc): 1053 CASE_OP_32_64(eqv): 1054 CASE_OP_32_64(nand): 1055 CASE_OP_32_64(nor): 1056 CASE_OP_32_64(muluh): 1057 CASE_OP_32_64(mulsh): 1058 CASE_OP_32_64(div): 1059 CASE_OP_32_64(divu): 1060 CASE_OP_32_64(rem): 1061 CASE_OP_32_64(remu): 1062 if (temps[args[1]].state == TCG_TEMP_CONST 1063 && temps[args[2]].state == TCG_TEMP_CONST) { 1064 s->gen_opc_buf[op_index] = op_to_movi(op); 1065 tmp = do_constant_folding(op, temps[args[1]].val, 1066 temps[args[2]].val); 1067 tcg_opt_gen_movi(gen_args, args[0], tmp); 1068 gen_args += 2; 1069 args += 3; 1070 break; 1071 } 1072 goto do_default; 1073 1074 CASE_OP_32_64(deposit): 1075 if (temps[args[1]].state == TCG_TEMP_CONST 1076 && temps[args[2]].state == TCG_TEMP_CONST) { 1077 s->gen_opc_buf[op_index] = op_to_movi(op); 1078 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1079 temps[args[2]].val); 1080 tcg_opt_gen_movi(gen_args, args[0], tmp); 1081 gen_args += 2; 1082 args += 5; 1083 break; 1084 } 1085 goto do_default; 1086 1087 CASE_OP_32_64(setcond): 1088 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 1089 if (tmp != 2) { 1090 s->gen_opc_buf[op_index] = op_to_movi(op); 1091 tcg_opt_gen_movi(gen_args, args[0], tmp); 1092 gen_args += 2; 1093 args += 4; 1094 break; 1095 } 1096 goto do_default; 1097 1098 CASE_OP_32_64(brcond): 1099 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 1100 if (tmp != 2) { 1101 if (tmp) { 1102 reset_all_temps(nb_temps); 1103 s->gen_opc_buf[op_index] = INDEX_op_br; 1104 gen_args[0] = args[3]; 1105 gen_args += 1; 1106 } else { 1107 s->gen_opc_buf[op_index] = INDEX_op_nop; 1108 } 1109 args += 4; 1110 break; 1111 } 1112 goto do_default; 1113 1114 CASE_OP_32_64(movcond): 1115 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 1116 if (tmp != 2) { 1117 if (temps_are_copies(args[0], args[4-tmp])) { 1118 s->gen_opc_buf[op_index] = INDEX_op_nop; 1119 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 1120 s->gen_opc_buf[op_index] = op_to_movi(op); 1121 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 1122 gen_args += 2; 1123 } else { 1124 s->gen_opc_buf[op_index] = op_to_mov(op); 1125 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 1126 gen_args += 2; 1127 } 1128 args += 6; 1129 break; 1130 } 1131 goto do_default; 1132 1133 case INDEX_op_add2_i32: 1134 case INDEX_op_sub2_i32: 1135 if (temps[args[2]].state == TCG_TEMP_CONST 1136 && temps[args[3]].state == TCG_TEMP_CONST 1137 && temps[args[4]].state == TCG_TEMP_CONST 1138 && temps[args[5]].state == TCG_TEMP_CONST) { 1139 uint32_t al = temps[args[2]].val; 1140 uint32_t ah = temps[args[3]].val; 1141 uint32_t bl = temps[args[4]].val; 1142 uint32_t bh = temps[args[5]].val; 1143 uint64_t a = ((uint64_t)ah << 32) | al; 1144 uint64_t b = ((uint64_t)bh << 32) | bl; 1145 TCGArg rl, rh; 1146 1147 if (op == INDEX_op_add2_i32) { 1148 a += b; 1149 } else { 1150 a -= b; 1151 } 1152 1153 /* We emit the extra nop when we emit the add2/sub2. */ 1154 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1155 1156 rl = args[0]; 1157 rh = args[1]; 1158 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1159 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1160 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 1161 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 1162 gen_args += 4; 1163 args += 6; 1164 break; 1165 } 1166 goto do_default; 1167 1168 case INDEX_op_mulu2_i32: 1169 if (temps[args[2]].state == TCG_TEMP_CONST 1170 && temps[args[3]].state == TCG_TEMP_CONST) { 1171 uint32_t a = temps[args[2]].val; 1172 uint32_t b = temps[args[3]].val; 1173 uint64_t r = (uint64_t)a * b; 1174 TCGArg rl, rh; 1175 1176 /* We emit the extra nop when we emit the mulu2. */ 1177 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1178 1179 rl = args[0]; 1180 rh = args[1]; 1181 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1182 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1183 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1184 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1185 gen_args += 4; 1186 args += 4; 1187 break; 1188 } 1189 goto do_default; 1190 1191 case INDEX_op_brcond2_i32: 1192 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1193 if (tmp != 2) { 1194 if (tmp) { 1195 reset_all_temps(nb_temps); 1196 s->gen_opc_buf[op_index] = INDEX_op_br; 1197 gen_args[0] = args[5]; 1198 gen_args += 1; 1199 } else { 1200 s->gen_opc_buf[op_index] = INDEX_op_nop; 1201 } 1202 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1203 && temps[args[2]].state == TCG_TEMP_CONST 1204 && temps[args[3]].state == TCG_TEMP_CONST 1205 && temps[args[2]].val == 0 1206 && temps[args[3]].val == 0) { 1207 /* Simplify LT/GE comparisons vs zero to a single compare 1208 vs the high word of the input. */ 1209 reset_all_temps(nb_temps); 1210 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1211 gen_args[0] = args[1]; 1212 gen_args[1] = args[3]; 1213 gen_args[2] = args[4]; 1214 gen_args[3] = args[5]; 1215 gen_args += 4; 1216 } else { 1217 goto do_default; 1218 } 1219 args += 6; 1220 break; 1221 1222 case INDEX_op_setcond2_i32: 1223 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1224 if (tmp != 2) { 1225 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1226 tcg_opt_gen_movi(gen_args, args[0], tmp); 1227 gen_args += 2; 1228 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1229 && temps[args[3]].state == TCG_TEMP_CONST 1230 && temps[args[4]].state == TCG_TEMP_CONST 1231 && temps[args[3]].val == 0 1232 && temps[args[4]].val == 0) { 1233 /* Simplify LT/GE comparisons vs zero to a single compare 1234 vs the high word of the input. */ 1235 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1236 reset_temp(args[0]); 1237 gen_args[0] = args[0]; 1238 gen_args[1] = args[2]; 1239 gen_args[2] = args[4]; 1240 gen_args[3] = args[5]; 1241 gen_args += 4; 1242 } else { 1243 goto do_default; 1244 } 1245 args += 6; 1246 break; 1247 1248 case INDEX_op_call: 1249 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 1250 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | 1251 TCG_CALL_NO_WRITE_GLOBALS))) { 1252 for (i = 0; i < nb_globals; i++) { 1253 reset_temp(i); 1254 } 1255 } 1256 for (i = 0; i < (args[0] >> 16); i++) { 1257 reset_temp(args[i + 1]); 1258 } 1259 i = nb_call_args + 3; 1260 while (i) { 1261 *gen_args = *args; 1262 args++; 1263 gen_args++; 1264 i--; 1265 } 1266 break; 1267 1268 default: 1269 do_default: 1270 /* Default case: we know nothing about operation (or were unable 1271 to compute the operation result) so no propagation is done. 1272 We trash everything if the operation is the end of a basic 1273 block, otherwise we only trash the output args. "mask" is 1274 the non-zero bits mask for the first output arg. */ 1275 if (def->flags & TCG_OPF_BB_END) { 1276 reset_all_temps(nb_temps); 1277 } else { 1278 for (i = 0; i < def->nb_oargs; i++) { 1279 reset_temp(args[i]); 1280 /* Save the corresponding known-zero bits mask for the 1281 first output argument (only one supported so far). */ 1282 if (i == 0) { 1283 temps[args[i]].mask = mask; 1284 } 1285 } 1286 } 1287 for (i = 0; i < def->nb_args; i++) { 1288 gen_args[i] = args[i]; 1289 } 1290 args += def->nb_args; 1291 gen_args += def->nb_args; 1292 break; 1293 } 1294 } 1295 1296 return gen_args; 1297 } 1298 1299 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1300 TCGArg *args, TCGOpDef *tcg_op_defs) 1301 { 1302 TCGArg *res; 1303 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1304 return res; 1305 } 1306