1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 int i; 74 for (i = 0; i < nb_temps; i++) { 75 temps[i].state = TCG_TEMP_UNDEF; 76 temps[i].mask = -1; 77 } 78 } 79 80 static int op_bits(TCGOpcode op) 81 { 82 const TCGOpDef *def = &tcg_op_defs[op]; 83 return def->flags & TCG_OPF_64BIT ? 64 : 32; 84 } 85 86 static TCGOpcode op_to_movi(TCGOpcode op) 87 { 88 switch (op_bits(op)) { 89 case 32: 90 return INDEX_op_movi_i32; 91 case 64: 92 return INDEX_op_movi_i64; 93 default: 94 fprintf(stderr, "op_to_movi: unexpected return value of " 95 "function op_bits.\n"); 96 tcg_abort(); 97 } 98 } 99 100 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 101 { 102 TCGArg i; 103 104 /* If this is already a global, we can't do better. */ 105 if (temp < s->nb_globals) { 106 return temp; 107 } 108 109 /* Search for a global first. */ 110 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 111 if (i < s->nb_globals) { 112 return i; 113 } 114 } 115 116 /* If it is a temp, search for a temp local. */ 117 if (!s->temps[temp].temp_local) { 118 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 119 if (s->temps[i].temp_local) { 120 return i; 121 } 122 } 123 } 124 125 /* Failure to find a better representation, return the same temp. */ 126 return temp; 127 } 128 129 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 130 { 131 TCGArg i; 132 133 if (arg1 == arg2) { 134 return true; 135 } 136 137 if (temps[arg1].state != TCG_TEMP_COPY 138 || temps[arg2].state != TCG_TEMP_COPY) { 139 return false; 140 } 141 142 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 143 if (i == arg2) { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 152 TCGArg dst, TCGArg src) 153 { 154 reset_temp(dst); 155 temps[dst].mask = temps[src].mask; 156 assert(temps[src].state != TCG_TEMP_CONST); 157 158 if (s->temps[src].type == s->temps[dst].type) { 159 if (temps[src].state != TCG_TEMP_COPY) { 160 temps[src].state = TCG_TEMP_COPY; 161 temps[src].next_copy = src; 162 temps[src].prev_copy = src; 163 } 164 temps[dst].state = TCG_TEMP_COPY; 165 temps[dst].next_copy = temps[src].next_copy; 166 temps[dst].prev_copy = src; 167 temps[temps[dst].next_copy].prev_copy = dst; 168 temps[src].next_copy = dst; 169 } 170 171 gen_args[0] = dst; 172 gen_args[1] = src; 173 } 174 175 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 176 { 177 reset_temp(dst); 178 temps[dst].state = TCG_TEMP_CONST; 179 temps[dst].val = val; 180 temps[dst].mask = val; 181 gen_args[0] = dst; 182 gen_args[1] = val; 183 } 184 185 static TCGOpcode op_to_mov(TCGOpcode op) 186 { 187 switch (op_bits(op)) { 188 case 32: 189 return INDEX_op_mov_i32; 190 case 64: 191 return INDEX_op_mov_i64; 192 default: 193 fprintf(stderr, "op_to_mov: unexpected return value of " 194 "function op_bits.\n"); 195 tcg_abort(); 196 } 197 } 198 199 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 200 { 201 uint64_t l64, h64; 202 203 switch (op) { 204 CASE_OP_32_64(add): 205 return x + y; 206 207 CASE_OP_32_64(sub): 208 return x - y; 209 210 CASE_OP_32_64(mul): 211 return x * y; 212 213 CASE_OP_32_64(and): 214 return x & y; 215 216 CASE_OP_32_64(or): 217 return x | y; 218 219 CASE_OP_32_64(xor): 220 return x ^ y; 221 222 case INDEX_op_shl_i32: 223 return (uint32_t)x << (y & 31); 224 225 case INDEX_op_shl_i64: 226 return (uint64_t)x << (y & 63); 227 228 case INDEX_op_shr_i32: 229 return (uint32_t)x >> (y & 31); 230 231 case INDEX_op_trunc_shr_i32: 232 case INDEX_op_shr_i64: 233 return (uint64_t)x >> (y & 63); 234 235 case INDEX_op_sar_i32: 236 return (int32_t)x >> (y & 31); 237 238 case INDEX_op_sar_i64: 239 return (int64_t)x >> (y & 63); 240 241 case INDEX_op_rotr_i32: 242 return ror32(x, y & 31); 243 244 case INDEX_op_rotr_i64: 245 return ror64(x, y & 63); 246 247 case INDEX_op_rotl_i32: 248 return rol32(x, y & 31); 249 250 case INDEX_op_rotl_i64: 251 return rol64(x, y & 63); 252 253 CASE_OP_32_64(not): 254 return ~x; 255 256 CASE_OP_32_64(neg): 257 return -x; 258 259 CASE_OP_32_64(andc): 260 return x & ~y; 261 262 CASE_OP_32_64(orc): 263 return x | ~y; 264 265 CASE_OP_32_64(eqv): 266 return ~(x ^ y); 267 268 CASE_OP_32_64(nand): 269 return ~(x & y); 270 271 CASE_OP_32_64(nor): 272 return ~(x | y); 273 274 CASE_OP_32_64(ext8s): 275 return (int8_t)x; 276 277 CASE_OP_32_64(ext16s): 278 return (int16_t)x; 279 280 CASE_OP_32_64(ext8u): 281 return (uint8_t)x; 282 283 CASE_OP_32_64(ext16u): 284 return (uint16_t)x; 285 286 case INDEX_op_ext32s_i64: 287 return (int32_t)x; 288 289 case INDEX_op_ext32u_i64: 290 return (uint32_t)x; 291 292 case INDEX_op_muluh_i32: 293 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 294 case INDEX_op_mulsh_i32: 295 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 296 297 case INDEX_op_muluh_i64: 298 mulu64(&l64, &h64, x, y); 299 return h64; 300 case INDEX_op_mulsh_i64: 301 muls64(&l64, &h64, x, y); 302 return h64; 303 304 case INDEX_op_div_i32: 305 /* Avoid crashing on divide by zero, otherwise undefined. */ 306 return (int32_t)x / ((int32_t)y ? : 1); 307 case INDEX_op_divu_i32: 308 return (uint32_t)x / ((uint32_t)y ? : 1); 309 case INDEX_op_div_i64: 310 return (int64_t)x / ((int64_t)y ? : 1); 311 case INDEX_op_divu_i64: 312 return (uint64_t)x / ((uint64_t)y ? : 1); 313 314 case INDEX_op_rem_i32: 315 return (int32_t)x % ((int32_t)y ? : 1); 316 case INDEX_op_remu_i32: 317 return (uint32_t)x % ((uint32_t)y ? : 1); 318 case INDEX_op_rem_i64: 319 return (int64_t)x % ((int64_t)y ? : 1); 320 case INDEX_op_remu_i64: 321 return (uint64_t)x % ((uint64_t)y ? : 1); 322 323 default: 324 fprintf(stderr, 325 "Unrecognized operation %d in do_constant_folding.\n", op); 326 tcg_abort(); 327 } 328 } 329 330 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 331 { 332 TCGArg res = do_constant_folding_2(op, x, y); 333 if (op_bits(op) == 32) { 334 res &= 0xffffffff; 335 } 336 return res; 337 } 338 339 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 340 { 341 switch (c) { 342 case TCG_COND_EQ: 343 return x == y; 344 case TCG_COND_NE: 345 return x != y; 346 case TCG_COND_LT: 347 return (int32_t)x < (int32_t)y; 348 case TCG_COND_GE: 349 return (int32_t)x >= (int32_t)y; 350 case TCG_COND_LE: 351 return (int32_t)x <= (int32_t)y; 352 case TCG_COND_GT: 353 return (int32_t)x > (int32_t)y; 354 case TCG_COND_LTU: 355 return x < y; 356 case TCG_COND_GEU: 357 return x >= y; 358 case TCG_COND_LEU: 359 return x <= y; 360 case TCG_COND_GTU: 361 return x > y; 362 default: 363 tcg_abort(); 364 } 365 } 366 367 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 368 { 369 switch (c) { 370 case TCG_COND_EQ: 371 return x == y; 372 case TCG_COND_NE: 373 return x != y; 374 case TCG_COND_LT: 375 return (int64_t)x < (int64_t)y; 376 case TCG_COND_GE: 377 return (int64_t)x >= (int64_t)y; 378 case TCG_COND_LE: 379 return (int64_t)x <= (int64_t)y; 380 case TCG_COND_GT: 381 return (int64_t)x > (int64_t)y; 382 case TCG_COND_LTU: 383 return x < y; 384 case TCG_COND_GEU: 385 return x >= y; 386 case TCG_COND_LEU: 387 return x <= y; 388 case TCG_COND_GTU: 389 return x > y; 390 default: 391 tcg_abort(); 392 } 393 } 394 395 static bool do_constant_folding_cond_eq(TCGCond c) 396 { 397 switch (c) { 398 case TCG_COND_GT: 399 case TCG_COND_LTU: 400 case TCG_COND_LT: 401 case TCG_COND_GTU: 402 case TCG_COND_NE: 403 return 0; 404 case TCG_COND_GE: 405 case TCG_COND_GEU: 406 case TCG_COND_LE: 407 case TCG_COND_LEU: 408 case TCG_COND_EQ: 409 return 1; 410 default: 411 tcg_abort(); 412 } 413 } 414 415 /* Return 2 if the condition can't be simplified, and the result 416 of the condition (0 or 1) if it can */ 417 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 418 TCGArg y, TCGCond c) 419 { 420 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 421 switch (op_bits(op)) { 422 case 32: 423 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 424 case 64: 425 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 426 default: 427 tcg_abort(); 428 } 429 } else if (temps_are_copies(x, y)) { 430 return do_constant_folding_cond_eq(c); 431 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 432 switch (c) { 433 case TCG_COND_LTU: 434 return 0; 435 case TCG_COND_GEU: 436 return 1; 437 default: 438 return 2; 439 } 440 } else { 441 return 2; 442 } 443 } 444 445 /* Return 2 if the condition can't be simplified, and the result 446 of the condition (0 or 1) if it can */ 447 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 448 { 449 TCGArg al = p1[0], ah = p1[1]; 450 TCGArg bl = p2[0], bh = p2[1]; 451 452 if (temps[bl].state == TCG_TEMP_CONST 453 && temps[bh].state == TCG_TEMP_CONST) { 454 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 455 456 if (temps[al].state == TCG_TEMP_CONST 457 && temps[ah].state == TCG_TEMP_CONST) { 458 uint64_t a; 459 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 460 return do_constant_folding_cond_64(a, b, c); 461 } 462 if (b == 0) { 463 switch (c) { 464 case TCG_COND_LTU: 465 return 0; 466 case TCG_COND_GEU: 467 return 1; 468 default: 469 break; 470 } 471 } 472 } 473 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 474 return do_constant_folding_cond_eq(c); 475 } 476 return 2; 477 } 478 479 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 480 { 481 TCGArg a1 = *p1, a2 = *p2; 482 int sum = 0; 483 sum += temps[a1].state == TCG_TEMP_CONST; 484 sum -= temps[a2].state == TCG_TEMP_CONST; 485 486 /* Prefer the constant in second argument, and then the form 487 op a, a, b, which is better handled on non-RISC hosts. */ 488 if (sum > 0 || (sum == 0 && dest == a2)) { 489 *p1 = a2; 490 *p2 = a1; 491 return true; 492 } 493 return false; 494 } 495 496 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 497 { 498 int sum = 0; 499 sum += temps[p1[0]].state == TCG_TEMP_CONST; 500 sum += temps[p1[1]].state == TCG_TEMP_CONST; 501 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 502 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 503 if (sum > 0) { 504 TCGArg t; 505 t = p1[0], p1[0] = p2[0], p2[0] = t; 506 t = p1[1], p1[1] = p2[1], p2[1] = t; 507 return true; 508 } 509 return false; 510 } 511 512 /* Propagate constants and copies, fold constant expressions. */ 513 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 514 TCGArg *args, TCGOpDef *tcg_op_defs) 515 { 516 int nb_ops, op_index, nb_temps, nb_globals; 517 TCGArg *gen_args; 518 519 /* Array VALS has an element for each temp. 520 If this temp holds a constant then its value is kept in VALS' element. 521 If this temp is a copy of other ones then the other copies are 522 available through the doubly linked circular list. */ 523 524 nb_temps = s->nb_temps; 525 nb_globals = s->nb_globals; 526 reset_all_temps(nb_temps); 527 528 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 529 gen_args = args; 530 for (op_index = 0; op_index < nb_ops; op_index++) { 531 TCGOpcode op = s->gen_opc_buf[op_index]; 532 const TCGOpDef *def = &tcg_op_defs[op]; 533 tcg_target_ulong mask, affected; 534 int nb_oargs, nb_iargs, nb_args, i; 535 TCGArg tmp; 536 537 if (op == INDEX_op_call) { 538 *gen_args++ = tmp = *args++; 539 nb_oargs = tmp >> 16; 540 nb_iargs = tmp & 0xffff; 541 nb_args = nb_oargs + nb_iargs + def->nb_cargs; 542 } else { 543 nb_oargs = def->nb_oargs; 544 nb_iargs = def->nb_iargs; 545 nb_args = def->nb_args; 546 } 547 548 /* Do copy propagation */ 549 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 550 if (temps[args[i]].state == TCG_TEMP_COPY) { 551 args[i] = find_better_copy(s, args[i]); 552 } 553 } 554 555 /* For commutative operations make constant second argument */ 556 switch (op) { 557 CASE_OP_32_64(add): 558 CASE_OP_32_64(mul): 559 CASE_OP_32_64(and): 560 CASE_OP_32_64(or): 561 CASE_OP_32_64(xor): 562 CASE_OP_32_64(eqv): 563 CASE_OP_32_64(nand): 564 CASE_OP_32_64(nor): 565 CASE_OP_32_64(muluh): 566 CASE_OP_32_64(mulsh): 567 swap_commutative(args[0], &args[1], &args[2]); 568 break; 569 CASE_OP_32_64(brcond): 570 if (swap_commutative(-1, &args[0], &args[1])) { 571 args[2] = tcg_swap_cond(args[2]); 572 } 573 break; 574 CASE_OP_32_64(setcond): 575 if (swap_commutative(args[0], &args[1], &args[2])) { 576 args[3] = tcg_swap_cond(args[3]); 577 } 578 break; 579 CASE_OP_32_64(movcond): 580 if (swap_commutative(-1, &args[1], &args[2])) { 581 args[5] = tcg_swap_cond(args[5]); 582 } 583 /* For movcond, we canonicalize the "false" input reg to match 584 the destination reg so that the tcg backend can implement 585 a "move if true" operation. */ 586 if (swap_commutative(args[0], &args[4], &args[3])) { 587 args[5] = tcg_invert_cond(args[5]); 588 } 589 break; 590 CASE_OP_32_64(add2): 591 swap_commutative(args[0], &args[2], &args[4]); 592 swap_commutative(args[1], &args[3], &args[5]); 593 break; 594 CASE_OP_32_64(mulu2): 595 CASE_OP_32_64(muls2): 596 swap_commutative(args[0], &args[2], &args[3]); 597 break; 598 case INDEX_op_brcond2_i32: 599 if (swap_commutative2(&args[0], &args[2])) { 600 args[4] = tcg_swap_cond(args[4]); 601 } 602 break; 603 case INDEX_op_setcond2_i32: 604 if (swap_commutative2(&args[1], &args[3])) { 605 args[5] = tcg_swap_cond(args[5]); 606 } 607 break; 608 default: 609 break; 610 } 611 612 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 613 and "sub r, 0, a => neg r, a" case. */ 614 switch (op) { 615 CASE_OP_32_64(shl): 616 CASE_OP_32_64(shr): 617 CASE_OP_32_64(sar): 618 CASE_OP_32_64(rotl): 619 CASE_OP_32_64(rotr): 620 if (temps[args[1]].state == TCG_TEMP_CONST 621 && temps[args[1]].val == 0) { 622 s->gen_opc_buf[op_index] = op_to_movi(op); 623 tcg_opt_gen_movi(gen_args, args[0], 0); 624 args += 3; 625 gen_args += 2; 626 continue; 627 } 628 break; 629 CASE_OP_32_64(sub): 630 { 631 TCGOpcode neg_op; 632 bool have_neg; 633 634 if (temps[args[2]].state == TCG_TEMP_CONST) { 635 /* Proceed with possible constant folding. */ 636 break; 637 } 638 if (op == INDEX_op_sub_i32) { 639 neg_op = INDEX_op_neg_i32; 640 have_neg = TCG_TARGET_HAS_neg_i32; 641 } else { 642 neg_op = INDEX_op_neg_i64; 643 have_neg = TCG_TARGET_HAS_neg_i64; 644 } 645 if (!have_neg) { 646 break; 647 } 648 if (temps[args[1]].state == TCG_TEMP_CONST 649 && temps[args[1]].val == 0) { 650 s->gen_opc_buf[op_index] = neg_op; 651 reset_temp(args[0]); 652 gen_args[0] = args[0]; 653 gen_args[1] = args[2]; 654 args += 3; 655 gen_args += 2; 656 continue; 657 } 658 } 659 break; 660 CASE_OP_32_64(xor): 661 CASE_OP_32_64(nand): 662 if (temps[args[1]].state != TCG_TEMP_CONST 663 && temps[args[2]].state == TCG_TEMP_CONST 664 && temps[args[2]].val == -1) { 665 i = 1; 666 goto try_not; 667 } 668 break; 669 CASE_OP_32_64(nor): 670 if (temps[args[1]].state != TCG_TEMP_CONST 671 && temps[args[2]].state == TCG_TEMP_CONST 672 && temps[args[2]].val == 0) { 673 i = 1; 674 goto try_not; 675 } 676 break; 677 CASE_OP_32_64(andc): 678 if (temps[args[2]].state != TCG_TEMP_CONST 679 && temps[args[1]].state == TCG_TEMP_CONST 680 && temps[args[1]].val == -1) { 681 i = 2; 682 goto try_not; 683 } 684 break; 685 CASE_OP_32_64(orc): 686 CASE_OP_32_64(eqv): 687 if (temps[args[2]].state != TCG_TEMP_CONST 688 && temps[args[1]].state == TCG_TEMP_CONST 689 && temps[args[1]].val == 0) { 690 i = 2; 691 goto try_not; 692 } 693 break; 694 try_not: 695 { 696 TCGOpcode not_op; 697 bool have_not; 698 699 if (def->flags & TCG_OPF_64BIT) { 700 not_op = INDEX_op_not_i64; 701 have_not = TCG_TARGET_HAS_not_i64; 702 } else { 703 not_op = INDEX_op_not_i32; 704 have_not = TCG_TARGET_HAS_not_i32; 705 } 706 if (!have_not) { 707 break; 708 } 709 s->gen_opc_buf[op_index] = not_op; 710 reset_temp(args[0]); 711 gen_args[0] = args[0]; 712 gen_args[1] = args[i]; 713 args += 3; 714 gen_args += 2; 715 continue; 716 } 717 default: 718 break; 719 } 720 721 /* Simplify expression for "op r, a, const => mov r, a" cases */ 722 switch (op) { 723 CASE_OP_32_64(add): 724 CASE_OP_32_64(sub): 725 CASE_OP_32_64(shl): 726 CASE_OP_32_64(shr): 727 CASE_OP_32_64(sar): 728 CASE_OP_32_64(rotl): 729 CASE_OP_32_64(rotr): 730 CASE_OP_32_64(or): 731 CASE_OP_32_64(xor): 732 CASE_OP_32_64(andc): 733 if (temps[args[1]].state != TCG_TEMP_CONST 734 && temps[args[2]].state == TCG_TEMP_CONST 735 && temps[args[2]].val == 0) { 736 goto do_mov3; 737 } 738 break; 739 CASE_OP_32_64(and): 740 CASE_OP_32_64(orc): 741 CASE_OP_32_64(eqv): 742 if (temps[args[1]].state != TCG_TEMP_CONST 743 && temps[args[2]].state == TCG_TEMP_CONST 744 && temps[args[2]].val == -1) { 745 goto do_mov3; 746 } 747 break; 748 do_mov3: 749 if (temps_are_copies(args[0], args[1])) { 750 s->gen_opc_buf[op_index] = INDEX_op_nop; 751 } else { 752 s->gen_opc_buf[op_index] = op_to_mov(op); 753 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 754 gen_args += 2; 755 } 756 args += 3; 757 continue; 758 default: 759 break; 760 } 761 762 /* Simplify using known-zero bits. Currently only ops with a single 763 output argument is supported. */ 764 mask = -1; 765 affected = -1; 766 switch (op) { 767 CASE_OP_32_64(ext8s): 768 if ((temps[args[1]].mask & 0x80) != 0) { 769 break; 770 } 771 CASE_OP_32_64(ext8u): 772 mask = 0xff; 773 goto and_const; 774 CASE_OP_32_64(ext16s): 775 if ((temps[args[1]].mask & 0x8000) != 0) { 776 break; 777 } 778 CASE_OP_32_64(ext16u): 779 mask = 0xffff; 780 goto and_const; 781 case INDEX_op_ext32s_i64: 782 if ((temps[args[1]].mask & 0x80000000) != 0) { 783 break; 784 } 785 case INDEX_op_ext32u_i64: 786 mask = 0xffffffffU; 787 goto and_const; 788 789 CASE_OP_32_64(and): 790 mask = temps[args[2]].mask; 791 if (temps[args[2]].state == TCG_TEMP_CONST) { 792 and_const: 793 affected = temps[args[1]].mask & ~mask; 794 } 795 mask = temps[args[1]].mask & mask; 796 break; 797 798 CASE_OP_32_64(andc): 799 /* Known-zeros does not imply known-ones. Therefore unless 800 args[2] is constant, we can't infer anything from it. */ 801 if (temps[args[2]].state == TCG_TEMP_CONST) { 802 mask = ~temps[args[2]].mask; 803 goto and_const; 804 } 805 /* But we certainly know nothing outside args[1] may be set. */ 806 mask = temps[args[1]].mask; 807 break; 808 809 case INDEX_op_sar_i32: 810 if (temps[args[2]].state == TCG_TEMP_CONST) { 811 tmp = temps[args[2]].val & 31; 812 mask = (int32_t)temps[args[1]].mask >> tmp; 813 } 814 break; 815 case INDEX_op_sar_i64: 816 if (temps[args[2]].state == TCG_TEMP_CONST) { 817 tmp = temps[args[2]].val & 63; 818 mask = (int64_t)temps[args[1]].mask >> tmp; 819 } 820 break; 821 822 case INDEX_op_shr_i32: 823 if (temps[args[2]].state == TCG_TEMP_CONST) { 824 tmp = temps[args[2]].val & 31; 825 mask = (uint32_t)temps[args[1]].mask >> tmp; 826 } 827 break; 828 case INDEX_op_shr_i64: 829 if (temps[args[2]].state == TCG_TEMP_CONST) { 830 tmp = temps[args[2]].val & 63; 831 mask = (uint64_t)temps[args[1]].mask >> tmp; 832 } 833 break; 834 835 case INDEX_op_trunc_shr_i32: 836 mask = (uint64_t)temps[args[1]].mask >> args[2]; 837 break; 838 839 CASE_OP_32_64(shl): 840 if (temps[args[2]].state == TCG_TEMP_CONST) { 841 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 842 mask = temps[args[1]].mask << tmp; 843 } 844 break; 845 846 CASE_OP_32_64(neg): 847 /* Set to 1 all bits to the left of the rightmost. */ 848 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 849 break; 850 851 CASE_OP_32_64(deposit): 852 mask = deposit64(temps[args[1]].mask, args[3], args[4], 853 temps[args[2]].mask); 854 break; 855 856 CASE_OP_32_64(or): 857 CASE_OP_32_64(xor): 858 mask = temps[args[1]].mask | temps[args[2]].mask; 859 break; 860 861 CASE_OP_32_64(setcond): 862 mask = 1; 863 break; 864 865 CASE_OP_32_64(movcond): 866 mask = temps[args[3]].mask | temps[args[4]].mask; 867 break; 868 869 CASE_OP_32_64(ld8u): 870 case INDEX_op_qemu_ld8u: 871 mask = 0xff; 872 break; 873 CASE_OP_32_64(ld16u): 874 case INDEX_op_qemu_ld16u: 875 mask = 0xffff; 876 break; 877 case INDEX_op_ld32u_i64: 878 #if TCG_TARGET_REG_BITS == 64 879 case INDEX_op_qemu_ld32u: 880 #endif 881 mask = 0xffffffffu; 882 break; 883 884 CASE_OP_32_64(qemu_ld): 885 { 886 TCGMemOp mop = args[nb_oargs + nb_iargs]; 887 if (!(mop & MO_SIGN)) { 888 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 889 } 890 } 891 break; 892 893 default: 894 break; 895 } 896 897 /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit 898 results */ 899 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { 900 mask &= 0xffffffffu; 901 } 902 903 if (mask == 0) { 904 assert(nb_oargs == 1); 905 s->gen_opc_buf[op_index] = op_to_movi(op); 906 tcg_opt_gen_movi(gen_args, args[0], 0); 907 args += nb_args; 908 gen_args += 2; 909 continue; 910 } 911 if (affected == 0) { 912 assert(nb_oargs == 1); 913 if (temps_are_copies(args[0], args[1])) { 914 s->gen_opc_buf[op_index] = INDEX_op_nop; 915 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 916 s->gen_opc_buf[op_index] = op_to_mov(op); 917 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 918 gen_args += 2; 919 } else { 920 s->gen_opc_buf[op_index] = op_to_movi(op); 921 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 922 gen_args += 2; 923 } 924 args += nb_args; 925 continue; 926 } 927 928 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 929 switch (op) { 930 CASE_OP_32_64(and): 931 CASE_OP_32_64(mul): 932 CASE_OP_32_64(muluh): 933 CASE_OP_32_64(mulsh): 934 if ((temps[args[2]].state == TCG_TEMP_CONST 935 && temps[args[2]].val == 0)) { 936 s->gen_opc_buf[op_index] = op_to_movi(op); 937 tcg_opt_gen_movi(gen_args, args[0], 0); 938 args += 3; 939 gen_args += 2; 940 continue; 941 } 942 break; 943 default: 944 break; 945 } 946 947 /* Simplify expression for "op r, a, a => mov r, a" cases */ 948 switch (op) { 949 CASE_OP_32_64(or): 950 CASE_OP_32_64(and): 951 if (temps_are_copies(args[1], args[2])) { 952 if (temps_are_copies(args[0], args[1])) { 953 s->gen_opc_buf[op_index] = INDEX_op_nop; 954 } else { 955 s->gen_opc_buf[op_index] = op_to_mov(op); 956 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 957 gen_args += 2; 958 } 959 args += 3; 960 continue; 961 } 962 break; 963 default: 964 break; 965 } 966 967 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 968 switch (op) { 969 CASE_OP_32_64(andc): 970 CASE_OP_32_64(sub): 971 CASE_OP_32_64(xor): 972 if (temps_are_copies(args[1], args[2])) { 973 s->gen_opc_buf[op_index] = op_to_movi(op); 974 tcg_opt_gen_movi(gen_args, args[0], 0); 975 gen_args += 2; 976 args += 3; 977 continue; 978 } 979 break; 980 default: 981 break; 982 } 983 984 /* Propagate constants through copy operations and do constant 985 folding. Constants will be substituted to arguments by register 986 allocator where needed and possible. Also detect copies. */ 987 switch (op) { 988 CASE_OP_32_64(mov): 989 if (temps_are_copies(args[0], args[1])) { 990 args += 2; 991 s->gen_opc_buf[op_index] = INDEX_op_nop; 992 break; 993 } 994 if (temps[args[1]].state != TCG_TEMP_CONST) { 995 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 996 gen_args += 2; 997 args += 2; 998 break; 999 } 1000 /* Source argument is constant. Rewrite the operation and 1001 let movi case handle it. */ 1002 op = op_to_movi(op); 1003 s->gen_opc_buf[op_index] = op; 1004 args[1] = temps[args[1]].val; 1005 /* fallthrough */ 1006 CASE_OP_32_64(movi): 1007 tcg_opt_gen_movi(gen_args, args[0], args[1]); 1008 gen_args += 2; 1009 args += 2; 1010 break; 1011 1012 CASE_OP_32_64(not): 1013 CASE_OP_32_64(neg): 1014 CASE_OP_32_64(ext8s): 1015 CASE_OP_32_64(ext8u): 1016 CASE_OP_32_64(ext16s): 1017 CASE_OP_32_64(ext16u): 1018 case INDEX_op_ext32s_i64: 1019 case INDEX_op_ext32u_i64: 1020 if (temps[args[1]].state == TCG_TEMP_CONST) { 1021 s->gen_opc_buf[op_index] = op_to_movi(op); 1022 tmp = do_constant_folding(op, temps[args[1]].val, 0); 1023 tcg_opt_gen_movi(gen_args, args[0], tmp); 1024 gen_args += 2; 1025 args += 2; 1026 break; 1027 } 1028 goto do_default; 1029 1030 case INDEX_op_trunc_shr_i32: 1031 if (temps[args[1]].state == TCG_TEMP_CONST) { 1032 s->gen_opc_buf[op_index] = op_to_movi(op); 1033 tmp = do_constant_folding(op, temps[args[1]].val, args[2]); 1034 tcg_opt_gen_movi(gen_args, args[0], tmp); 1035 gen_args += 2; 1036 args += 3; 1037 break; 1038 } 1039 goto do_default; 1040 1041 CASE_OP_32_64(add): 1042 CASE_OP_32_64(sub): 1043 CASE_OP_32_64(mul): 1044 CASE_OP_32_64(or): 1045 CASE_OP_32_64(and): 1046 CASE_OP_32_64(xor): 1047 CASE_OP_32_64(shl): 1048 CASE_OP_32_64(shr): 1049 CASE_OP_32_64(sar): 1050 CASE_OP_32_64(rotl): 1051 CASE_OP_32_64(rotr): 1052 CASE_OP_32_64(andc): 1053 CASE_OP_32_64(orc): 1054 CASE_OP_32_64(eqv): 1055 CASE_OP_32_64(nand): 1056 CASE_OP_32_64(nor): 1057 CASE_OP_32_64(muluh): 1058 CASE_OP_32_64(mulsh): 1059 CASE_OP_32_64(div): 1060 CASE_OP_32_64(divu): 1061 CASE_OP_32_64(rem): 1062 CASE_OP_32_64(remu): 1063 if (temps[args[1]].state == TCG_TEMP_CONST 1064 && temps[args[2]].state == TCG_TEMP_CONST) { 1065 s->gen_opc_buf[op_index] = op_to_movi(op); 1066 tmp = do_constant_folding(op, temps[args[1]].val, 1067 temps[args[2]].val); 1068 tcg_opt_gen_movi(gen_args, args[0], tmp); 1069 gen_args += 2; 1070 args += 3; 1071 break; 1072 } 1073 goto do_default; 1074 1075 CASE_OP_32_64(deposit): 1076 if (temps[args[1]].state == TCG_TEMP_CONST 1077 && temps[args[2]].state == TCG_TEMP_CONST) { 1078 s->gen_opc_buf[op_index] = op_to_movi(op); 1079 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1080 temps[args[2]].val); 1081 tcg_opt_gen_movi(gen_args, args[0], tmp); 1082 gen_args += 2; 1083 args += 5; 1084 break; 1085 } 1086 goto do_default; 1087 1088 CASE_OP_32_64(setcond): 1089 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 1090 if (tmp != 2) { 1091 s->gen_opc_buf[op_index] = op_to_movi(op); 1092 tcg_opt_gen_movi(gen_args, args[0], tmp); 1093 gen_args += 2; 1094 args += 4; 1095 break; 1096 } 1097 goto do_default; 1098 1099 CASE_OP_32_64(brcond): 1100 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 1101 if (tmp != 2) { 1102 if (tmp) { 1103 reset_all_temps(nb_temps); 1104 s->gen_opc_buf[op_index] = INDEX_op_br; 1105 gen_args[0] = args[3]; 1106 gen_args += 1; 1107 } else { 1108 s->gen_opc_buf[op_index] = INDEX_op_nop; 1109 } 1110 args += 4; 1111 break; 1112 } 1113 goto do_default; 1114 1115 CASE_OP_32_64(movcond): 1116 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 1117 if (tmp != 2) { 1118 if (temps_are_copies(args[0], args[4-tmp])) { 1119 s->gen_opc_buf[op_index] = INDEX_op_nop; 1120 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 1121 s->gen_opc_buf[op_index] = op_to_movi(op); 1122 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 1123 gen_args += 2; 1124 } else { 1125 s->gen_opc_buf[op_index] = op_to_mov(op); 1126 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 1127 gen_args += 2; 1128 } 1129 args += 6; 1130 break; 1131 } 1132 goto do_default; 1133 1134 case INDEX_op_add2_i32: 1135 case INDEX_op_sub2_i32: 1136 if (temps[args[2]].state == TCG_TEMP_CONST 1137 && temps[args[3]].state == TCG_TEMP_CONST 1138 && temps[args[4]].state == TCG_TEMP_CONST 1139 && temps[args[5]].state == TCG_TEMP_CONST) { 1140 uint32_t al = temps[args[2]].val; 1141 uint32_t ah = temps[args[3]].val; 1142 uint32_t bl = temps[args[4]].val; 1143 uint32_t bh = temps[args[5]].val; 1144 uint64_t a = ((uint64_t)ah << 32) | al; 1145 uint64_t b = ((uint64_t)bh << 32) | bl; 1146 TCGArg rl, rh; 1147 1148 if (op == INDEX_op_add2_i32) { 1149 a += b; 1150 } else { 1151 a -= b; 1152 } 1153 1154 /* We emit the extra nop when we emit the add2/sub2. */ 1155 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1156 1157 rl = args[0]; 1158 rh = args[1]; 1159 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1160 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1161 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 1162 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 1163 gen_args += 4; 1164 args += 6; 1165 break; 1166 } 1167 goto do_default; 1168 1169 case INDEX_op_mulu2_i32: 1170 if (temps[args[2]].state == TCG_TEMP_CONST 1171 && temps[args[3]].state == TCG_TEMP_CONST) { 1172 uint32_t a = temps[args[2]].val; 1173 uint32_t b = temps[args[3]].val; 1174 uint64_t r = (uint64_t)a * b; 1175 TCGArg rl, rh; 1176 1177 /* We emit the extra nop when we emit the mulu2. */ 1178 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1179 1180 rl = args[0]; 1181 rh = args[1]; 1182 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1183 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1184 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1185 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1186 gen_args += 4; 1187 args += 4; 1188 break; 1189 } 1190 goto do_default; 1191 1192 case INDEX_op_brcond2_i32: 1193 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1194 if (tmp != 2) { 1195 if (tmp) { 1196 reset_all_temps(nb_temps); 1197 s->gen_opc_buf[op_index] = INDEX_op_br; 1198 gen_args[0] = args[5]; 1199 gen_args += 1; 1200 } else { 1201 s->gen_opc_buf[op_index] = INDEX_op_nop; 1202 } 1203 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1204 && temps[args[2]].state == TCG_TEMP_CONST 1205 && temps[args[3]].state == TCG_TEMP_CONST 1206 && temps[args[2]].val == 0 1207 && temps[args[3]].val == 0) { 1208 /* Simplify LT/GE comparisons vs zero to a single compare 1209 vs the high word of the input. */ 1210 reset_all_temps(nb_temps); 1211 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1212 gen_args[0] = args[1]; 1213 gen_args[1] = args[3]; 1214 gen_args[2] = args[4]; 1215 gen_args[3] = args[5]; 1216 gen_args += 4; 1217 } else { 1218 goto do_default; 1219 } 1220 args += 6; 1221 break; 1222 1223 case INDEX_op_setcond2_i32: 1224 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1225 if (tmp != 2) { 1226 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1227 tcg_opt_gen_movi(gen_args, args[0], tmp); 1228 gen_args += 2; 1229 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1230 && temps[args[3]].state == TCG_TEMP_CONST 1231 && temps[args[4]].state == TCG_TEMP_CONST 1232 && temps[args[3]].val == 0 1233 && temps[args[4]].val == 0) { 1234 /* Simplify LT/GE comparisons vs zero to a single compare 1235 vs the high word of the input. */ 1236 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1237 reset_temp(args[0]); 1238 gen_args[0] = args[0]; 1239 gen_args[1] = args[2]; 1240 gen_args[2] = args[4]; 1241 gen_args[3] = args[5]; 1242 gen_args += 4; 1243 } else { 1244 goto do_default; 1245 } 1246 args += 6; 1247 break; 1248 1249 case INDEX_op_call: 1250 if (!(args[nb_oargs + nb_iargs + 1] 1251 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1252 for (i = 0; i < nb_globals; i++) { 1253 reset_temp(i); 1254 } 1255 } 1256 goto do_reset_output; 1257 1258 default: 1259 do_default: 1260 /* Default case: we know nothing about operation (or were unable 1261 to compute the operation result) so no propagation is done. 1262 We trash everything if the operation is the end of a basic 1263 block, otherwise we only trash the output args. "mask" is 1264 the non-zero bits mask for the first output arg. */ 1265 if (def->flags & TCG_OPF_BB_END) { 1266 reset_all_temps(nb_temps); 1267 } else { 1268 do_reset_output: 1269 for (i = 0; i < nb_oargs; i++) { 1270 reset_temp(args[i]); 1271 /* Save the corresponding known-zero bits mask for the 1272 first output argument (only one supported so far). */ 1273 if (i == 0) { 1274 temps[args[i]].mask = mask; 1275 } 1276 } 1277 } 1278 for (i = 0; i < nb_args; i++) { 1279 gen_args[i] = args[i]; 1280 } 1281 args += nb_args; 1282 gen_args += nb_args; 1283 break; 1284 } 1285 } 1286 1287 return gen_args; 1288 } 1289 1290 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1291 TCGArg *args, TCGOpDef *tcg_op_defs) 1292 { 1293 TCGArg *res; 1294 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1295 return res; 1296 } 1297