1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 int i; 74 for (i = 0; i < nb_temps; i++) { 75 temps[i].state = TCG_TEMP_UNDEF; 76 temps[i].mask = -1; 77 } 78 } 79 80 static int op_bits(TCGOpcode op) 81 { 82 const TCGOpDef *def = &tcg_op_defs[op]; 83 return def->flags & TCG_OPF_64BIT ? 64 : 32; 84 } 85 86 static TCGOpcode op_to_movi(TCGOpcode op) 87 { 88 switch (op_bits(op)) { 89 case 32: 90 return INDEX_op_movi_i32; 91 case 64: 92 return INDEX_op_movi_i64; 93 default: 94 fprintf(stderr, "op_to_movi: unexpected return value of " 95 "function op_bits.\n"); 96 tcg_abort(); 97 } 98 } 99 100 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 101 { 102 TCGArg i; 103 104 /* If this is already a global, we can't do better. */ 105 if (temp < s->nb_globals) { 106 return temp; 107 } 108 109 /* Search for a global first. */ 110 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 111 if (i < s->nb_globals) { 112 return i; 113 } 114 } 115 116 /* If it is a temp, search for a temp local. */ 117 if (!s->temps[temp].temp_local) { 118 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 119 if (s->temps[i].temp_local) { 120 return i; 121 } 122 } 123 } 124 125 /* Failure to find a better representation, return the same temp. */ 126 return temp; 127 } 128 129 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 130 { 131 TCGArg i; 132 133 if (arg1 == arg2) { 134 return true; 135 } 136 137 if (temps[arg1].state != TCG_TEMP_COPY 138 || temps[arg2].state != TCG_TEMP_COPY) { 139 return false; 140 } 141 142 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 143 if (i == arg2) { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 152 TCGArg dst, TCGArg src) 153 { 154 reset_temp(dst); 155 temps[dst].mask = temps[src].mask; 156 assert(temps[src].state != TCG_TEMP_CONST); 157 158 if (s->temps[src].type == s->temps[dst].type) { 159 if (temps[src].state != TCG_TEMP_COPY) { 160 temps[src].state = TCG_TEMP_COPY; 161 temps[src].next_copy = src; 162 temps[src].prev_copy = src; 163 } 164 temps[dst].state = TCG_TEMP_COPY; 165 temps[dst].next_copy = temps[src].next_copy; 166 temps[dst].prev_copy = src; 167 temps[temps[dst].next_copy].prev_copy = dst; 168 temps[src].next_copy = dst; 169 } 170 171 gen_args[0] = dst; 172 gen_args[1] = src; 173 } 174 175 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 176 { 177 reset_temp(dst); 178 temps[dst].state = TCG_TEMP_CONST; 179 temps[dst].val = val; 180 temps[dst].mask = val; 181 gen_args[0] = dst; 182 gen_args[1] = val; 183 } 184 185 static TCGOpcode op_to_mov(TCGOpcode op) 186 { 187 switch (op_bits(op)) { 188 case 32: 189 return INDEX_op_mov_i32; 190 case 64: 191 return INDEX_op_mov_i64; 192 default: 193 fprintf(stderr, "op_to_mov: unexpected return value of " 194 "function op_bits.\n"); 195 tcg_abort(); 196 } 197 } 198 199 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 200 { 201 uint64_t l64, h64; 202 203 switch (op) { 204 CASE_OP_32_64(add): 205 return x + y; 206 207 CASE_OP_32_64(sub): 208 return x - y; 209 210 CASE_OP_32_64(mul): 211 return x * y; 212 213 CASE_OP_32_64(and): 214 return x & y; 215 216 CASE_OP_32_64(or): 217 return x | y; 218 219 CASE_OP_32_64(xor): 220 return x ^ y; 221 222 case INDEX_op_shl_i32: 223 return (uint32_t)x << (y & 31); 224 225 case INDEX_op_shl_i64: 226 return (uint64_t)x << (y & 63); 227 228 case INDEX_op_shr_i32: 229 return (uint32_t)x >> (y & 31); 230 231 case INDEX_op_shr_i64: 232 return (uint64_t)x >> (y & 63); 233 234 case INDEX_op_sar_i32: 235 return (int32_t)x >> (y & 31); 236 237 case INDEX_op_sar_i64: 238 return (int64_t)x >> (y & 63); 239 240 case INDEX_op_rotr_i32: 241 return ror32(x, y & 31); 242 243 case INDEX_op_rotr_i64: 244 return ror64(x, y & 63); 245 246 case INDEX_op_rotl_i32: 247 return rol32(x, y & 31); 248 249 case INDEX_op_rotl_i64: 250 return rol64(x, y & 63); 251 252 CASE_OP_32_64(not): 253 return ~x; 254 255 CASE_OP_32_64(neg): 256 return -x; 257 258 CASE_OP_32_64(andc): 259 return x & ~y; 260 261 CASE_OP_32_64(orc): 262 return x | ~y; 263 264 CASE_OP_32_64(eqv): 265 return ~(x ^ y); 266 267 CASE_OP_32_64(nand): 268 return ~(x & y); 269 270 CASE_OP_32_64(nor): 271 return ~(x | y); 272 273 CASE_OP_32_64(ext8s): 274 return (int8_t)x; 275 276 CASE_OP_32_64(ext16s): 277 return (int16_t)x; 278 279 CASE_OP_32_64(ext8u): 280 return (uint8_t)x; 281 282 CASE_OP_32_64(ext16u): 283 return (uint16_t)x; 284 285 case INDEX_op_ext32s_i64: 286 return (int32_t)x; 287 288 case INDEX_op_ext32u_i64: 289 return (uint32_t)x; 290 291 case INDEX_op_muluh_i32: 292 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 293 case INDEX_op_mulsh_i32: 294 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 295 296 case INDEX_op_muluh_i64: 297 mulu64(&l64, &h64, x, y); 298 return h64; 299 case INDEX_op_mulsh_i64: 300 muls64(&l64, &h64, x, y); 301 return h64; 302 303 case INDEX_op_div_i32: 304 /* Avoid crashing on divide by zero, otherwise undefined. */ 305 return (int32_t)x / ((int32_t)y ? : 1); 306 case INDEX_op_divu_i32: 307 return (uint32_t)x / ((uint32_t)y ? : 1); 308 case INDEX_op_div_i64: 309 return (int64_t)x / ((int64_t)y ? : 1); 310 case INDEX_op_divu_i64: 311 return (uint64_t)x / ((uint64_t)y ? : 1); 312 313 case INDEX_op_rem_i32: 314 return (int32_t)x % ((int32_t)y ? : 1); 315 case INDEX_op_remu_i32: 316 return (uint32_t)x % ((uint32_t)y ? : 1); 317 case INDEX_op_rem_i64: 318 return (int64_t)x % ((int64_t)y ? : 1); 319 case INDEX_op_remu_i64: 320 return (uint64_t)x % ((uint64_t)y ? : 1); 321 322 default: 323 fprintf(stderr, 324 "Unrecognized operation %d in do_constant_folding.\n", op); 325 tcg_abort(); 326 } 327 } 328 329 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 330 { 331 TCGArg res = do_constant_folding_2(op, x, y); 332 if (op_bits(op) == 32) { 333 res &= 0xffffffff; 334 } 335 return res; 336 } 337 338 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 339 { 340 switch (c) { 341 case TCG_COND_EQ: 342 return x == y; 343 case TCG_COND_NE: 344 return x != y; 345 case TCG_COND_LT: 346 return (int32_t)x < (int32_t)y; 347 case TCG_COND_GE: 348 return (int32_t)x >= (int32_t)y; 349 case TCG_COND_LE: 350 return (int32_t)x <= (int32_t)y; 351 case TCG_COND_GT: 352 return (int32_t)x > (int32_t)y; 353 case TCG_COND_LTU: 354 return x < y; 355 case TCG_COND_GEU: 356 return x >= y; 357 case TCG_COND_LEU: 358 return x <= y; 359 case TCG_COND_GTU: 360 return x > y; 361 default: 362 tcg_abort(); 363 } 364 } 365 366 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 367 { 368 switch (c) { 369 case TCG_COND_EQ: 370 return x == y; 371 case TCG_COND_NE: 372 return x != y; 373 case TCG_COND_LT: 374 return (int64_t)x < (int64_t)y; 375 case TCG_COND_GE: 376 return (int64_t)x >= (int64_t)y; 377 case TCG_COND_LE: 378 return (int64_t)x <= (int64_t)y; 379 case TCG_COND_GT: 380 return (int64_t)x > (int64_t)y; 381 case TCG_COND_LTU: 382 return x < y; 383 case TCG_COND_GEU: 384 return x >= y; 385 case TCG_COND_LEU: 386 return x <= y; 387 case TCG_COND_GTU: 388 return x > y; 389 default: 390 tcg_abort(); 391 } 392 } 393 394 static bool do_constant_folding_cond_eq(TCGCond c) 395 { 396 switch (c) { 397 case TCG_COND_GT: 398 case TCG_COND_LTU: 399 case TCG_COND_LT: 400 case TCG_COND_GTU: 401 case TCG_COND_NE: 402 return 0; 403 case TCG_COND_GE: 404 case TCG_COND_GEU: 405 case TCG_COND_LE: 406 case TCG_COND_LEU: 407 case TCG_COND_EQ: 408 return 1; 409 default: 410 tcg_abort(); 411 } 412 } 413 414 /* Return 2 if the condition can't be simplified, and the result 415 of the condition (0 or 1) if it can */ 416 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 417 TCGArg y, TCGCond c) 418 { 419 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 420 switch (op_bits(op)) { 421 case 32: 422 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 423 case 64: 424 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 425 default: 426 tcg_abort(); 427 } 428 } else if (temps_are_copies(x, y)) { 429 return do_constant_folding_cond_eq(c); 430 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 431 switch (c) { 432 case TCG_COND_LTU: 433 return 0; 434 case TCG_COND_GEU: 435 return 1; 436 default: 437 return 2; 438 } 439 } else { 440 return 2; 441 } 442 } 443 444 /* Return 2 if the condition can't be simplified, and the result 445 of the condition (0 or 1) if it can */ 446 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 447 { 448 TCGArg al = p1[0], ah = p1[1]; 449 TCGArg bl = p2[0], bh = p2[1]; 450 451 if (temps[bl].state == TCG_TEMP_CONST 452 && temps[bh].state == TCG_TEMP_CONST) { 453 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 454 455 if (temps[al].state == TCG_TEMP_CONST 456 && temps[ah].state == TCG_TEMP_CONST) { 457 uint64_t a; 458 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 459 return do_constant_folding_cond_64(a, b, c); 460 } 461 if (b == 0) { 462 switch (c) { 463 case TCG_COND_LTU: 464 return 0; 465 case TCG_COND_GEU: 466 return 1; 467 default: 468 break; 469 } 470 } 471 } 472 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 473 return do_constant_folding_cond_eq(c); 474 } 475 return 2; 476 } 477 478 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 479 { 480 TCGArg a1 = *p1, a2 = *p2; 481 int sum = 0; 482 sum += temps[a1].state == TCG_TEMP_CONST; 483 sum -= temps[a2].state == TCG_TEMP_CONST; 484 485 /* Prefer the constant in second argument, and then the form 486 op a, a, b, which is better handled on non-RISC hosts. */ 487 if (sum > 0 || (sum == 0 && dest == a2)) { 488 *p1 = a2; 489 *p2 = a1; 490 return true; 491 } 492 return false; 493 } 494 495 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 496 { 497 int sum = 0; 498 sum += temps[p1[0]].state == TCG_TEMP_CONST; 499 sum += temps[p1[1]].state == TCG_TEMP_CONST; 500 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 501 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 502 if (sum > 0) { 503 TCGArg t; 504 t = p1[0], p1[0] = p2[0], p2[0] = t; 505 t = p1[1], p1[1] = p2[1], p2[1] = t; 506 return true; 507 } 508 return false; 509 } 510 511 /* Propagate constants and copies, fold constant expressions. */ 512 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 513 TCGArg *args, TCGOpDef *tcg_op_defs) 514 { 515 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 516 tcg_target_ulong mask, affected; 517 TCGOpcode op; 518 const TCGOpDef *def; 519 TCGArg *gen_args; 520 TCGArg tmp; 521 522 /* Array VALS has an element for each temp. 523 If this temp holds a constant then its value is kept in VALS' element. 524 If this temp is a copy of other ones then the other copies are 525 available through the doubly linked circular list. */ 526 527 nb_temps = s->nb_temps; 528 nb_globals = s->nb_globals; 529 reset_all_temps(nb_temps); 530 531 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 532 gen_args = args; 533 for (op_index = 0; op_index < nb_ops; op_index++) { 534 op = s->gen_opc_buf[op_index]; 535 def = &tcg_op_defs[op]; 536 /* Do copy propagation */ 537 if (op == INDEX_op_call) { 538 int nb_oargs = args[0] >> 16; 539 int nb_iargs = args[0] & 0xffff; 540 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { 541 if (temps[args[i]].state == TCG_TEMP_COPY) { 542 args[i] = find_better_copy(s, args[i]); 543 } 544 } 545 } else { 546 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 547 if (temps[args[i]].state == TCG_TEMP_COPY) { 548 args[i] = find_better_copy(s, args[i]); 549 } 550 } 551 } 552 553 /* For commutative operations make constant second argument */ 554 switch (op) { 555 CASE_OP_32_64(add): 556 CASE_OP_32_64(mul): 557 CASE_OP_32_64(and): 558 CASE_OP_32_64(or): 559 CASE_OP_32_64(xor): 560 CASE_OP_32_64(eqv): 561 CASE_OP_32_64(nand): 562 CASE_OP_32_64(nor): 563 CASE_OP_32_64(muluh): 564 CASE_OP_32_64(mulsh): 565 swap_commutative(args[0], &args[1], &args[2]); 566 break; 567 CASE_OP_32_64(brcond): 568 if (swap_commutative(-1, &args[0], &args[1])) { 569 args[2] = tcg_swap_cond(args[2]); 570 } 571 break; 572 CASE_OP_32_64(setcond): 573 if (swap_commutative(args[0], &args[1], &args[2])) { 574 args[3] = tcg_swap_cond(args[3]); 575 } 576 break; 577 CASE_OP_32_64(movcond): 578 if (swap_commutative(-1, &args[1], &args[2])) { 579 args[5] = tcg_swap_cond(args[5]); 580 } 581 /* For movcond, we canonicalize the "false" input reg to match 582 the destination reg so that the tcg backend can implement 583 a "move if true" operation. */ 584 if (swap_commutative(args[0], &args[4], &args[3])) { 585 args[5] = tcg_invert_cond(args[5]); 586 } 587 break; 588 CASE_OP_32_64(add2): 589 swap_commutative(args[0], &args[2], &args[4]); 590 swap_commutative(args[1], &args[3], &args[5]); 591 break; 592 CASE_OP_32_64(mulu2): 593 CASE_OP_32_64(muls2): 594 swap_commutative(args[0], &args[2], &args[3]); 595 break; 596 case INDEX_op_brcond2_i32: 597 if (swap_commutative2(&args[0], &args[2])) { 598 args[4] = tcg_swap_cond(args[4]); 599 } 600 break; 601 case INDEX_op_setcond2_i32: 602 if (swap_commutative2(&args[1], &args[3])) { 603 args[5] = tcg_swap_cond(args[5]); 604 } 605 break; 606 default: 607 break; 608 } 609 610 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 611 and "sub r, 0, a => neg r, a" case. */ 612 switch (op) { 613 CASE_OP_32_64(shl): 614 CASE_OP_32_64(shr): 615 CASE_OP_32_64(sar): 616 CASE_OP_32_64(rotl): 617 CASE_OP_32_64(rotr): 618 if (temps[args[1]].state == TCG_TEMP_CONST 619 && temps[args[1]].val == 0) { 620 s->gen_opc_buf[op_index] = op_to_movi(op); 621 tcg_opt_gen_movi(gen_args, args[0], 0); 622 args += 3; 623 gen_args += 2; 624 continue; 625 } 626 break; 627 CASE_OP_32_64(sub): 628 { 629 TCGOpcode neg_op; 630 bool have_neg; 631 632 if (temps[args[2]].state == TCG_TEMP_CONST) { 633 /* Proceed with possible constant folding. */ 634 break; 635 } 636 if (op == INDEX_op_sub_i32) { 637 neg_op = INDEX_op_neg_i32; 638 have_neg = TCG_TARGET_HAS_neg_i32; 639 } else { 640 neg_op = INDEX_op_neg_i64; 641 have_neg = TCG_TARGET_HAS_neg_i64; 642 } 643 if (!have_neg) { 644 break; 645 } 646 if (temps[args[1]].state == TCG_TEMP_CONST 647 && temps[args[1]].val == 0) { 648 s->gen_opc_buf[op_index] = neg_op; 649 reset_temp(args[0]); 650 gen_args[0] = args[0]; 651 gen_args[1] = args[2]; 652 args += 3; 653 gen_args += 2; 654 continue; 655 } 656 } 657 break; 658 CASE_OP_32_64(xor): 659 CASE_OP_32_64(nand): 660 if (temps[args[1]].state != TCG_TEMP_CONST 661 && temps[args[2]].state == TCG_TEMP_CONST 662 && temps[args[2]].val == -1) { 663 i = 1; 664 goto try_not; 665 } 666 break; 667 CASE_OP_32_64(nor): 668 if (temps[args[1]].state != TCG_TEMP_CONST 669 && temps[args[2]].state == TCG_TEMP_CONST 670 && temps[args[2]].val == 0) { 671 i = 1; 672 goto try_not; 673 } 674 break; 675 CASE_OP_32_64(andc): 676 if (temps[args[2]].state != TCG_TEMP_CONST 677 && temps[args[1]].state == TCG_TEMP_CONST 678 && temps[args[1]].val == -1) { 679 i = 2; 680 goto try_not; 681 } 682 break; 683 CASE_OP_32_64(orc): 684 CASE_OP_32_64(eqv): 685 if (temps[args[2]].state != TCG_TEMP_CONST 686 && temps[args[1]].state == TCG_TEMP_CONST 687 && temps[args[1]].val == 0) { 688 i = 2; 689 goto try_not; 690 } 691 break; 692 try_not: 693 { 694 TCGOpcode not_op; 695 bool have_not; 696 697 if (def->flags & TCG_OPF_64BIT) { 698 not_op = INDEX_op_not_i64; 699 have_not = TCG_TARGET_HAS_not_i64; 700 } else { 701 not_op = INDEX_op_not_i32; 702 have_not = TCG_TARGET_HAS_not_i32; 703 } 704 if (!have_not) { 705 break; 706 } 707 s->gen_opc_buf[op_index] = not_op; 708 reset_temp(args[0]); 709 gen_args[0] = args[0]; 710 gen_args[1] = args[i]; 711 args += 3; 712 gen_args += 2; 713 continue; 714 } 715 default: 716 break; 717 } 718 719 /* Simplify expression for "op r, a, const => mov r, a" cases */ 720 switch (op) { 721 CASE_OP_32_64(add): 722 CASE_OP_32_64(sub): 723 CASE_OP_32_64(shl): 724 CASE_OP_32_64(shr): 725 CASE_OP_32_64(sar): 726 CASE_OP_32_64(rotl): 727 CASE_OP_32_64(rotr): 728 CASE_OP_32_64(or): 729 CASE_OP_32_64(xor): 730 CASE_OP_32_64(andc): 731 if (temps[args[1]].state != TCG_TEMP_CONST 732 && temps[args[2]].state == TCG_TEMP_CONST 733 && temps[args[2]].val == 0) { 734 goto do_mov3; 735 } 736 break; 737 CASE_OP_32_64(and): 738 CASE_OP_32_64(orc): 739 CASE_OP_32_64(eqv): 740 if (temps[args[1]].state != TCG_TEMP_CONST 741 && temps[args[2]].state == TCG_TEMP_CONST 742 && temps[args[2]].val == -1) { 743 goto do_mov3; 744 } 745 break; 746 do_mov3: 747 if (temps_are_copies(args[0], args[1])) { 748 s->gen_opc_buf[op_index] = INDEX_op_nop; 749 } else { 750 s->gen_opc_buf[op_index] = op_to_mov(op); 751 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 752 gen_args += 2; 753 } 754 args += 3; 755 continue; 756 default: 757 break; 758 } 759 760 /* Simplify using known-zero bits. Currently only ops with a single 761 output argument is supported. */ 762 mask = -1; 763 affected = -1; 764 switch (op) { 765 CASE_OP_32_64(ext8s): 766 if ((temps[args[1]].mask & 0x80) != 0) { 767 break; 768 } 769 CASE_OP_32_64(ext8u): 770 mask = 0xff; 771 goto and_const; 772 CASE_OP_32_64(ext16s): 773 if ((temps[args[1]].mask & 0x8000) != 0) { 774 break; 775 } 776 CASE_OP_32_64(ext16u): 777 mask = 0xffff; 778 goto and_const; 779 case INDEX_op_ext32s_i64: 780 if ((temps[args[1]].mask & 0x80000000) != 0) { 781 break; 782 } 783 case INDEX_op_ext32u_i64: 784 mask = 0xffffffffU; 785 goto and_const; 786 787 CASE_OP_32_64(and): 788 mask = temps[args[2]].mask; 789 if (temps[args[2]].state == TCG_TEMP_CONST) { 790 and_const: 791 affected = temps[args[1]].mask & ~mask; 792 } 793 mask = temps[args[1]].mask & mask; 794 break; 795 796 CASE_OP_32_64(andc): 797 /* Known-zeros does not imply known-ones. Therefore unless 798 args[2] is constant, we can't infer anything from it. */ 799 if (temps[args[2]].state == TCG_TEMP_CONST) { 800 mask = ~temps[args[2]].mask; 801 goto and_const; 802 } 803 /* But we certainly know nothing outside args[1] may be set. */ 804 mask = temps[args[1]].mask; 805 break; 806 807 case INDEX_op_sar_i32: 808 if (temps[args[2]].state == TCG_TEMP_CONST) { 809 tmp = temps[args[2]].val & 31; 810 mask = (int32_t)temps[args[1]].mask >> tmp; 811 } 812 break; 813 case INDEX_op_sar_i64: 814 if (temps[args[2]].state == TCG_TEMP_CONST) { 815 tmp = temps[args[2]].val & 63; 816 mask = (int64_t)temps[args[1]].mask >> tmp; 817 } 818 break; 819 820 case INDEX_op_shr_i32: 821 if (temps[args[2]].state == TCG_TEMP_CONST) { 822 tmp = temps[args[2]].val & 31; 823 mask = (uint32_t)temps[args[1]].mask >> tmp; 824 } 825 break; 826 case INDEX_op_shr_i64: 827 if (temps[args[2]].state == TCG_TEMP_CONST) { 828 tmp = temps[args[2]].val & 63; 829 mask = (uint64_t)temps[args[1]].mask >> tmp; 830 } 831 break; 832 833 CASE_OP_32_64(shl): 834 if (temps[args[2]].state == TCG_TEMP_CONST) { 835 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 836 mask = temps[args[1]].mask << tmp; 837 } 838 break; 839 840 CASE_OP_32_64(neg): 841 /* Set to 1 all bits to the left of the rightmost. */ 842 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 843 break; 844 845 CASE_OP_32_64(deposit): 846 mask = deposit64(temps[args[1]].mask, args[3], args[4], 847 temps[args[2]].mask); 848 break; 849 850 CASE_OP_32_64(or): 851 CASE_OP_32_64(xor): 852 mask = temps[args[1]].mask | temps[args[2]].mask; 853 break; 854 855 CASE_OP_32_64(setcond): 856 mask = 1; 857 break; 858 859 CASE_OP_32_64(movcond): 860 mask = temps[args[3]].mask | temps[args[4]].mask; 861 break; 862 863 CASE_OP_32_64(ld8u): 864 case INDEX_op_qemu_ld8u: 865 mask = 0xff; 866 break; 867 CASE_OP_32_64(ld16u): 868 case INDEX_op_qemu_ld16u: 869 mask = 0xffff; 870 break; 871 case INDEX_op_ld32u_i64: 872 #if TCG_TARGET_REG_BITS == 64 873 case INDEX_op_qemu_ld32u: 874 #endif 875 mask = 0xffffffffu; 876 break; 877 878 CASE_OP_32_64(qemu_ld): 879 { 880 TCGMemOp mop = args[def->nb_oargs + def->nb_iargs]; 881 if (!(mop & MO_SIGN)) { 882 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 883 } 884 } 885 break; 886 887 default: 888 break; 889 } 890 891 /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit 892 results */ 893 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { 894 mask &= 0xffffffffu; 895 } 896 897 if (mask == 0) { 898 assert(def->nb_oargs == 1); 899 s->gen_opc_buf[op_index] = op_to_movi(op); 900 tcg_opt_gen_movi(gen_args, args[0], 0); 901 args += def->nb_oargs + def->nb_iargs + def->nb_cargs; 902 gen_args += 2; 903 continue; 904 } 905 if (affected == 0) { 906 assert(def->nb_oargs == 1); 907 if (temps_are_copies(args[0], args[1])) { 908 s->gen_opc_buf[op_index] = INDEX_op_nop; 909 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 910 s->gen_opc_buf[op_index] = op_to_mov(op); 911 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 912 gen_args += 2; 913 } else { 914 s->gen_opc_buf[op_index] = op_to_movi(op); 915 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 916 gen_args += 2; 917 } 918 args += def->nb_iargs + 1; 919 continue; 920 } 921 922 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 923 switch (op) { 924 CASE_OP_32_64(and): 925 CASE_OP_32_64(mul): 926 CASE_OP_32_64(muluh): 927 CASE_OP_32_64(mulsh): 928 if ((temps[args[2]].state == TCG_TEMP_CONST 929 && temps[args[2]].val == 0)) { 930 s->gen_opc_buf[op_index] = op_to_movi(op); 931 tcg_opt_gen_movi(gen_args, args[0], 0); 932 args += 3; 933 gen_args += 2; 934 continue; 935 } 936 break; 937 default: 938 break; 939 } 940 941 /* Simplify expression for "op r, a, a => mov r, a" cases */ 942 switch (op) { 943 CASE_OP_32_64(or): 944 CASE_OP_32_64(and): 945 if (temps_are_copies(args[1], args[2])) { 946 if (temps_are_copies(args[0], args[1])) { 947 s->gen_opc_buf[op_index] = INDEX_op_nop; 948 } else { 949 s->gen_opc_buf[op_index] = op_to_mov(op); 950 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 951 gen_args += 2; 952 } 953 args += 3; 954 continue; 955 } 956 break; 957 default: 958 break; 959 } 960 961 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 962 switch (op) { 963 CASE_OP_32_64(andc): 964 CASE_OP_32_64(sub): 965 CASE_OP_32_64(xor): 966 if (temps_are_copies(args[1], args[2])) { 967 s->gen_opc_buf[op_index] = op_to_movi(op); 968 tcg_opt_gen_movi(gen_args, args[0], 0); 969 gen_args += 2; 970 args += 3; 971 continue; 972 } 973 break; 974 default: 975 break; 976 } 977 978 /* Propagate constants through copy operations and do constant 979 folding. Constants will be substituted to arguments by register 980 allocator where needed and possible. Also detect copies. */ 981 switch (op) { 982 CASE_OP_32_64(mov): 983 if (temps_are_copies(args[0], args[1])) { 984 args += 2; 985 s->gen_opc_buf[op_index] = INDEX_op_nop; 986 break; 987 } 988 if (temps[args[1]].state != TCG_TEMP_CONST) { 989 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 990 gen_args += 2; 991 args += 2; 992 break; 993 } 994 /* Source argument is constant. Rewrite the operation and 995 let movi case handle it. */ 996 op = op_to_movi(op); 997 s->gen_opc_buf[op_index] = op; 998 args[1] = temps[args[1]].val; 999 /* fallthrough */ 1000 CASE_OP_32_64(movi): 1001 tcg_opt_gen_movi(gen_args, args[0], args[1]); 1002 gen_args += 2; 1003 args += 2; 1004 break; 1005 1006 CASE_OP_32_64(not): 1007 CASE_OP_32_64(neg): 1008 CASE_OP_32_64(ext8s): 1009 CASE_OP_32_64(ext8u): 1010 CASE_OP_32_64(ext16s): 1011 CASE_OP_32_64(ext16u): 1012 case INDEX_op_ext32s_i64: 1013 case INDEX_op_ext32u_i64: 1014 if (temps[args[1]].state == TCG_TEMP_CONST) { 1015 s->gen_opc_buf[op_index] = op_to_movi(op); 1016 tmp = do_constant_folding(op, temps[args[1]].val, 0); 1017 tcg_opt_gen_movi(gen_args, args[0], tmp); 1018 gen_args += 2; 1019 args += 2; 1020 break; 1021 } 1022 goto do_default; 1023 1024 CASE_OP_32_64(add): 1025 CASE_OP_32_64(sub): 1026 CASE_OP_32_64(mul): 1027 CASE_OP_32_64(or): 1028 CASE_OP_32_64(and): 1029 CASE_OP_32_64(xor): 1030 CASE_OP_32_64(shl): 1031 CASE_OP_32_64(shr): 1032 CASE_OP_32_64(sar): 1033 CASE_OP_32_64(rotl): 1034 CASE_OP_32_64(rotr): 1035 CASE_OP_32_64(andc): 1036 CASE_OP_32_64(orc): 1037 CASE_OP_32_64(eqv): 1038 CASE_OP_32_64(nand): 1039 CASE_OP_32_64(nor): 1040 CASE_OP_32_64(muluh): 1041 CASE_OP_32_64(mulsh): 1042 CASE_OP_32_64(div): 1043 CASE_OP_32_64(divu): 1044 CASE_OP_32_64(rem): 1045 CASE_OP_32_64(remu): 1046 if (temps[args[1]].state == TCG_TEMP_CONST 1047 && temps[args[2]].state == TCG_TEMP_CONST) { 1048 s->gen_opc_buf[op_index] = op_to_movi(op); 1049 tmp = do_constant_folding(op, temps[args[1]].val, 1050 temps[args[2]].val); 1051 tcg_opt_gen_movi(gen_args, args[0], tmp); 1052 gen_args += 2; 1053 args += 3; 1054 break; 1055 } 1056 goto do_default; 1057 1058 CASE_OP_32_64(deposit): 1059 if (temps[args[1]].state == TCG_TEMP_CONST 1060 && temps[args[2]].state == TCG_TEMP_CONST) { 1061 s->gen_opc_buf[op_index] = op_to_movi(op); 1062 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1063 temps[args[2]].val); 1064 tcg_opt_gen_movi(gen_args, args[0], tmp); 1065 gen_args += 2; 1066 args += 5; 1067 break; 1068 } 1069 goto do_default; 1070 1071 CASE_OP_32_64(setcond): 1072 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 1073 if (tmp != 2) { 1074 s->gen_opc_buf[op_index] = op_to_movi(op); 1075 tcg_opt_gen_movi(gen_args, args[0], tmp); 1076 gen_args += 2; 1077 args += 4; 1078 break; 1079 } 1080 goto do_default; 1081 1082 CASE_OP_32_64(brcond): 1083 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 1084 if (tmp != 2) { 1085 if (tmp) { 1086 reset_all_temps(nb_temps); 1087 s->gen_opc_buf[op_index] = INDEX_op_br; 1088 gen_args[0] = args[3]; 1089 gen_args += 1; 1090 } else { 1091 s->gen_opc_buf[op_index] = INDEX_op_nop; 1092 } 1093 args += 4; 1094 break; 1095 } 1096 goto do_default; 1097 1098 CASE_OP_32_64(movcond): 1099 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 1100 if (tmp != 2) { 1101 if (temps_are_copies(args[0], args[4-tmp])) { 1102 s->gen_opc_buf[op_index] = INDEX_op_nop; 1103 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 1104 s->gen_opc_buf[op_index] = op_to_movi(op); 1105 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 1106 gen_args += 2; 1107 } else { 1108 s->gen_opc_buf[op_index] = op_to_mov(op); 1109 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 1110 gen_args += 2; 1111 } 1112 args += 6; 1113 break; 1114 } 1115 goto do_default; 1116 1117 case INDEX_op_add2_i32: 1118 case INDEX_op_sub2_i32: 1119 if (temps[args[2]].state == TCG_TEMP_CONST 1120 && temps[args[3]].state == TCG_TEMP_CONST 1121 && temps[args[4]].state == TCG_TEMP_CONST 1122 && temps[args[5]].state == TCG_TEMP_CONST) { 1123 uint32_t al = temps[args[2]].val; 1124 uint32_t ah = temps[args[3]].val; 1125 uint32_t bl = temps[args[4]].val; 1126 uint32_t bh = temps[args[5]].val; 1127 uint64_t a = ((uint64_t)ah << 32) | al; 1128 uint64_t b = ((uint64_t)bh << 32) | bl; 1129 TCGArg rl, rh; 1130 1131 if (op == INDEX_op_add2_i32) { 1132 a += b; 1133 } else { 1134 a -= b; 1135 } 1136 1137 /* We emit the extra nop when we emit the add2/sub2. */ 1138 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1139 1140 rl = args[0]; 1141 rh = args[1]; 1142 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1143 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1144 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 1145 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 1146 gen_args += 4; 1147 args += 6; 1148 break; 1149 } 1150 goto do_default; 1151 1152 case INDEX_op_mulu2_i32: 1153 if (temps[args[2]].state == TCG_TEMP_CONST 1154 && temps[args[3]].state == TCG_TEMP_CONST) { 1155 uint32_t a = temps[args[2]].val; 1156 uint32_t b = temps[args[3]].val; 1157 uint64_t r = (uint64_t)a * b; 1158 TCGArg rl, rh; 1159 1160 /* We emit the extra nop when we emit the mulu2. */ 1161 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1162 1163 rl = args[0]; 1164 rh = args[1]; 1165 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1166 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1167 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1168 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1169 gen_args += 4; 1170 args += 4; 1171 break; 1172 } 1173 goto do_default; 1174 1175 case INDEX_op_brcond2_i32: 1176 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1177 if (tmp != 2) { 1178 if (tmp) { 1179 reset_all_temps(nb_temps); 1180 s->gen_opc_buf[op_index] = INDEX_op_br; 1181 gen_args[0] = args[5]; 1182 gen_args += 1; 1183 } else { 1184 s->gen_opc_buf[op_index] = INDEX_op_nop; 1185 } 1186 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1187 && temps[args[2]].state == TCG_TEMP_CONST 1188 && temps[args[3]].state == TCG_TEMP_CONST 1189 && temps[args[2]].val == 0 1190 && temps[args[3]].val == 0) { 1191 /* Simplify LT/GE comparisons vs zero to a single compare 1192 vs the high word of the input. */ 1193 reset_all_temps(nb_temps); 1194 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1195 gen_args[0] = args[1]; 1196 gen_args[1] = args[3]; 1197 gen_args[2] = args[4]; 1198 gen_args[3] = args[5]; 1199 gen_args += 4; 1200 } else { 1201 goto do_default; 1202 } 1203 args += 6; 1204 break; 1205 1206 case INDEX_op_setcond2_i32: 1207 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1208 if (tmp != 2) { 1209 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1210 tcg_opt_gen_movi(gen_args, args[0], tmp); 1211 gen_args += 2; 1212 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1213 && temps[args[3]].state == TCG_TEMP_CONST 1214 && temps[args[4]].state == TCG_TEMP_CONST 1215 && temps[args[3]].val == 0 1216 && temps[args[4]].val == 0) { 1217 /* Simplify LT/GE comparisons vs zero to a single compare 1218 vs the high word of the input. */ 1219 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1220 reset_temp(args[0]); 1221 gen_args[0] = args[0]; 1222 gen_args[1] = args[2]; 1223 gen_args[2] = args[4]; 1224 gen_args[3] = args[5]; 1225 gen_args += 4; 1226 } else { 1227 goto do_default; 1228 } 1229 args += 6; 1230 break; 1231 1232 case INDEX_op_call: 1233 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 1234 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | 1235 TCG_CALL_NO_WRITE_GLOBALS))) { 1236 for (i = 0; i < nb_globals; i++) { 1237 reset_temp(i); 1238 } 1239 } 1240 for (i = 0; i < (args[0] >> 16); i++) { 1241 reset_temp(args[i + 1]); 1242 } 1243 i = nb_call_args + 3; 1244 while (i) { 1245 *gen_args = *args; 1246 args++; 1247 gen_args++; 1248 i--; 1249 } 1250 break; 1251 1252 default: 1253 do_default: 1254 /* Default case: we know nothing about operation (or were unable 1255 to compute the operation result) so no propagation is done. 1256 We trash everything if the operation is the end of a basic 1257 block, otherwise we only trash the output args. "mask" is 1258 the non-zero bits mask for the first output arg. */ 1259 if (def->flags & TCG_OPF_BB_END) { 1260 reset_all_temps(nb_temps); 1261 } else { 1262 for (i = 0; i < def->nb_oargs; i++) { 1263 reset_temp(args[i]); 1264 /* Save the corresponding known-zero bits mask for the 1265 first output argument (only one supported so far). */ 1266 if (i == 0) { 1267 temps[args[i]].mask = mask; 1268 } 1269 } 1270 } 1271 for (i = 0; i < def->nb_args; i++) { 1272 gen_args[i] = args[i]; 1273 } 1274 args += def->nb_args; 1275 gen_args += def->nb_args; 1276 break; 1277 } 1278 } 1279 1280 return gen_args; 1281 } 1282 1283 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1284 TCGArg *args, TCGOpDef *tcg_op_defs) 1285 { 1286 TCGArg *res; 1287 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1288 return res; 1289 } 1290