1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 28 29 #include "qemu-common.h" 30 #include "tcg-op.h" 31 32 #define CASE_OP_32_64(x) \ 33 glue(glue(case INDEX_op_, x), _i32): \ 34 glue(glue(case INDEX_op_, x), _i64) 35 36 struct tcg_temp_info { 37 bool is_const; 38 uint16_t prev_copy; 39 uint16_t next_copy; 40 tcg_target_ulong val; 41 tcg_target_ulong mask; 42 }; 43 44 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 45 static TCGTempSet temps_used; 46 47 static inline bool temp_is_const(TCGArg arg) 48 { 49 return temps[arg].is_const; 50 } 51 52 static inline bool temp_is_copy(TCGArg arg) 53 { 54 return temps[arg].next_copy != arg; 55 } 56 57 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 58 static void reset_temp(TCGArg temp) 59 { 60 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 61 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 62 temps[temp].next_copy = temp; 63 temps[temp].prev_copy = temp; 64 temps[temp].is_const = false; 65 temps[temp].mask = -1; 66 } 67 68 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 69 static void reset_all_temps(int nb_temps) 70 { 71 bitmap_zero(temps_used.l, nb_temps); 72 } 73 74 /* Initialize and activate a temporary. */ 75 static void init_temp_info(TCGArg temp) 76 { 77 if (!test_bit(temp, temps_used.l)) { 78 temps[temp].next_copy = temp; 79 temps[temp].prev_copy = temp; 80 temps[temp].is_const = false; 81 temps[temp].mask = -1; 82 set_bit(temp, temps_used.l); 83 } 84 } 85 86 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 87 TCGOpcode opc, int nargs) 88 { 89 int oi = s->gen_next_op_idx; 90 int pi = s->gen_next_parm_idx; 91 int prev = old_op->prev; 92 int next = old_op - s->gen_op_buf; 93 TCGOp *new_op; 94 95 tcg_debug_assert(oi < OPC_BUF_SIZE); 96 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 97 s->gen_next_op_idx = oi + 1; 98 s->gen_next_parm_idx = pi + nargs; 99 100 new_op = &s->gen_op_buf[oi]; 101 *new_op = (TCGOp){ 102 .opc = opc, 103 .args = pi, 104 .prev = prev, 105 .next = next 106 }; 107 if (prev >= 0) { 108 s->gen_op_buf[prev].next = oi; 109 } else { 110 s->gen_first_op_idx = oi; 111 } 112 old_op->prev = oi; 113 114 return new_op; 115 } 116 117 static int op_bits(TCGOpcode op) 118 { 119 const TCGOpDef *def = &tcg_op_defs[op]; 120 return def->flags & TCG_OPF_64BIT ? 64 : 32; 121 } 122 123 static TCGOpcode op_to_mov(TCGOpcode op) 124 { 125 switch (op_bits(op)) { 126 case 32: 127 return INDEX_op_mov_i32; 128 case 64: 129 return INDEX_op_mov_i64; 130 default: 131 fprintf(stderr, "op_to_mov: unexpected return value of " 132 "function op_bits.\n"); 133 tcg_abort(); 134 } 135 } 136 137 static TCGOpcode op_to_movi(TCGOpcode op) 138 { 139 switch (op_bits(op)) { 140 case 32: 141 return INDEX_op_movi_i32; 142 case 64: 143 return INDEX_op_movi_i64; 144 default: 145 fprintf(stderr, "op_to_movi: unexpected return value of " 146 "function op_bits.\n"); 147 tcg_abort(); 148 } 149 } 150 151 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 152 { 153 TCGArg i; 154 155 /* If this is already a global, we can't do better. */ 156 if (temp < s->nb_globals) { 157 return temp; 158 } 159 160 /* Search for a global first. */ 161 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 162 if (i < s->nb_globals) { 163 return i; 164 } 165 } 166 167 /* If it is a temp, search for a temp local. */ 168 if (!s->temps[temp].temp_local) { 169 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 170 if (s->temps[i].temp_local) { 171 return i; 172 } 173 } 174 } 175 176 /* Failure to find a better representation, return the same temp. */ 177 return temp; 178 } 179 180 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 181 { 182 TCGArg i; 183 184 if (arg1 == arg2) { 185 return true; 186 } 187 188 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 189 return false; 190 } 191 192 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 193 if (i == arg2) { 194 return true; 195 } 196 } 197 198 return false; 199 } 200 201 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 202 TCGArg dst, TCGArg val) 203 { 204 TCGOpcode new_op = op_to_movi(op->opc); 205 tcg_target_ulong mask; 206 207 op->opc = new_op; 208 209 reset_temp(dst); 210 temps[dst].is_const = true; 211 temps[dst].val = val; 212 mask = val; 213 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 214 /* High bits of the destination are now garbage. */ 215 mask |= ~0xffffffffull; 216 } 217 temps[dst].mask = mask; 218 219 args[0] = dst; 220 args[1] = val; 221 } 222 223 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 224 TCGArg dst, TCGArg src) 225 { 226 if (temps_are_copies(dst, src)) { 227 tcg_op_remove(s, op); 228 return; 229 } 230 231 TCGOpcode new_op = op_to_mov(op->opc); 232 tcg_target_ulong mask; 233 234 op->opc = new_op; 235 236 reset_temp(dst); 237 mask = temps[src].mask; 238 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 239 /* High bits of the destination are now garbage. */ 240 mask |= ~0xffffffffull; 241 } 242 temps[dst].mask = mask; 243 244 if (s->temps[src].type == s->temps[dst].type) { 245 temps[dst].next_copy = temps[src].next_copy; 246 temps[dst].prev_copy = src; 247 temps[temps[dst].next_copy].prev_copy = dst; 248 temps[src].next_copy = dst; 249 temps[dst].is_const = temps[src].is_const; 250 temps[dst].val = temps[src].val; 251 } 252 253 args[0] = dst; 254 args[1] = src; 255 } 256 257 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 258 { 259 uint64_t l64, h64; 260 261 switch (op) { 262 CASE_OP_32_64(add): 263 return x + y; 264 265 CASE_OP_32_64(sub): 266 return x - y; 267 268 CASE_OP_32_64(mul): 269 return x * y; 270 271 CASE_OP_32_64(and): 272 return x & y; 273 274 CASE_OP_32_64(or): 275 return x | y; 276 277 CASE_OP_32_64(xor): 278 return x ^ y; 279 280 case INDEX_op_shl_i32: 281 return (uint32_t)x << (y & 31); 282 283 case INDEX_op_shl_i64: 284 return (uint64_t)x << (y & 63); 285 286 case INDEX_op_shr_i32: 287 return (uint32_t)x >> (y & 31); 288 289 case INDEX_op_shr_i64: 290 return (uint64_t)x >> (y & 63); 291 292 case INDEX_op_sar_i32: 293 return (int32_t)x >> (y & 31); 294 295 case INDEX_op_sar_i64: 296 return (int64_t)x >> (y & 63); 297 298 case INDEX_op_rotr_i32: 299 return ror32(x, y & 31); 300 301 case INDEX_op_rotr_i64: 302 return ror64(x, y & 63); 303 304 case INDEX_op_rotl_i32: 305 return rol32(x, y & 31); 306 307 case INDEX_op_rotl_i64: 308 return rol64(x, y & 63); 309 310 CASE_OP_32_64(not): 311 return ~x; 312 313 CASE_OP_32_64(neg): 314 return -x; 315 316 CASE_OP_32_64(andc): 317 return x & ~y; 318 319 CASE_OP_32_64(orc): 320 return x | ~y; 321 322 CASE_OP_32_64(eqv): 323 return ~(x ^ y); 324 325 CASE_OP_32_64(nand): 326 return ~(x & y); 327 328 CASE_OP_32_64(nor): 329 return ~(x | y); 330 331 CASE_OP_32_64(ext8s): 332 return (int8_t)x; 333 334 CASE_OP_32_64(ext16s): 335 return (int16_t)x; 336 337 CASE_OP_32_64(ext8u): 338 return (uint8_t)x; 339 340 CASE_OP_32_64(ext16u): 341 return (uint16_t)x; 342 343 case INDEX_op_ext_i32_i64: 344 case INDEX_op_ext32s_i64: 345 return (int32_t)x; 346 347 case INDEX_op_extu_i32_i64: 348 case INDEX_op_extrl_i64_i32: 349 case INDEX_op_ext32u_i64: 350 return (uint32_t)x; 351 352 case INDEX_op_extrh_i64_i32: 353 return (uint64_t)x >> 32; 354 355 case INDEX_op_muluh_i32: 356 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 357 case INDEX_op_mulsh_i32: 358 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 359 360 case INDEX_op_muluh_i64: 361 mulu64(&l64, &h64, x, y); 362 return h64; 363 case INDEX_op_mulsh_i64: 364 muls64(&l64, &h64, x, y); 365 return h64; 366 367 case INDEX_op_div_i32: 368 /* Avoid crashing on divide by zero, otherwise undefined. */ 369 return (int32_t)x / ((int32_t)y ? : 1); 370 case INDEX_op_divu_i32: 371 return (uint32_t)x / ((uint32_t)y ? : 1); 372 case INDEX_op_div_i64: 373 return (int64_t)x / ((int64_t)y ? : 1); 374 case INDEX_op_divu_i64: 375 return (uint64_t)x / ((uint64_t)y ? : 1); 376 377 case INDEX_op_rem_i32: 378 return (int32_t)x % ((int32_t)y ? : 1); 379 case INDEX_op_remu_i32: 380 return (uint32_t)x % ((uint32_t)y ? : 1); 381 case INDEX_op_rem_i64: 382 return (int64_t)x % ((int64_t)y ? : 1); 383 case INDEX_op_remu_i64: 384 return (uint64_t)x % ((uint64_t)y ? : 1); 385 386 default: 387 fprintf(stderr, 388 "Unrecognized operation %d in do_constant_folding.\n", op); 389 tcg_abort(); 390 } 391 } 392 393 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 394 { 395 TCGArg res = do_constant_folding_2(op, x, y); 396 if (op_bits(op) == 32) { 397 res = (int32_t)res; 398 } 399 return res; 400 } 401 402 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 403 { 404 switch (c) { 405 case TCG_COND_EQ: 406 return x == y; 407 case TCG_COND_NE: 408 return x != y; 409 case TCG_COND_LT: 410 return (int32_t)x < (int32_t)y; 411 case TCG_COND_GE: 412 return (int32_t)x >= (int32_t)y; 413 case TCG_COND_LE: 414 return (int32_t)x <= (int32_t)y; 415 case TCG_COND_GT: 416 return (int32_t)x > (int32_t)y; 417 case TCG_COND_LTU: 418 return x < y; 419 case TCG_COND_GEU: 420 return x >= y; 421 case TCG_COND_LEU: 422 return x <= y; 423 case TCG_COND_GTU: 424 return x > y; 425 default: 426 tcg_abort(); 427 } 428 } 429 430 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 431 { 432 switch (c) { 433 case TCG_COND_EQ: 434 return x == y; 435 case TCG_COND_NE: 436 return x != y; 437 case TCG_COND_LT: 438 return (int64_t)x < (int64_t)y; 439 case TCG_COND_GE: 440 return (int64_t)x >= (int64_t)y; 441 case TCG_COND_LE: 442 return (int64_t)x <= (int64_t)y; 443 case TCG_COND_GT: 444 return (int64_t)x > (int64_t)y; 445 case TCG_COND_LTU: 446 return x < y; 447 case TCG_COND_GEU: 448 return x >= y; 449 case TCG_COND_LEU: 450 return x <= y; 451 case TCG_COND_GTU: 452 return x > y; 453 default: 454 tcg_abort(); 455 } 456 } 457 458 static bool do_constant_folding_cond_eq(TCGCond c) 459 { 460 switch (c) { 461 case TCG_COND_GT: 462 case TCG_COND_LTU: 463 case TCG_COND_LT: 464 case TCG_COND_GTU: 465 case TCG_COND_NE: 466 return 0; 467 case TCG_COND_GE: 468 case TCG_COND_GEU: 469 case TCG_COND_LE: 470 case TCG_COND_LEU: 471 case TCG_COND_EQ: 472 return 1; 473 default: 474 tcg_abort(); 475 } 476 } 477 478 /* Return 2 if the condition can't be simplified, and the result 479 of the condition (0 or 1) if it can */ 480 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 481 TCGArg y, TCGCond c) 482 { 483 if (temp_is_const(x) && temp_is_const(y)) { 484 switch (op_bits(op)) { 485 case 32: 486 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 487 case 64: 488 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 489 default: 490 tcg_abort(); 491 } 492 } else if (temps_are_copies(x, y)) { 493 return do_constant_folding_cond_eq(c); 494 } else if (temp_is_const(y) && temps[y].val == 0) { 495 switch (c) { 496 case TCG_COND_LTU: 497 return 0; 498 case TCG_COND_GEU: 499 return 1; 500 default: 501 return 2; 502 } 503 } else { 504 return 2; 505 } 506 } 507 508 /* Return 2 if the condition can't be simplified, and the result 509 of the condition (0 or 1) if it can */ 510 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 511 { 512 TCGArg al = p1[0], ah = p1[1]; 513 TCGArg bl = p2[0], bh = p2[1]; 514 515 if (temp_is_const(bl) && temp_is_const(bh)) { 516 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 517 518 if (temp_is_const(al) && temp_is_const(ah)) { 519 uint64_t a; 520 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 521 return do_constant_folding_cond_64(a, b, c); 522 } 523 if (b == 0) { 524 switch (c) { 525 case TCG_COND_LTU: 526 return 0; 527 case TCG_COND_GEU: 528 return 1; 529 default: 530 break; 531 } 532 } 533 } 534 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 535 return do_constant_folding_cond_eq(c); 536 } 537 return 2; 538 } 539 540 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 541 { 542 TCGArg a1 = *p1, a2 = *p2; 543 int sum = 0; 544 sum += temp_is_const(a1); 545 sum -= temp_is_const(a2); 546 547 /* Prefer the constant in second argument, and then the form 548 op a, a, b, which is better handled on non-RISC hosts. */ 549 if (sum > 0 || (sum == 0 && dest == a2)) { 550 *p1 = a2; 551 *p2 = a1; 552 return true; 553 } 554 return false; 555 } 556 557 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 558 { 559 int sum = 0; 560 sum += temp_is_const(p1[0]); 561 sum += temp_is_const(p1[1]); 562 sum -= temp_is_const(p2[0]); 563 sum -= temp_is_const(p2[1]); 564 if (sum > 0) { 565 TCGArg t; 566 t = p1[0], p1[0] = p2[0], p2[0] = t; 567 t = p1[1], p1[1] = p2[1], p2[1] = t; 568 return true; 569 } 570 return false; 571 } 572 573 /* Propagate constants and copies, fold constant expressions. */ 574 void tcg_optimize(TCGContext *s) 575 { 576 int oi, oi_next, nb_temps, nb_globals; 577 578 /* Array VALS has an element for each temp. 579 If this temp holds a constant then its value is kept in VALS' element. 580 If this temp is a copy of other ones then the other copies are 581 available through the doubly linked circular list. */ 582 583 nb_temps = s->nb_temps; 584 nb_globals = s->nb_globals; 585 reset_all_temps(nb_temps); 586 587 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 588 tcg_target_ulong mask, partmask, affected; 589 int nb_oargs, nb_iargs, i; 590 TCGArg tmp; 591 592 TCGOp * const op = &s->gen_op_buf[oi]; 593 TCGArg * const args = &s->gen_opparam_buf[op->args]; 594 TCGOpcode opc = op->opc; 595 const TCGOpDef *def = &tcg_op_defs[opc]; 596 597 oi_next = op->next; 598 599 /* Count the arguments, and initialize the temps that are 600 going to be used */ 601 if (opc == INDEX_op_call) { 602 nb_oargs = op->callo; 603 nb_iargs = op->calli; 604 for (i = 0; i < nb_oargs + nb_iargs; i++) { 605 tmp = args[i]; 606 if (tmp != TCG_CALL_DUMMY_ARG) { 607 init_temp_info(tmp); 608 } 609 } 610 } else { 611 nb_oargs = def->nb_oargs; 612 nb_iargs = def->nb_iargs; 613 for (i = 0; i < nb_oargs + nb_iargs; i++) { 614 init_temp_info(args[i]); 615 } 616 } 617 618 /* Do copy propagation */ 619 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 620 if (temp_is_copy(args[i])) { 621 args[i] = find_better_copy(s, args[i]); 622 } 623 } 624 625 /* For commutative operations make constant second argument */ 626 switch (opc) { 627 CASE_OP_32_64(add): 628 CASE_OP_32_64(mul): 629 CASE_OP_32_64(and): 630 CASE_OP_32_64(or): 631 CASE_OP_32_64(xor): 632 CASE_OP_32_64(eqv): 633 CASE_OP_32_64(nand): 634 CASE_OP_32_64(nor): 635 CASE_OP_32_64(muluh): 636 CASE_OP_32_64(mulsh): 637 swap_commutative(args[0], &args[1], &args[2]); 638 break; 639 CASE_OP_32_64(brcond): 640 if (swap_commutative(-1, &args[0], &args[1])) { 641 args[2] = tcg_swap_cond(args[2]); 642 } 643 break; 644 CASE_OP_32_64(setcond): 645 if (swap_commutative(args[0], &args[1], &args[2])) { 646 args[3] = tcg_swap_cond(args[3]); 647 } 648 break; 649 CASE_OP_32_64(movcond): 650 if (swap_commutative(-1, &args[1], &args[2])) { 651 args[5] = tcg_swap_cond(args[5]); 652 } 653 /* For movcond, we canonicalize the "false" input reg to match 654 the destination reg so that the tcg backend can implement 655 a "move if true" operation. */ 656 if (swap_commutative(args[0], &args[4], &args[3])) { 657 args[5] = tcg_invert_cond(args[5]); 658 } 659 break; 660 CASE_OP_32_64(add2): 661 swap_commutative(args[0], &args[2], &args[4]); 662 swap_commutative(args[1], &args[3], &args[5]); 663 break; 664 CASE_OP_32_64(mulu2): 665 CASE_OP_32_64(muls2): 666 swap_commutative(args[0], &args[2], &args[3]); 667 break; 668 case INDEX_op_brcond2_i32: 669 if (swap_commutative2(&args[0], &args[2])) { 670 args[4] = tcg_swap_cond(args[4]); 671 } 672 break; 673 case INDEX_op_setcond2_i32: 674 if (swap_commutative2(&args[1], &args[3])) { 675 args[5] = tcg_swap_cond(args[5]); 676 } 677 break; 678 default: 679 break; 680 } 681 682 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 683 and "sub r, 0, a => neg r, a" case. */ 684 switch (opc) { 685 CASE_OP_32_64(shl): 686 CASE_OP_32_64(shr): 687 CASE_OP_32_64(sar): 688 CASE_OP_32_64(rotl): 689 CASE_OP_32_64(rotr): 690 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 691 tcg_opt_gen_movi(s, op, args, args[0], 0); 692 continue; 693 } 694 break; 695 CASE_OP_32_64(sub): 696 { 697 TCGOpcode neg_op; 698 bool have_neg; 699 700 if (temp_is_const(args[2])) { 701 /* Proceed with possible constant folding. */ 702 break; 703 } 704 if (opc == INDEX_op_sub_i32) { 705 neg_op = INDEX_op_neg_i32; 706 have_neg = TCG_TARGET_HAS_neg_i32; 707 } else { 708 neg_op = INDEX_op_neg_i64; 709 have_neg = TCG_TARGET_HAS_neg_i64; 710 } 711 if (!have_neg) { 712 break; 713 } 714 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 715 op->opc = neg_op; 716 reset_temp(args[0]); 717 args[1] = args[2]; 718 continue; 719 } 720 } 721 break; 722 CASE_OP_32_64(xor): 723 CASE_OP_32_64(nand): 724 if (!temp_is_const(args[1]) 725 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 726 i = 1; 727 goto try_not; 728 } 729 break; 730 CASE_OP_32_64(nor): 731 if (!temp_is_const(args[1]) 732 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 733 i = 1; 734 goto try_not; 735 } 736 break; 737 CASE_OP_32_64(andc): 738 if (!temp_is_const(args[2]) 739 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 740 i = 2; 741 goto try_not; 742 } 743 break; 744 CASE_OP_32_64(orc): 745 CASE_OP_32_64(eqv): 746 if (!temp_is_const(args[2]) 747 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 748 i = 2; 749 goto try_not; 750 } 751 break; 752 try_not: 753 { 754 TCGOpcode not_op; 755 bool have_not; 756 757 if (def->flags & TCG_OPF_64BIT) { 758 not_op = INDEX_op_not_i64; 759 have_not = TCG_TARGET_HAS_not_i64; 760 } else { 761 not_op = INDEX_op_not_i32; 762 have_not = TCG_TARGET_HAS_not_i32; 763 } 764 if (!have_not) { 765 break; 766 } 767 op->opc = not_op; 768 reset_temp(args[0]); 769 args[1] = args[i]; 770 continue; 771 } 772 default: 773 break; 774 } 775 776 /* Simplify expression for "op r, a, const => mov r, a" cases */ 777 switch (opc) { 778 CASE_OP_32_64(add): 779 CASE_OP_32_64(sub): 780 CASE_OP_32_64(shl): 781 CASE_OP_32_64(shr): 782 CASE_OP_32_64(sar): 783 CASE_OP_32_64(rotl): 784 CASE_OP_32_64(rotr): 785 CASE_OP_32_64(or): 786 CASE_OP_32_64(xor): 787 CASE_OP_32_64(andc): 788 if (!temp_is_const(args[1]) 789 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 790 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 791 continue; 792 } 793 break; 794 CASE_OP_32_64(and): 795 CASE_OP_32_64(orc): 796 CASE_OP_32_64(eqv): 797 if (!temp_is_const(args[1]) 798 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 799 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 800 continue; 801 } 802 break; 803 default: 804 break; 805 } 806 807 /* Simplify using known-zero bits. Currently only ops with a single 808 output argument is supported. */ 809 mask = -1; 810 affected = -1; 811 switch (opc) { 812 CASE_OP_32_64(ext8s): 813 if ((temps[args[1]].mask & 0x80) != 0) { 814 break; 815 } 816 CASE_OP_32_64(ext8u): 817 mask = 0xff; 818 goto and_const; 819 CASE_OP_32_64(ext16s): 820 if ((temps[args[1]].mask & 0x8000) != 0) { 821 break; 822 } 823 CASE_OP_32_64(ext16u): 824 mask = 0xffff; 825 goto and_const; 826 case INDEX_op_ext32s_i64: 827 if ((temps[args[1]].mask & 0x80000000) != 0) { 828 break; 829 } 830 case INDEX_op_ext32u_i64: 831 mask = 0xffffffffU; 832 goto and_const; 833 834 CASE_OP_32_64(and): 835 mask = temps[args[2]].mask; 836 if (temp_is_const(args[2])) { 837 and_const: 838 affected = temps[args[1]].mask & ~mask; 839 } 840 mask = temps[args[1]].mask & mask; 841 break; 842 843 case INDEX_op_ext_i32_i64: 844 if ((temps[args[1]].mask & 0x80000000) != 0) { 845 break; 846 } 847 case INDEX_op_extu_i32_i64: 848 /* We do not compute affected as it is a size changing op. */ 849 mask = (uint32_t)temps[args[1]].mask; 850 break; 851 852 CASE_OP_32_64(andc): 853 /* Known-zeros does not imply known-ones. Therefore unless 854 args[2] is constant, we can't infer anything from it. */ 855 if (temp_is_const(args[2])) { 856 mask = ~temps[args[2]].mask; 857 goto and_const; 858 } 859 /* But we certainly know nothing outside args[1] may be set. */ 860 mask = temps[args[1]].mask; 861 break; 862 863 case INDEX_op_sar_i32: 864 if (temp_is_const(args[2])) { 865 tmp = temps[args[2]].val & 31; 866 mask = (int32_t)temps[args[1]].mask >> tmp; 867 } 868 break; 869 case INDEX_op_sar_i64: 870 if (temp_is_const(args[2])) { 871 tmp = temps[args[2]].val & 63; 872 mask = (int64_t)temps[args[1]].mask >> tmp; 873 } 874 break; 875 876 case INDEX_op_shr_i32: 877 if (temp_is_const(args[2])) { 878 tmp = temps[args[2]].val & 31; 879 mask = (uint32_t)temps[args[1]].mask >> tmp; 880 } 881 break; 882 case INDEX_op_shr_i64: 883 if (temp_is_const(args[2])) { 884 tmp = temps[args[2]].val & 63; 885 mask = (uint64_t)temps[args[1]].mask >> tmp; 886 } 887 break; 888 889 case INDEX_op_extrl_i64_i32: 890 mask = (uint32_t)temps[args[1]].mask; 891 break; 892 case INDEX_op_extrh_i64_i32: 893 mask = (uint64_t)temps[args[1]].mask >> 32; 894 break; 895 896 CASE_OP_32_64(shl): 897 if (temp_is_const(args[2])) { 898 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 899 mask = temps[args[1]].mask << tmp; 900 } 901 break; 902 903 CASE_OP_32_64(neg): 904 /* Set to 1 all bits to the left of the rightmost. */ 905 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 906 break; 907 908 CASE_OP_32_64(deposit): 909 mask = deposit64(temps[args[1]].mask, args[3], args[4], 910 temps[args[2]].mask); 911 break; 912 913 CASE_OP_32_64(or): 914 CASE_OP_32_64(xor): 915 mask = temps[args[1]].mask | temps[args[2]].mask; 916 break; 917 918 CASE_OP_32_64(setcond): 919 case INDEX_op_setcond2_i32: 920 mask = 1; 921 break; 922 923 CASE_OP_32_64(movcond): 924 mask = temps[args[3]].mask | temps[args[4]].mask; 925 break; 926 927 CASE_OP_32_64(ld8u): 928 mask = 0xff; 929 break; 930 CASE_OP_32_64(ld16u): 931 mask = 0xffff; 932 break; 933 case INDEX_op_ld32u_i64: 934 mask = 0xffffffffu; 935 break; 936 937 CASE_OP_32_64(qemu_ld): 938 { 939 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 940 TCGMemOp mop = get_memop(oi); 941 if (!(mop & MO_SIGN)) { 942 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 943 } 944 } 945 break; 946 947 default: 948 break; 949 } 950 951 /* 32-bit ops generate 32-bit results. For the result is zero test 952 below, we can ignore high bits, but for further optimizations we 953 need to record that the high bits contain garbage. */ 954 partmask = mask; 955 if (!(def->flags & TCG_OPF_64BIT)) { 956 mask |= ~(tcg_target_ulong)0xffffffffu; 957 partmask &= 0xffffffffu; 958 affected &= 0xffffffffu; 959 } 960 961 if (partmask == 0) { 962 tcg_debug_assert(nb_oargs == 1); 963 tcg_opt_gen_movi(s, op, args, args[0], 0); 964 continue; 965 } 966 if (affected == 0) { 967 tcg_debug_assert(nb_oargs == 1); 968 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 969 continue; 970 } 971 972 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 973 switch (opc) { 974 CASE_OP_32_64(and): 975 CASE_OP_32_64(mul): 976 CASE_OP_32_64(muluh): 977 CASE_OP_32_64(mulsh): 978 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 979 tcg_opt_gen_movi(s, op, args, args[0], 0); 980 continue; 981 } 982 break; 983 default: 984 break; 985 } 986 987 /* Simplify expression for "op r, a, a => mov r, a" cases */ 988 switch (opc) { 989 CASE_OP_32_64(or): 990 CASE_OP_32_64(and): 991 if (temps_are_copies(args[1], args[2])) { 992 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 993 continue; 994 } 995 break; 996 default: 997 break; 998 } 999 1000 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1001 switch (opc) { 1002 CASE_OP_32_64(andc): 1003 CASE_OP_32_64(sub): 1004 CASE_OP_32_64(xor): 1005 if (temps_are_copies(args[1], args[2])) { 1006 tcg_opt_gen_movi(s, op, args, args[0], 0); 1007 continue; 1008 } 1009 break; 1010 default: 1011 break; 1012 } 1013 1014 /* Propagate constants through copy operations and do constant 1015 folding. Constants will be substituted to arguments by register 1016 allocator where needed and possible. Also detect copies. */ 1017 switch (opc) { 1018 CASE_OP_32_64(mov): 1019 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1020 break; 1021 CASE_OP_32_64(movi): 1022 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 1023 break; 1024 1025 CASE_OP_32_64(not): 1026 CASE_OP_32_64(neg): 1027 CASE_OP_32_64(ext8s): 1028 CASE_OP_32_64(ext8u): 1029 CASE_OP_32_64(ext16s): 1030 CASE_OP_32_64(ext16u): 1031 case INDEX_op_ext32s_i64: 1032 case INDEX_op_ext32u_i64: 1033 case INDEX_op_ext_i32_i64: 1034 case INDEX_op_extu_i32_i64: 1035 case INDEX_op_extrl_i64_i32: 1036 case INDEX_op_extrh_i64_i32: 1037 if (temp_is_const(args[1])) { 1038 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1039 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1040 break; 1041 } 1042 goto do_default; 1043 1044 CASE_OP_32_64(add): 1045 CASE_OP_32_64(sub): 1046 CASE_OP_32_64(mul): 1047 CASE_OP_32_64(or): 1048 CASE_OP_32_64(and): 1049 CASE_OP_32_64(xor): 1050 CASE_OP_32_64(shl): 1051 CASE_OP_32_64(shr): 1052 CASE_OP_32_64(sar): 1053 CASE_OP_32_64(rotl): 1054 CASE_OP_32_64(rotr): 1055 CASE_OP_32_64(andc): 1056 CASE_OP_32_64(orc): 1057 CASE_OP_32_64(eqv): 1058 CASE_OP_32_64(nand): 1059 CASE_OP_32_64(nor): 1060 CASE_OP_32_64(muluh): 1061 CASE_OP_32_64(mulsh): 1062 CASE_OP_32_64(div): 1063 CASE_OP_32_64(divu): 1064 CASE_OP_32_64(rem): 1065 CASE_OP_32_64(remu): 1066 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1067 tmp = do_constant_folding(opc, temps[args[1]].val, 1068 temps[args[2]].val); 1069 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1070 break; 1071 } 1072 goto do_default; 1073 1074 CASE_OP_32_64(deposit): 1075 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1076 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1077 temps[args[2]].val); 1078 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1079 break; 1080 } 1081 goto do_default; 1082 1083 CASE_OP_32_64(setcond): 1084 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1085 if (tmp != 2) { 1086 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1087 break; 1088 } 1089 goto do_default; 1090 1091 CASE_OP_32_64(brcond): 1092 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1093 if (tmp != 2) { 1094 if (tmp) { 1095 reset_all_temps(nb_temps); 1096 op->opc = INDEX_op_br; 1097 args[0] = args[3]; 1098 } else { 1099 tcg_op_remove(s, op); 1100 } 1101 break; 1102 } 1103 goto do_default; 1104 1105 CASE_OP_32_64(movcond): 1106 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1107 if (tmp != 2) { 1108 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1109 break; 1110 } 1111 goto do_default; 1112 1113 case INDEX_op_add2_i32: 1114 case INDEX_op_sub2_i32: 1115 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1116 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1117 uint32_t al = temps[args[2]].val; 1118 uint32_t ah = temps[args[3]].val; 1119 uint32_t bl = temps[args[4]].val; 1120 uint32_t bh = temps[args[5]].val; 1121 uint64_t a = ((uint64_t)ah << 32) | al; 1122 uint64_t b = ((uint64_t)bh << 32) | bl; 1123 TCGArg rl, rh; 1124 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1125 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1126 1127 if (opc == INDEX_op_add2_i32) { 1128 a += b; 1129 } else { 1130 a -= b; 1131 } 1132 1133 rl = args[0]; 1134 rh = args[1]; 1135 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1136 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1137 1138 /* We've done all we need to do with the movi. Skip it. */ 1139 oi_next = op2->next; 1140 break; 1141 } 1142 goto do_default; 1143 1144 case INDEX_op_mulu2_i32: 1145 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1146 uint32_t a = temps[args[2]].val; 1147 uint32_t b = temps[args[3]].val; 1148 uint64_t r = (uint64_t)a * b; 1149 TCGArg rl, rh; 1150 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1151 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1152 1153 rl = args[0]; 1154 rh = args[1]; 1155 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1156 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1157 1158 /* We've done all we need to do with the movi. Skip it. */ 1159 oi_next = op2->next; 1160 break; 1161 } 1162 goto do_default; 1163 1164 case INDEX_op_brcond2_i32: 1165 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1166 if (tmp != 2) { 1167 if (tmp) { 1168 do_brcond_true: 1169 reset_all_temps(nb_temps); 1170 op->opc = INDEX_op_br; 1171 args[0] = args[5]; 1172 } else { 1173 do_brcond_false: 1174 tcg_op_remove(s, op); 1175 } 1176 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1177 && temp_is_const(args[2]) && temps[args[2]].val == 0 1178 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1179 /* Simplify LT/GE comparisons vs zero to a single compare 1180 vs the high word of the input. */ 1181 do_brcond_high: 1182 reset_all_temps(nb_temps); 1183 op->opc = INDEX_op_brcond_i32; 1184 args[0] = args[1]; 1185 args[1] = args[3]; 1186 args[2] = args[4]; 1187 args[3] = args[5]; 1188 } else if (args[4] == TCG_COND_EQ) { 1189 /* Simplify EQ comparisons where one of the pairs 1190 can be simplified. */ 1191 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1192 args[0], args[2], TCG_COND_EQ); 1193 if (tmp == 0) { 1194 goto do_brcond_false; 1195 } else if (tmp == 1) { 1196 goto do_brcond_high; 1197 } 1198 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1199 args[1], args[3], TCG_COND_EQ); 1200 if (tmp == 0) { 1201 goto do_brcond_false; 1202 } else if (tmp != 1) { 1203 goto do_default; 1204 } 1205 do_brcond_low: 1206 reset_all_temps(nb_temps); 1207 op->opc = INDEX_op_brcond_i32; 1208 args[1] = args[2]; 1209 args[2] = args[4]; 1210 args[3] = args[5]; 1211 } else if (args[4] == TCG_COND_NE) { 1212 /* Simplify NE comparisons where one of the pairs 1213 can be simplified. */ 1214 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1215 args[0], args[2], TCG_COND_NE); 1216 if (tmp == 0) { 1217 goto do_brcond_high; 1218 } else if (tmp == 1) { 1219 goto do_brcond_true; 1220 } 1221 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1222 args[1], args[3], TCG_COND_NE); 1223 if (tmp == 0) { 1224 goto do_brcond_low; 1225 } else if (tmp == 1) { 1226 goto do_brcond_true; 1227 } 1228 goto do_default; 1229 } else { 1230 goto do_default; 1231 } 1232 break; 1233 1234 case INDEX_op_setcond2_i32: 1235 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1236 if (tmp != 2) { 1237 do_setcond_const: 1238 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1239 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1240 && temp_is_const(args[3]) && temps[args[3]].val == 0 1241 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1242 /* Simplify LT/GE comparisons vs zero to a single compare 1243 vs the high word of the input. */ 1244 do_setcond_high: 1245 reset_temp(args[0]); 1246 temps[args[0]].mask = 1; 1247 op->opc = INDEX_op_setcond_i32; 1248 args[1] = args[2]; 1249 args[2] = args[4]; 1250 args[3] = args[5]; 1251 } else if (args[5] == TCG_COND_EQ) { 1252 /* Simplify EQ comparisons where one of the pairs 1253 can be simplified. */ 1254 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1255 args[1], args[3], TCG_COND_EQ); 1256 if (tmp == 0) { 1257 goto do_setcond_const; 1258 } else if (tmp == 1) { 1259 goto do_setcond_high; 1260 } 1261 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1262 args[2], args[4], TCG_COND_EQ); 1263 if (tmp == 0) { 1264 goto do_setcond_high; 1265 } else if (tmp != 1) { 1266 goto do_default; 1267 } 1268 do_setcond_low: 1269 reset_temp(args[0]); 1270 temps[args[0]].mask = 1; 1271 op->opc = INDEX_op_setcond_i32; 1272 args[2] = args[3]; 1273 args[3] = args[5]; 1274 } else if (args[5] == TCG_COND_NE) { 1275 /* Simplify NE comparisons where one of the pairs 1276 can be simplified. */ 1277 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1278 args[1], args[3], TCG_COND_NE); 1279 if (tmp == 0) { 1280 goto do_setcond_high; 1281 } else if (tmp == 1) { 1282 goto do_setcond_const; 1283 } 1284 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1285 args[2], args[4], TCG_COND_NE); 1286 if (tmp == 0) { 1287 goto do_setcond_low; 1288 } else if (tmp == 1) { 1289 goto do_setcond_const; 1290 } 1291 goto do_default; 1292 } else { 1293 goto do_default; 1294 } 1295 break; 1296 1297 case INDEX_op_call: 1298 if (!(args[nb_oargs + nb_iargs + 1] 1299 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1300 for (i = 0; i < nb_globals; i++) { 1301 if (test_bit(i, temps_used.l)) { 1302 reset_temp(i); 1303 } 1304 } 1305 } 1306 goto do_reset_output; 1307 1308 default: 1309 do_default: 1310 /* Default case: we know nothing about operation (or were unable 1311 to compute the operation result) so no propagation is done. 1312 We trash everything if the operation is the end of a basic 1313 block, otherwise we only trash the output args. "mask" is 1314 the non-zero bits mask for the first output arg. */ 1315 if (def->flags & TCG_OPF_BB_END) { 1316 reset_all_temps(nb_temps); 1317 } else { 1318 do_reset_output: 1319 for (i = 0; i < nb_oargs; i++) { 1320 reset_temp(args[i]); 1321 /* Save the corresponding known-zero bits mask for the 1322 first output argument (only one supported so far). */ 1323 if (i == 0) { 1324 temps[args[i]].mask = mask; 1325 } 1326 } 1327 } 1328 break; 1329 } 1330 } 1331 } 1332