1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "exec/cpu-common.h" 29 #include "tcg-op.h" 30 31 #define CASE_OP_32_64(x) \ 32 glue(glue(case INDEX_op_, x), _i32): \ 33 glue(glue(case INDEX_op_, x), _i64) 34 35 struct tcg_temp_info { 36 bool is_const; 37 uint16_t prev_copy; 38 uint16_t next_copy; 39 tcg_target_ulong val; 40 tcg_target_ulong mask; 41 }; 42 43 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 44 static TCGTempSet temps_used; 45 46 static inline bool temp_is_const(TCGArg arg) 47 { 48 return temps[arg].is_const; 49 } 50 51 static inline bool temp_is_copy(TCGArg arg) 52 { 53 return temps[arg].next_copy != arg; 54 } 55 56 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 57 static void reset_temp(TCGArg temp) 58 { 59 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 60 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 61 temps[temp].next_copy = temp; 62 temps[temp].prev_copy = temp; 63 temps[temp].is_const = false; 64 temps[temp].mask = -1; 65 } 66 67 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 68 static void reset_all_temps(int nb_temps) 69 { 70 bitmap_zero(temps_used.l, nb_temps); 71 } 72 73 /* Initialize and activate a temporary. */ 74 static void init_temp_info(TCGArg temp) 75 { 76 if (!test_bit(temp, temps_used.l)) { 77 temps[temp].next_copy = temp; 78 temps[temp].prev_copy = temp; 79 temps[temp].is_const = false; 80 temps[temp].mask = -1; 81 set_bit(temp, temps_used.l); 82 } 83 } 84 85 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 86 TCGOpcode opc, int nargs) 87 { 88 int oi = s->gen_next_op_idx; 89 int pi = s->gen_next_parm_idx; 90 int prev = old_op->prev; 91 int next = old_op - s->gen_op_buf; 92 TCGOp *new_op; 93 94 tcg_debug_assert(oi < OPC_BUF_SIZE); 95 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 96 s->gen_next_op_idx = oi + 1; 97 s->gen_next_parm_idx = pi + nargs; 98 99 new_op = &s->gen_op_buf[oi]; 100 *new_op = (TCGOp){ 101 .opc = opc, 102 .args = pi, 103 .prev = prev, 104 .next = next 105 }; 106 if (prev >= 0) { 107 s->gen_op_buf[prev].next = oi; 108 } else { 109 s->gen_first_op_idx = oi; 110 } 111 old_op->prev = oi; 112 113 return new_op; 114 } 115 116 static int op_bits(TCGOpcode op) 117 { 118 const TCGOpDef *def = &tcg_op_defs[op]; 119 return def->flags & TCG_OPF_64BIT ? 64 : 32; 120 } 121 122 static TCGOpcode op_to_mov(TCGOpcode op) 123 { 124 switch (op_bits(op)) { 125 case 32: 126 return INDEX_op_mov_i32; 127 case 64: 128 return INDEX_op_mov_i64; 129 default: 130 fprintf(stderr, "op_to_mov: unexpected return value of " 131 "function op_bits.\n"); 132 tcg_abort(); 133 } 134 } 135 136 static TCGOpcode op_to_movi(TCGOpcode op) 137 { 138 switch (op_bits(op)) { 139 case 32: 140 return INDEX_op_movi_i32; 141 case 64: 142 return INDEX_op_movi_i64; 143 default: 144 fprintf(stderr, "op_to_movi: unexpected return value of " 145 "function op_bits.\n"); 146 tcg_abort(); 147 } 148 } 149 150 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 151 { 152 TCGArg i; 153 154 /* If this is already a global, we can't do better. */ 155 if (temp < s->nb_globals) { 156 return temp; 157 } 158 159 /* Search for a global first. */ 160 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 161 if (i < s->nb_globals) { 162 return i; 163 } 164 } 165 166 /* If it is a temp, search for a temp local. */ 167 if (!s->temps[temp].temp_local) { 168 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 169 if (s->temps[i].temp_local) { 170 return i; 171 } 172 } 173 } 174 175 /* Failure to find a better representation, return the same temp. */ 176 return temp; 177 } 178 179 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 180 { 181 TCGArg i; 182 183 if (arg1 == arg2) { 184 return true; 185 } 186 187 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 188 return false; 189 } 190 191 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 192 if (i == arg2) { 193 return true; 194 } 195 } 196 197 return false; 198 } 199 200 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 201 TCGArg dst, TCGArg val) 202 { 203 TCGOpcode new_op = op_to_movi(op->opc); 204 tcg_target_ulong mask; 205 206 op->opc = new_op; 207 208 reset_temp(dst); 209 temps[dst].is_const = true; 210 temps[dst].val = val; 211 mask = val; 212 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 213 /* High bits of the destination are now garbage. */ 214 mask |= ~0xffffffffull; 215 } 216 temps[dst].mask = mask; 217 218 args[0] = dst; 219 args[1] = val; 220 } 221 222 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 223 TCGArg dst, TCGArg src) 224 { 225 if (temps_are_copies(dst, src)) { 226 tcg_op_remove(s, op); 227 return; 228 } 229 230 TCGOpcode new_op = op_to_mov(op->opc); 231 tcg_target_ulong mask; 232 233 op->opc = new_op; 234 235 reset_temp(dst); 236 mask = temps[src].mask; 237 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 238 /* High bits of the destination are now garbage. */ 239 mask |= ~0xffffffffull; 240 } 241 temps[dst].mask = mask; 242 243 if (s->temps[src].type == s->temps[dst].type) { 244 temps[dst].next_copy = temps[src].next_copy; 245 temps[dst].prev_copy = src; 246 temps[temps[dst].next_copy].prev_copy = dst; 247 temps[src].next_copy = dst; 248 temps[dst].is_const = temps[src].is_const; 249 temps[dst].val = temps[src].val; 250 } 251 252 args[0] = dst; 253 args[1] = src; 254 } 255 256 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 257 { 258 uint64_t l64, h64; 259 260 switch (op) { 261 CASE_OP_32_64(add): 262 return x + y; 263 264 CASE_OP_32_64(sub): 265 return x - y; 266 267 CASE_OP_32_64(mul): 268 return x * y; 269 270 CASE_OP_32_64(and): 271 return x & y; 272 273 CASE_OP_32_64(or): 274 return x | y; 275 276 CASE_OP_32_64(xor): 277 return x ^ y; 278 279 case INDEX_op_shl_i32: 280 return (uint32_t)x << (y & 31); 281 282 case INDEX_op_shl_i64: 283 return (uint64_t)x << (y & 63); 284 285 case INDEX_op_shr_i32: 286 return (uint32_t)x >> (y & 31); 287 288 case INDEX_op_shr_i64: 289 return (uint64_t)x >> (y & 63); 290 291 case INDEX_op_sar_i32: 292 return (int32_t)x >> (y & 31); 293 294 case INDEX_op_sar_i64: 295 return (int64_t)x >> (y & 63); 296 297 case INDEX_op_rotr_i32: 298 return ror32(x, y & 31); 299 300 case INDEX_op_rotr_i64: 301 return ror64(x, y & 63); 302 303 case INDEX_op_rotl_i32: 304 return rol32(x, y & 31); 305 306 case INDEX_op_rotl_i64: 307 return rol64(x, y & 63); 308 309 CASE_OP_32_64(not): 310 return ~x; 311 312 CASE_OP_32_64(neg): 313 return -x; 314 315 CASE_OP_32_64(andc): 316 return x & ~y; 317 318 CASE_OP_32_64(orc): 319 return x | ~y; 320 321 CASE_OP_32_64(eqv): 322 return ~(x ^ y); 323 324 CASE_OP_32_64(nand): 325 return ~(x & y); 326 327 CASE_OP_32_64(nor): 328 return ~(x | y); 329 330 CASE_OP_32_64(ext8s): 331 return (int8_t)x; 332 333 CASE_OP_32_64(ext16s): 334 return (int16_t)x; 335 336 CASE_OP_32_64(ext8u): 337 return (uint8_t)x; 338 339 CASE_OP_32_64(ext16u): 340 return (uint16_t)x; 341 342 case INDEX_op_ext_i32_i64: 343 case INDEX_op_ext32s_i64: 344 return (int32_t)x; 345 346 case INDEX_op_extu_i32_i64: 347 case INDEX_op_extrl_i64_i32: 348 case INDEX_op_ext32u_i64: 349 return (uint32_t)x; 350 351 case INDEX_op_extrh_i64_i32: 352 return (uint64_t)x >> 32; 353 354 case INDEX_op_muluh_i32: 355 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 356 case INDEX_op_mulsh_i32: 357 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 358 359 case INDEX_op_muluh_i64: 360 mulu64(&l64, &h64, x, y); 361 return h64; 362 case INDEX_op_mulsh_i64: 363 muls64(&l64, &h64, x, y); 364 return h64; 365 366 case INDEX_op_div_i32: 367 /* Avoid crashing on divide by zero, otherwise undefined. */ 368 return (int32_t)x / ((int32_t)y ? : 1); 369 case INDEX_op_divu_i32: 370 return (uint32_t)x / ((uint32_t)y ? : 1); 371 case INDEX_op_div_i64: 372 return (int64_t)x / ((int64_t)y ? : 1); 373 case INDEX_op_divu_i64: 374 return (uint64_t)x / ((uint64_t)y ? : 1); 375 376 case INDEX_op_rem_i32: 377 return (int32_t)x % ((int32_t)y ? : 1); 378 case INDEX_op_remu_i32: 379 return (uint32_t)x % ((uint32_t)y ? : 1); 380 case INDEX_op_rem_i64: 381 return (int64_t)x % ((int64_t)y ? : 1); 382 case INDEX_op_remu_i64: 383 return (uint64_t)x % ((uint64_t)y ? : 1); 384 385 default: 386 fprintf(stderr, 387 "Unrecognized operation %d in do_constant_folding.\n", op); 388 tcg_abort(); 389 } 390 } 391 392 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 393 { 394 TCGArg res = do_constant_folding_2(op, x, y); 395 if (op_bits(op) == 32) { 396 res = (int32_t)res; 397 } 398 return res; 399 } 400 401 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 402 { 403 switch (c) { 404 case TCG_COND_EQ: 405 return x == y; 406 case TCG_COND_NE: 407 return x != y; 408 case TCG_COND_LT: 409 return (int32_t)x < (int32_t)y; 410 case TCG_COND_GE: 411 return (int32_t)x >= (int32_t)y; 412 case TCG_COND_LE: 413 return (int32_t)x <= (int32_t)y; 414 case TCG_COND_GT: 415 return (int32_t)x > (int32_t)y; 416 case TCG_COND_LTU: 417 return x < y; 418 case TCG_COND_GEU: 419 return x >= y; 420 case TCG_COND_LEU: 421 return x <= y; 422 case TCG_COND_GTU: 423 return x > y; 424 default: 425 tcg_abort(); 426 } 427 } 428 429 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 430 { 431 switch (c) { 432 case TCG_COND_EQ: 433 return x == y; 434 case TCG_COND_NE: 435 return x != y; 436 case TCG_COND_LT: 437 return (int64_t)x < (int64_t)y; 438 case TCG_COND_GE: 439 return (int64_t)x >= (int64_t)y; 440 case TCG_COND_LE: 441 return (int64_t)x <= (int64_t)y; 442 case TCG_COND_GT: 443 return (int64_t)x > (int64_t)y; 444 case TCG_COND_LTU: 445 return x < y; 446 case TCG_COND_GEU: 447 return x >= y; 448 case TCG_COND_LEU: 449 return x <= y; 450 case TCG_COND_GTU: 451 return x > y; 452 default: 453 tcg_abort(); 454 } 455 } 456 457 static bool do_constant_folding_cond_eq(TCGCond c) 458 { 459 switch (c) { 460 case TCG_COND_GT: 461 case TCG_COND_LTU: 462 case TCG_COND_LT: 463 case TCG_COND_GTU: 464 case TCG_COND_NE: 465 return 0; 466 case TCG_COND_GE: 467 case TCG_COND_GEU: 468 case TCG_COND_LE: 469 case TCG_COND_LEU: 470 case TCG_COND_EQ: 471 return 1; 472 default: 473 tcg_abort(); 474 } 475 } 476 477 /* Return 2 if the condition can't be simplified, and the result 478 of the condition (0 or 1) if it can */ 479 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 480 TCGArg y, TCGCond c) 481 { 482 if (temp_is_const(x) && temp_is_const(y)) { 483 switch (op_bits(op)) { 484 case 32: 485 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 486 case 64: 487 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 488 default: 489 tcg_abort(); 490 } 491 } else if (temps_are_copies(x, y)) { 492 return do_constant_folding_cond_eq(c); 493 } else if (temp_is_const(y) && temps[y].val == 0) { 494 switch (c) { 495 case TCG_COND_LTU: 496 return 0; 497 case TCG_COND_GEU: 498 return 1; 499 default: 500 return 2; 501 } 502 } else { 503 return 2; 504 } 505 } 506 507 /* Return 2 if the condition can't be simplified, and the result 508 of the condition (0 or 1) if it can */ 509 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 510 { 511 TCGArg al = p1[0], ah = p1[1]; 512 TCGArg bl = p2[0], bh = p2[1]; 513 514 if (temp_is_const(bl) && temp_is_const(bh)) { 515 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 516 517 if (temp_is_const(al) && temp_is_const(ah)) { 518 uint64_t a; 519 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 520 return do_constant_folding_cond_64(a, b, c); 521 } 522 if (b == 0) { 523 switch (c) { 524 case TCG_COND_LTU: 525 return 0; 526 case TCG_COND_GEU: 527 return 1; 528 default: 529 break; 530 } 531 } 532 } 533 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 534 return do_constant_folding_cond_eq(c); 535 } 536 return 2; 537 } 538 539 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 540 { 541 TCGArg a1 = *p1, a2 = *p2; 542 int sum = 0; 543 sum += temp_is_const(a1); 544 sum -= temp_is_const(a2); 545 546 /* Prefer the constant in second argument, and then the form 547 op a, a, b, which is better handled on non-RISC hosts. */ 548 if (sum > 0 || (sum == 0 && dest == a2)) { 549 *p1 = a2; 550 *p2 = a1; 551 return true; 552 } 553 return false; 554 } 555 556 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 557 { 558 int sum = 0; 559 sum += temp_is_const(p1[0]); 560 sum += temp_is_const(p1[1]); 561 sum -= temp_is_const(p2[0]); 562 sum -= temp_is_const(p2[1]); 563 if (sum > 0) { 564 TCGArg t; 565 t = p1[0], p1[0] = p2[0], p2[0] = t; 566 t = p1[1], p1[1] = p2[1], p2[1] = t; 567 return true; 568 } 569 return false; 570 } 571 572 /* Propagate constants and copies, fold constant expressions. */ 573 void tcg_optimize(TCGContext *s) 574 { 575 int oi, oi_next, nb_temps, nb_globals; 576 577 /* Array VALS has an element for each temp. 578 If this temp holds a constant then its value is kept in VALS' element. 579 If this temp is a copy of other ones then the other copies are 580 available through the doubly linked circular list. */ 581 582 nb_temps = s->nb_temps; 583 nb_globals = s->nb_globals; 584 reset_all_temps(nb_temps); 585 586 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 587 tcg_target_ulong mask, partmask, affected; 588 int nb_oargs, nb_iargs, i; 589 TCGArg tmp; 590 591 TCGOp * const op = &s->gen_op_buf[oi]; 592 TCGArg * const args = &s->gen_opparam_buf[op->args]; 593 TCGOpcode opc = op->opc; 594 const TCGOpDef *def = &tcg_op_defs[opc]; 595 596 oi_next = op->next; 597 598 /* Count the arguments, and initialize the temps that are 599 going to be used */ 600 if (opc == INDEX_op_call) { 601 nb_oargs = op->callo; 602 nb_iargs = op->calli; 603 for (i = 0; i < nb_oargs + nb_iargs; i++) { 604 tmp = args[i]; 605 if (tmp != TCG_CALL_DUMMY_ARG) { 606 init_temp_info(tmp); 607 } 608 } 609 } else { 610 nb_oargs = def->nb_oargs; 611 nb_iargs = def->nb_iargs; 612 for (i = 0; i < nb_oargs + nb_iargs; i++) { 613 init_temp_info(args[i]); 614 } 615 } 616 617 /* Do copy propagation */ 618 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 619 if (temp_is_copy(args[i])) { 620 args[i] = find_better_copy(s, args[i]); 621 } 622 } 623 624 /* For commutative operations make constant second argument */ 625 switch (opc) { 626 CASE_OP_32_64(add): 627 CASE_OP_32_64(mul): 628 CASE_OP_32_64(and): 629 CASE_OP_32_64(or): 630 CASE_OP_32_64(xor): 631 CASE_OP_32_64(eqv): 632 CASE_OP_32_64(nand): 633 CASE_OP_32_64(nor): 634 CASE_OP_32_64(muluh): 635 CASE_OP_32_64(mulsh): 636 swap_commutative(args[0], &args[1], &args[2]); 637 break; 638 CASE_OP_32_64(brcond): 639 if (swap_commutative(-1, &args[0], &args[1])) { 640 args[2] = tcg_swap_cond(args[2]); 641 } 642 break; 643 CASE_OP_32_64(setcond): 644 if (swap_commutative(args[0], &args[1], &args[2])) { 645 args[3] = tcg_swap_cond(args[3]); 646 } 647 break; 648 CASE_OP_32_64(movcond): 649 if (swap_commutative(-1, &args[1], &args[2])) { 650 args[5] = tcg_swap_cond(args[5]); 651 } 652 /* For movcond, we canonicalize the "false" input reg to match 653 the destination reg so that the tcg backend can implement 654 a "move if true" operation. */ 655 if (swap_commutative(args[0], &args[4], &args[3])) { 656 args[5] = tcg_invert_cond(args[5]); 657 } 658 break; 659 CASE_OP_32_64(add2): 660 swap_commutative(args[0], &args[2], &args[4]); 661 swap_commutative(args[1], &args[3], &args[5]); 662 break; 663 CASE_OP_32_64(mulu2): 664 CASE_OP_32_64(muls2): 665 swap_commutative(args[0], &args[2], &args[3]); 666 break; 667 case INDEX_op_brcond2_i32: 668 if (swap_commutative2(&args[0], &args[2])) { 669 args[4] = tcg_swap_cond(args[4]); 670 } 671 break; 672 case INDEX_op_setcond2_i32: 673 if (swap_commutative2(&args[1], &args[3])) { 674 args[5] = tcg_swap_cond(args[5]); 675 } 676 break; 677 default: 678 break; 679 } 680 681 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 682 and "sub r, 0, a => neg r, a" case. */ 683 switch (opc) { 684 CASE_OP_32_64(shl): 685 CASE_OP_32_64(shr): 686 CASE_OP_32_64(sar): 687 CASE_OP_32_64(rotl): 688 CASE_OP_32_64(rotr): 689 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 690 tcg_opt_gen_movi(s, op, args, args[0], 0); 691 continue; 692 } 693 break; 694 CASE_OP_32_64(sub): 695 { 696 TCGOpcode neg_op; 697 bool have_neg; 698 699 if (temp_is_const(args[2])) { 700 /* Proceed with possible constant folding. */ 701 break; 702 } 703 if (opc == INDEX_op_sub_i32) { 704 neg_op = INDEX_op_neg_i32; 705 have_neg = TCG_TARGET_HAS_neg_i32; 706 } else { 707 neg_op = INDEX_op_neg_i64; 708 have_neg = TCG_TARGET_HAS_neg_i64; 709 } 710 if (!have_neg) { 711 break; 712 } 713 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 714 op->opc = neg_op; 715 reset_temp(args[0]); 716 args[1] = args[2]; 717 continue; 718 } 719 } 720 break; 721 CASE_OP_32_64(xor): 722 CASE_OP_32_64(nand): 723 if (!temp_is_const(args[1]) 724 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 725 i = 1; 726 goto try_not; 727 } 728 break; 729 CASE_OP_32_64(nor): 730 if (!temp_is_const(args[1]) 731 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 732 i = 1; 733 goto try_not; 734 } 735 break; 736 CASE_OP_32_64(andc): 737 if (!temp_is_const(args[2]) 738 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 739 i = 2; 740 goto try_not; 741 } 742 break; 743 CASE_OP_32_64(orc): 744 CASE_OP_32_64(eqv): 745 if (!temp_is_const(args[2]) 746 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 747 i = 2; 748 goto try_not; 749 } 750 break; 751 try_not: 752 { 753 TCGOpcode not_op; 754 bool have_not; 755 756 if (def->flags & TCG_OPF_64BIT) { 757 not_op = INDEX_op_not_i64; 758 have_not = TCG_TARGET_HAS_not_i64; 759 } else { 760 not_op = INDEX_op_not_i32; 761 have_not = TCG_TARGET_HAS_not_i32; 762 } 763 if (!have_not) { 764 break; 765 } 766 op->opc = not_op; 767 reset_temp(args[0]); 768 args[1] = args[i]; 769 continue; 770 } 771 default: 772 break; 773 } 774 775 /* Simplify expression for "op r, a, const => mov r, a" cases */ 776 switch (opc) { 777 CASE_OP_32_64(add): 778 CASE_OP_32_64(sub): 779 CASE_OP_32_64(shl): 780 CASE_OP_32_64(shr): 781 CASE_OP_32_64(sar): 782 CASE_OP_32_64(rotl): 783 CASE_OP_32_64(rotr): 784 CASE_OP_32_64(or): 785 CASE_OP_32_64(xor): 786 CASE_OP_32_64(andc): 787 if (!temp_is_const(args[1]) 788 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 789 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 790 continue; 791 } 792 break; 793 CASE_OP_32_64(and): 794 CASE_OP_32_64(orc): 795 CASE_OP_32_64(eqv): 796 if (!temp_is_const(args[1]) 797 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 798 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 799 continue; 800 } 801 break; 802 default: 803 break; 804 } 805 806 /* Simplify using known-zero bits. Currently only ops with a single 807 output argument is supported. */ 808 mask = -1; 809 affected = -1; 810 switch (opc) { 811 CASE_OP_32_64(ext8s): 812 if ((temps[args[1]].mask & 0x80) != 0) { 813 break; 814 } 815 CASE_OP_32_64(ext8u): 816 mask = 0xff; 817 goto and_const; 818 CASE_OP_32_64(ext16s): 819 if ((temps[args[1]].mask & 0x8000) != 0) { 820 break; 821 } 822 CASE_OP_32_64(ext16u): 823 mask = 0xffff; 824 goto and_const; 825 case INDEX_op_ext32s_i64: 826 if ((temps[args[1]].mask & 0x80000000) != 0) { 827 break; 828 } 829 case INDEX_op_ext32u_i64: 830 mask = 0xffffffffU; 831 goto and_const; 832 833 CASE_OP_32_64(and): 834 mask = temps[args[2]].mask; 835 if (temp_is_const(args[2])) { 836 and_const: 837 affected = temps[args[1]].mask & ~mask; 838 } 839 mask = temps[args[1]].mask & mask; 840 break; 841 842 case INDEX_op_ext_i32_i64: 843 if ((temps[args[1]].mask & 0x80000000) != 0) { 844 break; 845 } 846 case INDEX_op_extu_i32_i64: 847 /* We do not compute affected as it is a size changing op. */ 848 mask = (uint32_t)temps[args[1]].mask; 849 break; 850 851 CASE_OP_32_64(andc): 852 /* Known-zeros does not imply known-ones. Therefore unless 853 args[2] is constant, we can't infer anything from it. */ 854 if (temp_is_const(args[2])) { 855 mask = ~temps[args[2]].mask; 856 goto and_const; 857 } 858 /* But we certainly know nothing outside args[1] may be set. */ 859 mask = temps[args[1]].mask; 860 break; 861 862 case INDEX_op_sar_i32: 863 if (temp_is_const(args[2])) { 864 tmp = temps[args[2]].val & 31; 865 mask = (int32_t)temps[args[1]].mask >> tmp; 866 } 867 break; 868 case INDEX_op_sar_i64: 869 if (temp_is_const(args[2])) { 870 tmp = temps[args[2]].val & 63; 871 mask = (int64_t)temps[args[1]].mask >> tmp; 872 } 873 break; 874 875 case INDEX_op_shr_i32: 876 if (temp_is_const(args[2])) { 877 tmp = temps[args[2]].val & 31; 878 mask = (uint32_t)temps[args[1]].mask >> tmp; 879 } 880 break; 881 case INDEX_op_shr_i64: 882 if (temp_is_const(args[2])) { 883 tmp = temps[args[2]].val & 63; 884 mask = (uint64_t)temps[args[1]].mask >> tmp; 885 } 886 break; 887 888 case INDEX_op_extrl_i64_i32: 889 mask = (uint32_t)temps[args[1]].mask; 890 break; 891 case INDEX_op_extrh_i64_i32: 892 mask = (uint64_t)temps[args[1]].mask >> 32; 893 break; 894 895 CASE_OP_32_64(shl): 896 if (temp_is_const(args[2])) { 897 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 898 mask = temps[args[1]].mask << tmp; 899 } 900 break; 901 902 CASE_OP_32_64(neg): 903 /* Set to 1 all bits to the left of the rightmost. */ 904 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 905 break; 906 907 CASE_OP_32_64(deposit): 908 mask = deposit64(temps[args[1]].mask, args[3], args[4], 909 temps[args[2]].mask); 910 break; 911 912 CASE_OP_32_64(or): 913 CASE_OP_32_64(xor): 914 mask = temps[args[1]].mask | temps[args[2]].mask; 915 break; 916 917 CASE_OP_32_64(setcond): 918 case INDEX_op_setcond2_i32: 919 mask = 1; 920 break; 921 922 CASE_OP_32_64(movcond): 923 mask = temps[args[3]].mask | temps[args[4]].mask; 924 break; 925 926 CASE_OP_32_64(ld8u): 927 mask = 0xff; 928 break; 929 CASE_OP_32_64(ld16u): 930 mask = 0xffff; 931 break; 932 case INDEX_op_ld32u_i64: 933 mask = 0xffffffffu; 934 break; 935 936 CASE_OP_32_64(qemu_ld): 937 { 938 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 939 TCGMemOp mop = get_memop(oi); 940 if (!(mop & MO_SIGN)) { 941 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 942 } 943 } 944 break; 945 946 default: 947 break; 948 } 949 950 /* 32-bit ops generate 32-bit results. For the result is zero test 951 below, we can ignore high bits, but for further optimizations we 952 need to record that the high bits contain garbage. */ 953 partmask = mask; 954 if (!(def->flags & TCG_OPF_64BIT)) { 955 mask |= ~(tcg_target_ulong)0xffffffffu; 956 partmask &= 0xffffffffu; 957 affected &= 0xffffffffu; 958 } 959 960 if (partmask == 0) { 961 tcg_debug_assert(nb_oargs == 1); 962 tcg_opt_gen_movi(s, op, args, args[0], 0); 963 continue; 964 } 965 if (affected == 0) { 966 tcg_debug_assert(nb_oargs == 1); 967 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 968 continue; 969 } 970 971 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 972 switch (opc) { 973 CASE_OP_32_64(and): 974 CASE_OP_32_64(mul): 975 CASE_OP_32_64(muluh): 976 CASE_OP_32_64(mulsh): 977 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 978 tcg_opt_gen_movi(s, op, args, args[0], 0); 979 continue; 980 } 981 break; 982 default: 983 break; 984 } 985 986 /* Simplify expression for "op r, a, a => mov r, a" cases */ 987 switch (opc) { 988 CASE_OP_32_64(or): 989 CASE_OP_32_64(and): 990 if (temps_are_copies(args[1], args[2])) { 991 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 992 continue; 993 } 994 break; 995 default: 996 break; 997 } 998 999 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1000 switch (opc) { 1001 CASE_OP_32_64(andc): 1002 CASE_OP_32_64(sub): 1003 CASE_OP_32_64(xor): 1004 if (temps_are_copies(args[1], args[2])) { 1005 tcg_opt_gen_movi(s, op, args, args[0], 0); 1006 continue; 1007 } 1008 break; 1009 default: 1010 break; 1011 } 1012 1013 /* Propagate constants through copy operations and do constant 1014 folding. Constants will be substituted to arguments by register 1015 allocator where needed and possible. Also detect copies. */ 1016 switch (opc) { 1017 CASE_OP_32_64(mov): 1018 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1019 break; 1020 CASE_OP_32_64(movi): 1021 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 1022 break; 1023 1024 CASE_OP_32_64(not): 1025 CASE_OP_32_64(neg): 1026 CASE_OP_32_64(ext8s): 1027 CASE_OP_32_64(ext8u): 1028 CASE_OP_32_64(ext16s): 1029 CASE_OP_32_64(ext16u): 1030 case INDEX_op_ext32s_i64: 1031 case INDEX_op_ext32u_i64: 1032 case INDEX_op_ext_i32_i64: 1033 case INDEX_op_extu_i32_i64: 1034 case INDEX_op_extrl_i64_i32: 1035 case INDEX_op_extrh_i64_i32: 1036 if (temp_is_const(args[1])) { 1037 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1038 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1039 break; 1040 } 1041 goto do_default; 1042 1043 CASE_OP_32_64(add): 1044 CASE_OP_32_64(sub): 1045 CASE_OP_32_64(mul): 1046 CASE_OP_32_64(or): 1047 CASE_OP_32_64(and): 1048 CASE_OP_32_64(xor): 1049 CASE_OP_32_64(shl): 1050 CASE_OP_32_64(shr): 1051 CASE_OP_32_64(sar): 1052 CASE_OP_32_64(rotl): 1053 CASE_OP_32_64(rotr): 1054 CASE_OP_32_64(andc): 1055 CASE_OP_32_64(orc): 1056 CASE_OP_32_64(eqv): 1057 CASE_OP_32_64(nand): 1058 CASE_OP_32_64(nor): 1059 CASE_OP_32_64(muluh): 1060 CASE_OP_32_64(mulsh): 1061 CASE_OP_32_64(div): 1062 CASE_OP_32_64(divu): 1063 CASE_OP_32_64(rem): 1064 CASE_OP_32_64(remu): 1065 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1066 tmp = do_constant_folding(opc, temps[args[1]].val, 1067 temps[args[2]].val); 1068 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1069 break; 1070 } 1071 goto do_default; 1072 1073 CASE_OP_32_64(deposit): 1074 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1075 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1076 temps[args[2]].val); 1077 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1078 break; 1079 } 1080 goto do_default; 1081 1082 CASE_OP_32_64(setcond): 1083 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1084 if (tmp != 2) { 1085 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1086 break; 1087 } 1088 goto do_default; 1089 1090 CASE_OP_32_64(brcond): 1091 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1092 if (tmp != 2) { 1093 if (tmp) { 1094 reset_all_temps(nb_temps); 1095 op->opc = INDEX_op_br; 1096 args[0] = args[3]; 1097 } else { 1098 tcg_op_remove(s, op); 1099 } 1100 break; 1101 } 1102 goto do_default; 1103 1104 CASE_OP_32_64(movcond): 1105 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1106 if (tmp != 2) { 1107 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1108 break; 1109 } 1110 goto do_default; 1111 1112 case INDEX_op_add2_i32: 1113 case INDEX_op_sub2_i32: 1114 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1115 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1116 uint32_t al = temps[args[2]].val; 1117 uint32_t ah = temps[args[3]].val; 1118 uint32_t bl = temps[args[4]].val; 1119 uint32_t bh = temps[args[5]].val; 1120 uint64_t a = ((uint64_t)ah << 32) | al; 1121 uint64_t b = ((uint64_t)bh << 32) | bl; 1122 TCGArg rl, rh; 1123 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1124 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1125 1126 if (opc == INDEX_op_add2_i32) { 1127 a += b; 1128 } else { 1129 a -= b; 1130 } 1131 1132 rl = args[0]; 1133 rh = args[1]; 1134 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1135 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1136 1137 /* We've done all we need to do with the movi. Skip it. */ 1138 oi_next = op2->next; 1139 break; 1140 } 1141 goto do_default; 1142 1143 case INDEX_op_mulu2_i32: 1144 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1145 uint32_t a = temps[args[2]].val; 1146 uint32_t b = temps[args[3]].val; 1147 uint64_t r = (uint64_t)a * b; 1148 TCGArg rl, rh; 1149 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1150 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1151 1152 rl = args[0]; 1153 rh = args[1]; 1154 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1155 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1156 1157 /* We've done all we need to do with the movi. Skip it. */ 1158 oi_next = op2->next; 1159 break; 1160 } 1161 goto do_default; 1162 1163 case INDEX_op_brcond2_i32: 1164 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1165 if (tmp != 2) { 1166 if (tmp) { 1167 do_brcond_true: 1168 reset_all_temps(nb_temps); 1169 op->opc = INDEX_op_br; 1170 args[0] = args[5]; 1171 } else { 1172 do_brcond_false: 1173 tcg_op_remove(s, op); 1174 } 1175 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1176 && temp_is_const(args[2]) && temps[args[2]].val == 0 1177 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1178 /* Simplify LT/GE comparisons vs zero to a single compare 1179 vs the high word of the input. */ 1180 do_brcond_high: 1181 reset_all_temps(nb_temps); 1182 op->opc = INDEX_op_brcond_i32; 1183 args[0] = args[1]; 1184 args[1] = args[3]; 1185 args[2] = args[4]; 1186 args[3] = args[5]; 1187 } else if (args[4] == TCG_COND_EQ) { 1188 /* Simplify EQ comparisons where one of the pairs 1189 can be simplified. */ 1190 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1191 args[0], args[2], TCG_COND_EQ); 1192 if (tmp == 0) { 1193 goto do_brcond_false; 1194 } else if (tmp == 1) { 1195 goto do_brcond_high; 1196 } 1197 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1198 args[1], args[3], TCG_COND_EQ); 1199 if (tmp == 0) { 1200 goto do_brcond_false; 1201 } else if (tmp != 1) { 1202 goto do_default; 1203 } 1204 do_brcond_low: 1205 reset_all_temps(nb_temps); 1206 op->opc = INDEX_op_brcond_i32; 1207 args[1] = args[2]; 1208 args[2] = args[4]; 1209 args[3] = args[5]; 1210 } else if (args[4] == TCG_COND_NE) { 1211 /* Simplify NE comparisons where one of the pairs 1212 can be simplified. */ 1213 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1214 args[0], args[2], TCG_COND_NE); 1215 if (tmp == 0) { 1216 goto do_brcond_high; 1217 } else if (tmp == 1) { 1218 goto do_brcond_true; 1219 } 1220 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1221 args[1], args[3], TCG_COND_NE); 1222 if (tmp == 0) { 1223 goto do_brcond_low; 1224 } else if (tmp == 1) { 1225 goto do_brcond_true; 1226 } 1227 goto do_default; 1228 } else { 1229 goto do_default; 1230 } 1231 break; 1232 1233 case INDEX_op_setcond2_i32: 1234 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1235 if (tmp != 2) { 1236 do_setcond_const: 1237 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1238 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1239 && temp_is_const(args[3]) && temps[args[3]].val == 0 1240 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1241 /* Simplify LT/GE comparisons vs zero to a single compare 1242 vs the high word of the input. */ 1243 do_setcond_high: 1244 reset_temp(args[0]); 1245 temps[args[0]].mask = 1; 1246 op->opc = INDEX_op_setcond_i32; 1247 args[1] = args[2]; 1248 args[2] = args[4]; 1249 args[3] = args[5]; 1250 } else if (args[5] == TCG_COND_EQ) { 1251 /* Simplify EQ comparisons where one of the pairs 1252 can be simplified. */ 1253 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1254 args[1], args[3], TCG_COND_EQ); 1255 if (tmp == 0) { 1256 goto do_setcond_const; 1257 } else if (tmp == 1) { 1258 goto do_setcond_high; 1259 } 1260 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1261 args[2], args[4], TCG_COND_EQ); 1262 if (tmp == 0) { 1263 goto do_setcond_high; 1264 } else if (tmp != 1) { 1265 goto do_default; 1266 } 1267 do_setcond_low: 1268 reset_temp(args[0]); 1269 temps[args[0]].mask = 1; 1270 op->opc = INDEX_op_setcond_i32; 1271 args[2] = args[3]; 1272 args[3] = args[5]; 1273 } else if (args[5] == TCG_COND_NE) { 1274 /* Simplify NE comparisons where one of the pairs 1275 can be simplified. */ 1276 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1277 args[1], args[3], TCG_COND_NE); 1278 if (tmp == 0) { 1279 goto do_setcond_high; 1280 } else if (tmp == 1) { 1281 goto do_setcond_const; 1282 } 1283 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1284 args[2], args[4], TCG_COND_NE); 1285 if (tmp == 0) { 1286 goto do_setcond_low; 1287 } else if (tmp == 1) { 1288 goto do_setcond_const; 1289 } 1290 goto do_default; 1291 } else { 1292 goto do_default; 1293 } 1294 break; 1295 1296 case INDEX_op_call: 1297 if (!(args[nb_oargs + nb_iargs + 1] 1298 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1299 for (i = 0; i < nb_globals; i++) { 1300 if (test_bit(i, temps_used.l)) { 1301 reset_temp(i); 1302 } 1303 } 1304 } 1305 goto do_reset_output; 1306 1307 default: 1308 do_default: 1309 /* Default case: we know nothing about operation (or were unable 1310 to compute the operation result) so no propagation is done. 1311 We trash everything if the operation is the end of a basic 1312 block, otherwise we only trash the output args. "mask" is 1313 the non-zero bits mask for the first output arg. */ 1314 if (def->flags & TCG_OPF_BB_END) { 1315 reset_all_temps(nb_temps); 1316 } else { 1317 do_reset_output: 1318 for (i = 0; i < nb_oargs; i++) { 1319 reset_temp(args[i]); 1320 /* Save the corresponding known-zero bits mask for the 1321 first output argument (only one supported so far). */ 1322 if (i == 0) { 1323 temps[args[i]].mask = mask; 1324 } 1325 } 1326 } 1327 break; 1328 } 1329 } 1330 } 1331