1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 struct tcg_temp_info { 39 bool is_const; 40 uint16_t prev_copy; 41 uint16_t next_copy; 42 tcg_target_ulong val; 43 tcg_target_ulong mask; 44 }; 45 46 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 47 static TCGTempSet temps_used; 48 49 static inline bool temp_is_const(TCGArg arg) 50 { 51 return temps[arg].is_const; 52 } 53 54 static inline bool temp_is_copy(TCGArg arg) 55 { 56 return temps[arg].next_copy != arg; 57 } 58 59 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 60 static void reset_temp(TCGArg temp) 61 { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 temps[temp].next_copy = temp; 65 temps[temp].prev_copy = temp; 66 temps[temp].is_const = false; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 bitmap_zero(temps_used.l, nb_temps); 74 } 75 76 /* Initialize and activate a temporary. */ 77 static void init_temp_info(TCGArg temp) 78 { 79 if (!test_bit(temp, temps_used.l)) { 80 temps[temp].next_copy = temp; 81 temps[temp].prev_copy = temp; 82 temps[temp].is_const = false; 83 temps[temp].mask = -1; 84 set_bit(temp, temps_used.l); 85 } 86 } 87 88 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 89 TCGOpcode opc, int nargs) 90 { 91 int oi = s->gen_next_op_idx; 92 int pi = s->gen_next_parm_idx; 93 int prev = old_op->prev; 94 int next = old_op - s->gen_op_buf; 95 TCGOp *new_op; 96 97 tcg_debug_assert(oi < OPC_BUF_SIZE); 98 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 99 s->gen_next_op_idx = oi + 1; 100 s->gen_next_parm_idx = pi + nargs; 101 102 new_op = &s->gen_op_buf[oi]; 103 *new_op = (TCGOp){ 104 .opc = opc, 105 .args = pi, 106 .prev = prev, 107 .next = next 108 }; 109 if (prev >= 0) { 110 s->gen_op_buf[prev].next = oi; 111 } else { 112 s->gen_first_op_idx = oi; 113 } 114 old_op->prev = oi; 115 116 return new_op; 117 } 118 119 static int op_bits(TCGOpcode op) 120 { 121 const TCGOpDef *def = &tcg_op_defs[op]; 122 return def->flags & TCG_OPF_64BIT ? 64 : 32; 123 } 124 125 static TCGOpcode op_to_mov(TCGOpcode op) 126 { 127 switch (op_bits(op)) { 128 case 32: 129 return INDEX_op_mov_i32; 130 case 64: 131 return INDEX_op_mov_i64; 132 default: 133 fprintf(stderr, "op_to_mov: unexpected return value of " 134 "function op_bits.\n"); 135 tcg_abort(); 136 } 137 } 138 139 static TCGOpcode op_to_movi(TCGOpcode op) 140 { 141 switch (op_bits(op)) { 142 case 32: 143 return INDEX_op_movi_i32; 144 case 64: 145 return INDEX_op_movi_i64; 146 default: 147 fprintf(stderr, "op_to_movi: unexpected return value of " 148 "function op_bits.\n"); 149 tcg_abort(); 150 } 151 } 152 153 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 154 { 155 TCGArg i; 156 157 /* If this is already a global, we can't do better. */ 158 if (temp < s->nb_globals) { 159 return temp; 160 } 161 162 /* Search for a global first. */ 163 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 164 if (i < s->nb_globals) { 165 return i; 166 } 167 } 168 169 /* If it is a temp, search for a temp local. */ 170 if (!s->temps[temp].temp_local) { 171 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 172 if (s->temps[i].temp_local) { 173 return i; 174 } 175 } 176 } 177 178 /* Failure to find a better representation, return the same temp. */ 179 return temp; 180 } 181 182 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 183 { 184 TCGArg i; 185 186 if (arg1 == arg2) { 187 return true; 188 } 189 190 if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { 191 return false; 192 } 193 194 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 195 if (i == arg2) { 196 return true; 197 } 198 } 199 200 return false; 201 } 202 203 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 204 TCGArg dst, TCGArg val) 205 { 206 TCGOpcode new_op = op_to_movi(op->opc); 207 tcg_target_ulong mask; 208 209 op->opc = new_op; 210 211 reset_temp(dst); 212 temps[dst].is_const = true; 213 temps[dst].val = val; 214 mask = val; 215 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { 216 /* High bits of the destination are now garbage. */ 217 mask |= ~0xffffffffull; 218 } 219 temps[dst].mask = mask; 220 221 args[0] = dst; 222 args[1] = val; 223 } 224 225 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 226 TCGArg dst, TCGArg src) 227 { 228 if (temps_are_copies(dst, src)) { 229 tcg_op_remove(s, op); 230 return; 231 } 232 233 TCGOpcode new_op = op_to_mov(op->opc); 234 tcg_target_ulong mask; 235 236 op->opc = new_op; 237 238 reset_temp(dst); 239 mask = temps[src].mask; 240 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 241 /* High bits of the destination are now garbage. */ 242 mask |= ~0xffffffffull; 243 } 244 temps[dst].mask = mask; 245 246 if (s->temps[src].type == s->temps[dst].type) { 247 temps[dst].next_copy = temps[src].next_copy; 248 temps[dst].prev_copy = src; 249 temps[temps[dst].next_copy].prev_copy = dst; 250 temps[src].next_copy = dst; 251 temps[dst].is_const = temps[src].is_const; 252 temps[dst].val = temps[src].val; 253 } 254 255 args[0] = dst; 256 args[1] = src; 257 } 258 259 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 260 { 261 uint64_t l64, h64; 262 263 switch (op) { 264 CASE_OP_32_64(add): 265 return x + y; 266 267 CASE_OP_32_64(sub): 268 return x - y; 269 270 CASE_OP_32_64(mul): 271 return x * y; 272 273 CASE_OP_32_64(and): 274 return x & y; 275 276 CASE_OP_32_64(or): 277 return x | y; 278 279 CASE_OP_32_64(xor): 280 return x ^ y; 281 282 case INDEX_op_shl_i32: 283 return (uint32_t)x << (y & 31); 284 285 case INDEX_op_shl_i64: 286 return (uint64_t)x << (y & 63); 287 288 case INDEX_op_shr_i32: 289 return (uint32_t)x >> (y & 31); 290 291 case INDEX_op_shr_i64: 292 return (uint64_t)x >> (y & 63); 293 294 case INDEX_op_sar_i32: 295 return (int32_t)x >> (y & 31); 296 297 case INDEX_op_sar_i64: 298 return (int64_t)x >> (y & 63); 299 300 case INDEX_op_rotr_i32: 301 return ror32(x, y & 31); 302 303 case INDEX_op_rotr_i64: 304 return ror64(x, y & 63); 305 306 case INDEX_op_rotl_i32: 307 return rol32(x, y & 31); 308 309 case INDEX_op_rotl_i64: 310 return rol64(x, y & 63); 311 312 CASE_OP_32_64(not): 313 return ~x; 314 315 CASE_OP_32_64(neg): 316 return -x; 317 318 CASE_OP_32_64(andc): 319 return x & ~y; 320 321 CASE_OP_32_64(orc): 322 return x | ~y; 323 324 CASE_OP_32_64(eqv): 325 return ~(x ^ y); 326 327 CASE_OP_32_64(nand): 328 return ~(x & y); 329 330 CASE_OP_32_64(nor): 331 return ~(x | y); 332 333 CASE_OP_32_64(ext8s): 334 return (int8_t)x; 335 336 CASE_OP_32_64(ext16s): 337 return (int16_t)x; 338 339 CASE_OP_32_64(ext8u): 340 return (uint8_t)x; 341 342 CASE_OP_32_64(ext16u): 343 return (uint16_t)x; 344 345 case INDEX_op_ext_i32_i64: 346 case INDEX_op_ext32s_i64: 347 return (int32_t)x; 348 349 case INDEX_op_extu_i32_i64: 350 case INDEX_op_extrl_i64_i32: 351 case INDEX_op_ext32u_i64: 352 return (uint32_t)x; 353 354 case INDEX_op_extrh_i64_i32: 355 return (uint64_t)x >> 32; 356 357 case INDEX_op_muluh_i32: 358 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 359 case INDEX_op_mulsh_i32: 360 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 361 362 case INDEX_op_muluh_i64: 363 mulu64(&l64, &h64, x, y); 364 return h64; 365 case INDEX_op_mulsh_i64: 366 muls64(&l64, &h64, x, y); 367 return h64; 368 369 case INDEX_op_div_i32: 370 /* Avoid crashing on divide by zero, otherwise undefined. */ 371 return (int32_t)x / ((int32_t)y ? : 1); 372 case INDEX_op_divu_i32: 373 return (uint32_t)x / ((uint32_t)y ? : 1); 374 case INDEX_op_div_i64: 375 return (int64_t)x / ((int64_t)y ? : 1); 376 case INDEX_op_divu_i64: 377 return (uint64_t)x / ((uint64_t)y ? : 1); 378 379 case INDEX_op_rem_i32: 380 return (int32_t)x % ((int32_t)y ? : 1); 381 case INDEX_op_remu_i32: 382 return (uint32_t)x % ((uint32_t)y ? : 1); 383 case INDEX_op_rem_i64: 384 return (int64_t)x % ((int64_t)y ? : 1); 385 case INDEX_op_remu_i64: 386 return (uint64_t)x % ((uint64_t)y ? : 1); 387 388 default: 389 fprintf(stderr, 390 "Unrecognized operation %d in do_constant_folding.\n", op); 391 tcg_abort(); 392 } 393 } 394 395 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 396 { 397 TCGArg res = do_constant_folding_2(op, x, y); 398 if (op_bits(op) == 32) { 399 res = (int32_t)res; 400 } 401 return res; 402 } 403 404 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 405 { 406 switch (c) { 407 case TCG_COND_EQ: 408 return x == y; 409 case TCG_COND_NE: 410 return x != y; 411 case TCG_COND_LT: 412 return (int32_t)x < (int32_t)y; 413 case TCG_COND_GE: 414 return (int32_t)x >= (int32_t)y; 415 case TCG_COND_LE: 416 return (int32_t)x <= (int32_t)y; 417 case TCG_COND_GT: 418 return (int32_t)x > (int32_t)y; 419 case TCG_COND_LTU: 420 return x < y; 421 case TCG_COND_GEU: 422 return x >= y; 423 case TCG_COND_LEU: 424 return x <= y; 425 case TCG_COND_GTU: 426 return x > y; 427 default: 428 tcg_abort(); 429 } 430 } 431 432 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 433 { 434 switch (c) { 435 case TCG_COND_EQ: 436 return x == y; 437 case TCG_COND_NE: 438 return x != y; 439 case TCG_COND_LT: 440 return (int64_t)x < (int64_t)y; 441 case TCG_COND_GE: 442 return (int64_t)x >= (int64_t)y; 443 case TCG_COND_LE: 444 return (int64_t)x <= (int64_t)y; 445 case TCG_COND_GT: 446 return (int64_t)x > (int64_t)y; 447 case TCG_COND_LTU: 448 return x < y; 449 case TCG_COND_GEU: 450 return x >= y; 451 case TCG_COND_LEU: 452 return x <= y; 453 case TCG_COND_GTU: 454 return x > y; 455 default: 456 tcg_abort(); 457 } 458 } 459 460 static bool do_constant_folding_cond_eq(TCGCond c) 461 { 462 switch (c) { 463 case TCG_COND_GT: 464 case TCG_COND_LTU: 465 case TCG_COND_LT: 466 case TCG_COND_GTU: 467 case TCG_COND_NE: 468 return 0; 469 case TCG_COND_GE: 470 case TCG_COND_GEU: 471 case TCG_COND_LE: 472 case TCG_COND_LEU: 473 case TCG_COND_EQ: 474 return 1; 475 default: 476 tcg_abort(); 477 } 478 } 479 480 /* Return 2 if the condition can't be simplified, and the result 481 of the condition (0 or 1) if it can */ 482 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 483 TCGArg y, TCGCond c) 484 { 485 if (temp_is_const(x) && temp_is_const(y)) { 486 switch (op_bits(op)) { 487 case 32: 488 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 489 case 64: 490 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 491 default: 492 tcg_abort(); 493 } 494 } else if (temps_are_copies(x, y)) { 495 return do_constant_folding_cond_eq(c); 496 } else if (temp_is_const(y) && temps[y].val == 0) { 497 switch (c) { 498 case TCG_COND_LTU: 499 return 0; 500 case TCG_COND_GEU: 501 return 1; 502 default: 503 return 2; 504 } 505 } else { 506 return 2; 507 } 508 } 509 510 /* Return 2 if the condition can't be simplified, and the result 511 of the condition (0 or 1) if it can */ 512 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 513 { 514 TCGArg al = p1[0], ah = p1[1]; 515 TCGArg bl = p2[0], bh = p2[1]; 516 517 if (temp_is_const(bl) && temp_is_const(bh)) { 518 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 519 520 if (temp_is_const(al) && temp_is_const(ah)) { 521 uint64_t a; 522 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 523 return do_constant_folding_cond_64(a, b, c); 524 } 525 if (b == 0) { 526 switch (c) { 527 case TCG_COND_LTU: 528 return 0; 529 case TCG_COND_GEU: 530 return 1; 531 default: 532 break; 533 } 534 } 535 } 536 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 537 return do_constant_folding_cond_eq(c); 538 } 539 return 2; 540 } 541 542 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 543 { 544 TCGArg a1 = *p1, a2 = *p2; 545 int sum = 0; 546 sum += temp_is_const(a1); 547 sum -= temp_is_const(a2); 548 549 /* Prefer the constant in second argument, and then the form 550 op a, a, b, which is better handled on non-RISC hosts. */ 551 if (sum > 0 || (sum == 0 && dest == a2)) { 552 *p1 = a2; 553 *p2 = a1; 554 return true; 555 } 556 return false; 557 } 558 559 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 560 { 561 int sum = 0; 562 sum += temp_is_const(p1[0]); 563 sum += temp_is_const(p1[1]); 564 sum -= temp_is_const(p2[0]); 565 sum -= temp_is_const(p2[1]); 566 if (sum > 0) { 567 TCGArg t; 568 t = p1[0], p1[0] = p2[0], p2[0] = t; 569 t = p1[1], p1[1] = p2[1], p2[1] = t; 570 return true; 571 } 572 return false; 573 } 574 575 /* Propagate constants and copies, fold constant expressions. */ 576 void tcg_optimize(TCGContext *s) 577 { 578 int oi, oi_next, nb_temps, nb_globals; 579 580 /* Array VALS has an element for each temp. 581 If this temp holds a constant then its value is kept in VALS' element. 582 If this temp is a copy of other ones then the other copies are 583 available through the doubly linked circular list. */ 584 585 nb_temps = s->nb_temps; 586 nb_globals = s->nb_globals; 587 reset_all_temps(nb_temps); 588 589 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 590 tcg_target_ulong mask, partmask, affected; 591 int nb_oargs, nb_iargs, i; 592 TCGArg tmp; 593 594 TCGOp * const op = &s->gen_op_buf[oi]; 595 TCGArg * const args = &s->gen_opparam_buf[op->args]; 596 TCGOpcode opc = op->opc; 597 const TCGOpDef *def = &tcg_op_defs[opc]; 598 599 oi_next = op->next; 600 601 /* Count the arguments, and initialize the temps that are 602 going to be used */ 603 if (opc == INDEX_op_call) { 604 nb_oargs = op->callo; 605 nb_iargs = op->calli; 606 for (i = 0; i < nb_oargs + nb_iargs; i++) { 607 tmp = args[i]; 608 if (tmp != TCG_CALL_DUMMY_ARG) { 609 init_temp_info(tmp); 610 } 611 } 612 } else { 613 nb_oargs = def->nb_oargs; 614 nb_iargs = def->nb_iargs; 615 for (i = 0; i < nb_oargs + nb_iargs; i++) { 616 init_temp_info(args[i]); 617 } 618 } 619 620 /* Do copy propagation */ 621 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 622 if (temp_is_copy(args[i])) { 623 args[i] = find_better_copy(s, args[i]); 624 } 625 } 626 627 /* For commutative operations make constant second argument */ 628 switch (opc) { 629 CASE_OP_32_64(add): 630 CASE_OP_32_64(mul): 631 CASE_OP_32_64(and): 632 CASE_OP_32_64(or): 633 CASE_OP_32_64(xor): 634 CASE_OP_32_64(eqv): 635 CASE_OP_32_64(nand): 636 CASE_OP_32_64(nor): 637 CASE_OP_32_64(muluh): 638 CASE_OP_32_64(mulsh): 639 swap_commutative(args[0], &args[1], &args[2]); 640 break; 641 CASE_OP_32_64(brcond): 642 if (swap_commutative(-1, &args[0], &args[1])) { 643 args[2] = tcg_swap_cond(args[2]); 644 } 645 break; 646 CASE_OP_32_64(setcond): 647 if (swap_commutative(args[0], &args[1], &args[2])) { 648 args[3] = tcg_swap_cond(args[3]); 649 } 650 break; 651 CASE_OP_32_64(movcond): 652 if (swap_commutative(-1, &args[1], &args[2])) { 653 args[5] = tcg_swap_cond(args[5]); 654 } 655 /* For movcond, we canonicalize the "false" input reg to match 656 the destination reg so that the tcg backend can implement 657 a "move if true" operation. */ 658 if (swap_commutative(args[0], &args[4], &args[3])) { 659 args[5] = tcg_invert_cond(args[5]); 660 } 661 break; 662 CASE_OP_32_64(add2): 663 swap_commutative(args[0], &args[2], &args[4]); 664 swap_commutative(args[1], &args[3], &args[5]); 665 break; 666 CASE_OP_32_64(mulu2): 667 CASE_OP_32_64(muls2): 668 swap_commutative(args[0], &args[2], &args[3]); 669 break; 670 case INDEX_op_brcond2_i32: 671 if (swap_commutative2(&args[0], &args[2])) { 672 args[4] = tcg_swap_cond(args[4]); 673 } 674 break; 675 case INDEX_op_setcond2_i32: 676 if (swap_commutative2(&args[1], &args[3])) { 677 args[5] = tcg_swap_cond(args[5]); 678 } 679 break; 680 default: 681 break; 682 } 683 684 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 685 and "sub r, 0, a => neg r, a" case. */ 686 switch (opc) { 687 CASE_OP_32_64(shl): 688 CASE_OP_32_64(shr): 689 CASE_OP_32_64(sar): 690 CASE_OP_32_64(rotl): 691 CASE_OP_32_64(rotr): 692 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 693 tcg_opt_gen_movi(s, op, args, args[0], 0); 694 continue; 695 } 696 break; 697 CASE_OP_32_64(sub): 698 { 699 TCGOpcode neg_op; 700 bool have_neg; 701 702 if (temp_is_const(args[2])) { 703 /* Proceed with possible constant folding. */ 704 break; 705 } 706 if (opc == INDEX_op_sub_i32) { 707 neg_op = INDEX_op_neg_i32; 708 have_neg = TCG_TARGET_HAS_neg_i32; 709 } else { 710 neg_op = INDEX_op_neg_i64; 711 have_neg = TCG_TARGET_HAS_neg_i64; 712 } 713 if (!have_neg) { 714 break; 715 } 716 if (temp_is_const(args[1]) && temps[args[1]].val == 0) { 717 op->opc = neg_op; 718 reset_temp(args[0]); 719 args[1] = args[2]; 720 continue; 721 } 722 } 723 break; 724 CASE_OP_32_64(xor): 725 CASE_OP_32_64(nand): 726 if (!temp_is_const(args[1]) 727 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 728 i = 1; 729 goto try_not; 730 } 731 break; 732 CASE_OP_32_64(nor): 733 if (!temp_is_const(args[1]) 734 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 735 i = 1; 736 goto try_not; 737 } 738 break; 739 CASE_OP_32_64(andc): 740 if (!temp_is_const(args[2]) 741 && temp_is_const(args[1]) && temps[args[1]].val == -1) { 742 i = 2; 743 goto try_not; 744 } 745 break; 746 CASE_OP_32_64(orc): 747 CASE_OP_32_64(eqv): 748 if (!temp_is_const(args[2]) 749 && temp_is_const(args[1]) && temps[args[1]].val == 0) { 750 i = 2; 751 goto try_not; 752 } 753 break; 754 try_not: 755 { 756 TCGOpcode not_op; 757 bool have_not; 758 759 if (def->flags & TCG_OPF_64BIT) { 760 not_op = INDEX_op_not_i64; 761 have_not = TCG_TARGET_HAS_not_i64; 762 } else { 763 not_op = INDEX_op_not_i32; 764 have_not = TCG_TARGET_HAS_not_i32; 765 } 766 if (!have_not) { 767 break; 768 } 769 op->opc = not_op; 770 reset_temp(args[0]); 771 args[1] = args[i]; 772 continue; 773 } 774 default: 775 break; 776 } 777 778 /* Simplify expression for "op r, a, const => mov r, a" cases */ 779 switch (opc) { 780 CASE_OP_32_64(add): 781 CASE_OP_32_64(sub): 782 CASE_OP_32_64(shl): 783 CASE_OP_32_64(shr): 784 CASE_OP_32_64(sar): 785 CASE_OP_32_64(rotl): 786 CASE_OP_32_64(rotr): 787 CASE_OP_32_64(or): 788 CASE_OP_32_64(xor): 789 CASE_OP_32_64(andc): 790 if (!temp_is_const(args[1]) 791 && temp_is_const(args[2]) && temps[args[2]].val == 0) { 792 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 793 continue; 794 } 795 break; 796 CASE_OP_32_64(and): 797 CASE_OP_32_64(orc): 798 CASE_OP_32_64(eqv): 799 if (!temp_is_const(args[1]) 800 && temp_is_const(args[2]) && temps[args[2]].val == -1) { 801 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 802 continue; 803 } 804 break; 805 default: 806 break; 807 } 808 809 /* Simplify using known-zero bits. Currently only ops with a single 810 output argument is supported. */ 811 mask = -1; 812 affected = -1; 813 switch (opc) { 814 CASE_OP_32_64(ext8s): 815 if ((temps[args[1]].mask & 0x80) != 0) { 816 break; 817 } 818 CASE_OP_32_64(ext8u): 819 mask = 0xff; 820 goto and_const; 821 CASE_OP_32_64(ext16s): 822 if ((temps[args[1]].mask & 0x8000) != 0) { 823 break; 824 } 825 CASE_OP_32_64(ext16u): 826 mask = 0xffff; 827 goto and_const; 828 case INDEX_op_ext32s_i64: 829 if ((temps[args[1]].mask & 0x80000000) != 0) { 830 break; 831 } 832 case INDEX_op_ext32u_i64: 833 mask = 0xffffffffU; 834 goto and_const; 835 836 CASE_OP_32_64(and): 837 mask = temps[args[2]].mask; 838 if (temp_is_const(args[2])) { 839 and_const: 840 affected = temps[args[1]].mask & ~mask; 841 } 842 mask = temps[args[1]].mask & mask; 843 break; 844 845 case INDEX_op_ext_i32_i64: 846 if ((temps[args[1]].mask & 0x80000000) != 0) { 847 break; 848 } 849 case INDEX_op_extu_i32_i64: 850 /* We do not compute affected as it is a size changing op. */ 851 mask = (uint32_t)temps[args[1]].mask; 852 break; 853 854 CASE_OP_32_64(andc): 855 /* Known-zeros does not imply known-ones. Therefore unless 856 args[2] is constant, we can't infer anything from it. */ 857 if (temp_is_const(args[2])) { 858 mask = ~temps[args[2]].mask; 859 goto and_const; 860 } 861 /* But we certainly know nothing outside args[1] may be set. */ 862 mask = temps[args[1]].mask; 863 break; 864 865 case INDEX_op_sar_i32: 866 if (temp_is_const(args[2])) { 867 tmp = temps[args[2]].val & 31; 868 mask = (int32_t)temps[args[1]].mask >> tmp; 869 } 870 break; 871 case INDEX_op_sar_i64: 872 if (temp_is_const(args[2])) { 873 tmp = temps[args[2]].val & 63; 874 mask = (int64_t)temps[args[1]].mask >> tmp; 875 } 876 break; 877 878 case INDEX_op_shr_i32: 879 if (temp_is_const(args[2])) { 880 tmp = temps[args[2]].val & 31; 881 mask = (uint32_t)temps[args[1]].mask >> tmp; 882 } 883 break; 884 case INDEX_op_shr_i64: 885 if (temp_is_const(args[2])) { 886 tmp = temps[args[2]].val & 63; 887 mask = (uint64_t)temps[args[1]].mask >> tmp; 888 } 889 break; 890 891 case INDEX_op_extrl_i64_i32: 892 mask = (uint32_t)temps[args[1]].mask; 893 break; 894 case INDEX_op_extrh_i64_i32: 895 mask = (uint64_t)temps[args[1]].mask >> 32; 896 break; 897 898 CASE_OP_32_64(shl): 899 if (temp_is_const(args[2])) { 900 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 901 mask = temps[args[1]].mask << tmp; 902 } 903 break; 904 905 CASE_OP_32_64(neg): 906 /* Set to 1 all bits to the left of the rightmost. */ 907 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 908 break; 909 910 CASE_OP_32_64(deposit): 911 mask = deposit64(temps[args[1]].mask, args[3], args[4], 912 temps[args[2]].mask); 913 break; 914 915 CASE_OP_32_64(or): 916 CASE_OP_32_64(xor): 917 mask = temps[args[1]].mask | temps[args[2]].mask; 918 break; 919 920 CASE_OP_32_64(setcond): 921 case INDEX_op_setcond2_i32: 922 mask = 1; 923 break; 924 925 CASE_OP_32_64(movcond): 926 mask = temps[args[3]].mask | temps[args[4]].mask; 927 break; 928 929 CASE_OP_32_64(ld8u): 930 mask = 0xff; 931 break; 932 CASE_OP_32_64(ld16u): 933 mask = 0xffff; 934 break; 935 case INDEX_op_ld32u_i64: 936 mask = 0xffffffffu; 937 break; 938 939 CASE_OP_32_64(qemu_ld): 940 { 941 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 942 TCGMemOp mop = get_memop(oi); 943 if (!(mop & MO_SIGN)) { 944 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 945 } 946 } 947 break; 948 949 default: 950 break; 951 } 952 953 /* 32-bit ops generate 32-bit results. For the result is zero test 954 below, we can ignore high bits, but for further optimizations we 955 need to record that the high bits contain garbage. */ 956 partmask = mask; 957 if (!(def->flags & TCG_OPF_64BIT)) { 958 mask |= ~(tcg_target_ulong)0xffffffffu; 959 partmask &= 0xffffffffu; 960 affected &= 0xffffffffu; 961 } 962 963 if (partmask == 0) { 964 assert(nb_oargs == 1); 965 tcg_opt_gen_movi(s, op, args, args[0], 0); 966 continue; 967 } 968 if (affected == 0) { 969 assert(nb_oargs == 1); 970 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 971 continue; 972 } 973 974 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 975 switch (opc) { 976 CASE_OP_32_64(and): 977 CASE_OP_32_64(mul): 978 CASE_OP_32_64(muluh): 979 CASE_OP_32_64(mulsh): 980 if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { 981 tcg_opt_gen_movi(s, op, args, args[0], 0); 982 continue; 983 } 984 break; 985 default: 986 break; 987 } 988 989 /* Simplify expression for "op r, a, a => mov r, a" cases */ 990 switch (opc) { 991 CASE_OP_32_64(or): 992 CASE_OP_32_64(and): 993 if (temps_are_copies(args[1], args[2])) { 994 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 995 continue; 996 } 997 break; 998 default: 999 break; 1000 } 1001 1002 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 1003 switch (opc) { 1004 CASE_OP_32_64(andc): 1005 CASE_OP_32_64(sub): 1006 CASE_OP_32_64(xor): 1007 if (temps_are_copies(args[1], args[2])) { 1008 tcg_opt_gen_movi(s, op, args, args[0], 0); 1009 continue; 1010 } 1011 break; 1012 default: 1013 break; 1014 } 1015 1016 /* Propagate constants through copy operations and do constant 1017 folding. Constants will be substituted to arguments by register 1018 allocator where needed and possible. Also detect copies. */ 1019 switch (opc) { 1020 CASE_OP_32_64(mov): 1021 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1022 break; 1023 CASE_OP_32_64(movi): 1024 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 1025 break; 1026 1027 CASE_OP_32_64(not): 1028 CASE_OP_32_64(neg): 1029 CASE_OP_32_64(ext8s): 1030 CASE_OP_32_64(ext8u): 1031 CASE_OP_32_64(ext16s): 1032 CASE_OP_32_64(ext16u): 1033 case INDEX_op_ext32s_i64: 1034 case INDEX_op_ext32u_i64: 1035 case INDEX_op_ext_i32_i64: 1036 case INDEX_op_extu_i32_i64: 1037 case INDEX_op_extrl_i64_i32: 1038 case INDEX_op_extrh_i64_i32: 1039 if (temp_is_const(args[1])) { 1040 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1041 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1042 break; 1043 } 1044 goto do_default; 1045 1046 CASE_OP_32_64(add): 1047 CASE_OP_32_64(sub): 1048 CASE_OP_32_64(mul): 1049 CASE_OP_32_64(or): 1050 CASE_OP_32_64(and): 1051 CASE_OP_32_64(xor): 1052 CASE_OP_32_64(shl): 1053 CASE_OP_32_64(shr): 1054 CASE_OP_32_64(sar): 1055 CASE_OP_32_64(rotl): 1056 CASE_OP_32_64(rotr): 1057 CASE_OP_32_64(andc): 1058 CASE_OP_32_64(orc): 1059 CASE_OP_32_64(eqv): 1060 CASE_OP_32_64(nand): 1061 CASE_OP_32_64(nor): 1062 CASE_OP_32_64(muluh): 1063 CASE_OP_32_64(mulsh): 1064 CASE_OP_32_64(div): 1065 CASE_OP_32_64(divu): 1066 CASE_OP_32_64(rem): 1067 CASE_OP_32_64(remu): 1068 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1069 tmp = do_constant_folding(opc, temps[args[1]].val, 1070 temps[args[2]].val); 1071 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1072 break; 1073 } 1074 goto do_default; 1075 1076 CASE_OP_32_64(deposit): 1077 if (temp_is_const(args[1]) && temp_is_const(args[2])) { 1078 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1079 temps[args[2]].val); 1080 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1081 break; 1082 } 1083 goto do_default; 1084 1085 CASE_OP_32_64(setcond): 1086 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1087 if (tmp != 2) { 1088 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1089 break; 1090 } 1091 goto do_default; 1092 1093 CASE_OP_32_64(brcond): 1094 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1095 if (tmp != 2) { 1096 if (tmp) { 1097 reset_all_temps(nb_temps); 1098 op->opc = INDEX_op_br; 1099 args[0] = args[3]; 1100 } else { 1101 tcg_op_remove(s, op); 1102 } 1103 break; 1104 } 1105 goto do_default; 1106 1107 CASE_OP_32_64(movcond): 1108 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1109 if (tmp != 2) { 1110 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1111 break; 1112 } 1113 goto do_default; 1114 1115 case INDEX_op_add2_i32: 1116 case INDEX_op_sub2_i32: 1117 if (temp_is_const(args[2]) && temp_is_const(args[3]) 1118 && temp_is_const(args[4]) && temp_is_const(args[5])) { 1119 uint32_t al = temps[args[2]].val; 1120 uint32_t ah = temps[args[3]].val; 1121 uint32_t bl = temps[args[4]].val; 1122 uint32_t bh = temps[args[5]].val; 1123 uint64_t a = ((uint64_t)ah << 32) | al; 1124 uint64_t b = ((uint64_t)bh << 32) | bl; 1125 TCGArg rl, rh; 1126 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1127 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1128 1129 if (opc == INDEX_op_add2_i32) { 1130 a += b; 1131 } else { 1132 a -= b; 1133 } 1134 1135 rl = args[0]; 1136 rh = args[1]; 1137 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); 1138 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); 1139 1140 /* We've done all we need to do with the movi. Skip it. */ 1141 oi_next = op2->next; 1142 break; 1143 } 1144 goto do_default; 1145 1146 case INDEX_op_mulu2_i32: 1147 if (temp_is_const(args[2]) && temp_is_const(args[3])) { 1148 uint32_t a = temps[args[2]].val; 1149 uint32_t b = temps[args[3]].val; 1150 uint64_t r = (uint64_t)a * b; 1151 TCGArg rl, rh; 1152 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1153 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1154 1155 rl = args[0]; 1156 rh = args[1]; 1157 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); 1158 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); 1159 1160 /* We've done all we need to do with the movi. Skip it. */ 1161 oi_next = op2->next; 1162 break; 1163 } 1164 goto do_default; 1165 1166 case INDEX_op_brcond2_i32: 1167 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1168 if (tmp != 2) { 1169 if (tmp) { 1170 do_brcond_true: 1171 reset_all_temps(nb_temps); 1172 op->opc = INDEX_op_br; 1173 args[0] = args[5]; 1174 } else { 1175 do_brcond_false: 1176 tcg_op_remove(s, op); 1177 } 1178 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1179 && temp_is_const(args[2]) && temps[args[2]].val == 0 1180 && temp_is_const(args[3]) && temps[args[3]].val == 0) { 1181 /* Simplify LT/GE comparisons vs zero to a single compare 1182 vs the high word of the input. */ 1183 do_brcond_high: 1184 reset_all_temps(nb_temps); 1185 op->opc = INDEX_op_brcond_i32; 1186 args[0] = args[1]; 1187 args[1] = args[3]; 1188 args[2] = args[4]; 1189 args[3] = args[5]; 1190 } else if (args[4] == TCG_COND_EQ) { 1191 /* Simplify EQ comparisons where one of the pairs 1192 can be simplified. */ 1193 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1194 args[0], args[2], TCG_COND_EQ); 1195 if (tmp == 0) { 1196 goto do_brcond_false; 1197 } else if (tmp == 1) { 1198 goto do_brcond_high; 1199 } 1200 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1201 args[1], args[3], TCG_COND_EQ); 1202 if (tmp == 0) { 1203 goto do_brcond_false; 1204 } else if (tmp != 1) { 1205 goto do_default; 1206 } 1207 do_brcond_low: 1208 reset_all_temps(nb_temps); 1209 op->opc = INDEX_op_brcond_i32; 1210 args[1] = args[2]; 1211 args[2] = args[4]; 1212 args[3] = args[5]; 1213 } else if (args[4] == TCG_COND_NE) { 1214 /* Simplify NE comparisons where one of the pairs 1215 can be simplified. */ 1216 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1217 args[0], args[2], TCG_COND_NE); 1218 if (tmp == 0) { 1219 goto do_brcond_high; 1220 } else if (tmp == 1) { 1221 goto do_brcond_true; 1222 } 1223 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1224 args[1], args[3], TCG_COND_NE); 1225 if (tmp == 0) { 1226 goto do_brcond_low; 1227 } else if (tmp == 1) { 1228 goto do_brcond_true; 1229 } 1230 goto do_default; 1231 } else { 1232 goto do_default; 1233 } 1234 break; 1235 1236 case INDEX_op_setcond2_i32: 1237 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1238 if (tmp != 2) { 1239 do_setcond_const: 1240 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1241 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1242 && temp_is_const(args[3]) && temps[args[3]].val == 0 1243 && temp_is_const(args[4]) && temps[args[4]].val == 0) { 1244 /* Simplify LT/GE comparisons vs zero to a single compare 1245 vs the high word of the input. */ 1246 do_setcond_high: 1247 reset_temp(args[0]); 1248 temps[args[0]].mask = 1; 1249 op->opc = INDEX_op_setcond_i32; 1250 args[1] = args[2]; 1251 args[2] = args[4]; 1252 args[3] = args[5]; 1253 } else if (args[5] == TCG_COND_EQ) { 1254 /* Simplify EQ comparisons where one of the pairs 1255 can be simplified. */ 1256 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1257 args[1], args[3], TCG_COND_EQ); 1258 if (tmp == 0) { 1259 goto do_setcond_const; 1260 } else if (tmp == 1) { 1261 goto do_setcond_high; 1262 } 1263 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1264 args[2], args[4], TCG_COND_EQ); 1265 if (tmp == 0) { 1266 goto do_setcond_high; 1267 } else if (tmp != 1) { 1268 goto do_default; 1269 } 1270 do_setcond_low: 1271 reset_temp(args[0]); 1272 temps[args[0]].mask = 1; 1273 op->opc = INDEX_op_setcond_i32; 1274 args[2] = args[3]; 1275 args[3] = args[5]; 1276 } else if (args[5] == TCG_COND_NE) { 1277 /* Simplify NE comparisons where one of the pairs 1278 can be simplified. */ 1279 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1280 args[1], args[3], TCG_COND_NE); 1281 if (tmp == 0) { 1282 goto do_setcond_high; 1283 } else if (tmp == 1) { 1284 goto do_setcond_const; 1285 } 1286 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1287 args[2], args[4], TCG_COND_NE); 1288 if (tmp == 0) { 1289 goto do_setcond_low; 1290 } else if (tmp == 1) { 1291 goto do_setcond_const; 1292 } 1293 goto do_default; 1294 } else { 1295 goto do_default; 1296 } 1297 break; 1298 1299 case INDEX_op_call: 1300 if (!(args[nb_oargs + nb_iargs + 1] 1301 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1302 for (i = 0; i < nb_globals; i++) { 1303 if (test_bit(i, temps_used.l)) { 1304 reset_temp(i); 1305 } 1306 } 1307 } 1308 goto do_reset_output; 1309 1310 default: 1311 do_default: 1312 /* Default case: we know nothing about operation (or were unable 1313 to compute the operation result) so no propagation is done. 1314 We trash everything if the operation is the end of a basic 1315 block, otherwise we only trash the output args. "mask" is 1316 the non-zero bits mask for the first output arg. */ 1317 if (def->flags & TCG_OPF_BB_END) { 1318 reset_all_temps(nb_temps); 1319 } else { 1320 do_reset_output: 1321 for (i = 0; i < nb_oargs; i++) { 1322 reset_temp(args[i]); 1323 /* Save the corresponding known-zero bits mask for the 1324 first output argument (only one supported so far). */ 1325 if (i == 0) { 1326 temps[args[i]].mask = mask; 1327 } 1328 } 1329 } 1330 break; 1331 } 1332 } 1333 } 1334