1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 int i; 74 for (i = 0; i < nb_temps; i++) { 75 temps[i].state = TCG_TEMP_UNDEF; 76 temps[i].mask = -1; 77 } 78 } 79 80 static int op_bits(TCGOpcode op) 81 { 82 const TCGOpDef *def = &tcg_op_defs[op]; 83 return def->flags & TCG_OPF_64BIT ? 64 : 32; 84 } 85 86 static TCGOpcode op_to_movi(TCGOpcode op) 87 { 88 switch (op_bits(op)) { 89 case 32: 90 return INDEX_op_movi_i32; 91 case 64: 92 return INDEX_op_movi_i64; 93 default: 94 fprintf(stderr, "op_to_movi: unexpected return value of " 95 "function op_bits.\n"); 96 tcg_abort(); 97 } 98 } 99 100 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 101 { 102 TCGArg i; 103 104 /* If this is already a global, we can't do better. */ 105 if (temp < s->nb_globals) { 106 return temp; 107 } 108 109 /* Search for a global first. */ 110 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 111 if (i < s->nb_globals) { 112 return i; 113 } 114 } 115 116 /* If it is a temp, search for a temp local. */ 117 if (!s->temps[temp].temp_local) { 118 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 119 if (s->temps[i].temp_local) { 120 return i; 121 } 122 } 123 } 124 125 /* Failure to find a better representation, return the same temp. */ 126 return temp; 127 } 128 129 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 130 { 131 TCGArg i; 132 133 if (arg1 == arg2) { 134 return true; 135 } 136 137 if (temps[arg1].state != TCG_TEMP_COPY 138 || temps[arg2].state != TCG_TEMP_COPY) { 139 return false; 140 } 141 142 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 143 if (i == arg2) { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 152 TCGArg dst, TCGArg src) 153 { 154 reset_temp(dst); 155 temps[dst].mask = temps[src].mask; 156 assert(temps[src].state != TCG_TEMP_CONST); 157 158 if (s->temps[src].type == s->temps[dst].type) { 159 if (temps[src].state != TCG_TEMP_COPY) { 160 temps[src].state = TCG_TEMP_COPY; 161 temps[src].next_copy = src; 162 temps[src].prev_copy = src; 163 } 164 temps[dst].state = TCG_TEMP_COPY; 165 temps[dst].next_copy = temps[src].next_copy; 166 temps[dst].prev_copy = src; 167 temps[temps[dst].next_copy].prev_copy = dst; 168 temps[src].next_copy = dst; 169 } 170 171 gen_args[0] = dst; 172 gen_args[1] = src; 173 } 174 175 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 176 { 177 reset_temp(dst); 178 temps[dst].state = TCG_TEMP_CONST; 179 temps[dst].val = val; 180 temps[dst].mask = val; 181 gen_args[0] = dst; 182 gen_args[1] = val; 183 } 184 185 static TCGOpcode op_to_mov(TCGOpcode op) 186 { 187 switch (op_bits(op)) { 188 case 32: 189 return INDEX_op_mov_i32; 190 case 64: 191 return INDEX_op_mov_i64; 192 default: 193 fprintf(stderr, "op_to_mov: unexpected return value of " 194 "function op_bits.\n"); 195 tcg_abort(); 196 } 197 } 198 199 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 200 { 201 uint64_t l64, h64; 202 203 switch (op) { 204 CASE_OP_32_64(add): 205 return x + y; 206 207 CASE_OP_32_64(sub): 208 return x - y; 209 210 CASE_OP_32_64(mul): 211 return x * y; 212 213 CASE_OP_32_64(and): 214 return x & y; 215 216 CASE_OP_32_64(or): 217 return x | y; 218 219 CASE_OP_32_64(xor): 220 return x ^ y; 221 222 case INDEX_op_shl_i32: 223 return (uint32_t)x << (uint32_t)y; 224 225 case INDEX_op_shl_i64: 226 return (uint64_t)x << (uint64_t)y; 227 228 case INDEX_op_shr_i32: 229 return (uint32_t)x >> (uint32_t)y; 230 231 case INDEX_op_shr_i64: 232 return (uint64_t)x >> (uint64_t)y; 233 234 case INDEX_op_sar_i32: 235 return (int32_t)x >> (int32_t)y; 236 237 case INDEX_op_sar_i64: 238 return (int64_t)x >> (int64_t)y; 239 240 case INDEX_op_rotr_i32: 241 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); 242 return x; 243 244 case INDEX_op_rotr_i64: 245 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); 246 return x; 247 248 case INDEX_op_rotl_i32: 249 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); 250 return x; 251 252 case INDEX_op_rotl_i64: 253 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); 254 return x; 255 256 CASE_OP_32_64(not): 257 return ~x; 258 259 CASE_OP_32_64(neg): 260 return -x; 261 262 CASE_OP_32_64(andc): 263 return x & ~y; 264 265 CASE_OP_32_64(orc): 266 return x | ~y; 267 268 CASE_OP_32_64(eqv): 269 return ~(x ^ y); 270 271 CASE_OP_32_64(nand): 272 return ~(x & y); 273 274 CASE_OP_32_64(nor): 275 return ~(x | y); 276 277 CASE_OP_32_64(ext8s): 278 return (int8_t)x; 279 280 CASE_OP_32_64(ext16s): 281 return (int16_t)x; 282 283 CASE_OP_32_64(ext8u): 284 return (uint8_t)x; 285 286 CASE_OP_32_64(ext16u): 287 return (uint16_t)x; 288 289 case INDEX_op_ext32s_i64: 290 return (int32_t)x; 291 292 case INDEX_op_ext32u_i64: 293 return (uint32_t)x; 294 295 case INDEX_op_muluh_i32: 296 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 297 case INDEX_op_mulsh_i32: 298 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 299 300 case INDEX_op_muluh_i64: 301 mulu64(&l64, &h64, x, y); 302 return h64; 303 case INDEX_op_mulsh_i64: 304 muls64(&l64, &h64, x, y); 305 return h64; 306 307 case INDEX_op_div_i32: 308 /* Avoid crashing on divide by zero, otherwise undefined. */ 309 return (int32_t)x / ((int32_t)y ? : 1); 310 case INDEX_op_divu_i32: 311 return (uint32_t)x / ((uint32_t)y ? : 1); 312 case INDEX_op_div_i64: 313 return (int64_t)x / ((int64_t)y ? : 1); 314 case INDEX_op_divu_i64: 315 return (uint64_t)x / ((uint64_t)y ? : 1); 316 317 case INDEX_op_rem_i32: 318 return (int32_t)x % ((int32_t)y ? : 1); 319 case INDEX_op_remu_i32: 320 return (uint32_t)x % ((uint32_t)y ? : 1); 321 case INDEX_op_rem_i64: 322 return (int64_t)x % ((int64_t)y ? : 1); 323 case INDEX_op_remu_i64: 324 return (uint64_t)x % ((uint64_t)y ? : 1); 325 326 default: 327 fprintf(stderr, 328 "Unrecognized operation %d in do_constant_folding.\n", op); 329 tcg_abort(); 330 } 331 } 332 333 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 334 { 335 TCGArg res = do_constant_folding_2(op, x, y); 336 if (op_bits(op) == 32) { 337 res &= 0xffffffff; 338 } 339 return res; 340 } 341 342 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 343 { 344 switch (c) { 345 case TCG_COND_EQ: 346 return x == y; 347 case TCG_COND_NE: 348 return x != y; 349 case TCG_COND_LT: 350 return (int32_t)x < (int32_t)y; 351 case TCG_COND_GE: 352 return (int32_t)x >= (int32_t)y; 353 case TCG_COND_LE: 354 return (int32_t)x <= (int32_t)y; 355 case TCG_COND_GT: 356 return (int32_t)x > (int32_t)y; 357 case TCG_COND_LTU: 358 return x < y; 359 case TCG_COND_GEU: 360 return x >= y; 361 case TCG_COND_LEU: 362 return x <= y; 363 case TCG_COND_GTU: 364 return x > y; 365 default: 366 tcg_abort(); 367 } 368 } 369 370 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 371 { 372 switch (c) { 373 case TCG_COND_EQ: 374 return x == y; 375 case TCG_COND_NE: 376 return x != y; 377 case TCG_COND_LT: 378 return (int64_t)x < (int64_t)y; 379 case TCG_COND_GE: 380 return (int64_t)x >= (int64_t)y; 381 case TCG_COND_LE: 382 return (int64_t)x <= (int64_t)y; 383 case TCG_COND_GT: 384 return (int64_t)x > (int64_t)y; 385 case TCG_COND_LTU: 386 return x < y; 387 case TCG_COND_GEU: 388 return x >= y; 389 case TCG_COND_LEU: 390 return x <= y; 391 case TCG_COND_GTU: 392 return x > y; 393 default: 394 tcg_abort(); 395 } 396 } 397 398 static bool do_constant_folding_cond_eq(TCGCond c) 399 { 400 switch (c) { 401 case TCG_COND_GT: 402 case TCG_COND_LTU: 403 case TCG_COND_LT: 404 case TCG_COND_GTU: 405 case TCG_COND_NE: 406 return 0; 407 case TCG_COND_GE: 408 case TCG_COND_GEU: 409 case TCG_COND_LE: 410 case TCG_COND_LEU: 411 case TCG_COND_EQ: 412 return 1; 413 default: 414 tcg_abort(); 415 } 416 } 417 418 /* Return 2 if the condition can't be simplified, and the result 419 of the condition (0 or 1) if it can */ 420 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 421 TCGArg y, TCGCond c) 422 { 423 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 424 switch (op_bits(op)) { 425 case 32: 426 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 427 case 64: 428 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 429 default: 430 tcg_abort(); 431 } 432 } else if (temps_are_copies(x, y)) { 433 return do_constant_folding_cond_eq(c); 434 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 435 switch (c) { 436 case TCG_COND_LTU: 437 return 0; 438 case TCG_COND_GEU: 439 return 1; 440 default: 441 return 2; 442 } 443 } else { 444 return 2; 445 } 446 } 447 448 /* Return 2 if the condition can't be simplified, and the result 449 of the condition (0 or 1) if it can */ 450 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 451 { 452 TCGArg al = p1[0], ah = p1[1]; 453 TCGArg bl = p2[0], bh = p2[1]; 454 455 if (temps[bl].state == TCG_TEMP_CONST 456 && temps[bh].state == TCG_TEMP_CONST) { 457 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 458 459 if (temps[al].state == TCG_TEMP_CONST 460 && temps[ah].state == TCG_TEMP_CONST) { 461 uint64_t a; 462 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 463 return do_constant_folding_cond_64(a, b, c); 464 } 465 if (b == 0) { 466 switch (c) { 467 case TCG_COND_LTU: 468 return 0; 469 case TCG_COND_GEU: 470 return 1; 471 default: 472 break; 473 } 474 } 475 } 476 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 477 return do_constant_folding_cond_eq(c); 478 } 479 return 2; 480 } 481 482 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 483 { 484 TCGArg a1 = *p1, a2 = *p2; 485 int sum = 0; 486 sum += temps[a1].state == TCG_TEMP_CONST; 487 sum -= temps[a2].state == TCG_TEMP_CONST; 488 489 /* Prefer the constant in second argument, and then the form 490 op a, a, b, which is better handled on non-RISC hosts. */ 491 if (sum > 0 || (sum == 0 && dest == a2)) { 492 *p1 = a2; 493 *p2 = a1; 494 return true; 495 } 496 return false; 497 } 498 499 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 500 { 501 int sum = 0; 502 sum += temps[p1[0]].state == TCG_TEMP_CONST; 503 sum += temps[p1[1]].state == TCG_TEMP_CONST; 504 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 505 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 506 if (sum > 0) { 507 TCGArg t; 508 t = p1[0], p1[0] = p2[0], p2[0] = t; 509 t = p1[1], p1[1] = p2[1], p2[1] = t; 510 return true; 511 } 512 return false; 513 } 514 515 /* Propagate constants and copies, fold constant expressions. */ 516 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 517 TCGArg *args, TCGOpDef *tcg_op_defs) 518 { 519 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 520 tcg_target_ulong mask, affected; 521 TCGOpcode op; 522 const TCGOpDef *def; 523 TCGArg *gen_args; 524 TCGArg tmp; 525 526 /* Array VALS has an element for each temp. 527 If this temp holds a constant then its value is kept in VALS' element. 528 If this temp is a copy of other ones then the other copies are 529 available through the doubly linked circular list. */ 530 531 nb_temps = s->nb_temps; 532 nb_globals = s->nb_globals; 533 reset_all_temps(nb_temps); 534 535 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 536 gen_args = args; 537 for (op_index = 0; op_index < nb_ops; op_index++) { 538 op = s->gen_opc_buf[op_index]; 539 def = &tcg_op_defs[op]; 540 /* Do copy propagation */ 541 if (op == INDEX_op_call) { 542 int nb_oargs = args[0] >> 16; 543 int nb_iargs = args[0] & 0xffff; 544 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { 545 if (temps[args[i]].state == TCG_TEMP_COPY) { 546 args[i] = find_better_copy(s, args[i]); 547 } 548 } 549 } else { 550 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 551 if (temps[args[i]].state == TCG_TEMP_COPY) { 552 args[i] = find_better_copy(s, args[i]); 553 } 554 } 555 } 556 557 /* For commutative operations make constant second argument */ 558 switch (op) { 559 CASE_OP_32_64(add): 560 CASE_OP_32_64(mul): 561 CASE_OP_32_64(and): 562 CASE_OP_32_64(or): 563 CASE_OP_32_64(xor): 564 CASE_OP_32_64(eqv): 565 CASE_OP_32_64(nand): 566 CASE_OP_32_64(nor): 567 CASE_OP_32_64(muluh): 568 CASE_OP_32_64(mulsh): 569 swap_commutative(args[0], &args[1], &args[2]); 570 break; 571 CASE_OP_32_64(brcond): 572 if (swap_commutative(-1, &args[0], &args[1])) { 573 args[2] = tcg_swap_cond(args[2]); 574 } 575 break; 576 CASE_OP_32_64(setcond): 577 if (swap_commutative(args[0], &args[1], &args[2])) { 578 args[3] = tcg_swap_cond(args[3]); 579 } 580 break; 581 CASE_OP_32_64(movcond): 582 if (swap_commutative(-1, &args[1], &args[2])) { 583 args[5] = tcg_swap_cond(args[5]); 584 } 585 /* For movcond, we canonicalize the "false" input reg to match 586 the destination reg so that the tcg backend can implement 587 a "move if true" operation. */ 588 if (swap_commutative(args[0], &args[4], &args[3])) { 589 args[5] = tcg_invert_cond(args[5]); 590 } 591 break; 592 CASE_OP_32_64(add2): 593 swap_commutative(args[0], &args[2], &args[4]); 594 swap_commutative(args[1], &args[3], &args[5]); 595 break; 596 CASE_OP_32_64(mulu2): 597 CASE_OP_32_64(muls2): 598 swap_commutative(args[0], &args[2], &args[3]); 599 break; 600 case INDEX_op_brcond2_i32: 601 if (swap_commutative2(&args[0], &args[2])) { 602 args[4] = tcg_swap_cond(args[4]); 603 } 604 break; 605 case INDEX_op_setcond2_i32: 606 if (swap_commutative2(&args[1], &args[3])) { 607 args[5] = tcg_swap_cond(args[5]); 608 } 609 break; 610 default: 611 break; 612 } 613 614 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 615 and "sub r, 0, a => neg r, a" case. */ 616 switch (op) { 617 CASE_OP_32_64(shl): 618 CASE_OP_32_64(shr): 619 CASE_OP_32_64(sar): 620 CASE_OP_32_64(rotl): 621 CASE_OP_32_64(rotr): 622 if (temps[args[1]].state == TCG_TEMP_CONST 623 && temps[args[1]].val == 0) { 624 s->gen_opc_buf[op_index] = op_to_movi(op); 625 tcg_opt_gen_movi(gen_args, args[0], 0); 626 args += 3; 627 gen_args += 2; 628 continue; 629 } 630 break; 631 CASE_OP_32_64(sub): 632 { 633 TCGOpcode neg_op; 634 bool have_neg; 635 636 if (temps[args[2]].state == TCG_TEMP_CONST) { 637 /* Proceed with possible constant folding. */ 638 break; 639 } 640 if (op == INDEX_op_sub_i32) { 641 neg_op = INDEX_op_neg_i32; 642 have_neg = TCG_TARGET_HAS_neg_i32; 643 } else { 644 neg_op = INDEX_op_neg_i64; 645 have_neg = TCG_TARGET_HAS_neg_i64; 646 } 647 if (!have_neg) { 648 break; 649 } 650 if (temps[args[1]].state == TCG_TEMP_CONST 651 && temps[args[1]].val == 0) { 652 s->gen_opc_buf[op_index] = neg_op; 653 reset_temp(args[0]); 654 gen_args[0] = args[0]; 655 gen_args[1] = args[2]; 656 args += 3; 657 gen_args += 2; 658 continue; 659 } 660 } 661 break; 662 default: 663 break; 664 } 665 666 /* Simplify expression for "op r, a, 0 => mov r, a" cases */ 667 switch (op) { 668 CASE_OP_32_64(add): 669 CASE_OP_32_64(sub): 670 CASE_OP_32_64(shl): 671 CASE_OP_32_64(shr): 672 CASE_OP_32_64(sar): 673 CASE_OP_32_64(rotl): 674 CASE_OP_32_64(rotr): 675 CASE_OP_32_64(or): 676 CASE_OP_32_64(xor): 677 if (temps[args[1]].state == TCG_TEMP_CONST) { 678 /* Proceed with possible constant folding. */ 679 break; 680 } 681 if (temps[args[2]].state == TCG_TEMP_CONST 682 && temps[args[2]].val == 0) { 683 if (temps_are_copies(args[0], args[1])) { 684 s->gen_opc_buf[op_index] = INDEX_op_nop; 685 } else { 686 s->gen_opc_buf[op_index] = op_to_mov(op); 687 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 688 gen_args += 2; 689 } 690 args += 3; 691 continue; 692 } 693 break; 694 default: 695 break; 696 } 697 698 /* Simplify using known-zero bits */ 699 mask = -1; 700 affected = -1; 701 switch (op) { 702 CASE_OP_32_64(ext8s): 703 if ((temps[args[1]].mask & 0x80) != 0) { 704 break; 705 } 706 CASE_OP_32_64(ext8u): 707 mask = 0xff; 708 goto and_const; 709 CASE_OP_32_64(ext16s): 710 if ((temps[args[1]].mask & 0x8000) != 0) { 711 break; 712 } 713 CASE_OP_32_64(ext16u): 714 mask = 0xffff; 715 goto and_const; 716 case INDEX_op_ext32s_i64: 717 if ((temps[args[1]].mask & 0x80000000) != 0) { 718 break; 719 } 720 case INDEX_op_ext32u_i64: 721 mask = 0xffffffffU; 722 goto and_const; 723 724 CASE_OP_32_64(and): 725 mask = temps[args[2]].mask; 726 if (temps[args[2]].state == TCG_TEMP_CONST) { 727 and_const: 728 affected = temps[args[1]].mask & ~mask; 729 } 730 mask = temps[args[1]].mask & mask; 731 break; 732 733 CASE_OP_32_64(sar): 734 if (temps[args[2]].state == TCG_TEMP_CONST) { 735 mask = ((tcg_target_long)temps[args[1]].mask 736 >> temps[args[2]].val); 737 } 738 break; 739 740 CASE_OP_32_64(shr): 741 if (temps[args[2]].state == TCG_TEMP_CONST) { 742 mask = temps[args[1]].mask >> temps[args[2]].val; 743 } 744 break; 745 746 CASE_OP_32_64(shl): 747 if (temps[args[2]].state == TCG_TEMP_CONST) { 748 mask = temps[args[1]].mask << temps[args[2]].val; 749 } 750 break; 751 752 CASE_OP_32_64(neg): 753 /* Set to 1 all bits to the left of the rightmost. */ 754 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 755 break; 756 757 CASE_OP_32_64(deposit): 758 tmp = ((1ull << args[4]) - 1); 759 mask = ((temps[args[1]].mask & ~(tmp << args[3])) 760 | ((temps[args[2]].mask & tmp) << args[3])); 761 break; 762 763 CASE_OP_32_64(or): 764 CASE_OP_32_64(xor): 765 mask = temps[args[1]].mask | temps[args[2]].mask; 766 break; 767 768 CASE_OP_32_64(setcond): 769 mask = 1; 770 break; 771 772 CASE_OP_32_64(movcond): 773 mask = temps[args[3]].mask | temps[args[4]].mask; 774 break; 775 776 default: 777 break; 778 } 779 780 if (mask == 0) { 781 assert(def->nb_oargs == 1); 782 s->gen_opc_buf[op_index] = op_to_movi(op); 783 tcg_opt_gen_movi(gen_args, args[0], 0); 784 args += def->nb_oargs + def->nb_iargs + def->nb_cargs; 785 gen_args += 2; 786 continue; 787 } 788 if (affected == 0) { 789 assert(def->nb_oargs == 1); 790 if (temps_are_copies(args[0], args[1])) { 791 s->gen_opc_buf[op_index] = INDEX_op_nop; 792 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 793 s->gen_opc_buf[op_index] = op_to_mov(op); 794 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 795 gen_args += 2; 796 } else { 797 s->gen_opc_buf[op_index] = op_to_movi(op); 798 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 799 gen_args += 2; 800 } 801 args += def->nb_iargs + 1; 802 continue; 803 } 804 805 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 806 switch (op) { 807 CASE_OP_32_64(and): 808 CASE_OP_32_64(mul): 809 CASE_OP_32_64(muluh): 810 CASE_OP_32_64(mulsh): 811 if ((temps[args[2]].state == TCG_TEMP_CONST 812 && temps[args[2]].val == 0)) { 813 s->gen_opc_buf[op_index] = op_to_movi(op); 814 tcg_opt_gen_movi(gen_args, args[0], 0); 815 args += 3; 816 gen_args += 2; 817 continue; 818 } 819 break; 820 default: 821 break; 822 } 823 824 /* Simplify expression for "op r, a, a => mov r, a" cases */ 825 switch (op) { 826 CASE_OP_32_64(or): 827 CASE_OP_32_64(and): 828 if (temps_are_copies(args[1], args[2])) { 829 if (temps_are_copies(args[0], args[1])) { 830 s->gen_opc_buf[op_index] = INDEX_op_nop; 831 } else { 832 s->gen_opc_buf[op_index] = op_to_mov(op); 833 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 834 gen_args += 2; 835 } 836 args += 3; 837 continue; 838 } 839 break; 840 default: 841 break; 842 } 843 844 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 845 switch (op) { 846 CASE_OP_32_64(sub): 847 CASE_OP_32_64(xor): 848 if (temps_are_copies(args[1], args[2])) { 849 s->gen_opc_buf[op_index] = op_to_movi(op); 850 tcg_opt_gen_movi(gen_args, args[0], 0); 851 gen_args += 2; 852 args += 3; 853 continue; 854 } 855 break; 856 default: 857 break; 858 } 859 860 /* Propagate constants through copy operations and do constant 861 folding. Constants will be substituted to arguments by register 862 allocator where needed and possible. Also detect copies. */ 863 switch (op) { 864 CASE_OP_32_64(mov): 865 if (temps_are_copies(args[0], args[1])) { 866 args += 2; 867 s->gen_opc_buf[op_index] = INDEX_op_nop; 868 break; 869 } 870 if (temps[args[1]].state != TCG_TEMP_CONST) { 871 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 872 gen_args += 2; 873 args += 2; 874 break; 875 } 876 /* Source argument is constant. Rewrite the operation and 877 let movi case handle it. */ 878 op = op_to_movi(op); 879 s->gen_opc_buf[op_index] = op; 880 args[1] = temps[args[1]].val; 881 /* fallthrough */ 882 CASE_OP_32_64(movi): 883 tcg_opt_gen_movi(gen_args, args[0], args[1]); 884 gen_args += 2; 885 args += 2; 886 break; 887 888 CASE_OP_32_64(not): 889 CASE_OP_32_64(neg): 890 CASE_OP_32_64(ext8s): 891 CASE_OP_32_64(ext8u): 892 CASE_OP_32_64(ext16s): 893 CASE_OP_32_64(ext16u): 894 case INDEX_op_ext32s_i64: 895 case INDEX_op_ext32u_i64: 896 if (temps[args[1]].state == TCG_TEMP_CONST) { 897 s->gen_opc_buf[op_index] = op_to_movi(op); 898 tmp = do_constant_folding(op, temps[args[1]].val, 0); 899 tcg_opt_gen_movi(gen_args, args[0], tmp); 900 gen_args += 2; 901 args += 2; 902 break; 903 } 904 goto do_default; 905 906 CASE_OP_32_64(add): 907 CASE_OP_32_64(sub): 908 CASE_OP_32_64(mul): 909 CASE_OP_32_64(or): 910 CASE_OP_32_64(and): 911 CASE_OP_32_64(xor): 912 CASE_OP_32_64(shl): 913 CASE_OP_32_64(shr): 914 CASE_OP_32_64(sar): 915 CASE_OP_32_64(rotl): 916 CASE_OP_32_64(rotr): 917 CASE_OP_32_64(andc): 918 CASE_OP_32_64(orc): 919 CASE_OP_32_64(eqv): 920 CASE_OP_32_64(nand): 921 CASE_OP_32_64(nor): 922 CASE_OP_32_64(muluh): 923 CASE_OP_32_64(mulsh): 924 CASE_OP_32_64(div): 925 CASE_OP_32_64(divu): 926 CASE_OP_32_64(rem): 927 CASE_OP_32_64(remu): 928 if (temps[args[1]].state == TCG_TEMP_CONST 929 && temps[args[2]].state == TCG_TEMP_CONST) { 930 s->gen_opc_buf[op_index] = op_to_movi(op); 931 tmp = do_constant_folding(op, temps[args[1]].val, 932 temps[args[2]].val); 933 tcg_opt_gen_movi(gen_args, args[0], tmp); 934 gen_args += 2; 935 args += 3; 936 break; 937 } 938 goto do_default; 939 940 CASE_OP_32_64(deposit): 941 if (temps[args[1]].state == TCG_TEMP_CONST 942 && temps[args[2]].state == TCG_TEMP_CONST) { 943 s->gen_opc_buf[op_index] = op_to_movi(op); 944 tmp = ((1ull << args[4]) - 1); 945 tmp = (temps[args[1]].val & ~(tmp << args[3])) 946 | ((temps[args[2]].val & tmp) << args[3]); 947 tcg_opt_gen_movi(gen_args, args[0], tmp); 948 gen_args += 2; 949 args += 5; 950 break; 951 } 952 goto do_default; 953 954 CASE_OP_32_64(setcond): 955 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 956 if (tmp != 2) { 957 s->gen_opc_buf[op_index] = op_to_movi(op); 958 tcg_opt_gen_movi(gen_args, args[0], tmp); 959 gen_args += 2; 960 args += 4; 961 break; 962 } 963 goto do_default; 964 965 CASE_OP_32_64(brcond): 966 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 967 if (tmp != 2) { 968 if (tmp) { 969 reset_all_temps(nb_temps); 970 s->gen_opc_buf[op_index] = INDEX_op_br; 971 gen_args[0] = args[3]; 972 gen_args += 1; 973 } else { 974 s->gen_opc_buf[op_index] = INDEX_op_nop; 975 } 976 args += 4; 977 break; 978 } 979 goto do_default; 980 981 CASE_OP_32_64(movcond): 982 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 983 if (tmp != 2) { 984 if (temps_are_copies(args[0], args[4-tmp])) { 985 s->gen_opc_buf[op_index] = INDEX_op_nop; 986 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 987 s->gen_opc_buf[op_index] = op_to_movi(op); 988 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 989 gen_args += 2; 990 } else { 991 s->gen_opc_buf[op_index] = op_to_mov(op); 992 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 993 gen_args += 2; 994 } 995 args += 6; 996 break; 997 } 998 goto do_default; 999 1000 case INDEX_op_add2_i32: 1001 case INDEX_op_sub2_i32: 1002 if (temps[args[2]].state == TCG_TEMP_CONST 1003 && temps[args[3]].state == TCG_TEMP_CONST 1004 && temps[args[4]].state == TCG_TEMP_CONST 1005 && temps[args[5]].state == TCG_TEMP_CONST) { 1006 uint32_t al = temps[args[2]].val; 1007 uint32_t ah = temps[args[3]].val; 1008 uint32_t bl = temps[args[4]].val; 1009 uint32_t bh = temps[args[5]].val; 1010 uint64_t a = ((uint64_t)ah << 32) | al; 1011 uint64_t b = ((uint64_t)bh << 32) | bl; 1012 TCGArg rl, rh; 1013 1014 if (op == INDEX_op_add2_i32) { 1015 a += b; 1016 } else { 1017 a -= b; 1018 } 1019 1020 /* We emit the extra nop when we emit the add2/sub2. */ 1021 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1022 1023 rl = args[0]; 1024 rh = args[1]; 1025 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1026 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1027 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 1028 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 1029 gen_args += 4; 1030 args += 6; 1031 break; 1032 } 1033 goto do_default; 1034 1035 case INDEX_op_mulu2_i32: 1036 if (temps[args[2]].state == TCG_TEMP_CONST 1037 && temps[args[3]].state == TCG_TEMP_CONST) { 1038 uint32_t a = temps[args[2]].val; 1039 uint32_t b = temps[args[3]].val; 1040 uint64_t r = (uint64_t)a * b; 1041 TCGArg rl, rh; 1042 1043 /* We emit the extra nop when we emit the mulu2. */ 1044 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1045 1046 rl = args[0]; 1047 rh = args[1]; 1048 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1049 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1050 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1051 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1052 gen_args += 4; 1053 args += 4; 1054 break; 1055 } 1056 goto do_default; 1057 1058 case INDEX_op_brcond2_i32: 1059 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1060 if (tmp != 2) { 1061 if (tmp) { 1062 reset_all_temps(nb_temps); 1063 s->gen_opc_buf[op_index] = INDEX_op_br; 1064 gen_args[0] = args[5]; 1065 gen_args += 1; 1066 } else { 1067 s->gen_opc_buf[op_index] = INDEX_op_nop; 1068 } 1069 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1070 && temps[args[2]].state == TCG_TEMP_CONST 1071 && temps[args[3]].state == TCG_TEMP_CONST 1072 && temps[args[2]].val == 0 1073 && temps[args[3]].val == 0) { 1074 /* Simplify LT/GE comparisons vs zero to a single compare 1075 vs the high word of the input. */ 1076 reset_all_temps(nb_temps); 1077 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1078 gen_args[0] = args[1]; 1079 gen_args[1] = args[3]; 1080 gen_args[2] = args[4]; 1081 gen_args[3] = args[5]; 1082 gen_args += 4; 1083 } else { 1084 goto do_default; 1085 } 1086 args += 6; 1087 break; 1088 1089 case INDEX_op_setcond2_i32: 1090 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1091 if (tmp != 2) { 1092 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1093 tcg_opt_gen_movi(gen_args, args[0], tmp); 1094 gen_args += 2; 1095 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1096 && temps[args[3]].state == TCG_TEMP_CONST 1097 && temps[args[4]].state == TCG_TEMP_CONST 1098 && temps[args[3]].val == 0 1099 && temps[args[4]].val == 0) { 1100 /* Simplify LT/GE comparisons vs zero to a single compare 1101 vs the high word of the input. */ 1102 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1103 reset_temp(args[0]); 1104 gen_args[0] = args[0]; 1105 gen_args[1] = args[2]; 1106 gen_args[2] = args[4]; 1107 gen_args[3] = args[5]; 1108 gen_args += 4; 1109 } else { 1110 goto do_default; 1111 } 1112 args += 6; 1113 break; 1114 1115 case INDEX_op_call: 1116 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 1117 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | 1118 TCG_CALL_NO_WRITE_GLOBALS))) { 1119 for (i = 0; i < nb_globals; i++) { 1120 reset_temp(i); 1121 } 1122 } 1123 for (i = 0; i < (args[0] >> 16); i++) { 1124 reset_temp(args[i + 1]); 1125 } 1126 i = nb_call_args + 3; 1127 while (i) { 1128 *gen_args = *args; 1129 args++; 1130 gen_args++; 1131 i--; 1132 } 1133 break; 1134 1135 default: 1136 do_default: 1137 /* Default case: we know nothing about operation (or were unable 1138 to compute the operation result) so no propagation is done. 1139 We trash everything if the operation is the end of a basic 1140 block, otherwise we only trash the output args. "mask" is 1141 the non-zero bits mask for the first output arg. */ 1142 if (def->flags & TCG_OPF_BB_END) { 1143 reset_all_temps(nb_temps); 1144 } else { 1145 for (i = 0; i < def->nb_oargs; i++) { 1146 reset_temp(args[i]); 1147 } 1148 } 1149 for (i = 0; i < def->nb_args; i++) { 1150 gen_args[i] = args[i]; 1151 } 1152 args += def->nb_args; 1153 gen_args += def->nb_args; 1154 break; 1155 } 1156 } 1157 1158 return gen_args; 1159 } 1160 1161 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1162 TCGArg *args, TCGOpDef *tcg_op_defs) 1163 { 1164 TCGArg *res; 1165 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1166 return res; 1167 } 1168