1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 71 static void reset_all_temps(int nb_temps) 72 { 73 int i; 74 for (i = 0; i < nb_temps; i++) { 75 temps[i].state = TCG_TEMP_UNDEF; 76 temps[i].mask = -1; 77 } 78 } 79 80 static int op_bits(TCGOpcode op) 81 { 82 const TCGOpDef *def = &tcg_op_defs[op]; 83 return def->flags & TCG_OPF_64BIT ? 64 : 32; 84 } 85 86 static TCGOpcode op_to_movi(TCGOpcode op) 87 { 88 switch (op_bits(op)) { 89 case 32: 90 return INDEX_op_movi_i32; 91 case 64: 92 return INDEX_op_movi_i64; 93 default: 94 fprintf(stderr, "op_to_movi: unexpected return value of " 95 "function op_bits.\n"); 96 tcg_abort(); 97 } 98 } 99 100 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 101 { 102 TCGArg i; 103 104 /* If this is already a global, we can't do better. */ 105 if (temp < s->nb_globals) { 106 return temp; 107 } 108 109 /* Search for a global first. */ 110 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 111 if (i < s->nb_globals) { 112 return i; 113 } 114 } 115 116 /* If it is a temp, search for a temp local. */ 117 if (!s->temps[temp].temp_local) { 118 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 119 if (s->temps[i].temp_local) { 120 return i; 121 } 122 } 123 } 124 125 /* Failure to find a better representation, return the same temp. */ 126 return temp; 127 } 128 129 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 130 { 131 TCGArg i; 132 133 if (arg1 == arg2) { 134 return true; 135 } 136 137 if (temps[arg1].state != TCG_TEMP_COPY 138 || temps[arg2].state != TCG_TEMP_COPY) { 139 return false; 140 } 141 142 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 143 if (i == arg2) { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 152 TCGArg dst, TCGArg src) 153 { 154 reset_temp(dst); 155 temps[dst].mask = temps[src].mask; 156 assert(temps[src].state != TCG_TEMP_CONST); 157 158 if (s->temps[src].type == s->temps[dst].type) { 159 if (temps[src].state != TCG_TEMP_COPY) { 160 temps[src].state = TCG_TEMP_COPY; 161 temps[src].next_copy = src; 162 temps[src].prev_copy = src; 163 } 164 temps[dst].state = TCG_TEMP_COPY; 165 temps[dst].next_copy = temps[src].next_copy; 166 temps[dst].prev_copy = src; 167 temps[temps[dst].next_copy].prev_copy = dst; 168 temps[src].next_copy = dst; 169 } 170 171 gen_args[0] = dst; 172 gen_args[1] = src; 173 } 174 175 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 176 { 177 reset_temp(dst); 178 temps[dst].state = TCG_TEMP_CONST; 179 temps[dst].val = val; 180 temps[dst].mask = val; 181 gen_args[0] = dst; 182 gen_args[1] = val; 183 } 184 185 static TCGOpcode op_to_mov(TCGOpcode op) 186 { 187 switch (op_bits(op)) { 188 case 32: 189 return INDEX_op_mov_i32; 190 case 64: 191 return INDEX_op_mov_i64; 192 default: 193 fprintf(stderr, "op_to_mov: unexpected return value of " 194 "function op_bits.\n"); 195 tcg_abort(); 196 } 197 } 198 199 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 200 { 201 switch (op) { 202 CASE_OP_32_64(add): 203 return x + y; 204 205 CASE_OP_32_64(sub): 206 return x - y; 207 208 CASE_OP_32_64(mul): 209 return x * y; 210 211 CASE_OP_32_64(and): 212 return x & y; 213 214 CASE_OP_32_64(or): 215 return x | y; 216 217 CASE_OP_32_64(xor): 218 return x ^ y; 219 220 case INDEX_op_shl_i32: 221 return (uint32_t)x << (uint32_t)y; 222 223 case INDEX_op_shl_i64: 224 return (uint64_t)x << (uint64_t)y; 225 226 case INDEX_op_shr_i32: 227 return (uint32_t)x >> (uint32_t)y; 228 229 case INDEX_op_shr_i64: 230 return (uint64_t)x >> (uint64_t)y; 231 232 case INDEX_op_sar_i32: 233 return (int32_t)x >> (int32_t)y; 234 235 case INDEX_op_sar_i64: 236 return (int64_t)x >> (int64_t)y; 237 238 case INDEX_op_rotr_i32: 239 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); 240 return x; 241 242 case INDEX_op_rotr_i64: 243 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); 244 return x; 245 246 case INDEX_op_rotl_i32: 247 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); 248 return x; 249 250 case INDEX_op_rotl_i64: 251 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); 252 return x; 253 254 CASE_OP_32_64(not): 255 return ~x; 256 257 CASE_OP_32_64(neg): 258 return -x; 259 260 CASE_OP_32_64(andc): 261 return x & ~y; 262 263 CASE_OP_32_64(orc): 264 return x | ~y; 265 266 CASE_OP_32_64(eqv): 267 return ~(x ^ y); 268 269 CASE_OP_32_64(nand): 270 return ~(x & y); 271 272 CASE_OP_32_64(nor): 273 return ~(x | y); 274 275 CASE_OP_32_64(ext8s): 276 return (int8_t)x; 277 278 CASE_OP_32_64(ext16s): 279 return (int16_t)x; 280 281 CASE_OP_32_64(ext8u): 282 return (uint8_t)x; 283 284 CASE_OP_32_64(ext16u): 285 return (uint16_t)x; 286 287 case INDEX_op_ext32s_i64: 288 return (int32_t)x; 289 290 case INDEX_op_ext32u_i64: 291 return (uint32_t)x; 292 293 default: 294 fprintf(stderr, 295 "Unrecognized operation %d in do_constant_folding.\n", op); 296 tcg_abort(); 297 } 298 } 299 300 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 301 { 302 TCGArg res = do_constant_folding_2(op, x, y); 303 if (op_bits(op) == 32) { 304 res &= 0xffffffff; 305 } 306 return res; 307 } 308 309 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 310 { 311 switch (c) { 312 case TCG_COND_EQ: 313 return x == y; 314 case TCG_COND_NE: 315 return x != y; 316 case TCG_COND_LT: 317 return (int32_t)x < (int32_t)y; 318 case TCG_COND_GE: 319 return (int32_t)x >= (int32_t)y; 320 case TCG_COND_LE: 321 return (int32_t)x <= (int32_t)y; 322 case TCG_COND_GT: 323 return (int32_t)x > (int32_t)y; 324 case TCG_COND_LTU: 325 return x < y; 326 case TCG_COND_GEU: 327 return x >= y; 328 case TCG_COND_LEU: 329 return x <= y; 330 case TCG_COND_GTU: 331 return x > y; 332 default: 333 tcg_abort(); 334 } 335 } 336 337 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 338 { 339 switch (c) { 340 case TCG_COND_EQ: 341 return x == y; 342 case TCG_COND_NE: 343 return x != y; 344 case TCG_COND_LT: 345 return (int64_t)x < (int64_t)y; 346 case TCG_COND_GE: 347 return (int64_t)x >= (int64_t)y; 348 case TCG_COND_LE: 349 return (int64_t)x <= (int64_t)y; 350 case TCG_COND_GT: 351 return (int64_t)x > (int64_t)y; 352 case TCG_COND_LTU: 353 return x < y; 354 case TCG_COND_GEU: 355 return x >= y; 356 case TCG_COND_LEU: 357 return x <= y; 358 case TCG_COND_GTU: 359 return x > y; 360 default: 361 tcg_abort(); 362 } 363 } 364 365 static bool do_constant_folding_cond_eq(TCGCond c) 366 { 367 switch (c) { 368 case TCG_COND_GT: 369 case TCG_COND_LTU: 370 case TCG_COND_LT: 371 case TCG_COND_GTU: 372 case TCG_COND_NE: 373 return 0; 374 case TCG_COND_GE: 375 case TCG_COND_GEU: 376 case TCG_COND_LE: 377 case TCG_COND_LEU: 378 case TCG_COND_EQ: 379 return 1; 380 default: 381 tcg_abort(); 382 } 383 } 384 385 /* Return 2 if the condition can't be simplified, and the result 386 of the condition (0 or 1) if it can */ 387 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 388 TCGArg y, TCGCond c) 389 { 390 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 391 switch (op_bits(op)) { 392 case 32: 393 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 394 case 64: 395 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 396 default: 397 tcg_abort(); 398 } 399 } else if (temps_are_copies(x, y)) { 400 return do_constant_folding_cond_eq(c); 401 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 402 switch (c) { 403 case TCG_COND_LTU: 404 return 0; 405 case TCG_COND_GEU: 406 return 1; 407 default: 408 return 2; 409 } 410 } else { 411 return 2; 412 } 413 } 414 415 /* Return 2 if the condition can't be simplified, and the result 416 of the condition (0 or 1) if it can */ 417 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 418 { 419 TCGArg al = p1[0], ah = p1[1]; 420 TCGArg bl = p2[0], bh = p2[1]; 421 422 if (temps[bl].state == TCG_TEMP_CONST 423 && temps[bh].state == TCG_TEMP_CONST) { 424 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 425 426 if (temps[al].state == TCG_TEMP_CONST 427 && temps[ah].state == TCG_TEMP_CONST) { 428 uint64_t a; 429 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 430 return do_constant_folding_cond_64(a, b, c); 431 } 432 if (b == 0) { 433 switch (c) { 434 case TCG_COND_LTU: 435 return 0; 436 case TCG_COND_GEU: 437 return 1; 438 default: 439 break; 440 } 441 } 442 } 443 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 444 return do_constant_folding_cond_eq(c); 445 } 446 return 2; 447 } 448 449 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 450 { 451 TCGArg a1 = *p1, a2 = *p2; 452 int sum = 0; 453 sum += temps[a1].state == TCG_TEMP_CONST; 454 sum -= temps[a2].state == TCG_TEMP_CONST; 455 456 /* Prefer the constant in second argument, and then the form 457 op a, a, b, which is better handled on non-RISC hosts. */ 458 if (sum > 0 || (sum == 0 && dest == a2)) { 459 *p1 = a2; 460 *p2 = a1; 461 return true; 462 } 463 return false; 464 } 465 466 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 467 { 468 int sum = 0; 469 sum += temps[p1[0]].state == TCG_TEMP_CONST; 470 sum += temps[p1[1]].state == TCG_TEMP_CONST; 471 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 472 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 473 if (sum > 0) { 474 TCGArg t; 475 t = p1[0], p1[0] = p2[0], p2[0] = t; 476 t = p1[1], p1[1] = p2[1], p2[1] = t; 477 return true; 478 } 479 return false; 480 } 481 482 /* Propagate constants and copies, fold constant expressions. */ 483 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 484 TCGArg *args, TCGOpDef *tcg_op_defs) 485 { 486 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 487 tcg_target_ulong mask, affected; 488 TCGOpcode op; 489 const TCGOpDef *def; 490 TCGArg *gen_args; 491 TCGArg tmp; 492 493 /* Array VALS has an element for each temp. 494 If this temp holds a constant then its value is kept in VALS' element. 495 If this temp is a copy of other ones then the other copies are 496 available through the doubly linked circular list. */ 497 498 nb_temps = s->nb_temps; 499 nb_globals = s->nb_globals; 500 reset_all_temps(nb_temps); 501 502 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 503 gen_args = args; 504 for (op_index = 0; op_index < nb_ops; op_index++) { 505 op = s->gen_opc_buf[op_index]; 506 def = &tcg_op_defs[op]; 507 /* Do copy propagation */ 508 if (op == INDEX_op_call) { 509 int nb_oargs = args[0] >> 16; 510 int nb_iargs = args[0] & 0xffff; 511 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { 512 if (temps[args[i]].state == TCG_TEMP_COPY) { 513 args[i] = find_better_copy(s, args[i]); 514 } 515 } 516 } else { 517 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 518 if (temps[args[i]].state == TCG_TEMP_COPY) { 519 args[i] = find_better_copy(s, args[i]); 520 } 521 } 522 } 523 524 /* For commutative operations make constant second argument */ 525 switch (op) { 526 CASE_OP_32_64(add): 527 CASE_OP_32_64(mul): 528 CASE_OP_32_64(and): 529 CASE_OP_32_64(or): 530 CASE_OP_32_64(xor): 531 CASE_OP_32_64(eqv): 532 CASE_OP_32_64(nand): 533 CASE_OP_32_64(nor): 534 swap_commutative(args[0], &args[1], &args[2]); 535 break; 536 CASE_OP_32_64(brcond): 537 if (swap_commutative(-1, &args[0], &args[1])) { 538 args[2] = tcg_swap_cond(args[2]); 539 } 540 break; 541 CASE_OP_32_64(setcond): 542 if (swap_commutative(args[0], &args[1], &args[2])) { 543 args[3] = tcg_swap_cond(args[3]); 544 } 545 break; 546 CASE_OP_32_64(movcond): 547 if (swap_commutative(-1, &args[1], &args[2])) { 548 args[5] = tcg_swap_cond(args[5]); 549 } 550 /* For movcond, we canonicalize the "false" input reg to match 551 the destination reg so that the tcg backend can implement 552 a "move if true" operation. */ 553 if (swap_commutative(args[0], &args[4], &args[3])) { 554 args[5] = tcg_invert_cond(args[5]); 555 } 556 break; 557 CASE_OP_32_64(add2): 558 swap_commutative(args[0], &args[2], &args[4]); 559 swap_commutative(args[1], &args[3], &args[5]); 560 break; 561 CASE_OP_32_64(mulu2): 562 CASE_OP_32_64(muls2): 563 swap_commutative(args[0], &args[2], &args[3]); 564 break; 565 case INDEX_op_brcond2_i32: 566 if (swap_commutative2(&args[0], &args[2])) { 567 args[4] = tcg_swap_cond(args[4]); 568 } 569 break; 570 case INDEX_op_setcond2_i32: 571 if (swap_commutative2(&args[1], &args[3])) { 572 args[5] = tcg_swap_cond(args[5]); 573 } 574 break; 575 default: 576 break; 577 } 578 579 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 580 and "sub r, 0, a => neg r, a" case. */ 581 switch (op) { 582 CASE_OP_32_64(shl): 583 CASE_OP_32_64(shr): 584 CASE_OP_32_64(sar): 585 CASE_OP_32_64(rotl): 586 CASE_OP_32_64(rotr): 587 if (temps[args[1]].state == TCG_TEMP_CONST 588 && temps[args[1]].val == 0) { 589 s->gen_opc_buf[op_index] = op_to_movi(op); 590 tcg_opt_gen_movi(gen_args, args[0], 0); 591 args += 3; 592 gen_args += 2; 593 continue; 594 } 595 break; 596 CASE_OP_32_64(sub): 597 { 598 TCGOpcode neg_op; 599 bool have_neg; 600 601 if (temps[args[2]].state == TCG_TEMP_CONST) { 602 /* Proceed with possible constant folding. */ 603 break; 604 } 605 if (op == INDEX_op_sub_i32) { 606 neg_op = INDEX_op_neg_i32; 607 have_neg = TCG_TARGET_HAS_neg_i32; 608 } else { 609 neg_op = INDEX_op_neg_i64; 610 have_neg = TCG_TARGET_HAS_neg_i64; 611 } 612 if (!have_neg) { 613 break; 614 } 615 if (temps[args[1]].state == TCG_TEMP_CONST 616 && temps[args[1]].val == 0) { 617 s->gen_opc_buf[op_index] = neg_op; 618 reset_temp(args[0]); 619 gen_args[0] = args[0]; 620 gen_args[1] = args[2]; 621 args += 3; 622 gen_args += 2; 623 continue; 624 } 625 } 626 break; 627 default: 628 break; 629 } 630 631 /* Simplify expression for "op r, a, 0 => mov r, a" cases */ 632 switch (op) { 633 CASE_OP_32_64(add): 634 CASE_OP_32_64(sub): 635 CASE_OP_32_64(shl): 636 CASE_OP_32_64(shr): 637 CASE_OP_32_64(sar): 638 CASE_OP_32_64(rotl): 639 CASE_OP_32_64(rotr): 640 CASE_OP_32_64(or): 641 CASE_OP_32_64(xor): 642 if (temps[args[1]].state == TCG_TEMP_CONST) { 643 /* Proceed with possible constant folding. */ 644 break; 645 } 646 if (temps[args[2]].state == TCG_TEMP_CONST 647 && temps[args[2]].val == 0) { 648 if (temps_are_copies(args[0], args[1])) { 649 s->gen_opc_buf[op_index] = INDEX_op_nop; 650 } else { 651 s->gen_opc_buf[op_index] = op_to_mov(op); 652 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 653 gen_args += 2; 654 } 655 args += 3; 656 continue; 657 } 658 break; 659 default: 660 break; 661 } 662 663 /* Simplify using known-zero bits */ 664 mask = -1; 665 affected = -1; 666 switch (op) { 667 CASE_OP_32_64(ext8s): 668 if ((temps[args[1]].mask & 0x80) != 0) { 669 break; 670 } 671 CASE_OP_32_64(ext8u): 672 mask = 0xff; 673 goto and_const; 674 CASE_OP_32_64(ext16s): 675 if ((temps[args[1]].mask & 0x8000) != 0) { 676 break; 677 } 678 CASE_OP_32_64(ext16u): 679 mask = 0xffff; 680 goto and_const; 681 case INDEX_op_ext32s_i64: 682 if ((temps[args[1]].mask & 0x80000000) != 0) { 683 break; 684 } 685 case INDEX_op_ext32u_i64: 686 mask = 0xffffffffU; 687 goto and_const; 688 689 CASE_OP_32_64(and): 690 mask = temps[args[2]].mask; 691 if (temps[args[2]].state == TCG_TEMP_CONST) { 692 and_const: 693 affected = temps[args[1]].mask & ~mask; 694 } 695 mask = temps[args[1]].mask & mask; 696 break; 697 698 CASE_OP_32_64(sar): 699 if (temps[args[2]].state == TCG_TEMP_CONST) { 700 mask = ((tcg_target_long)temps[args[1]].mask 701 >> temps[args[2]].val); 702 } 703 break; 704 705 CASE_OP_32_64(shr): 706 if (temps[args[2]].state == TCG_TEMP_CONST) { 707 mask = temps[args[1]].mask >> temps[args[2]].val; 708 } 709 break; 710 711 CASE_OP_32_64(shl): 712 if (temps[args[2]].state == TCG_TEMP_CONST) { 713 mask = temps[args[1]].mask << temps[args[2]].val; 714 } 715 break; 716 717 CASE_OP_32_64(neg): 718 /* Set to 1 all bits to the left of the rightmost. */ 719 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 720 break; 721 722 CASE_OP_32_64(deposit): 723 tmp = ((1ull << args[4]) - 1); 724 mask = ((temps[args[1]].mask & ~(tmp << args[3])) 725 | ((temps[args[2]].mask & tmp) << args[3])); 726 break; 727 728 CASE_OP_32_64(or): 729 CASE_OP_32_64(xor): 730 mask = temps[args[1]].mask | temps[args[2]].mask; 731 break; 732 733 CASE_OP_32_64(setcond): 734 mask = 1; 735 break; 736 737 CASE_OP_32_64(movcond): 738 mask = temps[args[3]].mask | temps[args[4]].mask; 739 break; 740 741 default: 742 break; 743 } 744 745 if (mask == 0) { 746 assert(def->nb_oargs == 1); 747 s->gen_opc_buf[op_index] = op_to_movi(op); 748 tcg_opt_gen_movi(gen_args, args[0], 0); 749 args += def->nb_oargs + def->nb_iargs + def->nb_cargs; 750 gen_args += 2; 751 continue; 752 } 753 if (affected == 0) { 754 assert(def->nb_oargs == 1); 755 if (temps_are_copies(args[0], args[1])) { 756 s->gen_opc_buf[op_index] = INDEX_op_nop; 757 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 758 s->gen_opc_buf[op_index] = op_to_mov(op); 759 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 760 gen_args += 2; 761 } else { 762 s->gen_opc_buf[op_index] = op_to_movi(op); 763 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 764 gen_args += 2; 765 } 766 args += def->nb_iargs + 1; 767 continue; 768 } 769 770 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 771 switch (op) { 772 CASE_OP_32_64(and): 773 CASE_OP_32_64(mul): 774 if ((temps[args[2]].state == TCG_TEMP_CONST 775 && temps[args[2]].val == 0)) { 776 s->gen_opc_buf[op_index] = op_to_movi(op); 777 tcg_opt_gen_movi(gen_args, args[0], 0); 778 args += 3; 779 gen_args += 2; 780 continue; 781 } 782 break; 783 default: 784 break; 785 } 786 787 /* Simplify expression for "op r, a, a => mov r, a" cases */ 788 switch (op) { 789 CASE_OP_32_64(or): 790 CASE_OP_32_64(and): 791 if (temps_are_copies(args[1], args[2])) { 792 if (temps_are_copies(args[0], args[1])) { 793 s->gen_opc_buf[op_index] = INDEX_op_nop; 794 } else { 795 s->gen_opc_buf[op_index] = op_to_mov(op); 796 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 797 gen_args += 2; 798 } 799 args += 3; 800 continue; 801 } 802 break; 803 default: 804 break; 805 } 806 807 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 808 switch (op) { 809 CASE_OP_32_64(sub): 810 CASE_OP_32_64(xor): 811 if (temps_are_copies(args[1], args[2])) { 812 s->gen_opc_buf[op_index] = op_to_movi(op); 813 tcg_opt_gen_movi(gen_args, args[0], 0); 814 gen_args += 2; 815 args += 3; 816 continue; 817 } 818 break; 819 default: 820 break; 821 } 822 823 /* Propagate constants through copy operations and do constant 824 folding. Constants will be substituted to arguments by register 825 allocator where needed and possible. Also detect copies. */ 826 switch (op) { 827 CASE_OP_32_64(mov): 828 if (temps_are_copies(args[0], args[1])) { 829 args += 2; 830 s->gen_opc_buf[op_index] = INDEX_op_nop; 831 break; 832 } 833 if (temps[args[1]].state != TCG_TEMP_CONST) { 834 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 835 gen_args += 2; 836 args += 2; 837 break; 838 } 839 /* Source argument is constant. Rewrite the operation and 840 let movi case handle it. */ 841 op = op_to_movi(op); 842 s->gen_opc_buf[op_index] = op; 843 args[1] = temps[args[1]].val; 844 /* fallthrough */ 845 CASE_OP_32_64(movi): 846 tcg_opt_gen_movi(gen_args, args[0], args[1]); 847 gen_args += 2; 848 args += 2; 849 break; 850 851 CASE_OP_32_64(not): 852 CASE_OP_32_64(neg): 853 CASE_OP_32_64(ext8s): 854 CASE_OP_32_64(ext8u): 855 CASE_OP_32_64(ext16s): 856 CASE_OP_32_64(ext16u): 857 case INDEX_op_ext32s_i64: 858 case INDEX_op_ext32u_i64: 859 if (temps[args[1]].state == TCG_TEMP_CONST) { 860 s->gen_opc_buf[op_index] = op_to_movi(op); 861 tmp = do_constant_folding(op, temps[args[1]].val, 0); 862 tcg_opt_gen_movi(gen_args, args[0], tmp); 863 gen_args += 2; 864 args += 2; 865 break; 866 } 867 goto do_default; 868 869 CASE_OP_32_64(add): 870 CASE_OP_32_64(sub): 871 CASE_OP_32_64(mul): 872 CASE_OP_32_64(or): 873 CASE_OP_32_64(and): 874 CASE_OP_32_64(xor): 875 CASE_OP_32_64(shl): 876 CASE_OP_32_64(shr): 877 CASE_OP_32_64(sar): 878 CASE_OP_32_64(rotl): 879 CASE_OP_32_64(rotr): 880 CASE_OP_32_64(andc): 881 CASE_OP_32_64(orc): 882 CASE_OP_32_64(eqv): 883 CASE_OP_32_64(nand): 884 CASE_OP_32_64(nor): 885 if (temps[args[1]].state == TCG_TEMP_CONST 886 && temps[args[2]].state == TCG_TEMP_CONST) { 887 s->gen_opc_buf[op_index] = op_to_movi(op); 888 tmp = do_constant_folding(op, temps[args[1]].val, 889 temps[args[2]].val); 890 tcg_opt_gen_movi(gen_args, args[0], tmp); 891 gen_args += 2; 892 args += 3; 893 break; 894 } 895 goto do_default; 896 897 CASE_OP_32_64(deposit): 898 if (temps[args[1]].state == TCG_TEMP_CONST 899 && temps[args[2]].state == TCG_TEMP_CONST) { 900 s->gen_opc_buf[op_index] = op_to_movi(op); 901 tmp = ((1ull << args[4]) - 1); 902 tmp = (temps[args[1]].val & ~(tmp << args[3])) 903 | ((temps[args[2]].val & tmp) << args[3]); 904 tcg_opt_gen_movi(gen_args, args[0], tmp); 905 gen_args += 2; 906 args += 5; 907 break; 908 } 909 goto do_default; 910 911 CASE_OP_32_64(setcond): 912 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 913 if (tmp != 2) { 914 s->gen_opc_buf[op_index] = op_to_movi(op); 915 tcg_opt_gen_movi(gen_args, args[0], tmp); 916 gen_args += 2; 917 args += 4; 918 break; 919 } 920 goto do_default; 921 922 CASE_OP_32_64(brcond): 923 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 924 if (tmp != 2) { 925 if (tmp) { 926 reset_all_temps(nb_temps); 927 s->gen_opc_buf[op_index] = INDEX_op_br; 928 gen_args[0] = args[3]; 929 gen_args += 1; 930 } else { 931 s->gen_opc_buf[op_index] = INDEX_op_nop; 932 } 933 args += 4; 934 break; 935 } 936 goto do_default; 937 938 CASE_OP_32_64(movcond): 939 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 940 if (tmp != 2) { 941 if (temps_are_copies(args[0], args[4-tmp])) { 942 s->gen_opc_buf[op_index] = INDEX_op_nop; 943 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 944 s->gen_opc_buf[op_index] = op_to_movi(op); 945 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 946 gen_args += 2; 947 } else { 948 s->gen_opc_buf[op_index] = op_to_mov(op); 949 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 950 gen_args += 2; 951 } 952 args += 6; 953 break; 954 } 955 goto do_default; 956 957 case INDEX_op_add2_i32: 958 case INDEX_op_sub2_i32: 959 if (temps[args[2]].state == TCG_TEMP_CONST 960 && temps[args[3]].state == TCG_TEMP_CONST 961 && temps[args[4]].state == TCG_TEMP_CONST 962 && temps[args[5]].state == TCG_TEMP_CONST) { 963 uint32_t al = temps[args[2]].val; 964 uint32_t ah = temps[args[3]].val; 965 uint32_t bl = temps[args[4]].val; 966 uint32_t bh = temps[args[5]].val; 967 uint64_t a = ((uint64_t)ah << 32) | al; 968 uint64_t b = ((uint64_t)bh << 32) | bl; 969 TCGArg rl, rh; 970 971 if (op == INDEX_op_add2_i32) { 972 a += b; 973 } else { 974 a -= b; 975 } 976 977 /* We emit the extra nop when we emit the add2/sub2. */ 978 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 979 980 rl = args[0]; 981 rh = args[1]; 982 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 983 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 984 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 985 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 986 gen_args += 4; 987 args += 6; 988 break; 989 } 990 goto do_default; 991 992 case INDEX_op_mulu2_i32: 993 if (temps[args[2]].state == TCG_TEMP_CONST 994 && temps[args[3]].state == TCG_TEMP_CONST) { 995 uint32_t a = temps[args[2]].val; 996 uint32_t b = temps[args[3]].val; 997 uint64_t r = (uint64_t)a * b; 998 TCGArg rl, rh; 999 1000 /* We emit the extra nop when we emit the mulu2. */ 1001 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1002 1003 rl = args[0]; 1004 rh = args[1]; 1005 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1006 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1007 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1008 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1009 gen_args += 4; 1010 args += 4; 1011 break; 1012 } 1013 goto do_default; 1014 1015 case INDEX_op_brcond2_i32: 1016 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1017 if (tmp != 2) { 1018 if (tmp) { 1019 reset_all_temps(nb_temps); 1020 s->gen_opc_buf[op_index] = INDEX_op_br; 1021 gen_args[0] = args[5]; 1022 gen_args += 1; 1023 } else { 1024 s->gen_opc_buf[op_index] = INDEX_op_nop; 1025 } 1026 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1027 && temps[args[2]].state == TCG_TEMP_CONST 1028 && temps[args[3]].state == TCG_TEMP_CONST 1029 && temps[args[2]].val == 0 1030 && temps[args[3]].val == 0) { 1031 /* Simplify LT/GE comparisons vs zero to a single compare 1032 vs the high word of the input. */ 1033 reset_all_temps(nb_temps); 1034 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1035 gen_args[0] = args[1]; 1036 gen_args[1] = args[3]; 1037 gen_args[2] = args[4]; 1038 gen_args[3] = args[5]; 1039 gen_args += 4; 1040 } else { 1041 goto do_default; 1042 } 1043 args += 6; 1044 break; 1045 1046 case INDEX_op_setcond2_i32: 1047 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1048 if (tmp != 2) { 1049 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1050 tcg_opt_gen_movi(gen_args, args[0], tmp); 1051 gen_args += 2; 1052 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1053 && temps[args[3]].state == TCG_TEMP_CONST 1054 && temps[args[4]].state == TCG_TEMP_CONST 1055 && temps[args[3]].val == 0 1056 && temps[args[4]].val == 0) { 1057 /* Simplify LT/GE comparisons vs zero to a single compare 1058 vs the high word of the input. */ 1059 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1060 reset_temp(args[0]); 1061 gen_args[0] = args[0]; 1062 gen_args[1] = args[2]; 1063 gen_args[2] = args[4]; 1064 gen_args[3] = args[5]; 1065 gen_args += 4; 1066 } else { 1067 goto do_default; 1068 } 1069 args += 6; 1070 break; 1071 1072 case INDEX_op_call: 1073 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 1074 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | 1075 TCG_CALL_NO_WRITE_GLOBALS))) { 1076 for (i = 0; i < nb_globals; i++) { 1077 reset_temp(i); 1078 } 1079 } 1080 for (i = 0; i < (args[0] >> 16); i++) { 1081 reset_temp(args[i + 1]); 1082 } 1083 i = nb_call_args + 3; 1084 while (i) { 1085 *gen_args = *args; 1086 args++; 1087 gen_args++; 1088 i--; 1089 } 1090 break; 1091 1092 default: 1093 do_default: 1094 /* Default case: we know nothing about operation (or were unable 1095 to compute the operation result) so no propagation is done. 1096 We trash everything if the operation is the end of a basic 1097 block, otherwise we only trash the output args. "mask" is 1098 the non-zero bits mask for the first output arg. */ 1099 if (def->flags & TCG_OPF_BB_END) { 1100 reset_all_temps(nb_temps); 1101 } else { 1102 for (i = 0; i < def->nb_oargs; i++) { 1103 reset_temp(args[i]); 1104 } 1105 } 1106 for (i = 0; i < def->nb_args; i++) { 1107 gen_args[i] = args[i]; 1108 } 1109 args += def->nb_args; 1110 gen_args += def->nb_args; 1111 break; 1112 } 1113 } 1114 1115 return gen_args; 1116 } 1117 1118 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1119 TCGArg *args, TCGOpDef *tcg_op_defs) 1120 { 1121 TCGArg *res; 1122 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1123 return res; 1124 } 1125