1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 TCG_TEMP_HAS_COPY, 43 TCG_TEMP_ANY 44 } tcg_temp_state; 45 46 struct tcg_temp_info { 47 tcg_temp_state state; 48 uint16_t prev_copy; 49 uint16_t next_copy; 50 tcg_target_ulong val; 51 }; 52 53 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 54 55 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some 56 class of equivalent temp's, a new representative should be chosen in this 57 class. */ 58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals) 59 { 60 int i; 61 TCGArg new_base = (TCGArg)-1; 62 if (temps[temp].state == TCG_TEMP_HAS_COPY) { 63 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { 64 if (i >= nb_globals) { 65 temps[i].state = TCG_TEMP_HAS_COPY; 66 new_base = i; 67 break; 68 } 69 } 70 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { 71 if (new_base == (TCGArg)-1) { 72 temps[i].state = TCG_TEMP_ANY; 73 } else { 74 temps[i].val = new_base; 75 } 76 } 77 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 78 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 79 } else if (temps[temp].state == TCG_TEMP_COPY) { 80 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 81 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 82 new_base = temps[temp].val; 83 } 84 temps[temp].state = TCG_TEMP_ANY; 85 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) { 86 temps[new_base].state = TCG_TEMP_ANY; 87 } 88 } 89 90 static int op_bits(TCGOpcode op) 91 { 92 const TCGOpDef *def = &tcg_op_defs[op]; 93 return def->flags & TCG_OPF_64BIT ? 64 : 32; 94 } 95 96 static TCGOpcode op_to_movi(TCGOpcode op) 97 { 98 switch (op_bits(op)) { 99 case 32: 100 return INDEX_op_movi_i32; 101 case 64: 102 return INDEX_op_movi_i64; 103 default: 104 fprintf(stderr, "op_to_movi: unexpected return value of " 105 "function op_bits.\n"); 106 tcg_abort(); 107 } 108 } 109 110 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst, 111 TCGArg src, int nb_temps, int nb_globals) 112 { 113 reset_temp(dst, nb_temps, nb_globals); 114 assert(temps[src].state != TCG_TEMP_COPY); 115 /* Don't try to copy if one of temps is a global or either one 116 is local and another is register */ 117 if (src >= nb_globals && dst >= nb_globals && 118 tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) { 119 assert(temps[src].state != TCG_TEMP_CONST); 120 if (temps[src].state != TCG_TEMP_HAS_COPY) { 121 temps[src].state = TCG_TEMP_HAS_COPY; 122 temps[src].next_copy = src; 123 temps[src].prev_copy = src; 124 } 125 temps[dst].state = TCG_TEMP_COPY; 126 temps[dst].val = src; 127 temps[dst].next_copy = temps[src].next_copy; 128 temps[dst].prev_copy = src; 129 temps[temps[dst].next_copy].prev_copy = dst; 130 temps[src].next_copy = dst; 131 } 132 gen_args[0] = dst; 133 gen_args[1] = src; 134 } 135 136 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val, 137 int nb_temps, int nb_globals) 138 { 139 reset_temp(dst, nb_temps, nb_globals); 140 temps[dst].state = TCG_TEMP_CONST; 141 temps[dst].val = val; 142 gen_args[0] = dst; 143 gen_args[1] = val; 144 } 145 146 static TCGOpcode op_to_mov(TCGOpcode op) 147 { 148 switch (op_bits(op)) { 149 case 32: 150 return INDEX_op_mov_i32; 151 case 64: 152 return INDEX_op_mov_i64; 153 default: 154 fprintf(stderr, "op_to_mov: unexpected return value of " 155 "function op_bits.\n"); 156 tcg_abort(); 157 } 158 } 159 160 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 161 { 162 switch (op) { 163 CASE_OP_32_64(add): 164 return x + y; 165 166 CASE_OP_32_64(sub): 167 return x - y; 168 169 CASE_OP_32_64(mul): 170 return x * y; 171 172 CASE_OP_32_64(and): 173 return x & y; 174 175 CASE_OP_32_64(or): 176 return x | y; 177 178 CASE_OP_32_64(xor): 179 return x ^ y; 180 181 case INDEX_op_shl_i32: 182 return (uint32_t)x << (uint32_t)y; 183 184 case INDEX_op_shl_i64: 185 return (uint64_t)x << (uint64_t)y; 186 187 case INDEX_op_shr_i32: 188 return (uint32_t)x >> (uint32_t)y; 189 190 case INDEX_op_shr_i64: 191 return (uint64_t)x >> (uint64_t)y; 192 193 case INDEX_op_sar_i32: 194 return (int32_t)x >> (int32_t)y; 195 196 case INDEX_op_sar_i64: 197 return (int64_t)x >> (int64_t)y; 198 199 case INDEX_op_rotr_i32: 200 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); 201 return x; 202 203 case INDEX_op_rotr_i64: 204 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); 205 return x; 206 207 case INDEX_op_rotl_i32: 208 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); 209 return x; 210 211 case INDEX_op_rotl_i64: 212 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); 213 return x; 214 215 CASE_OP_32_64(not): 216 return ~x; 217 218 CASE_OP_32_64(neg): 219 return -x; 220 221 CASE_OP_32_64(andc): 222 return x & ~y; 223 224 CASE_OP_32_64(orc): 225 return x | ~y; 226 227 CASE_OP_32_64(eqv): 228 return ~(x ^ y); 229 230 CASE_OP_32_64(nand): 231 return ~(x & y); 232 233 CASE_OP_32_64(nor): 234 return ~(x | y); 235 236 CASE_OP_32_64(ext8s): 237 return (int8_t)x; 238 239 CASE_OP_32_64(ext16s): 240 return (int16_t)x; 241 242 CASE_OP_32_64(ext8u): 243 return (uint8_t)x; 244 245 CASE_OP_32_64(ext16u): 246 return (uint16_t)x; 247 248 case INDEX_op_ext32s_i64: 249 return (int32_t)x; 250 251 case INDEX_op_ext32u_i64: 252 return (uint32_t)x; 253 254 default: 255 fprintf(stderr, 256 "Unrecognized operation %d in do_constant_folding.\n", op); 257 tcg_abort(); 258 } 259 } 260 261 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 262 { 263 TCGArg res = do_constant_folding_2(op, x, y); 264 if (op_bits(op) == 32) { 265 res &= 0xffffffff; 266 } 267 return res; 268 } 269 270 /* Propagate constants and copies, fold constant expressions. */ 271 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 272 TCGArg *args, TCGOpDef *tcg_op_defs) 273 { 274 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 275 TCGOpcode op; 276 const TCGOpDef *def; 277 TCGArg *gen_args; 278 TCGArg tmp; 279 /* Array VALS has an element for each temp. 280 If this temp holds a constant then its value is kept in VALS' element. 281 If this temp is a copy of other ones then this equivalence class' 282 representative is kept in VALS' element. 283 If this temp is neither copy nor constant then corresponding VALS' 284 element is unused. */ 285 286 nb_temps = s->nb_temps; 287 nb_globals = s->nb_globals; 288 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); 289 290 nb_ops = tcg_opc_ptr - gen_opc_buf; 291 gen_args = args; 292 for (op_index = 0; op_index < nb_ops; op_index++) { 293 op = gen_opc_buf[op_index]; 294 def = &tcg_op_defs[op]; 295 /* Do copy propagation */ 296 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) { 297 assert(op != INDEX_op_call); 298 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 299 if (temps[args[i]].state == TCG_TEMP_COPY) { 300 args[i] = temps[args[i]].val; 301 } 302 } 303 } 304 305 /* For commutative operations make constant second argument */ 306 switch (op) { 307 CASE_OP_32_64(add): 308 CASE_OP_32_64(mul): 309 CASE_OP_32_64(and): 310 CASE_OP_32_64(or): 311 CASE_OP_32_64(xor): 312 CASE_OP_32_64(eqv): 313 CASE_OP_32_64(nand): 314 CASE_OP_32_64(nor): 315 if (temps[args[1]].state == TCG_TEMP_CONST) { 316 tmp = args[1]; 317 args[1] = args[2]; 318 args[2] = tmp; 319 } 320 break; 321 default: 322 break; 323 } 324 325 /* Simplify expression if possible. */ 326 switch (op) { 327 CASE_OP_32_64(add): 328 CASE_OP_32_64(sub): 329 CASE_OP_32_64(shl): 330 CASE_OP_32_64(shr): 331 CASE_OP_32_64(sar): 332 CASE_OP_32_64(rotl): 333 CASE_OP_32_64(rotr): 334 if (temps[args[1]].state == TCG_TEMP_CONST) { 335 /* Proceed with possible constant folding. */ 336 break; 337 } 338 if (temps[args[2]].state == TCG_TEMP_CONST 339 && temps[args[2]].val == 0) { 340 if ((temps[args[0]].state == TCG_TEMP_COPY 341 && temps[args[0]].val == args[1]) 342 || args[0] == args[1]) { 343 args += 3; 344 gen_opc_buf[op_index] = INDEX_op_nop; 345 } else { 346 gen_opc_buf[op_index] = op_to_mov(op); 347 tcg_opt_gen_mov(s, gen_args, args[0], args[1], 348 nb_temps, nb_globals); 349 gen_args += 2; 350 args += 3; 351 } 352 continue; 353 } 354 break; 355 CASE_OP_32_64(mul): 356 if ((temps[args[2]].state == TCG_TEMP_CONST 357 && temps[args[2]].val == 0)) { 358 gen_opc_buf[op_index] = op_to_movi(op); 359 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); 360 args += 3; 361 gen_args += 2; 362 continue; 363 } 364 break; 365 CASE_OP_32_64(or): 366 CASE_OP_32_64(and): 367 if (args[1] == args[2]) { 368 if (args[1] == args[0]) { 369 args += 3; 370 gen_opc_buf[op_index] = INDEX_op_nop; 371 } else { 372 gen_opc_buf[op_index] = op_to_mov(op); 373 tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps, 374 nb_globals); 375 gen_args += 2; 376 args += 3; 377 } 378 continue; 379 } 380 break; 381 default: 382 break; 383 } 384 385 /* Propagate constants through copy operations and do constant 386 folding. Constants will be substituted to arguments by register 387 allocator where needed and possible. Also detect copies. */ 388 switch (op) { 389 CASE_OP_32_64(mov): 390 if ((temps[args[1]].state == TCG_TEMP_COPY 391 && temps[args[1]].val == args[0]) 392 || args[0] == args[1]) { 393 args += 2; 394 gen_opc_buf[op_index] = INDEX_op_nop; 395 break; 396 } 397 if (temps[args[1]].state != TCG_TEMP_CONST) { 398 tcg_opt_gen_mov(s, gen_args, args[0], args[1], 399 nb_temps, nb_globals); 400 gen_args += 2; 401 args += 2; 402 break; 403 } 404 /* Source argument is constant. Rewrite the operation and 405 let movi case handle it. */ 406 op = op_to_movi(op); 407 gen_opc_buf[op_index] = op; 408 args[1] = temps[args[1]].val; 409 /* fallthrough */ 410 CASE_OP_32_64(movi): 411 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals); 412 gen_args += 2; 413 args += 2; 414 break; 415 CASE_OP_32_64(not): 416 CASE_OP_32_64(neg): 417 CASE_OP_32_64(ext8s): 418 CASE_OP_32_64(ext8u): 419 CASE_OP_32_64(ext16s): 420 CASE_OP_32_64(ext16u): 421 case INDEX_op_ext32s_i64: 422 case INDEX_op_ext32u_i64: 423 if (temps[args[1]].state == TCG_TEMP_CONST) { 424 gen_opc_buf[op_index] = op_to_movi(op); 425 tmp = do_constant_folding(op, temps[args[1]].val, 0); 426 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); 427 gen_args += 2; 428 args += 2; 429 break; 430 } else { 431 reset_temp(args[0], nb_temps, nb_globals); 432 gen_args[0] = args[0]; 433 gen_args[1] = args[1]; 434 gen_args += 2; 435 args += 2; 436 break; 437 } 438 CASE_OP_32_64(add): 439 CASE_OP_32_64(sub): 440 CASE_OP_32_64(mul): 441 CASE_OP_32_64(or): 442 CASE_OP_32_64(and): 443 CASE_OP_32_64(xor): 444 CASE_OP_32_64(shl): 445 CASE_OP_32_64(shr): 446 CASE_OP_32_64(sar): 447 CASE_OP_32_64(rotl): 448 CASE_OP_32_64(rotr): 449 CASE_OP_32_64(andc): 450 CASE_OP_32_64(orc): 451 CASE_OP_32_64(eqv): 452 CASE_OP_32_64(nand): 453 CASE_OP_32_64(nor): 454 if (temps[args[1]].state == TCG_TEMP_CONST 455 && temps[args[2]].state == TCG_TEMP_CONST) { 456 gen_opc_buf[op_index] = op_to_movi(op); 457 tmp = do_constant_folding(op, temps[args[1]].val, 458 temps[args[2]].val); 459 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); 460 gen_args += 2; 461 args += 3; 462 break; 463 } else { 464 reset_temp(args[0], nb_temps, nb_globals); 465 gen_args[0] = args[0]; 466 gen_args[1] = args[1]; 467 gen_args[2] = args[2]; 468 gen_args += 3; 469 args += 3; 470 break; 471 } 472 case INDEX_op_call: 473 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 474 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { 475 for (i = 0; i < nb_globals; i++) { 476 reset_temp(i, nb_temps, nb_globals); 477 } 478 } 479 for (i = 0; i < (args[0] >> 16); i++) { 480 reset_temp(args[i + 1], nb_temps, nb_globals); 481 } 482 i = nb_call_args + 3; 483 while (i) { 484 *gen_args = *args; 485 args++; 486 gen_args++; 487 i--; 488 } 489 break; 490 case INDEX_op_set_label: 491 case INDEX_op_jmp: 492 case INDEX_op_br: 493 CASE_OP_32_64(brcond): 494 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); 495 for (i = 0; i < def->nb_args; i++) { 496 *gen_args = *args; 497 args++; 498 gen_args++; 499 } 500 break; 501 default: 502 /* Default case: we do know nothing about operation so no 503 propagation is done. We only trash output args. */ 504 for (i = 0; i < def->nb_oargs; i++) { 505 reset_temp(args[i], nb_temps, nb_globals); 506 } 507 for (i = 0; i < def->nb_args; i++) { 508 gen_args[i] = args[i]; 509 } 510 args += def->nb_args; 511 gen_args += def->nb_args; 512 break; 513 } 514 } 515 516 return gen_args; 517 } 518 519 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 520 TCGArg *args, TCGOpDef *tcg_op_defs) 521 { 522 TCGArg *res; 523 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 524 return res; 525 } 526