1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 62 /* Forward declarations for functions declared in tcg-target.inc.c and 63 used here. */ 64 static void tcg_target_init(TCGContext *s); 65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 66 static void tcg_target_qemu_prologue(TCGContext *s); 67 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 68 intptr_t value, intptr_t addend); 69 70 /* The CIE and FDE header definitions will be common to all hosts. */ 71 typedef struct { 72 uint32_t len __attribute__((aligned((sizeof(void *))))); 73 uint32_t id; 74 uint8_t version; 75 char augmentation[1]; 76 uint8_t code_align; 77 uint8_t data_align; 78 uint8_t return_column; 79 } DebugFrameCIE; 80 81 typedef struct QEMU_PACKED { 82 uint32_t len __attribute__((aligned((sizeof(void *))))); 83 uint32_t cie_offset; 84 uintptr_t func_start; 85 uintptr_t func_len; 86 } DebugFrameFDEHeader; 87 88 typedef struct QEMU_PACKED { 89 DebugFrameCIE cie; 90 DebugFrameFDEHeader fde; 91 } DebugFrameHeader; 92 93 static void tcg_register_jit_int(void *buf, size_t size, 94 const void *debug_frame, 95 size_t debug_frame_size) 96 __attribute__((unused)); 97 98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 99 static const char *target_parse_constraint(TCGArgConstraint *ct, 100 const char *ct_str, TCGType type); 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 107 const int *const_args); 108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 111 TCGReg base, intptr_t ofs); 112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 113 static int tcg_target_const_match(tcg_target_long val, TCGType type, 114 const TCGArgConstraint *arg_ct); 115 #ifdef TCG_TARGET_NEED_LDST_LABELS 116 static bool tcg_out_ldst_finalize(TCGContext *s); 117 #endif 118 119 static TCGRegSet tcg_target_available_regs[2]; 120 static TCGRegSet tcg_target_call_clobber_regs; 121 122 #if TCG_TARGET_INSN_UNIT_SIZE == 1 123 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 124 { 125 *s->code_ptr++ = v; 126 } 127 128 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 129 uint8_t v) 130 { 131 *p = v; 132 } 133 #endif 134 135 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 136 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 137 { 138 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 139 *s->code_ptr++ = v; 140 } else { 141 tcg_insn_unit *p = s->code_ptr; 142 memcpy(p, &v, sizeof(v)); 143 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 144 } 145 } 146 147 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 148 uint16_t v) 149 { 150 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 151 *p = v; 152 } else { 153 memcpy(p, &v, sizeof(v)); 154 } 155 } 156 #endif 157 158 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 159 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 160 { 161 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 162 *s->code_ptr++ = v; 163 } else { 164 tcg_insn_unit *p = s->code_ptr; 165 memcpy(p, &v, sizeof(v)); 166 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 167 } 168 } 169 170 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 171 uint32_t v) 172 { 173 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 174 *p = v; 175 } else { 176 memcpy(p, &v, sizeof(v)); 177 } 178 } 179 #endif 180 181 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 182 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 183 { 184 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 185 *s->code_ptr++ = v; 186 } else { 187 tcg_insn_unit *p = s->code_ptr; 188 memcpy(p, &v, sizeof(v)); 189 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 190 } 191 } 192 193 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 194 uint64_t v) 195 { 196 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 197 *p = v; 198 } else { 199 memcpy(p, &v, sizeof(v)); 200 } 201 } 202 #endif 203 204 /* label relocation processing */ 205 206 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 207 TCGLabel *l, intptr_t addend) 208 { 209 TCGRelocation *r; 210 211 if (l->has_value) { 212 /* FIXME: This may break relocations on RISC targets that 213 modify instruction fields in place. The caller may not have 214 written the initial value. */ 215 patch_reloc(code_ptr, type, l->u.value, addend); 216 } else { 217 /* add a new relocation entry */ 218 r = tcg_malloc(sizeof(TCGRelocation)); 219 r->type = type; 220 r->ptr = code_ptr; 221 r->addend = addend; 222 r->next = l->u.first_reloc; 223 l->u.first_reloc = r; 224 } 225 } 226 227 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 228 { 229 intptr_t value = (intptr_t)ptr; 230 TCGRelocation *r; 231 232 tcg_debug_assert(!l->has_value); 233 234 for (r = l->u.first_reloc; r != NULL; r = r->next) { 235 patch_reloc(r->ptr, r->type, value, r->addend); 236 } 237 238 l->has_value = 1; 239 l->u.value_ptr = ptr; 240 } 241 242 TCGLabel *gen_new_label(void) 243 { 244 TCGContext *s = &tcg_ctx; 245 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 246 247 *l = (TCGLabel){ 248 .id = s->nb_labels++ 249 }; 250 251 return l; 252 } 253 254 #include "tcg-target.inc.c" 255 256 /* pool based memory allocation */ 257 void *tcg_malloc_internal(TCGContext *s, int size) 258 { 259 TCGPool *p; 260 int pool_size; 261 262 if (size > TCG_POOL_CHUNK_SIZE) { 263 /* big malloc: insert a new pool (XXX: could optimize) */ 264 p = g_malloc(sizeof(TCGPool) + size); 265 p->size = size; 266 p->next = s->pool_first_large; 267 s->pool_first_large = p; 268 return p->data; 269 } else { 270 p = s->pool_current; 271 if (!p) { 272 p = s->pool_first; 273 if (!p) 274 goto new_pool; 275 } else { 276 if (!p->next) { 277 new_pool: 278 pool_size = TCG_POOL_CHUNK_SIZE; 279 p = g_malloc(sizeof(TCGPool) + pool_size); 280 p->size = pool_size; 281 p->next = NULL; 282 if (s->pool_current) 283 s->pool_current->next = p; 284 else 285 s->pool_first = p; 286 } else { 287 p = p->next; 288 } 289 } 290 } 291 s->pool_current = p; 292 s->pool_cur = p->data + size; 293 s->pool_end = p->data + p->size; 294 return p->data; 295 } 296 297 void tcg_pool_reset(TCGContext *s) 298 { 299 TCGPool *p, *t; 300 for (p = s->pool_first_large; p; p = t) { 301 t = p->next; 302 g_free(p); 303 } 304 s->pool_first_large = NULL; 305 s->pool_cur = s->pool_end = NULL; 306 s->pool_current = NULL; 307 } 308 309 typedef struct TCGHelperInfo { 310 void *func; 311 const char *name; 312 unsigned flags; 313 unsigned sizemask; 314 } TCGHelperInfo; 315 316 #include "exec/helper-proto.h" 317 318 static const TCGHelperInfo all_helpers[] = { 319 #include "exec/helper-tcg.h" 320 }; 321 322 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 323 static void process_op_defs(TCGContext *s); 324 325 void tcg_context_init(TCGContext *s) 326 { 327 int op, total_args, n, i; 328 TCGOpDef *def; 329 TCGArgConstraint *args_ct; 330 int *sorted_args; 331 GHashTable *helper_table; 332 333 memset(s, 0, sizeof(*s)); 334 s->nb_globals = 0; 335 336 /* Count total number of arguments and allocate the corresponding 337 space */ 338 total_args = 0; 339 for(op = 0; op < NB_OPS; op++) { 340 def = &tcg_op_defs[op]; 341 n = def->nb_iargs + def->nb_oargs; 342 total_args += n; 343 } 344 345 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 346 sorted_args = g_malloc(sizeof(int) * total_args); 347 348 for(op = 0; op < NB_OPS; op++) { 349 def = &tcg_op_defs[op]; 350 def->args_ct = args_ct; 351 def->sorted_args = sorted_args; 352 n = def->nb_iargs + def->nb_oargs; 353 sorted_args += n; 354 args_ct += n; 355 } 356 357 /* Register helpers. */ 358 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 359 s->helpers = helper_table = g_hash_table_new(NULL, NULL); 360 361 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 362 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 363 (gpointer)&all_helpers[i]); 364 } 365 366 tcg_target_init(s); 367 process_op_defs(s); 368 369 /* Reverse the order of the saved registers, assuming they're all at 370 the start of tcg_target_reg_alloc_order. */ 371 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 372 int r = tcg_target_reg_alloc_order[n]; 373 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 374 break; 375 } 376 } 377 for (i = 0; i < n; ++i) { 378 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 379 } 380 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 381 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 382 } 383 } 384 385 /* 386 * Allocate TBs right before their corresponding translated code, making 387 * sure that TBs and code are on different cache lines. 388 */ 389 TranslationBlock *tcg_tb_alloc(TCGContext *s) 390 { 391 uintptr_t align = qemu_icache_linesize; 392 TranslationBlock *tb; 393 void *next; 394 395 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 396 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 397 398 if (unlikely(next > s->code_gen_highwater)) { 399 return NULL; 400 } 401 s->code_gen_ptr = next; 402 s->data_gen_ptr = NULL; 403 return tb; 404 } 405 406 void tcg_prologue_init(TCGContext *s) 407 { 408 size_t prologue_size, total_size; 409 void *buf0, *buf1; 410 411 /* Put the prologue at the beginning of code_gen_buffer. */ 412 buf0 = s->code_gen_buffer; 413 s->code_ptr = buf0; 414 s->code_buf = buf0; 415 s->code_gen_prologue = buf0; 416 417 /* Generate the prologue. */ 418 tcg_target_qemu_prologue(s); 419 buf1 = s->code_ptr; 420 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 421 422 /* Deduct the prologue from the buffer. */ 423 prologue_size = tcg_current_code_size(s); 424 s->code_gen_ptr = buf1; 425 s->code_gen_buffer = buf1; 426 s->code_buf = buf1; 427 total_size = s->code_gen_buffer_size - prologue_size; 428 s->code_gen_buffer_size = total_size; 429 430 /* Compute a high-water mark, at which we voluntarily flush the buffer 431 and start over. The size here is arbitrary, significantly larger 432 than we expect the code generation for any one opcode to require. */ 433 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); 434 435 tcg_register_jit(s->code_gen_buffer, total_size); 436 437 #ifdef DEBUG_DISAS 438 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 439 qemu_log_lock(); 440 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 441 log_disas(buf0, prologue_size); 442 qemu_log("\n"); 443 qemu_log_flush(); 444 qemu_log_unlock(); 445 } 446 #endif 447 448 /* Assert that goto_ptr is implemented completely. */ 449 if (TCG_TARGET_HAS_goto_ptr) { 450 tcg_debug_assert(s->code_gen_epilogue != NULL); 451 } 452 } 453 454 void tcg_func_start(TCGContext *s) 455 { 456 tcg_pool_reset(s); 457 s->nb_temps = s->nb_globals; 458 459 /* No temps have been previously allocated for size or locality. */ 460 memset(s->free_temps, 0, sizeof(s->free_temps)); 461 462 s->nb_labels = 0; 463 s->current_frame_offset = s->frame_start; 464 465 #ifdef CONFIG_DEBUG_TCG 466 s->goto_tb_issue_mask = 0; 467 #endif 468 469 s->gen_op_buf[0].next = 1; 470 s->gen_op_buf[0].prev = 0; 471 s->gen_next_op_idx = 1; 472 s->gen_next_parm_idx = 0; 473 } 474 475 static inline int temp_idx(TCGContext *s, TCGTemp *ts) 476 { 477 ptrdiff_t n = ts - s->temps; 478 tcg_debug_assert(n >= 0 && n < s->nb_temps); 479 return n; 480 } 481 482 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 483 { 484 int n = s->nb_temps++; 485 tcg_debug_assert(n < TCG_MAX_TEMPS); 486 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 487 } 488 489 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 490 { 491 tcg_debug_assert(s->nb_globals == s->nb_temps); 492 s->nb_globals++; 493 return tcg_temp_alloc(s); 494 } 495 496 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, 497 TCGReg reg, const char *name) 498 { 499 TCGTemp *ts; 500 501 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 502 tcg_abort(); 503 } 504 505 ts = tcg_global_alloc(s); 506 ts->base_type = type; 507 ts->type = type; 508 ts->fixed_reg = 1; 509 ts->reg = reg; 510 ts->name = name; 511 tcg_regset_set_reg(s->reserved_regs, reg); 512 513 return temp_idx(s, ts); 514 } 515 516 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 517 { 518 int idx; 519 s->frame_start = start; 520 s->frame_end = start + size; 521 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 522 s->frame_temp = &s->temps[idx]; 523 } 524 525 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) 526 { 527 TCGContext *s = &tcg_ctx; 528 int idx; 529 530 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 531 tcg_abort(); 532 } 533 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); 534 return MAKE_TCGV_I32(idx); 535 } 536 537 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) 538 { 539 TCGContext *s = &tcg_ctx; 540 int idx; 541 542 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 543 tcg_abort(); 544 } 545 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); 546 return MAKE_TCGV_I64(idx); 547 } 548 549 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 550 intptr_t offset, const char *name) 551 { 552 TCGContext *s = &tcg_ctx; 553 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; 554 TCGTemp *ts = tcg_global_alloc(s); 555 int indirect_reg = 0, bigendian = 0; 556 #ifdef HOST_WORDS_BIGENDIAN 557 bigendian = 1; 558 #endif 559 560 if (!base_ts->fixed_reg) { 561 /* We do not support double-indirect registers. */ 562 tcg_debug_assert(!base_ts->indirect_reg); 563 base_ts->indirect_base = 1; 564 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 565 ? 2 : 1); 566 indirect_reg = 1; 567 } 568 569 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 570 TCGTemp *ts2 = tcg_global_alloc(s); 571 char buf[64]; 572 573 ts->base_type = TCG_TYPE_I64; 574 ts->type = TCG_TYPE_I32; 575 ts->indirect_reg = indirect_reg; 576 ts->mem_allocated = 1; 577 ts->mem_base = base_ts; 578 ts->mem_offset = offset + bigendian * 4; 579 pstrcpy(buf, sizeof(buf), name); 580 pstrcat(buf, sizeof(buf), "_0"); 581 ts->name = strdup(buf); 582 583 tcg_debug_assert(ts2 == ts + 1); 584 ts2->base_type = TCG_TYPE_I64; 585 ts2->type = TCG_TYPE_I32; 586 ts2->indirect_reg = indirect_reg; 587 ts2->mem_allocated = 1; 588 ts2->mem_base = base_ts; 589 ts2->mem_offset = offset + (1 - bigendian) * 4; 590 pstrcpy(buf, sizeof(buf), name); 591 pstrcat(buf, sizeof(buf), "_1"); 592 ts2->name = strdup(buf); 593 } else { 594 ts->base_type = type; 595 ts->type = type; 596 ts->indirect_reg = indirect_reg; 597 ts->mem_allocated = 1; 598 ts->mem_base = base_ts; 599 ts->mem_offset = offset; 600 ts->name = name; 601 } 602 return temp_idx(s, ts); 603 } 604 605 static int tcg_temp_new_internal(TCGType type, int temp_local) 606 { 607 TCGContext *s = &tcg_ctx; 608 TCGTemp *ts; 609 int idx, k; 610 611 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 612 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 613 if (idx < TCG_MAX_TEMPS) { 614 /* There is already an available temp with the right type. */ 615 clear_bit(idx, s->free_temps[k].l); 616 617 ts = &s->temps[idx]; 618 ts->temp_allocated = 1; 619 tcg_debug_assert(ts->base_type == type); 620 tcg_debug_assert(ts->temp_local == temp_local); 621 } else { 622 ts = tcg_temp_alloc(s); 623 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 624 TCGTemp *ts2 = tcg_temp_alloc(s); 625 626 ts->base_type = type; 627 ts->type = TCG_TYPE_I32; 628 ts->temp_allocated = 1; 629 ts->temp_local = temp_local; 630 631 tcg_debug_assert(ts2 == ts + 1); 632 ts2->base_type = TCG_TYPE_I64; 633 ts2->type = TCG_TYPE_I32; 634 ts2->temp_allocated = 1; 635 ts2->temp_local = temp_local; 636 } else { 637 ts->base_type = type; 638 ts->type = type; 639 ts->temp_allocated = 1; 640 ts->temp_local = temp_local; 641 } 642 idx = temp_idx(s, ts); 643 } 644 645 #if defined(CONFIG_DEBUG_TCG) 646 s->temps_in_use++; 647 #endif 648 return idx; 649 } 650 651 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 652 { 653 int idx; 654 655 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 656 return MAKE_TCGV_I32(idx); 657 } 658 659 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 660 { 661 int idx; 662 663 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 664 return MAKE_TCGV_I64(idx); 665 } 666 667 static void tcg_temp_free_internal(int idx) 668 { 669 TCGContext *s = &tcg_ctx; 670 TCGTemp *ts; 671 int k; 672 673 #if defined(CONFIG_DEBUG_TCG) 674 s->temps_in_use--; 675 if (s->temps_in_use < 0) { 676 fprintf(stderr, "More temporaries freed than allocated!\n"); 677 } 678 #endif 679 680 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); 681 ts = &s->temps[idx]; 682 tcg_debug_assert(ts->temp_allocated != 0); 683 ts->temp_allocated = 0; 684 685 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 686 set_bit(idx, s->free_temps[k].l); 687 } 688 689 void tcg_temp_free_i32(TCGv_i32 arg) 690 { 691 tcg_temp_free_internal(GET_TCGV_I32(arg)); 692 } 693 694 void tcg_temp_free_i64(TCGv_i64 arg) 695 { 696 tcg_temp_free_internal(GET_TCGV_I64(arg)); 697 } 698 699 TCGv_i32 tcg_const_i32(int32_t val) 700 { 701 TCGv_i32 t0; 702 t0 = tcg_temp_new_i32(); 703 tcg_gen_movi_i32(t0, val); 704 return t0; 705 } 706 707 TCGv_i64 tcg_const_i64(int64_t val) 708 { 709 TCGv_i64 t0; 710 t0 = tcg_temp_new_i64(); 711 tcg_gen_movi_i64(t0, val); 712 return t0; 713 } 714 715 TCGv_i32 tcg_const_local_i32(int32_t val) 716 { 717 TCGv_i32 t0; 718 t0 = tcg_temp_local_new_i32(); 719 tcg_gen_movi_i32(t0, val); 720 return t0; 721 } 722 723 TCGv_i64 tcg_const_local_i64(int64_t val) 724 { 725 TCGv_i64 t0; 726 t0 = tcg_temp_local_new_i64(); 727 tcg_gen_movi_i64(t0, val); 728 return t0; 729 } 730 731 #if defined(CONFIG_DEBUG_TCG) 732 void tcg_clear_temp_count(void) 733 { 734 TCGContext *s = &tcg_ctx; 735 s->temps_in_use = 0; 736 } 737 738 int tcg_check_temp_count(void) 739 { 740 TCGContext *s = &tcg_ctx; 741 if (s->temps_in_use) { 742 /* Clear the count so that we don't give another 743 * warning immediately next time around. 744 */ 745 s->temps_in_use = 0; 746 return 1; 747 } 748 return 0; 749 } 750 #endif 751 752 /* Note: we convert the 64 bit args to 32 bit and do some alignment 753 and endian swap. Maybe it would be better to do the alignment 754 and endian swap in tcg_reg_alloc_call(). */ 755 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, 756 int nargs, TCGArg *args) 757 { 758 int i, real_args, nb_rets, pi, pi_first; 759 unsigned sizemask, flags; 760 TCGHelperInfo *info; 761 762 info = g_hash_table_lookup(s->helpers, (gpointer)func); 763 flags = info->flags; 764 sizemask = info->sizemask; 765 766 #if defined(__sparc__) && !defined(__arch64__) \ 767 && !defined(CONFIG_TCG_INTERPRETER) 768 /* We have 64-bit values in one register, but need to pass as two 769 separate parameters. Split them. */ 770 int orig_sizemask = sizemask; 771 int orig_nargs = nargs; 772 TCGv_i64 retl, reth; 773 774 TCGV_UNUSED_I64(retl); 775 TCGV_UNUSED_I64(reth); 776 if (sizemask != 0) { 777 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); 778 for (i = real_args = 0; i < nargs; ++i) { 779 int is_64bit = sizemask & (1 << (i+1)*2); 780 if (is_64bit) { 781 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 782 TCGv_i32 h = tcg_temp_new_i32(); 783 TCGv_i32 l = tcg_temp_new_i32(); 784 tcg_gen_extr_i64_i32(l, h, orig); 785 split_args[real_args++] = GET_TCGV_I32(h); 786 split_args[real_args++] = GET_TCGV_I32(l); 787 } else { 788 split_args[real_args++] = args[i]; 789 } 790 } 791 nargs = real_args; 792 args = split_args; 793 sizemask = 0; 794 } 795 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 796 for (i = 0; i < nargs; ++i) { 797 int is_64bit = sizemask & (1 << (i+1)*2); 798 int is_signed = sizemask & (2 << (i+1)*2); 799 if (!is_64bit) { 800 TCGv_i64 temp = tcg_temp_new_i64(); 801 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 802 if (is_signed) { 803 tcg_gen_ext32s_i64(temp, orig); 804 } else { 805 tcg_gen_ext32u_i64(temp, orig); 806 } 807 args[i] = GET_TCGV_I64(temp); 808 } 809 } 810 #endif /* TCG_TARGET_EXTEND_ARGS */ 811 812 pi_first = pi = s->gen_next_parm_idx; 813 if (ret != TCG_CALL_DUMMY_ARG) { 814 #if defined(__sparc__) && !defined(__arch64__) \ 815 && !defined(CONFIG_TCG_INTERPRETER) 816 if (orig_sizemask & 1) { 817 /* The 32-bit ABI is going to return the 64-bit value in 818 the %o0/%o1 register pair. Prepare for this by using 819 two return temporaries, and reassemble below. */ 820 retl = tcg_temp_new_i64(); 821 reth = tcg_temp_new_i64(); 822 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); 823 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); 824 nb_rets = 2; 825 } else { 826 s->gen_opparam_buf[pi++] = ret; 827 nb_rets = 1; 828 } 829 #else 830 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 831 #ifdef HOST_WORDS_BIGENDIAN 832 s->gen_opparam_buf[pi++] = ret + 1; 833 s->gen_opparam_buf[pi++] = ret; 834 #else 835 s->gen_opparam_buf[pi++] = ret; 836 s->gen_opparam_buf[pi++] = ret + 1; 837 #endif 838 nb_rets = 2; 839 } else { 840 s->gen_opparam_buf[pi++] = ret; 841 nb_rets = 1; 842 } 843 #endif 844 } else { 845 nb_rets = 0; 846 } 847 real_args = 0; 848 for (i = 0; i < nargs; i++) { 849 int is_64bit = sizemask & (1 << (i+1)*2); 850 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 851 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 852 /* some targets want aligned 64 bit args */ 853 if (real_args & 1) { 854 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; 855 real_args++; 856 } 857 #endif 858 /* If stack grows up, then we will be placing successive 859 arguments at lower addresses, which means we need to 860 reverse the order compared to how we would normally 861 treat either big or little-endian. For those arguments 862 that will wind up in registers, this still works for 863 HPPA (the only current STACK_GROWSUP target) since the 864 argument registers are *also* allocated in decreasing 865 order. If another such target is added, this logic may 866 have to get more complicated to differentiate between 867 stack arguments and register arguments. */ 868 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 869 s->gen_opparam_buf[pi++] = args[i] + 1; 870 s->gen_opparam_buf[pi++] = args[i]; 871 #else 872 s->gen_opparam_buf[pi++] = args[i]; 873 s->gen_opparam_buf[pi++] = args[i] + 1; 874 #endif 875 real_args += 2; 876 continue; 877 } 878 879 s->gen_opparam_buf[pi++] = args[i]; 880 real_args++; 881 } 882 s->gen_opparam_buf[pi++] = (uintptr_t)func; 883 s->gen_opparam_buf[pi++] = flags; 884 885 i = s->gen_next_op_idx; 886 tcg_debug_assert(i < OPC_BUF_SIZE); 887 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); 888 889 /* Set links for sequential allocation during translation. */ 890 s->gen_op_buf[i] = (TCGOp){ 891 .opc = INDEX_op_call, 892 .callo = nb_rets, 893 .calli = real_args, 894 .args = pi_first, 895 .prev = i - 1, 896 .next = i + 1 897 }; 898 899 /* Make sure the calli field didn't overflow. */ 900 tcg_debug_assert(s->gen_op_buf[i].calli == real_args); 901 902 s->gen_op_buf[0].prev = i; 903 s->gen_next_op_idx = i + 1; 904 s->gen_next_parm_idx = pi; 905 906 #if defined(__sparc__) && !defined(__arch64__) \ 907 && !defined(CONFIG_TCG_INTERPRETER) 908 /* Free all of the parts we allocated above. */ 909 for (i = real_args = 0; i < orig_nargs; ++i) { 910 int is_64bit = orig_sizemask & (1 << (i+1)*2); 911 if (is_64bit) { 912 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); 913 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); 914 tcg_temp_free_i32(h); 915 tcg_temp_free_i32(l); 916 } else { 917 real_args++; 918 } 919 } 920 if (orig_sizemask & 1) { 921 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 922 Note that describing these as TCGv_i64 eliminates an unnecessary 923 zero-extension that tcg_gen_concat_i32_i64 would create. */ 924 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); 925 tcg_temp_free_i64(retl); 926 tcg_temp_free_i64(reth); 927 } 928 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 929 for (i = 0; i < nargs; ++i) { 930 int is_64bit = sizemask & (1 << (i+1)*2); 931 if (!is_64bit) { 932 TCGv_i64 temp = MAKE_TCGV_I64(args[i]); 933 tcg_temp_free_i64(temp); 934 } 935 } 936 #endif /* TCG_TARGET_EXTEND_ARGS */ 937 } 938 939 static void tcg_reg_alloc_start(TCGContext *s) 940 { 941 int i; 942 TCGTemp *ts; 943 for(i = 0; i < s->nb_globals; i++) { 944 ts = &s->temps[i]; 945 if (ts->fixed_reg) { 946 ts->val_type = TEMP_VAL_REG; 947 } else { 948 ts->val_type = TEMP_VAL_MEM; 949 } 950 } 951 for(i = s->nb_globals; i < s->nb_temps; i++) { 952 ts = &s->temps[i]; 953 if (ts->temp_local) { 954 ts->val_type = TEMP_VAL_MEM; 955 } else { 956 ts->val_type = TEMP_VAL_DEAD; 957 } 958 ts->mem_allocated = 0; 959 ts->fixed_reg = 0; 960 } 961 962 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 963 } 964 965 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 966 TCGTemp *ts) 967 { 968 int idx = temp_idx(s, ts); 969 970 if (idx < s->nb_globals) { 971 pstrcpy(buf, buf_size, ts->name); 972 } else if (ts->temp_local) { 973 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 974 } else { 975 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 976 } 977 return buf; 978 } 979 980 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 981 int buf_size, int idx) 982 { 983 tcg_debug_assert(idx >= 0 && idx < s->nb_temps); 984 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); 985 } 986 987 /* Find helper name. */ 988 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 989 { 990 const char *ret = NULL; 991 if (s->helpers) { 992 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); 993 if (info) { 994 ret = info->name; 995 } 996 } 997 return ret; 998 } 999 1000 static const char * const cond_name[] = 1001 { 1002 [TCG_COND_NEVER] = "never", 1003 [TCG_COND_ALWAYS] = "always", 1004 [TCG_COND_EQ] = "eq", 1005 [TCG_COND_NE] = "ne", 1006 [TCG_COND_LT] = "lt", 1007 [TCG_COND_GE] = "ge", 1008 [TCG_COND_LE] = "le", 1009 [TCG_COND_GT] = "gt", 1010 [TCG_COND_LTU] = "ltu", 1011 [TCG_COND_GEU] = "geu", 1012 [TCG_COND_LEU] = "leu", 1013 [TCG_COND_GTU] = "gtu" 1014 }; 1015 1016 static const char * const ldst_name[] = 1017 { 1018 [MO_UB] = "ub", 1019 [MO_SB] = "sb", 1020 [MO_LEUW] = "leuw", 1021 [MO_LESW] = "lesw", 1022 [MO_LEUL] = "leul", 1023 [MO_LESL] = "lesl", 1024 [MO_LEQ] = "leq", 1025 [MO_BEUW] = "beuw", 1026 [MO_BESW] = "besw", 1027 [MO_BEUL] = "beul", 1028 [MO_BESL] = "besl", 1029 [MO_BEQ] = "beq", 1030 }; 1031 1032 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1033 #ifdef ALIGNED_ONLY 1034 [MO_UNALN >> MO_ASHIFT] = "un+", 1035 [MO_ALIGN >> MO_ASHIFT] = "", 1036 #else 1037 [MO_UNALN >> MO_ASHIFT] = "", 1038 [MO_ALIGN >> MO_ASHIFT] = "al+", 1039 #endif 1040 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1041 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1042 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1043 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1044 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1045 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1046 }; 1047 1048 void tcg_dump_ops(TCGContext *s) 1049 { 1050 char buf[128]; 1051 TCGOp *op; 1052 int oi; 1053 1054 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1055 int i, k, nb_oargs, nb_iargs, nb_cargs; 1056 const TCGOpDef *def; 1057 const TCGArg *args; 1058 TCGOpcode c; 1059 int col = 0; 1060 1061 op = &s->gen_op_buf[oi]; 1062 c = op->opc; 1063 def = &tcg_op_defs[c]; 1064 args = &s->gen_opparam_buf[op->args]; 1065 1066 if (c == INDEX_op_insn_start) { 1067 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1068 1069 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1070 target_ulong a; 1071 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1072 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 1073 #else 1074 a = args[i]; 1075 #endif 1076 col += qemu_log(" " TARGET_FMT_lx, a); 1077 } 1078 } else if (c == INDEX_op_call) { 1079 /* variable number of arguments */ 1080 nb_oargs = op->callo; 1081 nb_iargs = op->calli; 1082 nb_cargs = def->nb_cargs; 1083 1084 /* function name, flags, out args */ 1085 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1086 tcg_find_helper(s, args[nb_oargs + nb_iargs]), 1087 args[nb_oargs + nb_iargs + 1], nb_oargs); 1088 for (i = 0; i < nb_oargs; i++) { 1089 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1090 args[i])); 1091 } 1092 for (i = 0; i < nb_iargs; i++) { 1093 TCGArg arg = args[nb_oargs + i]; 1094 const char *t = "<dummy>"; 1095 if (arg != TCG_CALL_DUMMY_ARG) { 1096 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); 1097 } 1098 col += qemu_log(",%s", t); 1099 } 1100 } else { 1101 col += qemu_log(" %s ", def->name); 1102 1103 nb_oargs = def->nb_oargs; 1104 nb_iargs = def->nb_iargs; 1105 nb_cargs = def->nb_cargs; 1106 1107 k = 0; 1108 for (i = 0; i < nb_oargs; i++) { 1109 if (k != 0) { 1110 col += qemu_log(","); 1111 } 1112 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1113 args[k++])); 1114 } 1115 for (i = 0; i < nb_iargs; i++) { 1116 if (k != 0) { 1117 col += qemu_log(","); 1118 } 1119 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1120 args[k++])); 1121 } 1122 switch (c) { 1123 case INDEX_op_brcond_i32: 1124 case INDEX_op_setcond_i32: 1125 case INDEX_op_movcond_i32: 1126 case INDEX_op_brcond2_i32: 1127 case INDEX_op_setcond2_i32: 1128 case INDEX_op_brcond_i64: 1129 case INDEX_op_setcond_i64: 1130 case INDEX_op_movcond_i64: 1131 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { 1132 col += qemu_log(",%s", cond_name[args[k++]]); 1133 } else { 1134 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); 1135 } 1136 i = 1; 1137 break; 1138 case INDEX_op_qemu_ld_i32: 1139 case INDEX_op_qemu_st_i32: 1140 case INDEX_op_qemu_ld_i64: 1141 case INDEX_op_qemu_st_i64: 1142 { 1143 TCGMemOpIdx oi = args[k++]; 1144 TCGMemOp op = get_memop(oi); 1145 unsigned ix = get_mmuidx(oi); 1146 1147 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1148 col += qemu_log(",$0x%x,%u", op, ix); 1149 } else { 1150 const char *s_al, *s_op; 1151 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1152 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1153 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1154 } 1155 i = 1; 1156 } 1157 break; 1158 default: 1159 i = 0; 1160 break; 1161 } 1162 switch (c) { 1163 case INDEX_op_set_label: 1164 case INDEX_op_br: 1165 case INDEX_op_brcond_i32: 1166 case INDEX_op_brcond_i64: 1167 case INDEX_op_brcond2_i32: 1168 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); 1169 i++, k++; 1170 break; 1171 default: 1172 break; 1173 } 1174 for (; i < nb_cargs; i++, k++) { 1175 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); 1176 } 1177 } 1178 if (op->life) { 1179 unsigned life = op->life; 1180 1181 for (; col < 48; ++col) { 1182 putc(' ', qemu_logfile); 1183 } 1184 1185 if (life & (SYNC_ARG * 3)) { 1186 qemu_log(" sync:"); 1187 for (i = 0; i < 2; ++i) { 1188 if (life & (SYNC_ARG << i)) { 1189 qemu_log(" %d", i); 1190 } 1191 } 1192 } 1193 life /= DEAD_ARG; 1194 if (life) { 1195 qemu_log(" dead:"); 1196 for (i = 0; life; ++i, life >>= 1) { 1197 if (life & 1) { 1198 qemu_log(" %d", i); 1199 } 1200 } 1201 } 1202 } 1203 qemu_log("\n"); 1204 } 1205 } 1206 1207 /* we give more priority to constraints with less registers */ 1208 static int get_constraint_priority(const TCGOpDef *def, int k) 1209 { 1210 const TCGArgConstraint *arg_ct; 1211 1212 int i, n; 1213 arg_ct = &def->args_ct[k]; 1214 if (arg_ct->ct & TCG_CT_ALIAS) { 1215 /* an alias is equivalent to a single register */ 1216 n = 1; 1217 } else { 1218 if (!(arg_ct->ct & TCG_CT_REG)) 1219 return 0; 1220 n = 0; 1221 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1222 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1223 n++; 1224 } 1225 } 1226 return TCG_TARGET_NB_REGS - n + 1; 1227 } 1228 1229 /* sort from highest priority to lowest */ 1230 static void sort_constraints(TCGOpDef *def, int start, int n) 1231 { 1232 int i, j, p1, p2, tmp; 1233 1234 for(i = 0; i < n; i++) 1235 def->sorted_args[start + i] = start + i; 1236 if (n <= 1) 1237 return; 1238 for(i = 0; i < n - 1; i++) { 1239 for(j = i + 1; j < n; j++) { 1240 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1241 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1242 if (p1 < p2) { 1243 tmp = def->sorted_args[start + i]; 1244 def->sorted_args[start + i] = def->sorted_args[start + j]; 1245 def->sorted_args[start + j] = tmp; 1246 } 1247 } 1248 } 1249 } 1250 1251 static void process_op_defs(TCGContext *s) 1252 { 1253 TCGOpcode op; 1254 1255 for (op = 0; op < NB_OPS; op++) { 1256 TCGOpDef *def = &tcg_op_defs[op]; 1257 const TCGTargetOpDef *tdefs; 1258 TCGType type; 1259 int i, nb_args; 1260 1261 if (def->flags & TCG_OPF_NOT_PRESENT) { 1262 continue; 1263 } 1264 1265 nb_args = def->nb_iargs + def->nb_oargs; 1266 if (nb_args == 0) { 1267 continue; 1268 } 1269 1270 tdefs = tcg_target_op_def(op); 1271 /* Missing TCGTargetOpDef entry. */ 1272 tcg_debug_assert(tdefs != NULL); 1273 1274 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1275 for (i = 0; i < nb_args; i++) { 1276 const char *ct_str = tdefs->args_ct_str[i]; 1277 /* Incomplete TCGTargetOpDef entry. */ 1278 tcg_debug_assert(ct_str != NULL); 1279 1280 tcg_regset_clear(def->args_ct[i].u.regs); 1281 def->args_ct[i].ct = 0; 1282 while (*ct_str != '\0') { 1283 switch(*ct_str) { 1284 case '0' ... '9': 1285 { 1286 int oarg = *ct_str - '0'; 1287 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1288 tcg_debug_assert(oarg < def->nb_oargs); 1289 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1290 /* TCG_CT_ALIAS is for the output arguments. 1291 The input is tagged with TCG_CT_IALIAS. */ 1292 def->args_ct[i] = def->args_ct[oarg]; 1293 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1294 def->args_ct[oarg].alias_index = i; 1295 def->args_ct[i].ct |= TCG_CT_IALIAS; 1296 def->args_ct[i].alias_index = oarg; 1297 } 1298 ct_str++; 1299 break; 1300 case '&': 1301 def->args_ct[i].ct |= TCG_CT_NEWREG; 1302 ct_str++; 1303 break; 1304 case 'i': 1305 def->args_ct[i].ct |= TCG_CT_CONST; 1306 ct_str++; 1307 break; 1308 default: 1309 ct_str = target_parse_constraint(&def->args_ct[i], 1310 ct_str, type); 1311 /* Typo in TCGTargetOpDef constraint. */ 1312 tcg_debug_assert(ct_str != NULL); 1313 } 1314 } 1315 } 1316 1317 /* TCGTargetOpDef entry with too much information? */ 1318 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1319 1320 /* sort the constraints (XXX: this is just an heuristic) */ 1321 sort_constraints(def, 0, def->nb_oargs); 1322 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1323 } 1324 } 1325 1326 void tcg_op_remove(TCGContext *s, TCGOp *op) 1327 { 1328 int next = op->next; 1329 int prev = op->prev; 1330 1331 /* We should never attempt to remove the list terminator. */ 1332 tcg_debug_assert(op != &s->gen_op_buf[0]); 1333 1334 s->gen_op_buf[next].prev = prev; 1335 s->gen_op_buf[prev].next = next; 1336 1337 memset(op, 0, sizeof(*op)); 1338 1339 #ifdef CONFIG_PROFILER 1340 s->del_op_count++; 1341 #endif 1342 } 1343 1344 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1345 TCGOpcode opc, int nargs) 1346 { 1347 int oi = s->gen_next_op_idx; 1348 int pi = s->gen_next_parm_idx; 1349 int prev = old_op->prev; 1350 int next = old_op - s->gen_op_buf; 1351 TCGOp *new_op; 1352 1353 tcg_debug_assert(oi < OPC_BUF_SIZE); 1354 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1355 s->gen_next_op_idx = oi + 1; 1356 s->gen_next_parm_idx = pi + nargs; 1357 1358 new_op = &s->gen_op_buf[oi]; 1359 *new_op = (TCGOp){ 1360 .opc = opc, 1361 .args = pi, 1362 .prev = prev, 1363 .next = next 1364 }; 1365 s->gen_op_buf[prev].next = oi; 1366 old_op->prev = oi; 1367 1368 return new_op; 1369 } 1370 1371 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1372 TCGOpcode opc, int nargs) 1373 { 1374 int oi = s->gen_next_op_idx; 1375 int pi = s->gen_next_parm_idx; 1376 int prev = old_op - s->gen_op_buf; 1377 int next = old_op->next; 1378 TCGOp *new_op; 1379 1380 tcg_debug_assert(oi < OPC_BUF_SIZE); 1381 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1382 s->gen_next_op_idx = oi + 1; 1383 s->gen_next_parm_idx = pi + nargs; 1384 1385 new_op = &s->gen_op_buf[oi]; 1386 *new_op = (TCGOp){ 1387 .opc = opc, 1388 .args = pi, 1389 .prev = prev, 1390 .next = next 1391 }; 1392 s->gen_op_buf[next].prev = oi; 1393 old_op->next = oi; 1394 1395 return new_op; 1396 } 1397 1398 #define TS_DEAD 1 1399 #define TS_MEM 2 1400 1401 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1402 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1403 1404 /* liveness analysis: end of function: all temps are dead, and globals 1405 should be in memory. */ 1406 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) 1407 { 1408 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); 1409 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); 1410 } 1411 1412 /* liveness analysis: end of basic block: all temps are dead, globals 1413 and local temps should be in memory. */ 1414 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) 1415 { 1416 int i, n; 1417 1418 tcg_la_func_end(s, temp_state); 1419 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { 1420 if (s->temps[i].temp_local) { 1421 temp_state[i] |= TS_MEM; 1422 } 1423 } 1424 } 1425 1426 /* Liveness analysis : update the opc_arg_life array to tell if a 1427 given input arguments is dead. Instructions updating dead 1428 temporaries are removed. */ 1429 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) 1430 { 1431 int nb_globals = s->nb_globals; 1432 int oi, oi_prev; 1433 1434 tcg_la_func_end(s, temp_state); 1435 1436 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1437 int i, nb_iargs, nb_oargs; 1438 TCGOpcode opc_new, opc_new2; 1439 bool have_opc_new2; 1440 TCGLifeData arg_life = 0; 1441 TCGArg arg; 1442 1443 TCGOp * const op = &s->gen_op_buf[oi]; 1444 TCGArg * const args = &s->gen_opparam_buf[op->args]; 1445 TCGOpcode opc = op->opc; 1446 const TCGOpDef *def = &tcg_op_defs[opc]; 1447 1448 oi_prev = op->prev; 1449 1450 switch (opc) { 1451 case INDEX_op_call: 1452 { 1453 int call_flags; 1454 1455 nb_oargs = op->callo; 1456 nb_iargs = op->calli; 1457 call_flags = args[nb_oargs + nb_iargs + 1]; 1458 1459 /* pure functions can be removed if their result is unused */ 1460 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 1461 for (i = 0; i < nb_oargs; i++) { 1462 arg = args[i]; 1463 if (temp_state[arg] != TS_DEAD) { 1464 goto do_not_remove_call; 1465 } 1466 } 1467 goto do_remove; 1468 } else { 1469 do_not_remove_call: 1470 1471 /* output args are dead */ 1472 for (i = 0; i < nb_oargs; i++) { 1473 arg = args[i]; 1474 if (temp_state[arg] & TS_DEAD) { 1475 arg_life |= DEAD_ARG << i; 1476 } 1477 if (temp_state[arg] & TS_MEM) { 1478 arg_life |= SYNC_ARG << i; 1479 } 1480 temp_state[arg] = TS_DEAD; 1481 } 1482 1483 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 1484 TCG_CALL_NO_READ_GLOBALS))) { 1485 /* globals should go back to memory */ 1486 memset(temp_state, TS_DEAD | TS_MEM, nb_globals); 1487 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 1488 /* globals should be synced to memory */ 1489 for (i = 0; i < nb_globals; i++) { 1490 temp_state[i] |= TS_MEM; 1491 } 1492 } 1493 1494 /* record arguments that die in this helper */ 1495 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1496 arg = args[i]; 1497 if (arg != TCG_CALL_DUMMY_ARG) { 1498 if (temp_state[arg] & TS_DEAD) { 1499 arg_life |= DEAD_ARG << i; 1500 } 1501 } 1502 } 1503 /* input arguments are live for preceding opcodes */ 1504 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1505 arg = args[i]; 1506 if (arg != TCG_CALL_DUMMY_ARG) { 1507 temp_state[arg] &= ~TS_DEAD; 1508 } 1509 } 1510 } 1511 } 1512 break; 1513 case INDEX_op_insn_start: 1514 break; 1515 case INDEX_op_discard: 1516 /* mark the temporary as dead */ 1517 temp_state[args[0]] = TS_DEAD; 1518 break; 1519 1520 case INDEX_op_add2_i32: 1521 opc_new = INDEX_op_add_i32; 1522 goto do_addsub2; 1523 case INDEX_op_sub2_i32: 1524 opc_new = INDEX_op_sub_i32; 1525 goto do_addsub2; 1526 case INDEX_op_add2_i64: 1527 opc_new = INDEX_op_add_i64; 1528 goto do_addsub2; 1529 case INDEX_op_sub2_i64: 1530 opc_new = INDEX_op_sub_i64; 1531 do_addsub2: 1532 nb_iargs = 4; 1533 nb_oargs = 2; 1534 /* Test if the high part of the operation is dead, but not 1535 the low part. The result can be optimized to a simple 1536 add or sub. This happens often for x86_64 guest when the 1537 cpu mode is set to 32 bit. */ 1538 if (temp_state[args[1]] == TS_DEAD) { 1539 if (temp_state[args[0]] == TS_DEAD) { 1540 goto do_remove; 1541 } 1542 /* Replace the opcode and adjust the args in place, 1543 leaving 3 unused args at the end. */ 1544 op->opc = opc = opc_new; 1545 args[1] = args[2]; 1546 args[2] = args[4]; 1547 /* Fall through and mark the single-word operation live. */ 1548 nb_iargs = 2; 1549 nb_oargs = 1; 1550 } 1551 goto do_not_remove; 1552 1553 case INDEX_op_mulu2_i32: 1554 opc_new = INDEX_op_mul_i32; 1555 opc_new2 = INDEX_op_muluh_i32; 1556 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 1557 goto do_mul2; 1558 case INDEX_op_muls2_i32: 1559 opc_new = INDEX_op_mul_i32; 1560 opc_new2 = INDEX_op_mulsh_i32; 1561 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 1562 goto do_mul2; 1563 case INDEX_op_mulu2_i64: 1564 opc_new = INDEX_op_mul_i64; 1565 opc_new2 = INDEX_op_muluh_i64; 1566 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 1567 goto do_mul2; 1568 case INDEX_op_muls2_i64: 1569 opc_new = INDEX_op_mul_i64; 1570 opc_new2 = INDEX_op_mulsh_i64; 1571 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 1572 goto do_mul2; 1573 do_mul2: 1574 nb_iargs = 2; 1575 nb_oargs = 2; 1576 if (temp_state[args[1]] == TS_DEAD) { 1577 if (temp_state[args[0]] == TS_DEAD) { 1578 /* Both parts of the operation are dead. */ 1579 goto do_remove; 1580 } 1581 /* The high part of the operation is dead; generate the low. */ 1582 op->opc = opc = opc_new; 1583 args[1] = args[2]; 1584 args[2] = args[3]; 1585 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { 1586 /* The low part of the operation is dead; generate the high. */ 1587 op->opc = opc = opc_new2; 1588 args[0] = args[1]; 1589 args[1] = args[2]; 1590 args[2] = args[3]; 1591 } else { 1592 goto do_not_remove; 1593 } 1594 /* Mark the single-word operation live. */ 1595 nb_oargs = 1; 1596 goto do_not_remove; 1597 1598 default: 1599 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 1600 nb_iargs = def->nb_iargs; 1601 nb_oargs = def->nb_oargs; 1602 1603 /* Test if the operation can be removed because all 1604 its outputs are dead. We assume that nb_oargs == 0 1605 implies side effects */ 1606 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 1607 for (i = 0; i < nb_oargs; i++) { 1608 if (temp_state[args[i]] != TS_DEAD) { 1609 goto do_not_remove; 1610 } 1611 } 1612 do_remove: 1613 tcg_op_remove(s, op); 1614 } else { 1615 do_not_remove: 1616 /* output args are dead */ 1617 for (i = 0; i < nb_oargs; i++) { 1618 arg = args[i]; 1619 if (temp_state[arg] & TS_DEAD) { 1620 arg_life |= DEAD_ARG << i; 1621 } 1622 if (temp_state[arg] & TS_MEM) { 1623 arg_life |= SYNC_ARG << i; 1624 } 1625 temp_state[arg] = TS_DEAD; 1626 } 1627 1628 /* if end of basic block, update */ 1629 if (def->flags & TCG_OPF_BB_END) { 1630 tcg_la_bb_end(s, temp_state); 1631 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1632 /* globals should be synced to memory */ 1633 for (i = 0; i < nb_globals; i++) { 1634 temp_state[i] |= TS_MEM; 1635 } 1636 } 1637 1638 /* record arguments that die in this opcode */ 1639 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1640 arg = args[i]; 1641 if (temp_state[arg] & TS_DEAD) { 1642 arg_life |= DEAD_ARG << i; 1643 } 1644 } 1645 /* input arguments are live for preceding opcodes */ 1646 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1647 temp_state[args[i]] &= ~TS_DEAD; 1648 } 1649 } 1650 break; 1651 } 1652 op->life = arg_life; 1653 } 1654 } 1655 1656 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 1657 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) 1658 { 1659 int nb_globals = s->nb_globals; 1660 int16_t *dir_temps; 1661 int i, oi, oi_next; 1662 bool changes = false; 1663 1664 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); 1665 memset(dir_temps, 0, nb_globals * sizeof(int16_t)); 1666 1667 /* Create a temporary for each indirect global. */ 1668 for (i = 0; i < nb_globals; ++i) { 1669 TCGTemp *its = &s->temps[i]; 1670 if (its->indirect_reg) { 1671 TCGTemp *dts = tcg_temp_alloc(s); 1672 dts->type = its->type; 1673 dts->base_type = its->base_type; 1674 dir_temps[i] = temp_idx(s, dts); 1675 } 1676 } 1677 1678 memset(temp_state, TS_DEAD, nb_globals); 1679 1680 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 1681 TCGOp *op = &s->gen_op_buf[oi]; 1682 TCGArg *args = &s->gen_opparam_buf[op->args]; 1683 TCGOpcode opc = op->opc; 1684 const TCGOpDef *def = &tcg_op_defs[opc]; 1685 TCGLifeData arg_life = op->life; 1686 int nb_iargs, nb_oargs, call_flags; 1687 TCGArg arg, dir; 1688 1689 oi_next = op->next; 1690 1691 if (opc == INDEX_op_call) { 1692 nb_oargs = op->callo; 1693 nb_iargs = op->calli; 1694 call_flags = args[nb_oargs + nb_iargs + 1]; 1695 } else { 1696 nb_iargs = def->nb_iargs; 1697 nb_oargs = def->nb_oargs; 1698 1699 /* Set flags similar to how calls require. */ 1700 if (def->flags & TCG_OPF_BB_END) { 1701 /* Like writing globals: save_globals */ 1702 call_flags = 0; 1703 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1704 /* Like reading globals: sync_globals */ 1705 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 1706 } else { 1707 /* No effect on globals. */ 1708 call_flags = (TCG_CALL_NO_READ_GLOBALS | 1709 TCG_CALL_NO_WRITE_GLOBALS); 1710 } 1711 } 1712 1713 /* Make sure that input arguments are available. */ 1714 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1715 arg = args[i]; 1716 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ 1717 if (arg < nb_globals) { 1718 dir = dir_temps[arg]; 1719 if (dir != 0 && temp_state[arg] == TS_DEAD) { 1720 TCGTemp *its = &s->temps[arg]; 1721 TCGOpcode lopc = (its->type == TCG_TYPE_I32 1722 ? INDEX_op_ld_i32 1723 : INDEX_op_ld_i64); 1724 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 1725 TCGArg *largs = &s->gen_opparam_buf[lop->args]; 1726 1727 largs[0] = dir; 1728 largs[1] = temp_idx(s, its->mem_base); 1729 largs[2] = its->mem_offset; 1730 1731 /* Loaded, but synced with memory. */ 1732 temp_state[arg] = TS_MEM; 1733 } 1734 } 1735 } 1736 1737 /* Perform input replacement, and mark inputs that became dead. 1738 No action is required except keeping temp_state up to date 1739 so that we reload when needed. */ 1740 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1741 arg = args[i]; 1742 if (arg < nb_globals) { 1743 dir = dir_temps[arg]; 1744 if (dir != 0) { 1745 args[i] = dir; 1746 changes = true; 1747 if (IS_DEAD_ARG(i)) { 1748 temp_state[arg] = TS_DEAD; 1749 } 1750 } 1751 } 1752 } 1753 1754 /* Liveness analysis should ensure that the following are 1755 all correct, for call sites and basic block end points. */ 1756 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 1757 /* Nothing to do */ 1758 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 1759 for (i = 0; i < nb_globals; ++i) { 1760 /* Liveness should see that globals are synced back, 1761 that is, either TS_DEAD or TS_MEM. */ 1762 tcg_debug_assert(dir_temps[i] == 0 1763 || temp_state[i] != 0); 1764 } 1765 } else { 1766 for (i = 0; i < nb_globals; ++i) { 1767 /* Liveness should see that globals are saved back, 1768 that is, TS_DEAD, waiting to be reloaded. */ 1769 tcg_debug_assert(dir_temps[i] == 0 1770 || temp_state[i] == TS_DEAD); 1771 } 1772 } 1773 1774 /* Outputs become available. */ 1775 for (i = 0; i < nb_oargs; i++) { 1776 arg = args[i]; 1777 if (arg >= nb_globals) { 1778 continue; 1779 } 1780 dir = dir_temps[arg]; 1781 if (dir == 0) { 1782 continue; 1783 } 1784 args[i] = dir; 1785 changes = true; 1786 1787 /* The output is now live and modified. */ 1788 temp_state[arg] = 0; 1789 1790 /* Sync outputs upon their last write. */ 1791 if (NEED_SYNC_ARG(i)) { 1792 TCGTemp *its = &s->temps[arg]; 1793 TCGOpcode sopc = (its->type == TCG_TYPE_I32 1794 ? INDEX_op_st_i32 1795 : INDEX_op_st_i64); 1796 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 1797 TCGArg *sargs = &s->gen_opparam_buf[sop->args]; 1798 1799 sargs[0] = dir; 1800 sargs[1] = temp_idx(s, its->mem_base); 1801 sargs[2] = its->mem_offset; 1802 1803 temp_state[arg] = TS_MEM; 1804 } 1805 /* Drop outputs that are dead. */ 1806 if (IS_DEAD_ARG(i)) { 1807 temp_state[arg] = TS_DEAD; 1808 } 1809 } 1810 } 1811 1812 return changes; 1813 } 1814 1815 #ifdef CONFIG_DEBUG_TCG 1816 static void dump_regs(TCGContext *s) 1817 { 1818 TCGTemp *ts; 1819 int i; 1820 char buf[64]; 1821 1822 for(i = 0; i < s->nb_temps; i++) { 1823 ts = &s->temps[i]; 1824 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); 1825 switch(ts->val_type) { 1826 case TEMP_VAL_REG: 1827 printf("%s", tcg_target_reg_names[ts->reg]); 1828 break; 1829 case TEMP_VAL_MEM: 1830 printf("%d(%s)", (int)ts->mem_offset, 1831 tcg_target_reg_names[ts->mem_base->reg]); 1832 break; 1833 case TEMP_VAL_CONST: 1834 printf("$0x%" TCG_PRIlx, ts->val); 1835 break; 1836 case TEMP_VAL_DEAD: 1837 printf("D"); 1838 break; 1839 default: 1840 printf("???"); 1841 break; 1842 } 1843 printf("\n"); 1844 } 1845 1846 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1847 if (s->reg_to_temp[i] != NULL) { 1848 printf("%s: %s\n", 1849 tcg_target_reg_names[i], 1850 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 1851 } 1852 } 1853 } 1854 1855 static void check_regs(TCGContext *s) 1856 { 1857 int reg; 1858 int k; 1859 TCGTemp *ts; 1860 char buf[64]; 1861 1862 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 1863 ts = s->reg_to_temp[reg]; 1864 if (ts != NULL) { 1865 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 1866 printf("Inconsistency for register %s:\n", 1867 tcg_target_reg_names[reg]); 1868 goto fail; 1869 } 1870 } 1871 } 1872 for (k = 0; k < s->nb_temps; k++) { 1873 ts = &s->temps[k]; 1874 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 1875 && s->reg_to_temp[ts->reg] != ts) { 1876 printf("Inconsistency for temp %s:\n", 1877 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 1878 fail: 1879 printf("reg state:\n"); 1880 dump_regs(s); 1881 tcg_abort(); 1882 } 1883 } 1884 } 1885 #endif 1886 1887 static void temp_allocate_frame(TCGContext *s, int temp) 1888 { 1889 TCGTemp *ts; 1890 ts = &s->temps[temp]; 1891 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 1892 /* Sparc64 stack is accessed with offset of 2047 */ 1893 s->current_frame_offset = (s->current_frame_offset + 1894 (tcg_target_long)sizeof(tcg_target_long) - 1) & 1895 ~(sizeof(tcg_target_long) - 1); 1896 #endif 1897 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 1898 s->frame_end) { 1899 tcg_abort(); 1900 } 1901 ts->mem_offset = s->current_frame_offset; 1902 ts->mem_base = s->frame_temp; 1903 ts->mem_allocated = 1; 1904 s->current_frame_offset += sizeof(tcg_target_long); 1905 } 1906 1907 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 1908 1909 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 1910 mark it free; otherwise mark it dead. */ 1911 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 1912 { 1913 if (ts->fixed_reg) { 1914 return; 1915 } 1916 if (ts->val_type == TEMP_VAL_REG) { 1917 s->reg_to_temp[ts->reg] = NULL; 1918 } 1919 ts->val_type = (free_or_dead < 0 1920 || ts->temp_local 1921 || temp_idx(s, ts) < s->nb_globals 1922 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1923 } 1924 1925 /* Mark a temporary as dead. */ 1926 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 1927 { 1928 temp_free_or_dead(s, ts, 1); 1929 } 1930 1931 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 1932 registers needs to be allocated to store a constant. If 'free_or_dead' 1933 is non-zero, subsequently release the temporary; if it is positive, the 1934 temp is dead; if it is negative, the temp is free. */ 1935 static void temp_sync(TCGContext *s, TCGTemp *ts, 1936 TCGRegSet allocated_regs, int free_or_dead) 1937 { 1938 if (ts->fixed_reg) { 1939 return; 1940 } 1941 if (!ts->mem_coherent) { 1942 if (!ts->mem_allocated) { 1943 temp_allocate_frame(s, temp_idx(s, ts)); 1944 } 1945 switch (ts->val_type) { 1946 case TEMP_VAL_CONST: 1947 /* If we're going to free the temp immediately, then we won't 1948 require it later in a register, so attempt to store the 1949 constant to memory directly. */ 1950 if (free_or_dead 1951 && tcg_out_sti(s, ts->type, ts->val, 1952 ts->mem_base->reg, ts->mem_offset)) { 1953 break; 1954 } 1955 temp_load(s, ts, tcg_target_available_regs[ts->type], 1956 allocated_regs); 1957 /* fallthrough */ 1958 1959 case TEMP_VAL_REG: 1960 tcg_out_st(s, ts->type, ts->reg, 1961 ts->mem_base->reg, ts->mem_offset); 1962 break; 1963 1964 case TEMP_VAL_MEM: 1965 break; 1966 1967 case TEMP_VAL_DEAD: 1968 default: 1969 tcg_abort(); 1970 } 1971 ts->mem_coherent = 1; 1972 } 1973 if (free_or_dead) { 1974 temp_free_or_dead(s, ts, free_or_dead); 1975 } 1976 } 1977 1978 /* free register 'reg' by spilling the corresponding temporary if necessary */ 1979 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 1980 { 1981 TCGTemp *ts = s->reg_to_temp[reg]; 1982 if (ts != NULL) { 1983 temp_sync(s, ts, allocated_regs, -1); 1984 } 1985 } 1986 1987 /* Allocate a register belonging to reg1 & ~reg2 */ 1988 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 1989 TCGRegSet allocated_regs, bool rev) 1990 { 1991 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 1992 const int *order; 1993 TCGReg reg; 1994 TCGRegSet reg_ct; 1995 1996 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); 1997 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 1998 1999 /* first try free registers */ 2000 for(i = 0; i < n; i++) { 2001 reg = order[i]; 2002 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2003 return reg; 2004 } 2005 2006 /* XXX: do better spill choice */ 2007 for(i = 0; i < n; i++) { 2008 reg = order[i]; 2009 if (tcg_regset_test_reg(reg_ct, reg)) { 2010 tcg_reg_free(s, reg, allocated_regs); 2011 return reg; 2012 } 2013 } 2014 2015 tcg_abort(); 2016 } 2017 2018 /* Make sure the temporary is in a register. If needed, allocate the register 2019 from DESIRED while avoiding ALLOCATED. */ 2020 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2021 TCGRegSet allocated_regs) 2022 { 2023 TCGReg reg; 2024 2025 switch (ts->val_type) { 2026 case TEMP_VAL_REG: 2027 return; 2028 case TEMP_VAL_CONST: 2029 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2030 tcg_out_movi(s, ts->type, reg, ts->val); 2031 ts->mem_coherent = 0; 2032 break; 2033 case TEMP_VAL_MEM: 2034 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2035 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2036 ts->mem_coherent = 1; 2037 break; 2038 case TEMP_VAL_DEAD: 2039 default: 2040 tcg_abort(); 2041 } 2042 ts->reg = reg; 2043 ts->val_type = TEMP_VAL_REG; 2044 s->reg_to_temp[reg] = ts; 2045 } 2046 2047 /* Save a temporary to memory. 'allocated_regs' is used in case a 2048 temporary registers needs to be allocated to store a constant. */ 2049 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2050 { 2051 /* The liveness analysis already ensures that globals are back 2052 in memory. Keep an tcg_debug_assert for safety. */ 2053 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2054 } 2055 2056 /* save globals to their canonical location and assume they can be 2057 modified be the following code. 'allocated_regs' is used in case a 2058 temporary registers needs to be allocated to store a constant. */ 2059 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2060 { 2061 int i; 2062 2063 for (i = 0; i < s->nb_globals; i++) { 2064 temp_save(s, &s->temps[i], allocated_regs); 2065 } 2066 } 2067 2068 /* sync globals to their canonical location and assume they can be 2069 read by the following code. 'allocated_regs' is used in case a 2070 temporary registers needs to be allocated to store a constant. */ 2071 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2072 { 2073 int i; 2074 2075 for (i = 0; i < s->nb_globals; i++) { 2076 TCGTemp *ts = &s->temps[i]; 2077 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2078 || ts->fixed_reg 2079 || ts->mem_coherent); 2080 } 2081 } 2082 2083 /* at the end of a basic block, we assume all temporaries are dead and 2084 all globals are stored at their canonical location. */ 2085 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2086 { 2087 int i; 2088 2089 for (i = s->nb_globals; i < s->nb_temps; i++) { 2090 TCGTemp *ts = &s->temps[i]; 2091 if (ts->temp_local) { 2092 temp_save(s, ts, allocated_regs); 2093 } else { 2094 /* The liveness analysis already ensures that temps are dead. 2095 Keep an tcg_debug_assert for safety. */ 2096 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2097 } 2098 } 2099 2100 save_globals(s, allocated_regs); 2101 } 2102 2103 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2104 tcg_target_ulong val, TCGLifeData arg_life) 2105 { 2106 if (ots->fixed_reg) { 2107 /* For fixed registers, we do not do any constant propagation. */ 2108 tcg_out_movi(s, ots->type, ots->reg, val); 2109 return; 2110 } 2111 2112 /* The movi is not explicitly generated here. */ 2113 if (ots->val_type == TEMP_VAL_REG) { 2114 s->reg_to_temp[ots->reg] = NULL; 2115 } 2116 ots->val_type = TEMP_VAL_CONST; 2117 ots->val = val; 2118 ots->mem_coherent = 0; 2119 if (NEED_SYNC_ARG(0)) { 2120 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2121 } else if (IS_DEAD_ARG(0)) { 2122 temp_dead(s, ots); 2123 } 2124 } 2125 2126 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, 2127 TCGLifeData arg_life) 2128 { 2129 TCGTemp *ots = &s->temps[args[0]]; 2130 tcg_target_ulong val = args[1]; 2131 2132 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2133 } 2134 2135 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, 2136 const TCGArg *args, TCGLifeData arg_life) 2137 { 2138 TCGRegSet allocated_regs; 2139 TCGTemp *ts, *ots; 2140 TCGType otype, itype; 2141 2142 tcg_regset_set(allocated_regs, s->reserved_regs); 2143 ots = &s->temps[args[0]]; 2144 ts = &s->temps[args[1]]; 2145 2146 /* Note that otype != itype for no-op truncation. */ 2147 otype = ots->type; 2148 itype = ts->type; 2149 2150 if (ts->val_type == TEMP_VAL_CONST) { 2151 /* propagate constant or generate sti */ 2152 tcg_target_ulong val = ts->val; 2153 if (IS_DEAD_ARG(1)) { 2154 temp_dead(s, ts); 2155 } 2156 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2157 return; 2158 } 2159 2160 /* If the source value is in memory we're going to be forced 2161 to have it in a register in order to perform the copy. Copy 2162 the SOURCE value into its own register first, that way we 2163 don't have to reload SOURCE the next time it is used. */ 2164 if (ts->val_type == TEMP_VAL_MEM) { 2165 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2166 } 2167 2168 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2169 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2170 /* mov to a non-saved dead register makes no sense (even with 2171 liveness analysis disabled). */ 2172 tcg_debug_assert(NEED_SYNC_ARG(0)); 2173 if (!ots->mem_allocated) { 2174 temp_allocate_frame(s, args[0]); 2175 } 2176 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2177 if (IS_DEAD_ARG(1)) { 2178 temp_dead(s, ts); 2179 } 2180 temp_dead(s, ots); 2181 } else { 2182 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2183 /* the mov can be suppressed */ 2184 if (ots->val_type == TEMP_VAL_REG) { 2185 s->reg_to_temp[ots->reg] = NULL; 2186 } 2187 ots->reg = ts->reg; 2188 temp_dead(s, ts); 2189 } else { 2190 if (ots->val_type != TEMP_VAL_REG) { 2191 /* When allocating a new register, make sure to not spill the 2192 input one. */ 2193 tcg_regset_set_reg(allocated_regs, ts->reg); 2194 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2195 allocated_regs, ots->indirect_base); 2196 } 2197 tcg_out_mov(s, otype, ots->reg, ts->reg); 2198 } 2199 ots->val_type = TEMP_VAL_REG; 2200 ots->mem_coherent = 0; 2201 s->reg_to_temp[ots->reg] = ots; 2202 if (NEED_SYNC_ARG(0)) { 2203 temp_sync(s, ots, allocated_regs, 0); 2204 } 2205 } 2206 } 2207 2208 static void tcg_reg_alloc_op(TCGContext *s, 2209 const TCGOpDef *def, TCGOpcode opc, 2210 const TCGArg *args, TCGLifeData arg_life) 2211 { 2212 TCGRegSet i_allocated_regs; 2213 TCGRegSet o_allocated_regs; 2214 int i, k, nb_iargs, nb_oargs; 2215 TCGReg reg; 2216 TCGArg arg; 2217 const TCGArgConstraint *arg_ct; 2218 TCGTemp *ts; 2219 TCGArg new_args[TCG_MAX_OP_ARGS]; 2220 int const_args[TCG_MAX_OP_ARGS]; 2221 2222 nb_oargs = def->nb_oargs; 2223 nb_iargs = def->nb_iargs; 2224 2225 /* copy constants */ 2226 memcpy(new_args + nb_oargs + nb_iargs, 2227 args + nb_oargs + nb_iargs, 2228 sizeof(TCGArg) * def->nb_cargs); 2229 2230 tcg_regset_set(i_allocated_regs, s->reserved_regs); 2231 tcg_regset_set(o_allocated_regs, s->reserved_regs); 2232 2233 /* satisfy input constraints */ 2234 for(k = 0; k < nb_iargs; k++) { 2235 i = def->sorted_args[nb_oargs + k]; 2236 arg = args[i]; 2237 arg_ct = &def->args_ct[i]; 2238 ts = &s->temps[arg]; 2239 2240 if (ts->val_type == TEMP_VAL_CONST 2241 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2242 /* constant is OK for instruction */ 2243 const_args[i] = 1; 2244 new_args[i] = ts->val; 2245 goto iarg_end; 2246 } 2247 2248 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2249 2250 if (arg_ct->ct & TCG_CT_IALIAS) { 2251 if (ts->fixed_reg) { 2252 /* if fixed register, we must allocate a new register 2253 if the alias is not the same register */ 2254 if (arg != args[arg_ct->alias_index]) 2255 goto allocate_in_reg; 2256 } else { 2257 /* if the input is aliased to an output and if it is 2258 not dead after the instruction, we must allocate 2259 a new register and move it */ 2260 if (!IS_DEAD_ARG(i)) { 2261 goto allocate_in_reg; 2262 } 2263 /* check if the current register has already been allocated 2264 for another input aliased to an output */ 2265 int k2, i2; 2266 for (k2 = 0 ; k2 < k ; k2++) { 2267 i2 = def->sorted_args[nb_oargs + k2]; 2268 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2269 (new_args[i2] == ts->reg)) { 2270 goto allocate_in_reg; 2271 } 2272 } 2273 } 2274 } 2275 reg = ts->reg; 2276 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2277 /* nothing to do : the constraint is satisfied */ 2278 } else { 2279 allocate_in_reg: 2280 /* allocate a new register matching the constraint 2281 and move the temporary register into it */ 2282 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2283 ts->indirect_base); 2284 tcg_out_mov(s, ts->type, reg, ts->reg); 2285 } 2286 new_args[i] = reg; 2287 const_args[i] = 0; 2288 tcg_regset_set_reg(i_allocated_regs, reg); 2289 iarg_end: ; 2290 } 2291 2292 /* mark dead temporaries and free the associated registers */ 2293 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2294 if (IS_DEAD_ARG(i)) { 2295 temp_dead(s, &s->temps[args[i]]); 2296 } 2297 } 2298 2299 if (def->flags & TCG_OPF_BB_END) { 2300 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2301 } else { 2302 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2303 /* XXX: permit generic clobber register list ? */ 2304 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2305 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2306 tcg_reg_free(s, i, i_allocated_regs); 2307 } 2308 } 2309 } 2310 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2311 /* sync globals if the op has side effects and might trigger 2312 an exception. */ 2313 sync_globals(s, i_allocated_regs); 2314 } 2315 2316 /* satisfy the output constraints */ 2317 for(k = 0; k < nb_oargs; k++) { 2318 i = def->sorted_args[k]; 2319 arg = args[i]; 2320 arg_ct = &def->args_ct[i]; 2321 ts = &s->temps[arg]; 2322 if ((arg_ct->ct & TCG_CT_ALIAS) 2323 && !const_args[arg_ct->alias_index]) { 2324 reg = new_args[arg_ct->alias_index]; 2325 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2326 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2327 i_allocated_regs | o_allocated_regs, 2328 ts->indirect_base); 2329 } else { 2330 /* if fixed register, we try to use it */ 2331 reg = ts->reg; 2332 if (ts->fixed_reg && 2333 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2334 goto oarg_end; 2335 } 2336 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2337 ts->indirect_base); 2338 } 2339 tcg_regset_set_reg(o_allocated_regs, reg); 2340 /* if a fixed register is used, then a move will be done afterwards */ 2341 if (!ts->fixed_reg) { 2342 if (ts->val_type == TEMP_VAL_REG) { 2343 s->reg_to_temp[ts->reg] = NULL; 2344 } 2345 ts->val_type = TEMP_VAL_REG; 2346 ts->reg = reg; 2347 /* temp value is modified, so the value kept in memory is 2348 potentially not the same */ 2349 ts->mem_coherent = 0; 2350 s->reg_to_temp[reg] = ts; 2351 } 2352 oarg_end: 2353 new_args[i] = reg; 2354 } 2355 } 2356 2357 /* emit instruction */ 2358 tcg_out_op(s, opc, new_args, const_args); 2359 2360 /* move the outputs in the correct register if needed */ 2361 for(i = 0; i < nb_oargs; i++) { 2362 ts = &s->temps[args[i]]; 2363 reg = new_args[i]; 2364 if (ts->fixed_reg && ts->reg != reg) { 2365 tcg_out_mov(s, ts->type, ts->reg, reg); 2366 } 2367 if (NEED_SYNC_ARG(i)) { 2368 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2369 } else if (IS_DEAD_ARG(i)) { 2370 temp_dead(s, ts); 2371 } 2372 } 2373 } 2374 2375 #ifdef TCG_TARGET_STACK_GROWSUP 2376 #define STACK_DIR(x) (-(x)) 2377 #else 2378 #define STACK_DIR(x) (x) 2379 #endif 2380 2381 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, 2382 const TCGArg * const args, TCGLifeData arg_life) 2383 { 2384 int flags, nb_regs, i; 2385 TCGReg reg; 2386 TCGArg arg; 2387 TCGTemp *ts; 2388 intptr_t stack_offset; 2389 size_t call_stack_size; 2390 tcg_insn_unit *func_addr; 2391 int allocate_args; 2392 TCGRegSet allocated_regs; 2393 2394 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; 2395 flags = args[nb_oargs + nb_iargs + 1]; 2396 2397 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2398 if (nb_regs > nb_iargs) { 2399 nb_regs = nb_iargs; 2400 } 2401 2402 /* assign stack slots first */ 2403 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2404 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2405 ~(TCG_TARGET_STACK_ALIGN - 1); 2406 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2407 if (allocate_args) { 2408 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2409 preallocate call stack */ 2410 tcg_abort(); 2411 } 2412 2413 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2414 for(i = nb_regs; i < nb_iargs; i++) { 2415 arg = args[nb_oargs + i]; 2416 #ifdef TCG_TARGET_STACK_GROWSUP 2417 stack_offset -= sizeof(tcg_target_long); 2418 #endif 2419 if (arg != TCG_CALL_DUMMY_ARG) { 2420 ts = &s->temps[arg]; 2421 temp_load(s, ts, tcg_target_available_regs[ts->type], 2422 s->reserved_regs); 2423 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2424 } 2425 #ifndef TCG_TARGET_STACK_GROWSUP 2426 stack_offset += sizeof(tcg_target_long); 2427 #endif 2428 } 2429 2430 /* assign input registers */ 2431 tcg_regset_set(allocated_regs, s->reserved_regs); 2432 for(i = 0; i < nb_regs; i++) { 2433 arg = args[nb_oargs + i]; 2434 if (arg != TCG_CALL_DUMMY_ARG) { 2435 ts = &s->temps[arg]; 2436 reg = tcg_target_call_iarg_regs[i]; 2437 tcg_reg_free(s, reg, allocated_regs); 2438 2439 if (ts->val_type == TEMP_VAL_REG) { 2440 if (ts->reg != reg) { 2441 tcg_out_mov(s, ts->type, reg, ts->reg); 2442 } 2443 } else { 2444 TCGRegSet arg_set; 2445 2446 tcg_regset_clear(arg_set); 2447 tcg_regset_set_reg(arg_set, reg); 2448 temp_load(s, ts, arg_set, allocated_regs); 2449 } 2450 2451 tcg_regset_set_reg(allocated_regs, reg); 2452 } 2453 } 2454 2455 /* mark dead temporaries and free the associated registers */ 2456 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2457 if (IS_DEAD_ARG(i)) { 2458 temp_dead(s, &s->temps[args[i]]); 2459 } 2460 } 2461 2462 /* clobber call registers */ 2463 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2464 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2465 tcg_reg_free(s, i, allocated_regs); 2466 } 2467 } 2468 2469 /* Save globals if they might be written by the helper, sync them if 2470 they might be read. */ 2471 if (flags & TCG_CALL_NO_READ_GLOBALS) { 2472 /* Nothing to do */ 2473 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 2474 sync_globals(s, allocated_regs); 2475 } else { 2476 save_globals(s, allocated_regs); 2477 } 2478 2479 tcg_out_call(s, func_addr); 2480 2481 /* assign output registers and emit moves if needed */ 2482 for(i = 0; i < nb_oargs; i++) { 2483 arg = args[i]; 2484 ts = &s->temps[arg]; 2485 reg = tcg_target_call_oarg_regs[i]; 2486 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 2487 2488 if (ts->fixed_reg) { 2489 if (ts->reg != reg) { 2490 tcg_out_mov(s, ts->type, ts->reg, reg); 2491 } 2492 } else { 2493 if (ts->val_type == TEMP_VAL_REG) { 2494 s->reg_to_temp[ts->reg] = NULL; 2495 } 2496 ts->val_type = TEMP_VAL_REG; 2497 ts->reg = reg; 2498 ts->mem_coherent = 0; 2499 s->reg_to_temp[reg] = ts; 2500 if (NEED_SYNC_ARG(i)) { 2501 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2502 } else if (IS_DEAD_ARG(i)) { 2503 temp_dead(s, ts); 2504 } 2505 } 2506 } 2507 } 2508 2509 #ifdef CONFIG_PROFILER 2510 2511 static int64_t tcg_table_op_count[NB_OPS]; 2512 2513 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2514 { 2515 int i; 2516 2517 for (i = 0; i < NB_OPS; i++) { 2518 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 2519 tcg_table_op_count[i]); 2520 } 2521 } 2522 #else 2523 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2524 { 2525 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2526 } 2527 #endif 2528 2529 2530 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 2531 { 2532 int i, oi, oi_next, num_insns; 2533 2534 #ifdef CONFIG_PROFILER 2535 { 2536 int n; 2537 2538 n = s->gen_op_buf[0].prev + 1; 2539 s->op_count += n; 2540 if (n > s->op_count_max) { 2541 s->op_count_max = n; 2542 } 2543 2544 n = s->nb_temps; 2545 s->temp_count += n; 2546 if (n > s->temp_count_max) { 2547 s->temp_count_max = n; 2548 } 2549 } 2550 #endif 2551 2552 #ifdef DEBUG_DISAS 2553 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 2554 && qemu_log_in_addr_range(tb->pc))) { 2555 qemu_log_lock(); 2556 qemu_log("OP:\n"); 2557 tcg_dump_ops(s); 2558 qemu_log("\n"); 2559 qemu_log_unlock(); 2560 } 2561 #endif 2562 2563 #ifdef CONFIG_PROFILER 2564 s->opt_time -= profile_getclock(); 2565 #endif 2566 2567 #ifdef USE_TCG_OPTIMIZATIONS 2568 tcg_optimize(s); 2569 #endif 2570 2571 #ifdef CONFIG_PROFILER 2572 s->opt_time += profile_getclock(); 2573 s->la_time -= profile_getclock(); 2574 #endif 2575 2576 { 2577 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); 2578 2579 liveness_pass_1(s, temp_state); 2580 2581 if (s->nb_indirects > 0) { 2582 #ifdef DEBUG_DISAS 2583 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 2584 && qemu_log_in_addr_range(tb->pc))) { 2585 qemu_log_lock(); 2586 qemu_log("OP before indirect lowering:\n"); 2587 tcg_dump_ops(s); 2588 qemu_log("\n"); 2589 qemu_log_unlock(); 2590 } 2591 #endif 2592 /* Replace indirect temps with direct temps. */ 2593 if (liveness_pass_2(s, temp_state)) { 2594 /* If changes were made, re-run liveness. */ 2595 liveness_pass_1(s, temp_state); 2596 } 2597 } 2598 } 2599 2600 #ifdef CONFIG_PROFILER 2601 s->la_time += profile_getclock(); 2602 #endif 2603 2604 #ifdef DEBUG_DISAS 2605 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 2606 && qemu_log_in_addr_range(tb->pc))) { 2607 qemu_log_lock(); 2608 qemu_log("OP after optimization and liveness analysis:\n"); 2609 tcg_dump_ops(s); 2610 qemu_log("\n"); 2611 qemu_log_unlock(); 2612 } 2613 #endif 2614 2615 tcg_reg_alloc_start(s); 2616 2617 s->code_buf = tb->tc_ptr; 2618 s->code_ptr = tb->tc_ptr; 2619 2620 #ifdef TCG_TARGET_NEED_LDST_LABELS 2621 s->ldst_labels = NULL; 2622 #endif 2623 #ifdef TCG_TARGET_NEED_POOL_LABELS 2624 s->pool_labels = NULL; 2625 #endif 2626 2627 num_insns = -1; 2628 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2629 TCGOp * const op = &s->gen_op_buf[oi]; 2630 TCGArg * const args = &s->gen_opparam_buf[op->args]; 2631 TCGOpcode opc = op->opc; 2632 const TCGOpDef *def = &tcg_op_defs[opc]; 2633 TCGLifeData arg_life = op->life; 2634 2635 oi_next = op->next; 2636 #ifdef CONFIG_PROFILER 2637 tcg_table_op_count[opc]++; 2638 #endif 2639 2640 switch (opc) { 2641 case INDEX_op_mov_i32: 2642 case INDEX_op_mov_i64: 2643 tcg_reg_alloc_mov(s, def, args, arg_life); 2644 break; 2645 case INDEX_op_movi_i32: 2646 case INDEX_op_movi_i64: 2647 tcg_reg_alloc_movi(s, args, arg_life); 2648 break; 2649 case INDEX_op_insn_start: 2650 if (num_insns >= 0) { 2651 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2652 } 2653 num_insns++; 2654 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2655 target_ulong a; 2656 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2657 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 2658 #else 2659 a = args[i]; 2660 #endif 2661 s->gen_insn_data[num_insns][i] = a; 2662 } 2663 break; 2664 case INDEX_op_discard: 2665 temp_dead(s, &s->temps[args[0]]); 2666 break; 2667 case INDEX_op_set_label: 2668 tcg_reg_alloc_bb_end(s, s->reserved_regs); 2669 tcg_out_label(s, arg_label(args[0]), s->code_ptr); 2670 break; 2671 case INDEX_op_call: 2672 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); 2673 break; 2674 default: 2675 /* Sanity check that we've not introduced any unhandled opcodes. */ 2676 if (def->flags & TCG_OPF_NOT_PRESENT) { 2677 tcg_abort(); 2678 } 2679 /* Note: in order to speed up the code, it would be much 2680 faster to have specialized register allocator functions for 2681 some common argument patterns */ 2682 tcg_reg_alloc_op(s, def, opc, args, arg_life); 2683 break; 2684 } 2685 #ifdef CONFIG_DEBUG_TCG 2686 check_regs(s); 2687 #endif 2688 /* Test for (pending) buffer overflow. The assumption is that any 2689 one operation beginning below the high water mark cannot overrun 2690 the buffer completely. Thus we can test for overflow after 2691 generating code without having to check during generation. */ 2692 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 2693 return -1; 2694 } 2695 } 2696 tcg_debug_assert(num_insns >= 0); 2697 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2698 2699 /* Generate TB finalization at the end of block */ 2700 #ifdef TCG_TARGET_NEED_LDST_LABELS 2701 if (!tcg_out_ldst_finalize(s)) { 2702 return -1; 2703 } 2704 #endif 2705 #ifdef TCG_TARGET_NEED_POOL_LABELS 2706 if (!tcg_out_pool_finalize(s)) { 2707 return -1; 2708 } 2709 #endif 2710 2711 /* flush instruction cache */ 2712 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 2713 2714 return tcg_current_code_size(s); 2715 } 2716 2717 #ifdef CONFIG_PROFILER 2718 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2719 { 2720 TCGContext *s = &tcg_ctx; 2721 int64_t tb_count = s->tb_count; 2722 int64_t tb_div_count = tb_count ? tb_count : 1; 2723 int64_t tot = s->interm_time + s->code_time; 2724 2725 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 2726 tot, tot / 2.4e9); 2727 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 2728 tb_count, s->tb_count1 - tb_count, 2729 (double)(s->tb_count1 - s->tb_count) 2730 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 2731 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 2732 (double)s->op_count / tb_div_count, s->op_count_max); 2733 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 2734 (double)s->del_op_count / tb_div_count); 2735 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 2736 (double)s->temp_count / tb_div_count, s->temp_count_max); 2737 cpu_fprintf(f, "avg host code/TB %0.1f\n", 2738 (double)s->code_out_len / tb_div_count); 2739 cpu_fprintf(f, "avg search data/TB %0.1f\n", 2740 (double)s->search_out_len / tb_div_count); 2741 2742 cpu_fprintf(f, "cycles/op %0.1f\n", 2743 s->op_count ? (double)tot / s->op_count : 0); 2744 cpu_fprintf(f, "cycles/in byte %0.1f\n", 2745 s->code_in_len ? (double)tot / s->code_in_len : 0); 2746 cpu_fprintf(f, "cycles/out byte %0.1f\n", 2747 s->code_out_len ? (double)tot / s->code_out_len : 0); 2748 cpu_fprintf(f, "cycles/search byte %0.1f\n", 2749 s->search_out_len ? (double)tot / s->search_out_len : 0); 2750 if (tot == 0) { 2751 tot = 1; 2752 } 2753 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 2754 (double)s->interm_time / tot * 100.0); 2755 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2756 (double)s->code_time / tot * 100.0); 2757 cpu_fprintf(f, "optim./code time %0.1f%%\n", 2758 (double)s->opt_time / (s->code_time ? s->code_time : 1) 2759 * 100.0); 2760 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2761 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2762 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2763 s->restore_count); 2764 cpu_fprintf(f, " avg cycles %0.1f\n", 2765 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 2766 } 2767 #else 2768 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2769 { 2770 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2771 } 2772 #endif 2773 2774 #ifdef ELF_HOST_MACHINE 2775 /* In order to use this feature, the backend needs to do three things: 2776 2777 (1) Define ELF_HOST_MACHINE to indicate both what value to 2778 put into the ELF image and to indicate support for the feature. 2779 2780 (2) Define tcg_register_jit. This should create a buffer containing 2781 the contents of a .debug_frame section that describes the post- 2782 prologue unwind info for the tcg machine. 2783 2784 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 2785 */ 2786 2787 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 2788 typedef enum { 2789 JIT_NOACTION = 0, 2790 JIT_REGISTER_FN, 2791 JIT_UNREGISTER_FN 2792 } jit_actions_t; 2793 2794 struct jit_code_entry { 2795 struct jit_code_entry *next_entry; 2796 struct jit_code_entry *prev_entry; 2797 const void *symfile_addr; 2798 uint64_t symfile_size; 2799 }; 2800 2801 struct jit_descriptor { 2802 uint32_t version; 2803 uint32_t action_flag; 2804 struct jit_code_entry *relevant_entry; 2805 struct jit_code_entry *first_entry; 2806 }; 2807 2808 void __jit_debug_register_code(void) __attribute__((noinline)); 2809 void __jit_debug_register_code(void) 2810 { 2811 asm(""); 2812 } 2813 2814 /* Must statically initialize the version, because GDB may check 2815 the version before we can set it. */ 2816 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 2817 2818 /* End GDB interface. */ 2819 2820 static int find_string(const char *strtab, const char *str) 2821 { 2822 const char *p = strtab + 1; 2823 2824 while (1) { 2825 if (strcmp(p, str) == 0) { 2826 return p - strtab; 2827 } 2828 p += strlen(p) + 1; 2829 } 2830 } 2831 2832 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 2833 const void *debug_frame, 2834 size_t debug_frame_size) 2835 { 2836 struct __attribute__((packed)) DebugInfo { 2837 uint32_t len; 2838 uint16_t version; 2839 uint32_t abbrev; 2840 uint8_t ptr_size; 2841 uint8_t cu_die; 2842 uint16_t cu_lang; 2843 uintptr_t cu_low_pc; 2844 uintptr_t cu_high_pc; 2845 uint8_t fn_die; 2846 char fn_name[16]; 2847 uintptr_t fn_low_pc; 2848 uintptr_t fn_high_pc; 2849 uint8_t cu_eoc; 2850 }; 2851 2852 struct ElfImage { 2853 ElfW(Ehdr) ehdr; 2854 ElfW(Phdr) phdr; 2855 ElfW(Shdr) shdr[7]; 2856 ElfW(Sym) sym[2]; 2857 struct DebugInfo di; 2858 uint8_t da[24]; 2859 char str[80]; 2860 }; 2861 2862 struct ElfImage *img; 2863 2864 static const struct ElfImage img_template = { 2865 .ehdr = { 2866 .e_ident[EI_MAG0] = ELFMAG0, 2867 .e_ident[EI_MAG1] = ELFMAG1, 2868 .e_ident[EI_MAG2] = ELFMAG2, 2869 .e_ident[EI_MAG3] = ELFMAG3, 2870 .e_ident[EI_CLASS] = ELF_CLASS, 2871 .e_ident[EI_DATA] = ELF_DATA, 2872 .e_ident[EI_VERSION] = EV_CURRENT, 2873 .e_type = ET_EXEC, 2874 .e_machine = ELF_HOST_MACHINE, 2875 .e_version = EV_CURRENT, 2876 .e_phoff = offsetof(struct ElfImage, phdr), 2877 .e_shoff = offsetof(struct ElfImage, shdr), 2878 .e_ehsize = sizeof(ElfW(Shdr)), 2879 .e_phentsize = sizeof(ElfW(Phdr)), 2880 .e_phnum = 1, 2881 .e_shentsize = sizeof(ElfW(Shdr)), 2882 .e_shnum = ARRAY_SIZE(img->shdr), 2883 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 2884 #ifdef ELF_HOST_FLAGS 2885 .e_flags = ELF_HOST_FLAGS, 2886 #endif 2887 #ifdef ELF_OSABI 2888 .e_ident[EI_OSABI] = ELF_OSABI, 2889 #endif 2890 }, 2891 .phdr = { 2892 .p_type = PT_LOAD, 2893 .p_flags = PF_X, 2894 }, 2895 .shdr = { 2896 [0] = { .sh_type = SHT_NULL }, 2897 /* Trick: The contents of code_gen_buffer are not present in 2898 this fake ELF file; that got allocated elsewhere. Therefore 2899 we mark .text as SHT_NOBITS (similar to .bss) so that readers 2900 will not look for contents. We can record any address. */ 2901 [1] = { /* .text */ 2902 .sh_type = SHT_NOBITS, 2903 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 2904 }, 2905 [2] = { /* .debug_info */ 2906 .sh_type = SHT_PROGBITS, 2907 .sh_offset = offsetof(struct ElfImage, di), 2908 .sh_size = sizeof(struct DebugInfo), 2909 }, 2910 [3] = { /* .debug_abbrev */ 2911 .sh_type = SHT_PROGBITS, 2912 .sh_offset = offsetof(struct ElfImage, da), 2913 .sh_size = sizeof(img->da), 2914 }, 2915 [4] = { /* .debug_frame */ 2916 .sh_type = SHT_PROGBITS, 2917 .sh_offset = sizeof(struct ElfImage), 2918 }, 2919 [5] = { /* .symtab */ 2920 .sh_type = SHT_SYMTAB, 2921 .sh_offset = offsetof(struct ElfImage, sym), 2922 .sh_size = sizeof(img->sym), 2923 .sh_info = 1, 2924 .sh_link = ARRAY_SIZE(img->shdr) - 1, 2925 .sh_entsize = sizeof(ElfW(Sym)), 2926 }, 2927 [6] = { /* .strtab */ 2928 .sh_type = SHT_STRTAB, 2929 .sh_offset = offsetof(struct ElfImage, str), 2930 .sh_size = sizeof(img->str), 2931 } 2932 }, 2933 .sym = { 2934 [1] = { /* code_gen_buffer */ 2935 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 2936 .st_shndx = 1, 2937 } 2938 }, 2939 .di = { 2940 .len = sizeof(struct DebugInfo) - 4, 2941 .version = 2, 2942 .ptr_size = sizeof(void *), 2943 .cu_die = 1, 2944 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 2945 .fn_die = 2, 2946 .fn_name = "code_gen_buffer" 2947 }, 2948 .da = { 2949 1, /* abbrev number (the cu) */ 2950 0x11, 1, /* DW_TAG_compile_unit, has children */ 2951 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 2952 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2953 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2954 0, 0, /* end of abbrev */ 2955 2, /* abbrev number (the fn) */ 2956 0x2e, 0, /* DW_TAG_subprogram, no children */ 2957 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 2958 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2959 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2960 0, 0, /* end of abbrev */ 2961 0 /* no more abbrev */ 2962 }, 2963 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 2964 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 2965 }; 2966 2967 /* We only need a single jit entry; statically allocate it. */ 2968 static struct jit_code_entry one_entry; 2969 2970 uintptr_t buf = (uintptr_t)buf_ptr; 2971 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 2972 DebugFrameHeader *dfh; 2973 2974 img = g_malloc(img_size); 2975 *img = img_template; 2976 2977 img->phdr.p_vaddr = buf; 2978 img->phdr.p_paddr = buf; 2979 img->phdr.p_memsz = buf_size; 2980 2981 img->shdr[1].sh_name = find_string(img->str, ".text"); 2982 img->shdr[1].sh_addr = buf; 2983 img->shdr[1].sh_size = buf_size; 2984 2985 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 2986 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 2987 2988 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 2989 img->shdr[4].sh_size = debug_frame_size; 2990 2991 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 2992 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 2993 2994 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 2995 img->sym[1].st_value = buf; 2996 img->sym[1].st_size = buf_size; 2997 2998 img->di.cu_low_pc = buf; 2999 img->di.cu_high_pc = buf + buf_size; 3000 img->di.fn_low_pc = buf; 3001 img->di.fn_high_pc = buf + buf_size; 3002 3003 dfh = (DebugFrameHeader *)(img + 1); 3004 memcpy(dfh, debug_frame, debug_frame_size); 3005 dfh->fde.func_start = buf; 3006 dfh->fde.func_len = buf_size; 3007 3008 #ifdef DEBUG_JIT 3009 /* Enable this block to be able to debug the ELF image file creation. 3010 One can use readelf, objdump, or other inspection utilities. */ 3011 { 3012 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3013 if (f) { 3014 if (fwrite(img, img_size, 1, f) != img_size) { 3015 /* Avoid stupid unused return value warning for fwrite. */ 3016 } 3017 fclose(f); 3018 } 3019 } 3020 #endif 3021 3022 one_entry.symfile_addr = img; 3023 one_entry.symfile_size = img_size; 3024 3025 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3026 __jit_debug_descriptor.relevant_entry = &one_entry; 3027 __jit_debug_descriptor.first_entry = &one_entry; 3028 __jit_debug_register_code(); 3029 } 3030 #else 3031 /* No support for the feature. Provide the entry point expected by exec.c, 3032 and implement the internal function we declared earlier. */ 3033 3034 static void tcg_register_jit_int(void *buf, size_t size, 3035 const void *debug_frame, 3036 size_t debug_frame_size) 3037 { 3038 } 3039 3040 void tcg_register_jit(void *buf, size_t buf_size) 3041 { 3042 } 3043 #endif /* ELF_HOST_MACHINE */ 3044