1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 62 /* Forward declarations for functions declared in tcg-target.inc.c and 63 used here. */ 64 static void tcg_target_init(TCGContext *s); 65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 66 static void tcg_target_qemu_prologue(TCGContext *s); 67 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 68 intptr_t value, intptr_t addend); 69 70 /* The CIE and FDE header definitions will be common to all hosts. */ 71 typedef struct { 72 uint32_t len __attribute__((aligned((sizeof(void *))))); 73 uint32_t id; 74 uint8_t version; 75 char augmentation[1]; 76 uint8_t code_align; 77 uint8_t data_align; 78 uint8_t return_column; 79 } DebugFrameCIE; 80 81 typedef struct QEMU_PACKED { 82 uint32_t len __attribute__((aligned((sizeof(void *))))); 83 uint32_t cie_offset; 84 uintptr_t func_start; 85 uintptr_t func_len; 86 } DebugFrameFDEHeader; 87 88 typedef struct QEMU_PACKED { 89 DebugFrameCIE cie; 90 DebugFrameFDEHeader fde; 91 } DebugFrameHeader; 92 93 static void tcg_register_jit_int(void *buf, size_t size, 94 const void *debug_frame, 95 size_t debug_frame_size) 96 __attribute__((unused)); 97 98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 99 static const char *target_parse_constraint(TCGArgConstraint *ct, 100 const char *ct_str, TCGType type); 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 107 const int *const_args); 108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 111 TCGReg base, intptr_t ofs); 112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 113 static int tcg_target_const_match(tcg_target_long val, TCGType type, 114 const TCGArgConstraint *arg_ct); 115 static void tcg_out_tb_init(TCGContext *s); 116 static bool tcg_out_tb_finalize(TCGContext *s); 117 118 119 120 static TCGRegSet tcg_target_available_regs[2]; 121 static TCGRegSet tcg_target_call_clobber_regs; 122 123 #if TCG_TARGET_INSN_UNIT_SIZE == 1 124 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 125 { 126 *s->code_ptr++ = v; 127 } 128 129 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 130 uint8_t v) 131 { 132 *p = v; 133 } 134 #endif 135 136 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 137 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 138 { 139 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 140 *s->code_ptr++ = v; 141 } else { 142 tcg_insn_unit *p = s->code_ptr; 143 memcpy(p, &v, sizeof(v)); 144 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 145 } 146 } 147 148 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 149 uint16_t v) 150 { 151 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 152 *p = v; 153 } else { 154 memcpy(p, &v, sizeof(v)); 155 } 156 } 157 #endif 158 159 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 160 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 161 { 162 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 163 *s->code_ptr++ = v; 164 } else { 165 tcg_insn_unit *p = s->code_ptr; 166 memcpy(p, &v, sizeof(v)); 167 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 168 } 169 } 170 171 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 172 uint32_t v) 173 { 174 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 175 *p = v; 176 } else { 177 memcpy(p, &v, sizeof(v)); 178 } 179 } 180 #endif 181 182 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 183 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 184 { 185 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 186 *s->code_ptr++ = v; 187 } else { 188 tcg_insn_unit *p = s->code_ptr; 189 memcpy(p, &v, sizeof(v)); 190 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 191 } 192 } 193 194 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 195 uint64_t v) 196 { 197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 198 *p = v; 199 } else { 200 memcpy(p, &v, sizeof(v)); 201 } 202 } 203 #endif 204 205 /* label relocation processing */ 206 207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 208 TCGLabel *l, intptr_t addend) 209 { 210 TCGRelocation *r; 211 212 if (l->has_value) { 213 /* FIXME: This may break relocations on RISC targets that 214 modify instruction fields in place. The caller may not have 215 written the initial value. */ 216 patch_reloc(code_ptr, type, l->u.value, addend); 217 } else { 218 /* add a new relocation entry */ 219 r = tcg_malloc(sizeof(TCGRelocation)); 220 r->type = type; 221 r->ptr = code_ptr; 222 r->addend = addend; 223 r->next = l->u.first_reloc; 224 l->u.first_reloc = r; 225 } 226 } 227 228 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 229 { 230 intptr_t value = (intptr_t)ptr; 231 TCGRelocation *r; 232 233 tcg_debug_assert(!l->has_value); 234 235 for (r = l->u.first_reloc; r != NULL; r = r->next) { 236 patch_reloc(r->ptr, r->type, value, r->addend); 237 } 238 239 l->has_value = 1; 240 l->u.value_ptr = ptr; 241 } 242 243 TCGLabel *gen_new_label(void) 244 { 245 TCGContext *s = &tcg_ctx; 246 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 247 248 *l = (TCGLabel){ 249 .id = s->nb_labels++ 250 }; 251 252 return l; 253 } 254 255 #include "tcg-target.inc.c" 256 257 /* pool based memory allocation */ 258 void *tcg_malloc_internal(TCGContext *s, int size) 259 { 260 TCGPool *p; 261 int pool_size; 262 263 if (size > TCG_POOL_CHUNK_SIZE) { 264 /* big malloc: insert a new pool (XXX: could optimize) */ 265 p = g_malloc(sizeof(TCGPool) + size); 266 p->size = size; 267 p->next = s->pool_first_large; 268 s->pool_first_large = p; 269 return p->data; 270 } else { 271 p = s->pool_current; 272 if (!p) { 273 p = s->pool_first; 274 if (!p) 275 goto new_pool; 276 } else { 277 if (!p->next) { 278 new_pool: 279 pool_size = TCG_POOL_CHUNK_SIZE; 280 p = g_malloc(sizeof(TCGPool) + pool_size); 281 p->size = pool_size; 282 p->next = NULL; 283 if (s->pool_current) 284 s->pool_current->next = p; 285 else 286 s->pool_first = p; 287 } else { 288 p = p->next; 289 } 290 } 291 } 292 s->pool_current = p; 293 s->pool_cur = p->data + size; 294 s->pool_end = p->data + p->size; 295 return p->data; 296 } 297 298 void tcg_pool_reset(TCGContext *s) 299 { 300 TCGPool *p, *t; 301 for (p = s->pool_first_large; p; p = t) { 302 t = p->next; 303 g_free(p); 304 } 305 s->pool_first_large = NULL; 306 s->pool_cur = s->pool_end = NULL; 307 s->pool_current = NULL; 308 } 309 310 typedef struct TCGHelperInfo { 311 void *func; 312 const char *name; 313 unsigned flags; 314 unsigned sizemask; 315 } TCGHelperInfo; 316 317 #include "exec/helper-proto.h" 318 319 static const TCGHelperInfo all_helpers[] = { 320 #include "exec/helper-tcg.h" 321 }; 322 323 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 324 static void process_op_defs(TCGContext *s); 325 326 void tcg_context_init(TCGContext *s) 327 { 328 int op, total_args, n, i; 329 TCGOpDef *def; 330 TCGArgConstraint *args_ct; 331 int *sorted_args; 332 GHashTable *helper_table; 333 334 memset(s, 0, sizeof(*s)); 335 s->nb_globals = 0; 336 337 /* Count total number of arguments and allocate the corresponding 338 space */ 339 total_args = 0; 340 for(op = 0; op < NB_OPS; op++) { 341 def = &tcg_op_defs[op]; 342 n = def->nb_iargs + def->nb_oargs; 343 total_args += n; 344 } 345 346 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 347 sorted_args = g_malloc(sizeof(int) * total_args); 348 349 for(op = 0; op < NB_OPS; op++) { 350 def = &tcg_op_defs[op]; 351 def->args_ct = args_ct; 352 def->sorted_args = sorted_args; 353 n = def->nb_iargs + def->nb_oargs; 354 sorted_args += n; 355 args_ct += n; 356 } 357 358 /* Register helpers. */ 359 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 360 s->helpers = helper_table = g_hash_table_new(NULL, NULL); 361 362 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 363 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 364 (gpointer)&all_helpers[i]); 365 } 366 367 tcg_target_init(s); 368 process_op_defs(s); 369 370 /* Reverse the order of the saved registers, assuming they're all at 371 the start of tcg_target_reg_alloc_order. */ 372 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 373 int r = tcg_target_reg_alloc_order[n]; 374 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 375 break; 376 } 377 } 378 for (i = 0; i < n; ++i) { 379 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 380 } 381 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 382 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 383 } 384 } 385 386 /* 387 * Allocate TBs right before their corresponding translated code, making 388 * sure that TBs and code are on different cache lines. 389 */ 390 TranslationBlock *tcg_tb_alloc(TCGContext *s) 391 { 392 uintptr_t align = qemu_icache_linesize; 393 TranslationBlock *tb; 394 void *next; 395 396 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 397 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 398 399 if (unlikely(next > s->code_gen_highwater)) { 400 return NULL; 401 } 402 s->code_gen_ptr = next; 403 return tb; 404 } 405 406 void tcg_prologue_init(TCGContext *s) 407 { 408 size_t prologue_size, total_size; 409 void *buf0, *buf1; 410 411 /* Put the prologue at the beginning of code_gen_buffer. */ 412 buf0 = s->code_gen_buffer; 413 s->code_ptr = buf0; 414 s->code_buf = buf0; 415 s->code_gen_prologue = buf0; 416 417 /* Generate the prologue. */ 418 tcg_target_qemu_prologue(s); 419 buf1 = s->code_ptr; 420 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 421 422 /* Deduct the prologue from the buffer. */ 423 prologue_size = tcg_current_code_size(s); 424 s->code_gen_ptr = buf1; 425 s->code_gen_buffer = buf1; 426 s->code_buf = buf1; 427 total_size = s->code_gen_buffer_size - prologue_size; 428 s->code_gen_buffer_size = total_size; 429 430 /* Compute a high-water mark, at which we voluntarily flush the buffer 431 and start over. The size here is arbitrary, significantly larger 432 than we expect the code generation for any one opcode to require. */ 433 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); 434 435 tcg_register_jit(s->code_gen_buffer, total_size); 436 437 #ifdef DEBUG_DISAS 438 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 439 qemu_log_lock(); 440 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 441 log_disas(buf0, prologue_size); 442 qemu_log("\n"); 443 qemu_log_flush(); 444 qemu_log_unlock(); 445 } 446 #endif 447 448 /* Assert that goto_ptr is implemented completely. */ 449 if (TCG_TARGET_HAS_goto_ptr) { 450 tcg_debug_assert(s->code_gen_epilogue != NULL); 451 } 452 } 453 454 void tcg_func_start(TCGContext *s) 455 { 456 tcg_pool_reset(s); 457 s->nb_temps = s->nb_globals; 458 459 /* No temps have been previously allocated for size or locality. */ 460 memset(s->free_temps, 0, sizeof(s->free_temps)); 461 462 s->nb_labels = 0; 463 s->current_frame_offset = s->frame_start; 464 465 #ifdef CONFIG_DEBUG_TCG 466 s->goto_tb_issue_mask = 0; 467 #endif 468 469 s->gen_op_buf[0].next = 1; 470 s->gen_op_buf[0].prev = 0; 471 s->gen_next_op_idx = 1; 472 s->gen_next_parm_idx = 0; 473 474 s->be = tcg_malloc(sizeof(TCGBackendData)); 475 } 476 477 static inline int temp_idx(TCGContext *s, TCGTemp *ts) 478 { 479 ptrdiff_t n = ts - s->temps; 480 tcg_debug_assert(n >= 0 && n < s->nb_temps); 481 return n; 482 } 483 484 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 485 { 486 int n = s->nb_temps++; 487 tcg_debug_assert(n < TCG_MAX_TEMPS); 488 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 489 } 490 491 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 492 { 493 tcg_debug_assert(s->nb_globals == s->nb_temps); 494 s->nb_globals++; 495 return tcg_temp_alloc(s); 496 } 497 498 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, 499 TCGReg reg, const char *name) 500 { 501 TCGTemp *ts; 502 503 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 504 tcg_abort(); 505 } 506 507 ts = tcg_global_alloc(s); 508 ts->base_type = type; 509 ts->type = type; 510 ts->fixed_reg = 1; 511 ts->reg = reg; 512 ts->name = name; 513 tcg_regset_set_reg(s->reserved_regs, reg); 514 515 return temp_idx(s, ts); 516 } 517 518 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 519 { 520 int idx; 521 s->frame_start = start; 522 s->frame_end = start + size; 523 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 524 s->frame_temp = &s->temps[idx]; 525 } 526 527 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) 528 { 529 TCGContext *s = &tcg_ctx; 530 int idx; 531 532 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 533 tcg_abort(); 534 } 535 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); 536 return MAKE_TCGV_I32(idx); 537 } 538 539 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) 540 { 541 TCGContext *s = &tcg_ctx; 542 int idx; 543 544 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 545 tcg_abort(); 546 } 547 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); 548 return MAKE_TCGV_I64(idx); 549 } 550 551 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 552 intptr_t offset, const char *name) 553 { 554 TCGContext *s = &tcg_ctx; 555 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; 556 TCGTemp *ts = tcg_global_alloc(s); 557 int indirect_reg = 0, bigendian = 0; 558 #ifdef HOST_WORDS_BIGENDIAN 559 bigendian = 1; 560 #endif 561 562 if (!base_ts->fixed_reg) { 563 /* We do not support double-indirect registers. */ 564 tcg_debug_assert(!base_ts->indirect_reg); 565 base_ts->indirect_base = 1; 566 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 567 ? 2 : 1); 568 indirect_reg = 1; 569 } 570 571 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 572 TCGTemp *ts2 = tcg_global_alloc(s); 573 char buf[64]; 574 575 ts->base_type = TCG_TYPE_I64; 576 ts->type = TCG_TYPE_I32; 577 ts->indirect_reg = indirect_reg; 578 ts->mem_allocated = 1; 579 ts->mem_base = base_ts; 580 ts->mem_offset = offset + bigendian * 4; 581 pstrcpy(buf, sizeof(buf), name); 582 pstrcat(buf, sizeof(buf), "_0"); 583 ts->name = strdup(buf); 584 585 tcg_debug_assert(ts2 == ts + 1); 586 ts2->base_type = TCG_TYPE_I64; 587 ts2->type = TCG_TYPE_I32; 588 ts2->indirect_reg = indirect_reg; 589 ts2->mem_allocated = 1; 590 ts2->mem_base = base_ts; 591 ts2->mem_offset = offset + (1 - bigendian) * 4; 592 pstrcpy(buf, sizeof(buf), name); 593 pstrcat(buf, sizeof(buf), "_1"); 594 ts2->name = strdup(buf); 595 } else { 596 ts->base_type = type; 597 ts->type = type; 598 ts->indirect_reg = indirect_reg; 599 ts->mem_allocated = 1; 600 ts->mem_base = base_ts; 601 ts->mem_offset = offset; 602 ts->name = name; 603 } 604 return temp_idx(s, ts); 605 } 606 607 static int tcg_temp_new_internal(TCGType type, int temp_local) 608 { 609 TCGContext *s = &tcg_ctx; 610 TCGTemp *ts; 611 int idx, k; 612 613 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 614 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 615 if (idx < TCG_MAX_TEMPS) { 616 /* There is already an available temp with the right type. */ 617 clear_bit(idx, s->free_temps[k].l); 618 619 ts = &s->temps[idx]; 620 ts->temp_allocated = 1; 621 tcg_debug_assert(ts->base_type == type); 622 tcg_debug_assert(ts->temp_local == temp_local); 623 } else { 624 ts = tcg_temp_alloc(s); 625 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 626 TCGTemp *ts2 = tcg_temp_alloc(s); 627 628 ts->base_type = type; 629 ts->type = TCG_TYPE_I32; 630 ts->temp_allocated = 1; 631 ts->temp_local = temp_local; 632 633 tcg_debug_assert(ts2 == ts + 1); 634 ts2->base_type = TCG_TYPE_I64; 635 ts2->type = TCG_TYPE_I32; 636 ts2->temp_allocated = 1; 637 ts2->temp_local = temp_local; 638 } else { 639 ts->base_type = type; 640 ts->type = type; 641 ts->temp_allocated = 1; 642 ts->temp_local = temp_local; 643 } 644 idx = temp_idx(s, ts); 645 } 646 647 #if defined(CONFIG_DEBUG_TCG) 648 s->temps_in_use++; 649 #endif 650 return idx; 651 } 652 653 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 654 { 655 int idx; 656 657 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 658 return MAKE_TCGV_I32(idx); 659 } 660 661 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 662 { 663 int idx; 664 665 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 666 return MAKE_TCGV_I64(idx); 667 } 668 669 static void tcg_temp_free_internal(int idx) 670 { 671 TCGContext *s = &tcg_ctx; 672 TCGTemp *ts; 673 int k; 674 675 #if defined(CONFIG_DEBUG_TCG) 676 s->temps_in_use--; 677 if (s->temps_in_use < 0) { 678 fprintf(stderr, "More temporaries freed than allocated!\n"); 679 } 680 #endif 681 682 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); 683 ts = &s->temps[idx]; 684 tcg_debug_assert(ts->temp_allocated != 0); 685 ts->temp_allocated = 0; 686 687 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 688 set_bit(idx, s->free_temps[k].l); 689 } 690 691 void tcg_temp_free_i32(TCGv_i32 arg) 692 { 693 tcg_temp_free_internal(GET_TCGV_I32(arg)); 694 } 695 696 void tcg_temp_free_i64(TCGv_i64 arg) 697 { 698 tcg_temp_free_internal(GET_TCGV_I64(arg)); 699 } 700 701 TCGv_i32 tcg_const_i32(int32_t val) 702 { 703 TCGv_i32 t0; 704 t0 = tcg_temp_new_i32(); 705 tcg_gen_movi_i32(t0, val); 706 return t0; 707 } 708 709 TCGv_i64 tcg_const_i64(int64_t val) 710 { 711 TCGv_i64 t0; 712 t0 = tcg_temp_new_i64(); 713 tcg_gen_movi_i64(t0, val); 714 return t0; 715 } 716 717 TCGv_i32 tcg_const_local_i32(int32_t val) 718 { 719 TCGv_i32 t0; 720 t0 = tcg_temp_local_new_i32(); 721 tcg_gen_movi_i32(t0, val); 722 return t0; 723 } 724 725 TCGv_i64 tcg_const_local_i64(int64_t val) 726 { 727 TCGv_i64 t0; 728 t0 = tcg_temp_local_new_i64(); 729 tcg_gen_movi_i64(t0, val); 730 return t0; 731 } 732 733 #if defined(CONFIG_DEBUG_TCG) 734 void tcg_clear_temp_count(void) 735 { 736 TCGContext *s = &tcg_ctx; 737 s->temps_in_use = 0; 738 } 739 740 int tcg_check_temp_count(void) 741 { 742 TCGContext *s = &tcg_ctx; 743 if (s->temps_in_use) { 744 /* Clear the count so that we don't give another 745 * warning immediately next time around. 746 */ 747 s->temps_in_use = 0; 748 return 1; 749 } 750 return 0; 751 } 752 #endif 753 754 /* Note: we convert the 64 bit args to 32 bit and do some alignment 755 and endian swap. Maybe it would be better to do the alignment 756 and endian swap in tcg_reg_alloc_call(). */ 757 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, 758 int nargs, TCGArg *args) 759 { 760 int i, real_args, nb_rets, pi, pi_first; 761 unsigned sizemask, flags; 762 TCGHelperInfo *info; 763 764 info = g_hash_table_lookup(s->helpers, (gpointer)func); 765 flags = info->flags; 766 sizemask = info->sizemask; 767 768 #if defined(__sparc__) && !defined(__arch64__) \ 769 && !defined(CONFIG_TCG_INTERPRETER) 770 /* We have 64-bit values in one register, but need to pass as two 771 separate parameters. Split them. */ 772 int orig_sizemask = sizemask; 773 int orig_nargs = nargs; 774 TCGv_i64 retl, reth; 775 776 TCGV_UNUSED_I64(retl); 777 TCGV_UNUSED_I64(reth); 778 if (sizemask != 0) { 779 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); 780 for (i = real_args = 0; i < nargs; ++i) { 781 int is_64bit = sizemask & (1 << (i+1)*2); 782 if (is_64bit) { 783 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 784 TCGv_i32 h = tcg_temp_new_i32(); 785 TCGv_i32 l = tcg_temp_new_i32(); 786 tcg_gen_extr_i64_i32(l, h, orig); 787 split_args[real_args++] = GET_TCGV_I32(h); 788 split_args[real_args++] = GET_TCGV_I32(l); 789 } else { 790 split_args[real_args++] = args[i]; 791 } 792 } 793 nargs = real_args; 794 args = split_args; 795 sizemask = 0; 796 } 797 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 798 for (i = 0; i < nargs; ++i) { 799 int is_64bit = sizemask & (1 << (i+1)*2); 800 int is_signed = sizemask & (2 << (i+1)*2); 801 if (!is_64bit) { 802 TCGv_i64 temp = tcg_temp_new_i64(); 803 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 804 if (is_signed) { 805 tcg_gen_ext32s_i64(temp, orig); 806 } else { 807 tcg_gen_ext32u_i64(temp, orig); 808 } 809 args[i] = GET_TCGV_I64(temp); 810 } 811 } 812 #endif /* TCG_TARGET_EXTEND_ARGS */ 813 814 pi_first = pi = s->gen_next_parm_idx; 815 if (ret != TCG_CALL_DUMMY_ARG) { 816 #if defined(__sparc__) && !defined(__arch64__) \ 817 && !defined(CONFIG_TCG_INTERPRETER) 818 if (orig_sizemask & 1) { 819 /* The 32-bit ABI is going to return the 64-bit value in 820 the %o0/%o1 register pair. Prepare for this by using 821 two return temporaries, and reassemble below. */ 822 retl = tcg_temp_new_i64(); 823 reth = tcg_temp_new_i64(); 824 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); 825 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); 826 nb_rets = 2; 827 } else { 828 s->gen_opparam_buf[pi++] = ret; 829 nb_rets = 1; 830 } 831 #else 832 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 833 #ifdef HOST_WORDS_BIGENDIAN 834 s->gen_opparam_buf[pi++] = ret + 1; 835 s->gen_opparam_buf[pi++] = ret; 836 #else 837 s->gen_opparam_buf[pi++] = ret; 838 s->gen_opparam_buf[pi++] = ret + 1; 839 #endif 840 nb_rets = 2; 841 } else { 842 s->gen_opparam_buf[pi++] = ret; 843 nb_rets = 1; 844 } 845 #endif 846 } else { 847 nb_rets = 0; 848 } 849 real_args = 0; 850 for (i = 0; i < nargs; i++) { 851 int is_64bit = sizemask & (1 << (i+1)*2); 852 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 853 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 854 /* some targets want aligned 64 bit args */ 855 if (real_args & 1) { 856 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; 857 real_args++; 858 } 859 #endif 860 /* If stack grows up, then we will be placing successive 861 arguments at lower addresses, which means we need to 862 reverse the order compared to how we would normally 863 treat either big or little-endian. For those arguments 864 that will wind up in registers, this still works for 865 HPPA (the only current STACK_GROWSUP target) since the 866 argument registers are *also* allocated in decreasing 867 order. If another such target is added, this logic may 868 have to get more complicated to differentiate between 869 stack arguments and register arguments. */ 870 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 871 s->gen_opparam_buf[pi++] = args[i] + 1; 872 s->gen_opparam_buf[pi++] = args[i]; 873 #else 874 s->gen_opparam_buf[pi++] = args[i]; 875 s->gen_opparam_buf[pi++] = args[i] + 1; 876 #endif 877 real_args += 2; 878 continue; 879 } 880 881 s->gen_opparam_buf[pi++] = args[i]; 882 real_args++; 883 } 884 s->gen_opparam_buf[pi++] = (uintptr_t)func; 885 s->gen_opparam_buf[pi++] = flags; 886 887 i = s->gen_next_op_idx; 888 tcg_debug_assert(i < OPC_BUF_SIZE); 889 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); 890 891 /* Set links for sequential allocation during translation. */ 892 s->gen_op_buf[i] = (TCGOp){ 893 .opc = INDEX_op_call, 894 .callo = nb_rets, 895 .calli = real_args, 896 .args = pi_first, 897 .prev = i - 1, 898 .next = i + 1 899 }; 900 901 /* Make sure the calli field didn't overflow. */ 902 tcg_debug_assert(s->gen_op_buf[i].calli == real_args); 903 904 s->gen_op_buf[0].prev = i; 905 s->gen_next_op_idx = i + 1; 906 s->gen_next_parm_idx = pi; 907 908 #if defined(__sparc__) && !defined(__arch64__) \ 909 && !defined(CONFIG_TCG_INTERPRETER) 910 /* Free all of the parts we allocated above. */ 911 for (i = real_args = 0; i < orig_nargs; ++i) { 912 int is_64bit = orig_sizemask & (1 << (i+1)*2); 913 if (is_64bit) { 914 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); 915 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); 916 tcg_temp_free_i32(h); 917 tcg_temp_free_i32(l); 918 } else { 919 real_args++; 920 } 921 } 922 if (orig_sizemask & 1) { 923 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 924 Note that describing these as TCGv_i64 eliminates an unnecessary 925 zero-extension that tcg_gen_concat_i32_i64 would create. */ 926 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); 927 tcg_temp_free_i64(retl); 928 tcg_temp_free_i64(reth); 929 } 930 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 931 for (i = 0; i < nargs; ++i) { 932 int is_64bit = sizemask & (1 << (i+1)*2); 933 if (!is_64bit) { 934 TCGv_i64 temp = MAKE_TCGV_I64(args[i]); 935 tcg_temp_free_i64(temp); 936 } 937 } 938 #endif /* TCG_TARGET_EXTEND_ARGS */ 939 } 940 941 static void tcg_reg_alloc_start(TCGContext *s) 942 { 943 int i; 944 TCGTemp *ts; 945 for(i = 0; i < s->nb_globals; i++) { 946 ts = &s->temps[i]; 947 if (ts->fixed_reg) { 948 ts->val_type = TEMP_VAL_REG; 949 } else { 950 ts->val_type = TEMP_VAL_MEM; 951 } 952 } 953 for(i = s->nb_globals; i < s->nb_temps; i++) { 954 ts = &s->temps[i]; 955 if (ts->temp_local) { 956 ts->val_type = TEMP_VAL_MEM; 957 } else { 958 ts->val_type = TEMP_VAL_DEAD; 959 } 960 ts->mem_allocated = 0; 961 ts->fixed_reg = 0; 962 } 963 964 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 965 } 966 967 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 968 TCGTemp *ts) 969 { 970 int idx = temp_idx(s, ts); 971 972 if (idx < s->nb_globals) { 973 pstrcpy(buf, buf_size, ts->name); 974 } else if (ts->temp_local) { 975 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 976 } else { 977 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 978 } 979 return buf; 980 } 981 982 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 983 int buf_size, int idx) 984 { 985 tcg_debug_assert(idx >= 0 && idx < s->nb_temps); 986 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); 987 } 988 989 /* Find helper name. */ 990 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 991 { 992 const char *ret = NULL; 993 if (s->helpers) { 994 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); 995 if (info) { 996 ret = info->name; 997 } 998 } 999 return ret; 1000 } 1001 1002 static const char * const cond_name[] = 1003 { 1004 [TCG_COND_NEVER] = "never", 1005 [TCG_COND_ALWAYS] = "always", 1006 [TCG_COND_EQ] = "eq", 1007 [TCG_COND_NE] = "ne", 1008 [TCG_COND_LT] = "lt", 1009 [TCG_COND_GE] = "ge", 1010 [TCG_COND_LE] = "le", 1011 [TCG_COND_GT] = "gt", 1012 [TCG_COND_LTU] = "ltu", 1013 [TCG_COND_GEU] = "geu", 1014 [TCG_COND_LEU] = "leu", 1015 [TCG_COND_GTU] = "gtu" 1016 }; 1017 1018 static const char * const ldst_name[] = 1019 { 1020 [MO_UB] = "ub", 1021 [MO_SB] = "sb", 1022 [MO_LEUW] = "leuw", 1023 [MO_LESW] = "lesw", 1024 [MO_LEUL] = "leul", 1025 [MO_LESL] = "lesl", 1026 [MO_LEQ] = "leq", 1027 [MO_BEUW] = "beuw", 1028 [MO_BESW] = "besw", 1029 [MO_BEUL] = "beul", 1030 [MO_BESL] = "besl", 1031 [MO_BEQ] = "beq", 1032 }; 1033 1034 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1035 #ifdef ALIGNED_ONLY 1036 [MO_UNALN >> MO_ASHIFT] = "un+", 1037 [MO_ALIGN >> MO_ASHIFT] = "", 1038 #else 1039 [MO_UNALN >> MO_ASHIFT] = "", 1040 [MO_ALIGN >> MO_ASHIFT] = "al+", 1041 #endif 1042 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1043 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1044 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1045 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1046 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1047 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1048 }; 1049 1050 void tcg_dump_ops(TCGContext *s) 1051 { 1052 char buf[128]; 1053 TCGOp *op; 1054 int oi; 1055 1056 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1057 int i, k, nb_oargs, nb_iargs, nb_cargs; 1058 const TCGOpDef *def; 1059 const TCGArg *args; 1060 TCGOpcode c; 1061 int col = 0; 1062 1063 op = &s->gen_op_buf[oi]; 1064 c = op->opc; 1065 def = &tcg_op_defs[c]; 1066 args = &s->gen_opparam_buf[op->args]; 1067 1068 if (c == INDEX_op_insn_start) { 1069 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1070 1071 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1072 target_ulong a; 1073 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1074 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 1075 #else 1076 a = args[i]; 1077 #endif 1078 col += qemu_log(" " TARGET_FMT_lx, a); 1079 } 1080 } else if (c == INDEX_op_call) { 1081 /* variable number of arguments */ 1082 nb_oargs = op->callo; 1083 nb_iargs = op->calli; 1084 nb_cargs = def->nb_cargs; 1085 1086 /* function name, flags, out args */ 1087 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1088 tcg_find_helper(s, args[nb_oargs + nb_iargs]), 1089 args[nb_oargs + nb_iargs + 1], nb_oargs); 1090 for (i = 0; i < nb_oargs; i++) { 1091 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1092 args[i])); 1093 } 1094 for (i = 0; i < nb_iargs; i++) { 1095 TCGArg arg = args[nb_oargs + i]; 1096 const char *t = "<dummy>"; 1097 if (arg != TCG_CALL_DUMMY_ARG) { 1098 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); 1099 } 1100 col += qemu_log(",%s", t); 1101 } 1102 } else { 1103 col += qemu_log(" %s ", def->name); 1104 1105 nb_oargs = def->nb_oargs; 1106 nb_iargs = def->nb_iargs; 1107 nb_cargs = def->nb_cargs; 1108 1109 k = 0; 1110 for (i = 0; i < nb_oargs; i++) { 1111 if (k != 0) { 1112 col += qemu_log(","); 1113 } 1114 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1115 args[k++])); 1116 } 1117 for (i = 0; i < nb_iargs; i++) { 1118 if (k != 0) { 1119 col += qemu_log(","); 1120 } 1121 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1122 args[k++])); 1123 } 1124 switch (c) { 1125 case INDEX_op_brcond_i32: 1126 case INDEX_op_setcond_i32: 1127 case INDEX_op_movcond_i32: 1128 case INDEX_op_brcond2_i32: 1129 case INDEX_op_setcond2_i32: 1130 case INDEX_op_brcond_i64: 1131 case INDEX_op_setcond_i64: 1132 case INDEX_op_movcond_i64: 1133 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { 1134 col += qemu_log(",%s", cond_name[args[k++]]); 1135 } else { 1136 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); 1137 } 1138 i = 1; 1139 break; 1140 case INDEX_op_qemu_ld_i32: 1141 case INDEX_op_qemu_st_i32: 1142 case INDEX_op_qemu_ld_i64: 1143 case INDEX_op_qemu_st_i64: 1144 { 1145 TCGMemOpIdx oi = args[k++]; 1146 TCGMemOp op = get_memop(oi); 1147 unsigned ix = get_mmuidx(oi); 1148 1149 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1150 col += qemu_log(",$0x%x,%u", op, ix); 1151 } else { 1152 const char *s_al, *s_op; 1153 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1154 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1155 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1156 } 1157 i = 1; 1158 } 1159 break; 1160 default: 1161 i = 0; 1162 break; 1163 } 1164 switch (c) { 1165 case INDEX_op_set_label: 1166 case INDEX_op_br: 1167 case INDEX_op_brcond_i32: 1168 case INDEX_op_brcond_i64: 1169 case INDEX_op_brcond2_i32: 1170 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); 1171 i++, k++; 1172 break; 1173 default: 1174 break; 1175 } 1176 for (; i < nb_cargs; i++, k++) { 1177 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); 1178 } 1179 } 1180 if (op->life) { 1181 unsigned life = op->life; 1182 1183 for (; col < 48; ++col) { 1184 putc(' ', qemu_logfile); 1185 } 1186 1187 if (life & (SYNC_ARG * 3)) { 1188 qemu_log(" sync:"); 1189 for (i = 0; i < 2; ++i) { 1190 if (life & (SYNC_ARG << i)) { 1191 qemu_log(" %d", i); 1192 } 1193 } 1194 } 1195 life /= DEAD_ARG; 1196 if (life) { 1197 qemu_log(" dead:"); 1198 for (i = 0; life; ++i, life >>= 1) { 1199 if (life & 1) { 1200 qemu_log(" %d", i); 1201 } 1202 } 1203 } 1204 } 1205 qemu_log("\n"); 1206 } 1207 } 1208 1209 /* we give more priority to constraints with less registers */ 1210 static int get_constraint_priority(const TCGOpDef *def, int k) 1211 { 1212 const TCGArgConstraint *arg_ct; 1213 1214 int i, n; 1215 arg_ct = &def->args_ct[k]; 1216 if (arg_ct->ct & TCG_CT_ALIAS) { 1217 /* an alias is equivalent to a single register */ 1218 n = 1; 1219 } else { 1220 if (!(arg_ct->ct & TCG_CT_REG)) 1221 return 0; 1222 n = 0; 1223 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1224 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1225 n++; 1226 } 1227 } 1228 return TCG_TARGET_NB_REGS - n + 1; 1229 } 1230 1231 /* sort from highest priority to lowest */ 1232 static void sort_constraints(TCGOpDef *def, int start, int n) 1233 { 1234 int i, j, p1, p2, tmp; 1235 1236 for(i = 0; i < n; i++) 1237 def->sorted_args[start + i] = start + i; 1238 if (n <= 1) 1239 return; 1240 for(i = 0; i < n - 1; i++) { 1241 for(j = i + 1; j < n; j++) { 1242 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1243 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1244 if (p1 < p2) { 1245 tmp = def->sorted_args[start + i]; 1246 def->sorted_args[start + i] = def->sorted_args[start + j]; 1247 def->sorted_args[start + j] = tmp; 1248 } 1249 } 1250 } 1251 } 1252 1253 static void process_op_defs(TCGContext *s) 1254 { 1255 TCGOpcode op; 1256 1257 for (op = 0; op < NB_OPS; op++) { 1258 TCGOpDef *def = &tcg_op_defs[op]; 1259 const TCGTargetOpDef *tdefs; 1260 TCGType type; 1261 int i, nb_args; 1262 1263 if (def->flags & TCG_OPF_NOT_PRESENT) { 1264 continue; 1265 } 1266 1267 nb_args = def->nb_iargs + def->nb_oargs; 1268 if (nb_args == 0) { 1269 continue; 1270 } 1271 1272 tdefs = tcg_target_op_def(op); 1273 /* Missing TCGTargetOpDef entry. */ 1274 tcg_debug_assert(tdefs != NULL); 1275 1276 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1277 for (i = 0; i < nb_args; i++) { 1278 const char *ct_str = tdefs->args_ct_str[i]; 1279 /* Incomplete TCGTargetOpDef entry. */ 1280 tcg_debug_assert(ct_str != NULL); 1281 1282 tcg_regset_clear(def->args_ct[i].u.regs); 1283 def->args_ct[i].ct = 0; 1284 while (*ct_str != '\0') { 1285 switch(*ct_str) { 1286 case '0' ... '9': 1287 { 1288 int oarg = *ct_str - '0'; 1289 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1290 tcg_debug_assert(oarg < def->nb_oargs); 1291 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1292 /* TCG_CT_ALIAS is for the output arguments. 1293 The input is tagged with TCG_CT_IALIAS. */ 1294 def->args_ct[i] = def->args_ct[oarg]; 1295 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1296 def->args_ct[oarg].alias_index = i; 1297 def->args_ct[i].ct |= TCG_CT_IALIAS; 1298 def->args_ct[i].alias_index = oarg; 1299 } 1300 ct_str++; 1301 break; 1302 case '&': 1303 def->args_ct[i].ct |= TCG_CT_NEWREG; 1304 ct_str++; 1305 break; 1306 case 'i': 1307 def->args_ct[i].ct |= TCG_CT_CONST; 1308 ct_str++; 1309 break; 1310 default: 1311 ct_str = target_parse_constraint(&def->args_ct[i], 1312 ct_str, type); 1313 /* Typo in TCGTargetOpDef constraint. */ 1314 tcg_debug_assert(ct_str != NULL); 1315 } 1316 } 1317 } 1318 1319 /* TCGTargetOpDef entry with too much information? */ 1320 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1321 1322 /* sort the constraints (XXX: this is just an heuristic) */ 1323 sort_constraints(def, 0, def->nb_oargs); 1324 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1325 } 1326 } 1327 1328 void tcg_op_remove(TCGContext *s, TCGOp *op) 1329 { 1330 int next = op->next; 1331 int prev = op->prev; 1332 1333 /* We should never attempt to remove the list terminator. */ 1334 tcg_debug_assert(op != &s->gen_op_buf[0]); 1335 1336 s->gen_op_buf[next].prev = prev; 1337 s->gen_op_buf[prev].next = next; 1338 1339 memset(op, 0, sizeof(*op)); 1340 1341 #ifdef CONFIG_PROFILER 1342 s->del_op_count++; 1343 #endif 1344 } 1345 1346 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1347 TCGOpcode opc, int nargs) 1348 { 1349 int oi = s->gen_next_op_idx; 1350 int pi = s->gen_next_parm_idx; 1351 int prev = old_op->prev; 1352 int next = old_op - s->gen_op_buf; 1353 TCGOp *new_op; 1354 1355 tcg_debug_assert(oi < OPC_BUF_SIZE); 1356 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1357 s->gen_next_op_idx = oi + 1; 1358 s->gen_next_parm_idx = pi + nargs; 1359 1360 new_op = &s->gen_op_buf[oi]; 1361 *new_op = (TCGOp){ 1362 .opc = opc, 1363 .args = pi, 1364 .prev = prev, 1365 .next = next 1366 }; 1367 s->gen_op_buf[prev].next = oi; 1368 old_op->prev = oi; 1369 1370 return new_op; 1371 } 1372 1373 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1374 TCGOpcode opc, int nargs) 1375 { 1376 int oi = s->gen_next_op_idx; 1377 int pi = s->gen_next_parm_idx; 1378 int prev = old_op - s->gen_op_buf; 1379 int next = old_op->next; 1380 TCGOp *new_op; 1381 1382 tcg_debug_assert(oi < OPC_BUF_SIZE); 1383 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1384 s->gen_next_op_idx = oi + 1; 1385 s->gen_next_parm_idx = pi + nargs; 1386 1387 new_op = &s->gen_op_buf[oi]; 1388 *new_op = (TCGOp){ 1389 .opc = opc, 1390 .args = pi, 1391 .prev = prev, 1392 .next = next 1393 }; 1394 s->gen_op_buf[next].prev = oi; 1395 old_op->next = oi; 1396 1397 return new_op; 1398 } 1399 1400 #define TS_DEAD 1 1401 #define TS_MEM 2 1402 1403 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1404 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1405 1406 /* liveness analysis: end of function: all temps are dead, and globals 1407 should be in memory. */ 1408 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) 1409 { 1410 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); 1411 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); 1412 } 1413 1414 /* liveness analysis: end of basic block: all temps are dead, globals 1415 and local temps should be in memory. */ 1416 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) 1417 { 1418 int i, n; 1419 1420 tcg_la_func_end(s, temp_state); 1421 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { 1422 if (s->temps[i].temp_local) { 1423 temp_state[i] |= TS_MEM; 1424 } 1425 } 1426 } 1427 1428 /* Liveness analysis : update the opc_arg_life array to tell if a 1429 given input arguments is dead. Instructions updating dead 1430 temporaries are removed. */ 1431 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) 1432 { 1433 int nb_globals = s->nb_globals; 1434 int oi, oi_prev; 1435 1436 tcg_la_func_end(s, temp_state); 1437 1438 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1439 int i, nb_iargs, nb_oargs; 1440 TCGOpcode opc_new, opc_new2; 1441 bool have_opc_new2; 1442 TCGLifeData arg_life = 0; 1443 TCGArg arg; 1444 1445 TCGOp * const op = &s->gen_op_buf[oi]; 1446 TCGArg * const args = &s->gen_opparam_buf[op->args]; 1447 TCGOpcode opc = op->opc; 1448 const TCGOpDef *def = &tcg_op_defs[opc]; 1449 1450 oi_prev = op->prev; 1451 1452 switch (opc) { 1453 case INDEX_op_call: 1454 { 1455 int call_flags; 1456 1457 nb_oargs = op->callo; 1458 nb_iargs = op->calli; 1459 call_flags = args[nb_oargs + nb_iargs + 1]; 1460 1461 /* pure functions can be removed if their result is unused */ 1462 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 1463 for (i = 0; i < nb_oargs; i++) { 1464 arg = args[i]; 1465 if (temp_state[arg] != TS_DEAD) { 1466 goto do_not_remove_call; 1467 } 1468 } 1469 goto do_remove; 1470 } else { 1471 do_not_remove_call: 1472 1473 /* output args are dead */ 1474 for (i = 0; i < nb_oargs; i++) { 1475 arg = args[i]; 1476 if (temp_state[arg] & TS_DEAD) { 1477 arg_life |= DEAD_ARG << i; 1478 } 1479 if (temp_state[arg] & TS_MEM) { 1480 arg_life |= SYNC_ARG << i; 1481 } 1482 temp_state[arg] = TS_DEAD; 1483 } 1484 1485 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 1486 TCG_CALL_NO_READ_GLOBALS))) { 1487 /* globals should go back to memory */ 1488 memset(temp_state, TS_DEAD | TS_MEM, nb_globals); 1489 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 1490 /* globals should be synced to memory */ 1491 for (i = 0; i < nb_globals; i++) { 1492 temp_state[i] |= TS_MEM; 1493 } 1494 } 1495 1496 /* record arguments that die in this helper */ 1497 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1498 arg = args[i]; 1499 if (arg != TCG_CALL_DUMMY_ARG) { 1500 if (temp_state[arg] & TS_DEAD) { 1501 arg_life |= DEAD_ARG << i; 1502 } 1503 } 1504 } 1505 /* input arguments are live for preceding opcodes */ 1506 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1507 arg = args[i]; 1508 if (arg != TCG_CALL_DUMMY_ARG) { 1509 temp_state[arg] &= ~TS_DEAD; 1510 } 1511 } 1512 } 1513 } 1514 break; 1515 case INDEX_op_insn_start: 1516 break; 1517 case INDEX_op_discard: 1518 /* mark the temporary as dead */ 1519 temp_state[args[0]] = TS_DEAD; 1520 break; 1521 1522 case INDEX_op_add2_i32: 1523 opc_new = INDEX_op_add_i32; 1524 goto do_addsub2; 1525 case INDEX_op_sub2_i32: 1526 opc_new = INDEX_op_sub_i32; 1527 goto do_addsub2; 1528 case INDEX_op_add2_i64: 1529 opc_new = INDEX_op_add_i64; 1530 goto do_addsub2; 1531 case INDEX_op_sub2_i64: 1532 opc_new = INDEX_op_sub_i64; 1533 do_addsub2: 1534 nb_iargs = 4; 1535 nb_oargs = 2; 1536 /* Test if the high part of the operation is dead, but not 1537 the low part. The result can be optimized to a simple 1538 add or sub. This happens often for x86_64 guest when the 1539 cpu mode is set to 32 bit. */ 1540 if (temp_state[args[1]] == TS_DEAD) { 1541 if (temp_state[args[0]] == TS_DEAD) { 1542 goto do_remove; 1543 } 1544 /* Replace the opcode and adjust the args in place, 1545 leaving 3 unused args at the end. */ 1546 op->opc = opc = opc_new; 1547 args[1] = args[2]; 1548 args[2] = args[4]; 1549 /* Fall through and mark the single-word operation live. */ 1550 nb_iargs = 2; 1551 nb_oargs = 1; 1552 } 1553 goto do_not_remove; 1554 1555 case INDEX_op_mulu2_i32: 1556 opc_new = INDEX_op_mul_i32; 1557 opc_new2 = INDEX_op_muluh_i32; 1558 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 1559 goto do_mul2; 1560 case INDEX_op_muls2_i32: 1561 opc_new = INDEX_op_mul_i32; 1562 opc_new2 = INDEX_op_mulsh_i32; 1563 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 1564 goto do_mul2; 1565 case INDEX_op_mulu2_i64: 1566 opc_new = INDEX_op_mul_i64; 1567 opc_new2 = INDEX_op_muluh_i64; 1568 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 1569 goto do_mul2; 1570 case INDEX_op_muls2_i64: 1571 opc_new = INDEX_op_mul_i64; 1572 opc_new2 = INDEX_op_mulsh_i64; 1573 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 1574 goto do_mul2; 1575 do_mul2: 1576 nb_iargs = 2; 1577 nb_oargs = 2; 1578 if (temp_state[args[1]] == TS_DEAD) { 1579 if (temp_state[args[0]] == TS_DEAD) { 1580 /* Both parts of the operation are dead. */ 1581 goto do_remove; 1582 } 1583 /* The high part of the operation is dead; generate the low. */ 1584 op->opc = opc = opc_new; 1585 args[1] = args[2]; 1586 args[2] = args[3]; 1587 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { 1588 /* The low part of the operation is dead; generate the high. */ 1589 op->opc = opc = opc_new2; 1590 args[0] = args[1]; 1591 args[1] = args[2]; 1592 args[2] = args[3]; 1593 } else { 1594 goto do_not_remove; 1595 } 1596 /* Mark the single-word operation live. */ 1597 nb_oargs = 1; 1598 goto do_not_remove; 1599 1600 default: 1601 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 1602 nb_iargs = def->nb_iargs; 1603 nb_oargs = def->nb_oargs; 1604 1605 /* Test if the operation can be removed because all 1606 its outputs are dead. We assume that nb_oargs == 0 1607 implies side effects */ 1608 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 1609 for (i = 0; i < nb_oargs; i++) { 1610 if (temp_state[args[i]] != TS_DEAD) { 1611 goto do_not_remove; 1612 } 1613 } 1614 do_remove: 1615 tcg_op_remove(s, op); 1616 } else { 1617 do_not_remove: 1618 /* output args are dead */ 1619 for (i = 0; i < nb_oargs; i++) { 1620 arg = args[i]; 1621 if (temp_state[arg] & TS_DEAD) { 1622 arg_life |= DEAD_ARG << i; 1623 } 1624 if (temp_state[arg] & TS_MEM) { 1625 arg_life |= SYNC_ARG << i; 1626 } 1627 temp_state[arg] = TS_DEAD; 1628 } 1629 1630 /* if end of basic block, update */ 1631 if (def->flags & TCG_OPF_BB_END) { 1632 tcg_la_bb_end(s, temp_state); 1633 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1634 /* globals should be synced to memory */ 1635 for (i = 0; i < nb_globals; i++) { 1636 temp_state[i] |= TS_MEM; 1637 } 1638 } 1639 1640 /* record arguments that die in this opcode */ 1641 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1642 arg = args[i]; 1643 if (temp_state[arg] & TS_DEAD) { 1644 arg_life |= DEAD_ARG << i; 1645 } 1646 } 1647 /* input arguments are live for preceding opcodes */ 1648 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1649 temp_state[args[i]] &= ~TS_DEAD; 1650 } 1651 } 1652 break; 1653 } 1654 op->life = arg_life; 1655 } 1656 } 1657 1658 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 1659 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) 1660 { 1661 int nb_globals = s->nb_globals; 1662 int16_t *dir_temps; 1663 int i, oi, oi_next; 1664 bool changes = false; 1665 1666 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); 1667 memset(dir_temps, 0, nb_globals * sizeof(int16_t)); 1668 1669 /* Create a temporary for each indirect global. */ 1670 for (i = 0; i < nb_globals; ++i) { 1671 TCGTemp *its = &s->temps[i]; 1672 if (its->indirect_reg) { 1673 TCGTemp *dts = tcg_temp_alloc(s); 1674 dts->type = its->type; 1675 dts->base_type = its->base_type; 1676 dir_temps[i] = temp_idx(s, dts); 1677 } 1678 } 1679 1680 memset(temp_state, TS_DEAD, nb_globals); 1681 1682 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 1683 TCGOp *op = &s->gen_op_buf[oi]; 1684 TCGArg *args = &s->gen_opparam_buf[op->args]; 1685 TCGOpcode opc = op->opc; 1686 const TCGOpDef *def = &tcg_op_defs[opc]; 1687 TCGLifeData arg_life = op->life; 1688 int nb_iargs, nb_oargs, call_flags; 1689 TCGArg arg, dir; 1690 1691 oi_next = op->next; 1692 1693 if (opc == INDEX_op_call) { 1694 nb_oargs = op->callo; 1695 nb_iargs = op->calli; 1696 call_flags = args[nb_oargs + nb_iargs + 1]; 1697 } else { 1698 nb_iargs = def->nb_iargs; 1699 nb_oargs = def->nb_oargs; 1700 1701 /* Set flags similar to how calls require. */ 1702 if (def->flags & TCG_OPF_BB_END) { 1703 /* Like writing globals: save_globals */ 1704 call_flags = 0; 1705 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1706 /* Like reading globals: sync_globals */ 1707 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 1708 } else { 1709 /* No effect on globals. */ 1710 call_flags = (TCG_CALL_NO_READ_GLOBALS | 1711 TCG_CALL_NO_WRITE_GLOBALS); 1712 } 1713 } 1714 1715 /* Make sure that input arguments are available. */ 1716 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1717 arg = args[i]; 1718 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ 1719 if (arg < nb_globals) { 1720 dir = dir_temps[arg]; 1721 if (dir != 0 && temp_state[arg] == TS_DEAD) { 1722 TCGTemp *its = &s->temps[arg]; 1723 TCGOpcode lopc = (its->type == TCG_TYPE_I32 1724 ? INDEX_op_ld_i32 1725 : INDEX_op_ld_i64); 1726 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 1727 TCGArg *largs = &s->gen_opparam_buf[lop->args]; 1728 1729 largs[0] = dir; 1730 largs[1] = temp_idx(s, its->mem_base); 1731 largs[2] = its->mem_offset; 1732 1733 /* Loaded, but synced with memory. */ 1734 temp_state[arg] = TS_MEM; 1735 } 1736 } 1737 } 1738 1739 /* Perform input replacement, and mark inputs that became dead. 1740 No action is required except keeping temp_state up to date 1741 so that we reload when needed. */ 1742 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1743 arg = args[i]; 1744 if (arg < nb_globals) { 1745 dir = dir_temps[arg]; 1746 if (dir != 0) { 1747 args[i] = dir; 1748 changes = true; 1749 if (IS_DEAD_ARG(i)) { 1750 temp_state[arg] = TS_DEAD; 1751 } 1752 } 1753 } 1754 } 1755 1756 /* Liveness analysis should ensure that the following are 1757 all correct, for call sites and basic block end points. */ 1758 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 1759 /* Nothing to do */ 1760 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 1761 for (i = 0; i < nb_globals; ++i) { 1762 /* Liveness should see that globals are synced back, 1763 that is, either TS_DEAD or TS_MEM. */ 1764 tcg_debug_assert(dir_temps[i] == 0 1765 || temp_state[i] != 0); 1766 } 1767 } else { 1768 for (i = 0; i < nb_globals; ++i) { 1769 /* Liveness should see that globals are saved back, 1770 that is, TS_DEAD, waiting to be reloaded. */ 1771 tcg_debug_assert(dir_temps[i] == 0 1772 || temp_state[i] == TS_DEAD); 1773 } 1774 } 1775 1776 /* Outputs become available. */ 1777 for (i = 0; i < nb_oargs; i++) { 1778 arg = args[i]; 1779 if (arg >= nb_globals) { 1780 continue; 1781 } 1782 dir = dir_temps[arg]; 1783 if (dir == 0) { 1784 continue; 1785 } 1786 args[i] = dir; 1787 changes = true; 1788 1789 /* The output is now live and modified. */ 1790 temp_state[arg] = 0; 1791 1792 /* Sync outputs upon their last write. */ 1793 if (NEED_SYNC_ARG(i)) { 1794 TCGTemp *its = &s->temps[arg]; 1795 TCGOpcode sopc = (its->type == TCG_TYPE_I32 1796 ? INDEX_op_st_i32 1797 : INDEX_op_st_i64); 1798 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 1799 TCGArg *sargs = &s->gen_opparam_buf[sop->args]; 1800 1801 sargs[0] = dir; 1802 sargs[1] = temp_idx(s, its->mem_base); 1803 sargs[2] = its->mem_offset; 1804 1805 temp_state[arg] = TS_MEM; 1806 } 1807 /* Drop outputs that are dead. */ 1808 if (IS_DEAD_ARG(i)) { 1809 temp_state[arg] = TS_DEAD; 1810 } 1811 } 1812 } 1813 1814 return changes; 1815 } 1816 1817 #ifdef CONFIG_DEBUG_TCG 1818 static void dump_regs(TCGContext *s) 1819 { 1820 TCGTemp *ts; 1821 int i; 1822 char buf[64]; 1823 1824 for(i = 0; i < s->nb_temps; i++) { 1825 ts = &s->temps[i]; 1826 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); 1827 switch(ts->val_type) { 1828 case TEMP_VAL_REG: 1829 printf("%s", tcg_target_reg_names[ts->reg]); 1830 break; 1831 case TEMP_VAL_MEM: 1832 printf("%d(%s)", (int)ts->mem_offset, 1833 tcg_target_reg_names[ts->mem_base->reg]); 1834 break; 1835 case TEMP_VAL_CONST: 1836 printf("$0x%" TCG_PRIlx, ts->val); 1837 break; 1838 case TEMP_VAL_DEAD: 1839 printf("D"); 1840 break; 1841 default: 1842 printf("???"); 1843 break; 1844 } 1845 printf("\n"); 1846 } 1847 1848 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1849 if (s->reg_to_temp[i] != NULL) { 1850 printf("%s: %s\n", 1851 tcg_target_reg_names[i], 1852 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 1853 } 1854 } 1855 } 1856 1857 static void check_regs(TCGContext *s) 1858 { 1859 int reg; 1860 int k; 1861 TCGTemp *ts; 1862 char buf[64]; 1863 1864 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 1865 ts = s->reg_to_temp[reg]; 1866 if (ts != NULL) { 1867 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 1868 printf("Inconsistency for register %s:\n", 1869 tcg_target_reg_names[reg]); 1870 goto fail; 1871 } 1872 } 1873 } 1874 for (k = 0; k < s->nb_temps; k++) { 1875 ts = &s->temps[k]; 1876 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 1877 && s->reg_to_temp[ts->reg] != ts) { 1878 printf("Inconsistency for temp %s:\n", 1879 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 1880 fail: 1881 printf("reg state:\n"); 1882 dump_regs(s); 1883 tcg_abort(); 1884 } 1885 } 1886 } 1887 #endif 1888 1889 static void temp_allocate_frame(TCGContext *s, int temp) 1890 { 1891 TCGTemp *ts; 1892 ts = &s->temps[temp]; 1893 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 1894 /* Sparc64 stack is accessed with offset of 2047 */ 1895 s->current_frame_offset = (s->current_frame_offset + 1896 (tcg_target_long)sizeof(tcg_target_long) - 1) & 1897 ~(sizeof(tcg_target_long) - 1); 1898 #endif 1899 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 1900 s->frame_end) { 1901 tcg_abort(); 1902 } 1903 ts->mem_offset = s->current_frame_offset; 1904 ts->mem_base = s->frame_temp; 1905 ts->mem_allocated = 1; 1906 s->current_frame_offset += sizeof(tcg_target_long); 1907 } 1908 1909 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 1910 1911 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 1912 mark it free; otherwise mark it dead. */ 1913 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 1914 { 1915 if (ts->fixed_reg) { 1916 return; 1917 } 1918 if (ts->val_type == TEMP_VAL_REG) { 1919 s->reg_to_temp[ts->reg] = NULL; 1920 } 1921 ts->val_type = (free_or_dead < 0 1922 || ts->temp_local 1923 || temp_idx(s, ts) < s->nb_globals 1924 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1925 } 1926 1927 /* Mark a temporary as dead. */ 1928 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 1929 { 1930 temp_free_or_dead(s, ts, 1); 1931 } 1932 1933 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 1934 registers needs to be allocated to store a constant. If 'free_or_dead' 1935 is non-zero, subsequently release the temporary; if it is positive, the 1936 temp is dead; if it is negative, the temp is free. */ 1937 static void temp_sync(TCGContext *s, TCGTemp *ts, 1938 TCGRegSet allocated_regs, int free_or_dead) 1939 { 1940 if (ts->fixed_reg) { 1941 return; 1942 } 1943 if (!ts->mem_coherent) { 1944 if (!ts->mem_allocated) { 1945 temp_allocate_frame(s, temp_idx(s, ts)); 1946 } 1947 switch (ts->val_type) { 1948 case TEMP_VAL_CONST: 1949 /* If we're going to free the temp immediately, then we won't 1950 require it later in a register, so attempt to store the 1951 constant to memory directly. */ 1952 if (free_or_dead 1953 && tcg_out_sti(s, ts->type, ts->val, 1954 ts->mem_base->reg, ts->mem_offset)) { 1955 break; 1956 } 1957 temp_load(s, ts, tcg_target_available_regs[ts->type], 1958 allocated_regs); 1959 /* fallthrough */ 1960 1961 case TEMP_VAL_REG: 1962 tcg_out_st(s, ts->type, ts->reg, 1963 ts->mem_base->reg, ts->mem_offset); 1964 break; 1965 1966 case TEMP_VAL_MEM: 1967 break; 1968 1969 case TEMP_VAL_DEAD: 1970 default: 1971 tcg_abort(); 1972 } 1973 ts->mem_coherent = 1; 1974 } 1975 if (free_or_dead) { 1976 temp_free_or_dead(s, ts, free_or_dead); 1977 } 1978 } 1979 1980 /* free register 'reg' by spilling the corresponding temporary if necessary */ 1981 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 1982 { 1983 TCGTemp *ts = s->reg_to_temp[reg]; 1984 if (ts != NULL) { 1985 temp_sync(s, ts, allocated_regs, -1); 1986 } 1987 } 1988 1989 /* Allocate a register belonging to reg1 & ~reg2 */ 1990 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 1991 TCGRegSet allocated_regs, bool rev) 1992 { 1993 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 1994 const int *order; 1995 TCGReg reg; 1996 TCGRegSet reg_ct; 1997 1998 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); 1999 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 2000 2001 /* first try free registers */ 2002 for(i = 0; i < n; i++) { 2003 reg = order[i]; 2004 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2005 return reg; 2006 } 2007 2008 /* XXX: do better spill choice */ 2009 for(i = 0; i < n; i++) { 2010 reg = order[i]; 2011 if (tcg_regset_test_reg(reg_ct, reg)) { 2012 tcg_reg_free(s, reg, allocated_regs); 2013 return reg; 2014 } 2015 } 2016 2017 tcg_abort(); 2018 } 2019 2020 /* Make sure the temporary is in a register. If needed, allocate the register 2021 from DESIRED while avoiding ALLOCATED. */ 2022 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2023 TCGRegSet allocated_regs) 2024 { 2025 TCGReg reg; 2026 2027 switch (ts->val_type) { 2028 case TEMP_VAL_REG: 2029 return; 2030 case TEMP_VAL_CONST: 2031 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2032 tcg_out_movi(s, ts->type, reg, ts->val); 2033 ts->mem_coherent = 0; 2034 break; 2035 case TEMP_VAL_MEM: 2036 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2037 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2038 ts->mem_coherent = 1; 2039 break; 2040 case TEMP_VAL_DEAD: 2041 default: 2042 tcg_abort(); 2043 } 2044 ts->reg = reg; 2045 ts->val_type = TEMP_VAL_REG; 2046 s->reg_to_temp[reg] = ts; 2047 } 2048 2049 /* Save a temporary to memory. 'allocated_regs' is used in case a 2050 temporary registers needs to be allocated to store a constant. */ 2051 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2052 { 2053 /* The liveness analysis already ensures that globals are back 2054 in memory. Keep an tcg_debug_assert for safety. */ 2055 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2056 } 2057 2058 /* save globals to their canonical location and assume they can be 2059 modified be the following code. 'allocated_regs' is used in case a 2060 temporary registers needs to be allocated to store a constant. */ 2061 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2062 { 2063 int i; 2064 2065 for (i = 0; i < s->nb_globals; i++) { 2066 temp_save(s, &s->temps[i], allocated_regs); 2067 } 2068 } 2069 2070 /* sync globals to their canonical location and assume they can be 2071 read by the following code. 'allocated_regs' is used in case a 2072 temporary registers needs to be allocated to store a constant. */ 2073 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2074 { 2075 int i; 2076 2077 for (i = 0; i < s->nb_globals; i++) { 2078 TCGTemp *ts = &s->temps[i]; 2079 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2080 || ts->fixed_reg 2081 || ts->mem_coherent); 2082 } 2083 } 2084 2085 /* at the end of a basic block, we assume all temporaries are dead and 2086 all globals are stored at their canonical location. */ 2087 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2088 { 2089 int i; 2090 2091 for (i = s->nb_globals; i < s->nb_temps; i++) { 2092 TCGTemp *ts = &s->temps[i]; 2093 if (ts->temp_local) { 2094 temp_save(s, ts, allocated_regs); 2095 } else { 2096 /* The liveness analysis already ensures that temps are dead. 2097 Keep an tcg_debug_assert for safety. */ 2098 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2099 } 2100 } 2101 2102 save_globals(s, allocated_regs); 2103 } 2104 2105 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2106 tcg_target_ulong val, TCGLifeData arg_life) 2107 { 2108 if (ots->fixed_reg) { 2109 /* For fixed registers, we do not do any constant propagation. */ 2110 tcg_out_movi(s, ots->type, ots->reg, val); 2111 return; 2112 } 2113 2114 /* The movi is not explicitly generated here. */ 2115 if (ots->val_type == TEMP_VAL_REG) { 2116 s->reg_to_temp[ots->reg] = NULL; 2117 } 2118 ots->val_type = TEMP_VAL_CONST; 2119 ots->val = val; 2120 ots->mem_coherent = 0; 2121 if (NEED_SYNC_ARG(0)) { 2122 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2123 } else if (IS_DEAD_ARG(0)) { 2124 temp_dead(s, ots); 2125 } 2126 } 2127 2128 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, 2129 TCGLifeData arg_life) 2130 { 2131 TCGTemp *ots = &s->temps[args[0]]; 2132 tcg_target_ulong val = args[1]; 2133 2134 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2135 } 2136 2137 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, 2138 const TCGArg *args, TCGLifeData arg_life) 2139 { 2140 TCGRegSet allocated_regs; 2141 TCGTemp *ts, *ots; 2142 TCGType otype, itype; 2143 2144 tcg_regset_set(allocated_regs, s->reserved_regs); 2145 ots = &s->temps[args[0]]; 2146 ts = &s->temps[args[1]]; 2147 2148 /* Note that otype != itype for no-op truncation. */ 2149 otype = ots->type; 2150 itype = ts->type; 2151 2152 if (ts->val_type == TEMP_VAL_CONST) { 2153 /* propagate constant or generate sti */ 2154 tcg_target_ulong val = ts->val; 2155 if (IS_DEAD_ARG(1)) { 2156 temp_dead(s, ts); 2157 } 2158 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2159 return; 2160 } 2161 2162 /* If the source value is in memory we're going to be forced 2163 to have it in a register in order to perform the copy. Copy 2164 the SOURCE value into its own register first, that way we 2165 don't have to reload SOURCE the next time it is used. */ 2166 if (ts->val_type == TEMP_VAL_MEM) { 2167 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2168 } 2169 2170 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2171 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2172 /* mov to a non-saved dead register makes no sense (even with 2173 liveness analysis disabled). */ 2174 tcg_debug_assert(NEED_SYNC_ARG(0)); 2175 if (!ots->mem_allocated) { 2176 temp_allocate_frame(s, args[0]); 2177 } 2178 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2179 if (IS_DEAD_ARG(1)) { 2180 temp_dead(s, ts); 2181 } 2182 temp_dead(s, ots); 2183 } else { 2184 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2185 /* the mov can be suppressed */ 2186 if (ots->val_type == TEMP_VAL_REG) { 2187 s->reg_to_temp[ots->reg] = NULL; 2188 } 2189 ots->reg = ts->reg; 2190 temp_dead(s, ts); 2191 } else { 2192 if (ots->val_type != TEMP_VAL_REG) { 2193 /* When allocating a new register, make sure to not spill the 2194 input one. */ 2195 tcg_regset_set_reg(allocated_regs, ts->reg); 2196 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2197 allocated_regs, ots->indirect_base); 2198 } 2199 tcg_out_mov(s, otype, ots->reg, ts->reg); 2200 } 2201 ots->val_type = TEMP_VAL_REG; 2202 ots->mem_coherent = 0; 2203 s->reg_to_temp[ots->reg] = ots; 2204 if (NEED_SYNC_ARG(0)) { 2205 temp_sync(s, ots, allocated_regs, 0); 2206 } 2207 } 2208 } 2209 2210 static void tcg_reg_alloc_op(TCGContext *s, 2211 const TCGOpDef *def, TCGOpcode opc, 2212 const TCGArg *args, TCGLifeData arg_life) 2213 { 2214 TCGRegSet i_allocated_regs; 2215 TCGRegSet o_allocated_regs; 2216 int i, k, nb_iargs, nb_oargs; 2217 TCGReg reg; 2218 TCGArg arg; 2219 const TCGArgConstraint *arg_ct; 2220 TCGTemp *ts; 2221 TCGArg new_args[TCG_MAX_OP_ARGS]; 2222 int const_args[TCG_MAX_OP_ARGS]; 2223 2224 nb_oargs = def->nb_oargs; 2225 nb_iargs = def->nb_iargs; 2226 2227 /* copy constants */ 2228 memcpy(new_args + nb_oargs + nb_iargs, 2229 args + nb_oargs + nb_iargs, 2230 sizeof(TCGArg) * def->nb_cargs); 2231 2232 tcg_regset_set(i_allocated_regs, s->reserved_regs); 2233 tcg_regset_set(o_allocated_regs, s->reserved_regs); 2234 2235 /* satisfy input constraints */ 2236 for(k = 0; k < nb_iargs; k++) { 2237 i = def->sorted_args[nb_oargs + k]; 2238 arg = args[i]; 2239 arg_ct = &def->args_ct[i]; 2240 ts = &s->temps[arg]; 2241 2242 if (ts->val_type == TEMP_VAL_CONST 2243 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2244 /* constant is OK for instruction */ 2245 const_args[i] = 1; 2246 new_args[i] = ts->val; 2247 goto iarg_end; 2248 } 2249 2250 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2251 2252 if (arg_ct->ct & TCG_CT_IALIAS) { 2253 if (ts->fixed_reg) { 2254 /* if fixed register, we must allocate a new register 2255 if the alias is not the same register */ 2256 if (arg != args[arg_ct->alias_index]) 2257 goto allocate_in_reg; 2258 } else { 2259 /* if the input is aliased to an output and if it is 2260 not dead after the instruction, we must allocate 2261 a new register and move it */ 2262 if (!IS_DEAD_ARG(i)) { 2263 goto allocate_in_reg; 2264 } 2265 /* check if the current register has already been allocated 2266 for another input aliased to an output */ 2267 int k2, i2; 2268 for (k2 = 0 ; k2 < k ; k2++) { 2269 i2 = def->sorted_args[nb_oargs + k2]; 2270 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2271 (new_args[i2] == ts->reg)) { 2272 goto allocate_in_reg; 2273 } 2274 } 2275 } 2276 } 2277 reg = ts->reg; 2278 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2279 /* nothing to do : the constraint is satisfied */ 2280 } else { 2281 allocate_in_reg: 2282 /* allocate a new register matching the constraint 2283 and move the temporary register into it */ 2284 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2285 ts->indirect_base); 2286 tcg_out_mov(s, ts->type, reg, ts->reg); 2287 } 2288 new_args[i] = reg; 2289 const_args[i] = 0; 2290 tcg_regset_set_reg(i_allocated_regs, reg); 2291 iarg_end: ; 2292 } 2293 2294 /* mark dead temporaries and free the associated registers */ 2295 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2296 if (IS_DEAD_ARG(i)) { 2297 temp_dead(s, &s->temps[args[i]]); 2298 } 2299 } 2300 2301 if (def->flags & TCG_OPF_BB_END) { 2302 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2303 } else { 2304 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2305 /* XXX: permit generic clobber register list ? */ 2306 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2307 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2308 tcg_reg_free(s, i, i_allocated_regs); 2309 } 2310 } 2311 } 2312 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2313 /* sync globals if the op has side effects and might trigger 2314 an exception. */ 2315 sync_globals(s, i_allocated_regs); 2316 } 2317 2318 /* satisfy the output constraints */ 2319 for(k = 0; k < nb_oargs; k++) { 2320 i = def->sorted_args[k]; 2321 arg = args[i]; 2322 arg_ct = &def->args_ct[i]; 2323 ts = &s->temps[arg]; 2324 if ((arg_ct->ct & TCG_CT_ALIAS) 2325 && !const_args[arg_ct->alias_index]) { 2326 reg = new_args[arg_ct->alias_index]; 2327 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2328 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2329 i_allocated_regs | o_allocated_regs, 2330 ts->indirect_base); 2331 } else { 2332 /* if fixed register, we try to use it */ 2333 reg = ts->reg; 2334 if (ts->fixed_reg && 2335 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2336 goto oarg_end; 2337 } 2338 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2339 ts->indirect_base); 2340 } 2341 tcg_regset_set_reg(o_allocated_regs, reg); 2342 /* if a fixed register is used, then a move will be done afterwards */ 2343 if (!ts->fixed_reg) { 2344 if (ts->val_type == TEMP_VAL_REG) { 2345 s->reg_to_temp[ts->reg] = NULL; 2346 } 2347 ts->val_type = TEMP_VAL_REG; 2348 ts->reg = reg; 2349 /* temp value is modified, so the value kept in memory is 2350 potentially not the same */ 2351 ts->mem_coherent = 0; 2352 s->reg_to_temp[reg] = ts; 2353 } 2354 oarg_end: 2355 new_args[i] = reg; 2356 } 2357 } 2358 2359 /* emit instruction */ 2360 tcg_out_op(s, opc, new_args, const_args); 2361 2362 /* move the outputs in the correct register if needed */ 2363 for(i = 0; i < nb_oargs; i++) { 2364 ts = &s->temps[args[i]]; 2365 reg = new_args[i]; 2366 if (ts->fixed_reg && ts->reg != reg) { 2367 tcg_out_mov(s, ts->type, ts->reg, reg); 2368 } 2369 if (NEED_SYNC_ARG(i)) { 2370 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2371 } else if (IS_DEAD_ARG(i)) { 2372 temp_dead(s, ts); 2373 } 2374 } 2375 } 2376 2377 #ifdef TCG_TARGET_STACK_GROWSUP 2378 #define STACK_DIR(x) (-(x)) 2379 #else 2380 #define STACK_DIR(x) (x) 2381 #endif 2382 2383 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, 2384 const TCGArg * const args, TCGLifeData arg_life) 2385 { 2386 int flags, nb_regs, i; 2387 TCGReg reg; 2388 TCGArg arg; 2389 TCGTemp *ts; 2390 intptr_t stack_offset; 2391 size_t call_stack_size; 2392 tcg_insn_unit *func_addr; 2393 int allocate_args; 2394 TCGRegSet allocated_regs; 2395 2396 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; 2397 flags = args[nb_oargs + nb_iargs + 1]; 2398 2399 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2400 if (nb_regs > nb_iargs) { 2401 nb_regs = nb_iargs; 2402 } 2403 2404 /* assign stack slots first */ 2405 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2406 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2407 ~(TCG_TARGET_STACK_ALIGN - 1); 2408 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2409 if (allocate_args) { 2410 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2411 preallocate call stack */ 2412 tcg_abort(); 2413 } 2414 2415 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2416 for(i = nb_regs; i < nb_iargs; i++) { 2417 arg = args[nb_oargs + i]; 2418 #ifdef TCG_TARGET_STACK_GROWSUP 2419 stack_offset -= sizeof(tcg_target_long); 2420 #endif 2421 if (arg != TCG_CALL_DUMMY_ARG) { 2422 ts = &s->temps[arg]; 2423 temp_load(s, ts, tcg_target_available_regs[ts->type], 2424 s->reserved_regs); 2425 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2426 } 2427 #ifndef TCG_TARGET_STACK_GROWSUP 2428 stack_offset += sizeof(tcg_target_long); 2429 #endif 2430 } 2431 2432 /* assign input registers */ 2433 tcg_regset_set(allocated_regs, s->reserved_regs); 2434 for(i = 0; i < nb_regs; i++) { 2435 arg = args[nb_oargs + i]; 2436 if (arg != TCG_CALL_DUMMY_ARG) { 2437 ts = &s->temps[arg]; 2438 reg = tcg_target_call_iarg_regs[i]; 2439 tcg_reg_free(s, reg, allocated_regs); 2440 2441 if (ts->val_type == TEMP_VAL_REG) { 2442 if (ts->reg != reg) { 2443 tcg_out_mov(s, ts->type, reg, ts->reg); 2444 } 2445 } else { 2446 TCGRegSet arg_set; 2447 2448 tcg_regset_clear(arg_set); 2449 tcg_regset_set_reg(arg_set, reg); 2450 temp_load(s, ts, arg_set, allocated_regs); 2451 } 2452 2453 tcg_regset_set_reg(allocated_regs, reg); 2454 } 2455 } 2456 2457 /* mark dead temporaries and free the associated registers */ 2458 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2459 if (IS_DEAD_ARG(i)) { 2460 temp_dead(s, &s->temps[args[i]]); 2461 } 2462 } 2463 2464 /* clobber call registers */ 2465 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2466 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2467 tcg_reg_free(s, i, allocated_regs); 2468 } 2469 } 2470 2471 /* Save globals if they might be written by the helper, sync them if 2472 they might be read. */ 2473 if (flags & TCG_CALL_NO_READ_GLOBALS) { 2474 /* Nothing to do */ 2475 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 2476 sync_globals(s, allocated_regs); 2477 } else { 2478 save_globals(s, allocated_regs); 2479 } 2480 2481 tcg_out_call(s, func_addr); 2482 2483 /* assign output registers and emit moves if needed */ 2484 for(i = 0; i < nb_oargs; i++) { 2485 arg = args[i]; 2486 ts = &s->temps[arg]; 2487 reg = tcg_target_call_oarg_regs[i]; 2488 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 2489 2490 if (ts->fixed_reg) { 2491 if (ts->reg != reg) { 2492 tcg_out_mov(s, ts->type, ts->reg, reg); 2493 } 2494 } else { 2495 if (ts->val_type == TEMP_VAL_REG) { 2496 s->reg_to_temp[ts->reg] = NULL; 2497 } 2498 ts->val_type = TEMP_VAL_REG; 2499 ts->reg = reg; 2500 ts->mem_coherent = 0; 2501 s->reg_to_temp[reg] = ts; 2502 if (NEED_SYNC_ARG(i)) { 2503 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2504 } else if (IS_DEAD_ARG(i)) { 2505 temp_dead(s, ts); 2506 } 2507 } 2508 } 2509 } 2510 2511 #ifdef CONFIG_PROFILER 2512 2513 static int64_t tcg_table_op_count[NB_OPS]; 2514 2515 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2516 { 2517 int i; 2518 2519 for (i = 0; i < NB_OPS; i++) { 2520 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 2521 tcg_table_op_count[i]); 2522 } 2523 } 2524 #else 2525 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2526 { 2527 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2528 } 2529 #endif 2530 2531 2532 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 2533 { 2534 int i, oi, oi_next, num_insns; 2535 2536 #ifdef CONFIG_PROFILER 2537 { 2538 int n; 2539 2540 n = s->gen_op_buf[0].prev + 1; 2541 s->op_count += n; 2542 if (n > s->op_count_max) { 2543 s->op_count_max = n; 2544 } 2545 2546 n = s->nb_temps; 2547 s->temp_count += n; 2548 if (n > s->temp_count_max) { 2549 s->temp_count_max = n; 2550 } 2551 } 2552 #endif 2553 2554 #ifdef DEBUG_DISAS 2555 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 2556 && qemu_log_in_addr_range(tb->pc))) { 2557 qemu_log_lock(); 2558 qemu_log("OP:\n"); 2559 tcg_dump_ops(s); 2560 qemu_log("\n"); 2561 qemu_log_unlock(); 2562 } 2563 #endif 2564 2565 #ifdef CONFIG_PROFILER 2566 s->opt_time -= profile_getclock(); 2567 #endif 2568 2569 #ifdef USE_TCG_OPTIMIZATIONS 2570 tcg_optimize(s); 2571 #endif 2572 2573 #ifdef CONFIG_PROFILER 2574 s->opt_time += profile_getclock(); 2575 s->la_time -= profile_getclock(); 2576 #endif 2577 2578 { 2579 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); 2580 2581 liveness_pass_1(s, temp_state); 2582 2583 if (s->nb_indirects > 0) { 2584 #ifdef DEBUG_DISAS 2585 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 2586 && qemu_log_in_addr_range(tb->pc))) { 2587 qemu_log_lock(); 2588 qemu_log("OP before indirect lowering:\n"); 2589 tcg_dump_ops(s); 2590 qemu_log("\n"); 2591 qemu_log_unlock(); 2592 } 2593 #endif 2594 /* Replace indirect temps with direct temps. */ 2595 if (liveness_pass_2(s, temp_state)) { 2596 /* If changes were made, re-run liveness. */ 2597 liveness_pass_1(s, temp_state); 2598 } 2599 } 2600 } 2601 2602 #ifdef CONFIG_PROFILER 2603 s->la_time += profile_getclock(); 2604 #endif 2605 2606 #ifdef DEBUG_DISAS 2607 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 2608 && qemu_log_in_addr_range(tb->pc))) { 2609 qemu_log_lock(); 2610 qemu_log("OP after optimization and liveness analysis:\n"); 2611 tcg_dump_ops(s); 2612 qemu_log("\n"); 2613 qemu_log_unlock(); 2614 } 2615 #endif 2616 2617 tcg_reg_alloc_start(s); 2618 2619 s->code_buf = tb->tc_ptr; 2620 s->code_ptr = tb->tc_ptr; 2621 2622 tcg_out_tb_init(s); 2623 2624 num_insns = -1; 2625 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2626 TCGOp * const op = &s->gen_op_buf[oi]; 2627 TCGArg * const args = &s->gen_opparam_buf[op->args]; 2628 TCGOpcode opc = op->opc; 2629 const TCGOpDef *def = &tcg_op_defs[opc]; 2630 TCGLifeData arg_life = op->life; 2631 2632 oi_next = op->next; 2633 #ifdef CONFIG_PROFILER 2634 tcg_table_op_count[opc]++; 2635 #endif 2636 2637 switch (opc) { 2638 case INDEX_op_mov_i32: 2639 case INDEX_op_mov_i64: 2640 tcg_reg_alloc_mov(s, def, args, arg_life); 2641 break; 2642 case INDEX_op_movi_i32: 2643 case INDEX_op_movi_i64: 2644 tcg_reg_alloc_movi(s, args, arg_life); 2645 break; 2646 case INDEX_op_insn_start: 2647 if (num_insns >= 0) { 2648 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2649 } 2650 num_insns++; 2651 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2652 target_ulong a; 2653 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2654 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 2655 #else 2656 a = args[i]; 2657 #endif 2658 s->gen_insn_data[num_insns][i] = a; 2659 } 2660 break; 2661 case INDEX_op_discard: 2662 temp_dead(s, &s->temps[args[0]]); 2663 break; 2664 case INDEX_op_set_label: 2665 tcg_reg_alloc_bb_end(s, s->reserved_regs); 2666 tcg_out_label(s, arg_label(args[0]), s->code_ptr); 2667 break; 2668 case INDEX_op_call: 2669 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); 2670 break; 2671 default: 2672 /* Sanity check that we've not introduced any unhandled opcodes. */ 2673 if (def->flags & TCG_OPF_NOT_PRESENT) { 2674 tcg_abort(); 2675 } 2676 /* Note: in order to speed up the code, it would be much 2677 faster to have specialized register allocator functions for 2678 some common argument patterns */ 2679 tcg_reg_alloc_op(s, def, opc, args, arg_life); 2680 break; 2681 } 2682 #ifdef CONFIG_DEBUG_TCG 2683 check_regs(s); 2684 #endif 2685 /* Test for (pending) buffer overflow. The assumption is that any 2686 one operation beginning below the high water mark cannot overrun 2687 the buffer completely. Thus we can test for overflow after 2688 generating code without having to check during generation. */ 2689 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 2690 return -1; 2691 } 2692 } 2693 tcg_debug_assert(num_insns >= 0); 2694 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2695 2696 /* Generate TB finalization at the end of block */ 2697 if (!tcg_out_tb_finalize(s)) { 2698 return -1; 2699 } 2700 2701 /* flush instruction cache */ 2702 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 2703 2704 return tcg_current_code_size(s); 2705 } 2706 2707 #ifdef CONFIG_PROFILER 2708 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2709 { 2710 TCGContext *s = &tcg_ctx; 2711 int64_t tb_count = s->tb_count; 2712 int64_t tb_div_count = tb_count ? tb_count : 1; 2713 int64_t tot = s->interm_time + s->code_time; 2714 2715 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 2716 tot, tot / 2.4e9); 2717 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 2718 tb_count, s->tb_count1 - tb_count, 2719 (double)(s->tb_count1 - s->tb_count) 2720 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 2721 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 2722 (double)s->op_count / tb_div_count, s->op_count_max); 2723 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 2724 (double)s->del_op_count / tb_div_count); 2725 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 2726 (double)s->temp_count / tb_div_count, s->temp_count_max); 2727 cpu_fprintf(f, "avg host code/TB %0.1f\n", 2728 (double)s->code_out_len / tb_div_count); 2729 cpu_fprintf(f, "avg search data/TB %0.1f\n", 2730 (double)s->search_out_len / tb_div_count); 2731 2732 cpu_fprintf(f, "cycles/op %0.1f\n", 2733 s->op_count ? (double)tot / s->op_count : 0); 2734 cpu_fprintf(f, "cycles/in byte %0.1f\n", 2735 s->code_in_len ? (double)tot / s->code_in_len : 0); 2736 cpu_fprintf(f, "cycles/out byte %0.1f\n", 2737 s->code_out_len ? (double)tot / s->code_out_len : 0); 2738 cpu_fprintf(f, "cycles/search byte %0.1f\n", 2739 s->search_out_len ? (double)tot / s->search_out_len : 0); 2740 if (tot == 0) { 2741 tot = 1; 2742 } 2743 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 2744 (double)s->interm_time / tot * 100.0); 2745 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2746 (double)s->code_time / tot * 100.0); 2747 cpu_fprintf(f, "optim./code time %0.1f%%\n", 2748 (double)s->opt_time / (s->code_time ? s->code_time : 1) 2749 * 100.0); 2750 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2751 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2752 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2753 s->restore_count); 2754 cpu_fprintf(f, " avg cycles %0.1f\n", 2755 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 2756 } 2757 #else 2758 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2759 { 2760 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2761 } 2762 #endif 2763 2764 #ifdef ELF_HOST_MACHINE 2765 /* In order to use this feature, the backend needs to do three things: 2766 2767 (1) Define ELF_HOST_MACHINE to indicate both what value to 2768 put into the ELF image and to indicate support for the feature. 2769 2770 (2) Define tcg_register_jit. This should create a buffer containing 2771 the contents of a .debug_frame section that describes the post- 2772 prologue unwind info for the tcg machine. 2773 2774 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 2775 */ 2776 2777 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 2778 typedef enum { 2779 JIT_NOACTION = 0, 2780 JIT_REGISTER_FN, 2781 JIT_UNREGISTER_FN 2782 } jit_actions_t; 2783 2784 struct jit_code_entry { 2785 struct jit_code_entry *next_entry; 2786 struct jit_code_entry *prev_entry; 2787 const void *symfile_addr; 2788 uint64_t symfile_size; 2789 }; 2790 2791 struct jit_descriptor { 2792 uint32_t version; 2793 uint32_t action_flag; 2794 struct jit_code_entry *relevant_entry; 2795 struct jit_code_entry *first_entry; 2796 }; 2797 2798 void __jit_debug_register_code(void) __attribute__((noinline)); 2799 void __jit_debug_register_code(void) 2800 { 2801 asm(""); 2802 } 2803 2804 /* Must statically initialize the version, because GDB may check 2805 the version before we can set it. */ 2806 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 2807 2808 /* End GDB interface. */ 2809 2810 static int find_string(const char *strtab, const char *str) 2811 { 2812 const char *p = strtab + 1; 2813 2814 while (1) { 2815 if (strcmp(p, str) == 0) { 2816 return p - strtab; 2817 } 2818 p += strlen(p) + 1; 2819 } 2820 } 2821 2822 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 2823 const void *debug_frame, 2824 size_t debug_frame_size) 2825 { 2826 struct __attribute__((packed)) DebugInfo { 2827 uint32_t len; 2828 uint16_t version; 2829 uint32_t abbrev; 2830 uint8_t ptr_size; 2831 uint8_t cu_die; 2832 uint16_t cu_lang; 2833 uintptr_t cu_low_pc; 2834 uintptr_t cu_high_pc; 2835 uint8_t fn_die; 2836 char fn_name[16]; 2837 uintptr_t fn_low_pc; 2838 uintptr_t fn_high_pc; 2839 uint8_t cu_eoc; 2840 }; 2841 2842 struct ElfImage { 2843 ElfW(Ehdr) ehdr; 2844 ElfW(Phdr) phdr; 2845 ElfW(Shdr) shdr[7]; 2846 ElfW(Sym) sym[2]; 2847 struct DebugInfo di; 2848 uint8_t da[24]; 2849 char str[80]; 2850 }; 2851 2852 struct ElfImage *img; 2853 2854 static const struct ElfImage img_template = { 2855 .ehdr = { 2856 .e_ident[EI_MAG0] = ELFMAG0, 2857 .e_ident[EI_MAG1] = ELFMAG1, 2858 .e_ident[EI_MAG2] = ELFMAG2, 2859 .e_ident[EI_MAG3] = ELFMAG3, 2860 .e_ident[EI_CLASS] = ELF_CLASS, 2861 .e_ident[EI_DATA] = ELF_DATA, 2862 .e_ident[EI_VERSION] = EV_CURRENT, 2863 .e_type = ET_EXEC, 2864 .e_machine = ELF_HOST_MACHINE, 2865 .e_version = EV_CURRENT, 2866 .e_phoff = offsetof(struct ElfImage, phdr), 2867 .e_shoff = offsetof(struct ElfImage, shdr), 2868 .e_ehsize = sizeof(ElfW(Shdr)), 2869 .e_phentsize = sizeof(ElfW(Phdr)), 2870 .e_phnum = 1, 2871 .e_shentsize = sizeof(ElfW(Shdr)), 2872 .e_shnum = ARRAY_SIZE(img->shdr), 2873 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 2874 #ifdef ELF_HOST_FLAGS 2875 .e_flags = ELF_HOST_FLAGS, 2876 #endif 2877 #ifdef ELF_OSABI 2878 .e_ident[EI_OSABI] = ELF_OSABI, 2879 #endif 2880 }, 2881 .phdr = { 2882 .p_type = PT_LOAD, 2883 .p_flags = PF_X, 2884 }, 2885 .shdr = { 2886 [0] = { .sh_type = SHT_NULL }, 2887 /* Trick: The contents of code_gen_buffer are not present in 2888 this fake ELF file; that got allocated elsewhere. Therefore 2889 we mark .text as SHT_NOBITS (similar to .bss) so that readers 2890 will not look for contents. We can record any address. */ 2891 [1] = { /* .text */ 2892 .sh_type = SHT_NOBITS, 2893 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 2894 }, 2895 [2] = { /* .debug_info */ 2896 .sh_type = SHT_PROGBITS, 2897 .sh_offset = offsetof(struct ElfImage, di), 2898 .sh_size = sizeof(struct DebugInfo), 2899 }, 2900 [3] = { /* .debug_abbrev */ 2901 .sh_type = SHT_PROGBITS, 2902 .sh_offset = offsetof(struct ElfImage, da), 2903 .sh_size = sizeof(img->da), 2904 }, 2905 [4] = { /* .debug_frame */ 2906 .sh_type = SHT_PROGBITS, 2907 .sh_offset = sizeof(struct ElfImage), 2908 }, 2909 [5] = { /* .symtab */ 2910 .sh_type = SHT_SYMTAB, 2911 .sh_offset = offsetof(struct ElfImage, sym), 2912 .sh_size = sizeof(img->sym), 2913 .sh_info = 1, 2914 .sh_link = ARRAY_SIZE(img->shdr) - 1, 2915 .sh_entsize = sizeof(ElfW(Sym)), 2916 }, 2917 [6] = { /* .strtab */ 2918 .sh_type = SHT_STRTAB, 2919 .sh_offset = offsetof(struct ElfImage, str), 2920 .sh_size = sizeof(img->str), 2921 } 2922 }, 2923 .sym = { 2924 [1] = { /* code_gen_buffer */ 2925 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 2926 .st_shndx = 1, 2927 } 2928 }, 2929 .di = { 2930 .len = sizeof(struct DebugInfo) - 4, 2931 .version = 2, 2932 .ptr_size = sizeof(void *), 2933 .cu_die = 1, 2934 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 2935 .fn_die = 2, 2936 .fn_name = "code_gen_buffer" 2937 }, 2938 .da = { 2939 1, /* abbrev number (the cu) */ 2940 0x11, 1, /* DW_TAG_compile_unit, has children */ 2941 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 2942 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2943 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2944 0, 0, /* end of abbrev */ 2945 2, /* abbrev number (the fn) */ 2946 0x2e, 0, /* DW_TAG_subprogram, no children */ 2947 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 2948 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2949 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2950 0, 0, /* end of abbrev */ 2951 0 /* no more abbrev */ 2952 }, 2953 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 2954 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 2955 }; 2956 2957 /* We only need a single jit entry; statically allocate it. */ 2958 static struct jit_code_entry one_entry; 2959 2960 uintptr_t buf = (uintptr_t)buf_ptr; 2961 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 2962 DebugFrameHeader *dfh; 2963 2964 img = g_malloc(img_size); 2965 *img = img_template; 2966 2967 img->phdr.p_vaddr = buf; 2968 img->phdr.p_paddr = buf; 2969 img->phdr.p_memsz = buf_size; 2970 2971 img->shdr[1].sh_name = find_string(img->str, ".text"); 2972 img->shdr[1].sh_addr = buf; 2973 img->shdr[1].sh_size = buf_size; 2974 2975 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 2976 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 2977 2978 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 2979 img->shdr[4].sh_size = debug_frame_size; 2980 2981 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 2982 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 2983 2984 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 2985 img->sym[1].st_value = buf; 2986 img->sym[1].st_size = buf_size; 2987 2988 img->di.cu_low_pc = buf; 2989 img->di.cu_high_pc = buf + buf_size; 2990 img->di.fn_low_pc = buf; 2991 img->di.fn_high_pc = buf + buf_size; 2992 2993 dfh = (DebugFrameHeader *)(img + 1); 2994 memcpy(dfh, debug_frame, debug_frame_size); 2995 dfh->fde.func_start = buf; 2996 dfh->fde.func_len = buf_size; 2997 2998 #ifdef DEBUG_JIT 2999 /* Enable this block to be able to debug the ELF image file creation. 3000 One can use readelf, objdump, or other inspection utilities. */ 3001 { 3002 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3003 if (f) { 3004 if (fwrite(img, img_size, 1, f) != img_size) { 3005 /* Avoid stupid unused return value warning for fwrite. */ 3006 } 3007 fclose(f); 3008 } 3009 } 3010 #endif 3011 3012 one_entry.symfile_addr = img; 3013 one_entry.symfile_size = img_size; 3014 3015 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3016 __jit_debug_descriptor.relevant_entry = &one_entry; 3017 __jit_debug_descriptor.first_entry = &one_entry; 3018 __jit_debug_register_code(); 3019 } 3020 #else 3021 /* No support for the feature. Provide the entry point expected by exec.c, 3022 and implement the internal function we declared earlier. */ 3023 3024 static void tcg_register_jit_int(void *buf, size_t size, 3025 const void *debug_frame, 3026 size_t debug_frame_size) 3027 { 3028 } 3029 3030 void tcg_register_jit(void *buf, size_t buf_size) 3031 { 3032 } 3033 #endif /* ELF_HOST_MACHINE */ 3034