1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 62 /* Forward declarations for functions declared in tcg-target.inc.c and 63 used here. */ 64 static void tcg_target_init(TCGContext *s); 65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 66 static void tcg_target_qemu_prologue(TCGContext *s); 67 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 68 intptr_t value, intptr_t addend); 69 70 /* The CIE and FDE header definitions will be common to all hosts. */ 71 typedef struct { 72 uint32_t len __attribute__((aligned((sizeof(void *))))); 73 uint32_t id; 74 uint8_t version; 75 char augmentation[1]; 76 uint8_t code_align; 77 uint8_t data_align; 78 uint8_t return_column; 79 } DebugFrameCIE; 80 81 typedef struct QEMU_PACKED { 82 uint32_t len __attribute__((aligned((sizeof(void *))))); 83 uint32_t cie_offset; 84 uintptr_t func_start; 85 uintptr_t func_len; 86 } DebugFrameFDEHeader; 87 88 typedef struct QEMU_PACKED { 89 DebugFrameCIE cie; 90 DebugFrameFDEHeader fde; 91 } DebugFrameHeader; 92 93 static void tcg_register_jit_int(void *buf, size_t size, 94 const void *debug_frame, 95 size_t debug_frame_size) 96 __attribute__((unused)); 97 98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 99 static const char *target_parse_constraint(TCGArgConstraint *ct, 100 const char *ct_str, TCGType type); 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 107 const int *const_args); 108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 111 TCGReg base, intptr_t ofs); 112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 113 static int tcg_target_const_match(tcg_target_long val, TCGType type, 114 const TCGArgConstraint *arg_ct); 115 static void tcg_out_tb_init(TCGContext *s); 116 static bool tcg_out_tb_finalize(TCGContext *s); 117 118 119 120 static TCGRegSet tcg_target_available_regs[2]; 121 static TCGRegSet tcg_target_call_clobber_regs; 122 123 #if TCG_TARGET_INSN_UNIT_SIZE == 1 124 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 125 { 126 *s->code_ptr++ = v; 127 } 128 129 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 130 uint8_t v) 131 { 132 *p = v; 133 } 134 #endif 135 136 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 137 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 138 { 139 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 140 *s->code_ptr++ = v; 141 } else { 142 tcg_insn_unit *p = s->code_ptr; 143 memcpy(p, &v, sizeof(v)); 144 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 145 } 146 } 147 148 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 149 uint16_t v) 150 { 151 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 152 *p = v; 153 } else { 154 memcpy(p, &v, sizeof(v)); 155 } 156 } 157 #endif 158 159 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 160 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 161 { 162 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 163 *s->code_ptr++ = v; 164 } else { 165 tcg_insn_unit *p = s->code_ptr; 166 memcpy(p, &v, sizeof(v)); 167 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 168 } 169 } 170 171 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 172 uint32_t v) 173 { 174 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 175 *p = v; 176 } else { 177 memcpy(p, &v, sizeof(v)); 178 } 179 } 180 #endif 181 182 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 183 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 184 { 185 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 186 *s->code_ptr++ = v; 187 } else { 188 tcg_insn_unit *p = s->code_ptr; 189 memcpy(p, &v, sizeof(v)); 190 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 191 } 192 } 193 194 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 195 uint64_t v) 196 { 197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 198 *p = v; 199 } else { 200 memcpy(p, &v, sizeof(v)); 201 } 202 } 203 #endif 204 205 /* label relocation processing */ 206 207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 208 TCGLabel *l, intptr_t addend) 209 { 210 TCGRelocation *r; 211 212 if (l->has_value) { 213 /* FIXME: This may break relocations on RISC targets that 214 modify instruction fields in place. The caller may not have 215 written the initial value. */ 216 patch_reloc(code_ptr, type, l->u.value, addend); 217 } else { 218 /* add a new relocation entry */ 219 r = tcg_malloc(sizeof(TCGRelocation)); 220 r->type = type; 221 r->ptr = code_ptr; 222 r->addend = addend; 223 r->next = l->u.first_reloc; 224 l->u.first_reloc = r; 225 } 226 } 227 228 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 229 { 230 intptr_t value = (intptr_t)ptr; 231 TCGRelocation *r; 232 233 tcg_debug_assert(!l->has_value); 234 235 for (r = l->u.first_reloc; r != NULL; r = r->next) { 236 patch_reloc(r->ptr, r->type, value, r->addend); 237 } 238 239 l->has_value = 1; 240 l->u.value_ptr = ptr; 241 } 242 243 TCGLabel *gen_new_label(void) 244 { 245 TCGContext *s = &tcg_ctx; 246 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 247 248 *l = (TCGLabel){ 249 .id = s->nb_labels++ 250 }; 251 252 return l; 253 } 254 255 #include "tcg-target.inc.c" 256 257 /* pool based memory allocation */ 258 void *tcg_malloc_internal(TCGContext *s, int size) 259 { 260 TCGPool *p; 261 int pool_size; 262 263 if (size > TCG_POOL_CHUNK_SIZE) { 264 /* big malloc: insert a new pool (XXX: could optimize) */ 265 p = g_malloc(sizeof(TCGPool) + size); 266 p->size = size; 267 p->next = s->pool_first_large; 268 s->pool_first_large = p; 269 return p->data; 270 } else { 271 p = s->pool_current; 272 if (!p) { 273 p = s->pool_first; 274 if (!p) 275 goto new_pool; 276 } else { 277 if (!p->next) { 278 new_pool: 279 pool_size = TCG_POOL_CHUNK_SIZE; 280 p = g_malloc(sizeof(TCGPool) + pool_size); 281 p->size = pool_size; 282 p->next = NULL; 283 if (s->pool_current) 284 s->pool_current->next = p; 285 else 286 s->pool_first = p; 287 } else { 288 p = p->next; 289 } 290 } 291 } 292 s->pool_current = p; 293 s->pool_cur = p->data + size; 294 s->pool_end = p->data + p->size; 295 return p->data; 296 } 297 298 void tcg_pool_reset(TCGContext *s) 299 { 300 TCGPool *p, *t; 301 for (p = s->pool_first_large; p; p = t) { 302 t = p->next; 303 g_free(p); 304 } 305 s->pool_first_large = NULL; 306 s->pool_cur = s->pool_end = NULL; 307 s->pool_current = NULL; 308 } 309 310 typedef struct TCGHelperInfo { 311 void *func; 312 const char *name; 313 unsigned flags; 314 unsigned sizemask; 315 } TCGHelperInfo; 316 317 #include "exec/helper-proto.h" 318 319 static const TCGHelperInfo all_helpers[] = { 320 #include "exec/helper-tcg.h" 321 }; 322 323 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 324 static void process_op_defs(TCGContext *s); 325 326 void tcg_context_init(TCGContext *s) 327 { 328 int op, total_args, n, i; 329 TCGOpDef *def; 330 TCGArgConstraint *args_ct; 331 int *sorted_args; 332 GHashTable *helper_table; 333 334 memset(s, 0, sizeof(*s)); 335 s->nb_globals = 0; 336 337 /* Count total number of arguments and allocate the corresponding 338 space */ 339 total_args = 0; 340 for(op = 0; op < NB_OPS; op++) { 341 def = &tcg_op_defs[op]; 342 n = def->nb_iargs + def->nb_oargs; 343 total_args += n; 344 } 345 346 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 347 sorted_args = g_malloc(sizeof(int) * total_args); 348 349 for(op = 0; op < NB_OPS; op++) { 350 def = &tcg_op_defs[op]; 351 def->args_ct = args_ct; 352 def->sorted_args = sorted_args; 353 n = def->nb_iargs + def->nb_oargs; 354 sorted_args += n; 355 args_ct += n; 356 } 357 358 /* Register helpers. */ 359 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 360 s->helpers = helper_table = g_hash_table_new(NULL, NULL); 361 362 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 363 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 364 (gpointer)&all_helpers[i]); 365 } 366 367 tcg_target_init(s); 368 process_op_defs(s); 369 370 /* Reverse the order of the saved registers, assuming they're all at 371 the start of tcg_target_reg_alloc_order. */ 372 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 373 int r = tcg_target_reg_alloc_order[n]; 374 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 375 break; 376 } 377 } 378 for (i = 0; i < n; ++i) { 379 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 380 } 381 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 382 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 383 } 384 } 385 386 void tcg_prologue_init(TCGContext *s) 387 { 388 size_t prologue_size, total_size; 389 void *buf0, *buf1; 390 391 /* Put the prologue at the beginning of code_gen_buffer. */ 392 buf0 = s->code_gen_buffer; 393 s->code_ptr = buf0; 394 s->code_buf = buf0; 395 s->code_gen_prologue = buf0; 396 397 /* Generate the prologue. */ 398 tcg_target_qemu_prologue(s); 399 buf1 = s->code_ptr; 400 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 401 402 /* Deduct the prologue from the buffer. */ 403 prologue_size = tcg_current_code_size(s); 404 s->code_gen_ptr = buf1; 405 s->code_gen_buffer = buf1; 406 s->code_buf = buf1; 407 total_size = s->code_gen_buffer_size - prologue_size; 408 s->code_gen_buffer_size = total_size; 409 410 /* Compute a high-water mark, at which we voluntarily flush the buffer 411 and start over. The size here is arbitrary, significantly larger 412 than we expect the code generation for any one opcode to require. */ 413 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); 414 415 tcg_register_jit(s->code_gen_buffer, total_size); 416 417 #ifdef DEBUG_DISAS 418 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 419 qemu_log_lock(); 420 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 421 log_disas(buf0, prologue_size); 422 qemu_log("\n"); 423 qemu_log_flush(); 424 qemu_log_unlock(); 425 } 426 #endif 427 428 /* Assert that goto_ptr is implemented completely. */ 429 if (TCG_TARGET_HAS_goto_ptr) { 430 tcg_debug_assert(s->code_gen_epilogue != NULL); 431 } 432 } 433 434 void tcg_func_start(TCGContext *s) 435 { 436 tcg_pool_reset(s); 437 s->nb_temps = s->nb_globals; 438 439 /* No temps have been previously allocated for size or locality. */ 440 memset(s->free_temps, 0, sizeof(s->free_temps)); 441 442 s->nb_labels = 0; 443 s->current_frame_offset = s->frame_start; 444 445 #ifdef CONFIG_DEBUG_TCG 446 s->goto_tb_issue_mask = 0; 447 #endif 448 449 s->gen_op_buf[0].next = 1; 450 s->gen_op_buf[0].prev = 0; 451 s->gen_next_op_idx = 1; 452 s->gen_next_parm_idx = 0; 453 454 s->be = tcg_malloc(sizeof(TCGBackendData)); 455 } 456 457 static inline int temp_idx(TCGContext *s, TCGTemp *ts) 458 { 459 ptrdiff_t n = ts - s->temps; 460 tcg_debug_assert(n >= 0 && n < s->nb_temps); 461 return n; 462 } 463 464 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 465 { 466 int n = s->nb_temps++; 467 tcg_debug_assert(n < TCG_MAX_TEMPS); 468 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 469 } 470 471 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 472 { 473 tcg_debug_assert(s->nb_globals == s->nb_temps); 474 s->nb_globals++; 475 return tcg_temp_alloc(s); 476 } 477 478 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, 479 TCGReg reg, const char *name) 480 { 481 TCGTemp *ts; 482 483 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 484 tcg_abort(); 485 } 486 487 ts = tcg_global_alloc(s); 488 ts->base_type = type; 489 ts->type = type; 490 ts->fixed_reg = 1; 491 ts->reg = reg; 492 ts->name = name; 493 tcg_regset_set_reg(s->reserved_regs, reg); 494 495 return temp_idx(s, ts); 496 } 497 498 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 499 { 500 int idx; 501 s->frame_start = start; 502 s->frame_end = start + size; 503 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 504 s->frame_temp = &s->temps[idx]; 505 } 506 507 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) 508 { 509 TCGContext *s = &tcg_ctx; 510 int idx; 511 512 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 513 tcg_abort(); 514 } 515 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); 516 return MAKE_TCGV_I32(idx); 517 } 518 519 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) 520 { 521 TCGContext *s = &tcg_ctx; 522 int idx; 523 524 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 525 tcg_abort(); 526 } 527 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); 528 return MAKE_TCGV_I64(idx); 529 } 530 531 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 532 intptr_t offset, const char *name) 533 { 534 TCGContext *s = &tcg_ctx; 535 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; 536 TCGTemp *ts = tcg_global_alloc(s); 537 int indirect_reg = 0, bigendian = 0; 538 #ifdef HOST_WORDS_BIGENDIAN 539 bigendian = 1; 540 #endif 541 542 if (!base_ts->fixed_reg) { 543 /* We do not support double-indirect registers. */ 544 tcg_debug_assert(!base_ts->indirect_reg); 545 base_ts->indirect_base = 1; 546 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 547 ? 2 : 1); 548 indirect_reg = 1; 549 } 550 551 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 552 TCGTemp *ts2 = tcg_global_alloc(s); 553 char buf[64]; 554 555 ts->base_type = TCG_TYPE_I64; 556 ts->type = TCG_TYPE_I32; 557 ts->indirect_reg = indirect_reg; 558 ts->mem_allocated = 1; 559 ts->mem_base = base_ts; 560 ts->mem_offset = offset + bigendian * 4; 561 pstrcpy(buf, sizeof(buf), name); 562 pstrcat(buf, sizeof(buf), "_0"); 563 ts->name = strdup(buf); 564 565 tcg_debug_assert(ts2 == ts + 1); 566 ts2->base_type = TCG_TYPE_I64; 567 ts2->type = TCG_TYPE_I32; 568 ts2->indirect_reg = indirect_reg; 569 ts2->mem_allocated = 1; 570 ts2->mem_base = base_ts; 571 ts2->mem_offset = offset + (1 - bigendian) * 4; 572 pstrcpy(buf, sizeof(buf), name); 573 pstrcat(buf, sizeof(buf), "_1"); 574 ts2->name = strdup(buf); 575 } else { 576 ts->base_type = type; 577 ts->type = type; 578 ts->indirect_reg = indirect_reg; 579 ts->mem_allocated = 1; 580 ts->mem_base = base_ts; 581 ts->mem_offset = offset; 582 ts->name = name; 583 } 584 return temp_idx(s, ts); 585 } 586 587 static int tcg_temp_new_internal(TCGType type, int temp_local) 588 { 589 TCGContext *s = &tcg_ctx; 590 TCGTemp *ts; 591 int idx, k; 592 593 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 594 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 595 if (idx < TCG_MAX_TEMPS) { 596 /* There is already an available temp with the right type. */ 597 clear_bit(idx, s->free_temps[k].l); 598 599 ts = &s->temps[idx]; 600 ts->temp_allocated = 1; 601 tcg_debug_assert(ts->base_type == type); 602 tcg_debug_assert(ts->temp_local == temp_local); 603 } else { 604 ts = tcg_temp_alloc(s); 605 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 606 TCGTemp *ts2 = tcg_temp_alloc(s); 607 608 ts->base_type = type; 609 ts->type = TCG_TYPE_I32; 610 ts->temp_allocated = 1; 611 ts->temp_local = temp_local; 612 613 tcg_debug_assert(ts2 == ts + 1); 614 ts2->base_type = TCG_TYPE_I64; 615 ts2->type = TCG_TYPE_I32; 616 ts2->temp_allocated = 1; 617 ts2->temp_local = temp_local; 618 } else { 619 ts->base_type = type; 620 ts->type = type; 621 ts->temp_allocated = 1; 622 ts->temp_local = temp_local; 623 } 624 idx = temp_idx(s, ts); 625 } 626 627 #if defined(CONFIG_DEBUG_TCG) 628 s->temps_in_use++; 629 #endif 630 return idx; 631 } 632 633 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 634 { 635 int idx; 636 637 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 638 return MAKE_TCGV_I32(idx); 639 } 640 641 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 642 { 643 int idx; 644 645 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 646 return MAKE_TCGV_I64(idx); 647 } 648 649 static void tcg_temp_free_internal(int idx) 650 { 651 TCGContext *s = &tcg_ctx; 652 TCGTemp *ts; 653 int k; 654 655 #if defined(CONFIG_DEBUG_TCG) 656 s->temps_in_use--; 657 if (s->temps_in_use < 0) { 658 fprintf(stderr, "More temporaries freed than allocated!\n"); 659 } 660 #endif 661 662 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); 663 ts = &s->temps[idx]; 664 tcg_debug_assert(ts->temp_allocated != 0); 665 ts->temp_allocated = 0; 666 667 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 668 set_bit(idx, s->free_temps[k].l); 669 } 670 671 void tcg_temp_free_i32(TCGv_i32 arg) 672 { 673 tcg_temp_free_internal(GET_TCGV_I32(arg)); 674 } 675 676 void tcg_temp_free_i64(TCGv_i64 arg) 677 { 678 tcg_temp_free_internal(GET_TCGV_I64(arg)); 679 } 680 681 TCGv_i32 tcg_const_i32(int32_t val) 682 { 683 TCGv_i32 t0; 684 t0 = tcg_temp_new_i32(); 685 tcg_gen_movi_i32(t0, val); 686 return t0; 687 } 688 689 TCGv_i64 tcg_const_i64(int64_t val) 690 { 691 TCGv_i64 t0; 692 t0 = tcg_temp_new_i64(); 693 tcg_gen_movi_i64(t0, val); 694 return t0; 695 } 696 697 TCGv_i32 tcg_const_local_i32(int32_t val) 698 { 699 TCGv_i32 t0; 700 t0 = tcg_temp_local_new_i32(); 701 tcg_gen_movi_i32(t0, val); 702 return t0; 703 } 704 705 TCGv_i64 tcg_const_local_i64(int64_t val) 706 { 707 TCGv_i64 t0; 708 t0 = tcg_temp_local_new_i64(); 709 tcg_gen_movi_i64(t0, val); 710 return t0; 711 } 712 713 #if defined(CONFIG_DEBUG_TCG) 714 void tcg_clear_temp_count(void) 715 { 716 TCGContext *s = &tcg_ctx; 717 s->temps_in_use = 0; 718 } 719 720 int tcg_check_temp_count(void) 721 { 722 TCGContext *s = &tcg_ctx; 723 if (s->temps_in_use) { 724 /* Clear the count so that we don't give another 725 * warning immediately next time around. 726 */ 727 s->temps_in_use = 0; 728 return 1; 729 } 730 return 0; 731 } 732 #endif 733 734 /* Note: we convert the 64 bit args to 32 bit and do some alignment 735 and endian swap. Maybe it would be better to do the alignment 736 and endian swap in tcg_reg_alloc_call(). */ 737 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, 738 int nargs, TCGArg *args) 739 { 740 int i, real_args, nb_rets, pi, pi_first; 741 unsigned sizemask, flags; 742 TCGHelperInfo *info; 743 744 info = g_hash_table_lookup(s->helpers, (gpointer)func); 745 flags = info->flags; 746 sizemask = info->sizemask; 747 748 #if defined(__sparc__) && !defined(__arch64__) \ 749 && !defined(CONFIG_TCG_INTERPRETER) 750 /* We have 64-bit values in one register, but need to pass as two 751 separate parameters. Split them. */ 752 int orig_sizemask = sizemask; 753 int orig_nargs = nargs; 754 TCGv_i64 retl, reth; 755 756 TCGV_UNUSED_I64(retl); 757 TCGV_UNUSED_I64(reth); 758 if (sizemask != 0) { 759 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); 760 for (i = real_args = 0; i < nargs; ++i) { 761 int is_64bit = sizemask & (1 << (i+1)*2); 762 if (is_64bit) { 763 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 764 TCGv_i32 h = tcg_temp_new_i32(); 765 TCGv_i32 l = tcg_temp_new_i32(); 766 tcg_gen_extr_i64_i32(l, h, orig); 767 split_args[real_args++] = GET_TCGV_I32(h); 768 split_args[real_args++] = GET_TCGV_I32(l); 769 } else { 770 split_args[real_args++] = args[i]; 771 } 772 } 773 nargs = real_args; 774 args = split_args; 775 sizemask = 0; 776 } 777 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 778 for (i = 0; i < nargs; ++i) { 779 int is_64bit = sizemask & (1 << (i+1)*2); 780 int is_signed = sizemask & (2 << (i+1)*2); 781 if (!is_64bit) { 782 TCGv_i64 temp = tcg_temp_new_i64(); 783 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 784 if (is_signed) { 785 tcg_gen_ext32s_i64(temp, orig); 786 } else { 787 tcg_gen_ext32u_i64(temp, orig); 788 } 789 args[i] = GET_TCGV_I64(temp); 790 } 791 } 792 #endif /* TCG_TARGET_EXTEND_ARGS */ 793 794 pi_first = pi = s->gen_next_parm_idx; 795 if (ret != TCG_CALL_DUMMY_ARG) { 796 #if defined(__sparc__) && !defined(__arch64__) \ 797 && !defined(CONFIG_TCG_INTERPRETER) 798 if (orig_sizemask & 1) { 799 /* The 32-bit ABI is going to return the 64-bit value in 800 the %o0/%o1 register pair. Prepare for this by using 801 two return temporaries, and reassemble below. */ 802 retl = tcg_temp_new_i64(); 803 reth = tcg_temp_new_i64(); 804 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); 805 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); 806 nb_rets = 2; 807 } else { 808 s->gen_opparam_buf[pi++] = ret; 809 nb_rets = 1; 810 } 811 #else 812 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 813 #ifdef HOST_WORDS_BIGENDIAN 814 s->gen_opparam_buf[pi++] = ret + 1; 815 s->gen_opparam_buf[pi++] = ret; 816 #else 817 s->gen_opparam_buf[pi++] = ret; 818 s->gen_opparam_buf[pi++] = ret + 1; 819 #endif 820 nb_rets = 2; 821 } else { 822 s->gen_opparam_buf[pi++] = ret; 823 nb_rets = 1; 824 } 825 #endif 826 } else { 827 nb_rets = 0; 828 } 829 real_args = 0; 830 for (i = 0; i < nargs; i++) { 831 int is_64bit = sizemask & (1 << (i+1)*2); 832 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 833 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 834 /* some targets want aligned 64 bit args */ 835 if (real_args & 1) { 836 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; 837 real_args++; 838 } 839 #endif 840 /* If stack grows up, then we will be placing successive 841 arguments at lower addresses, which means we need to 842 reverse the order compared to how we would normally 843 treat either big or little-endian. For those arguments 844 that will wind up in registers, this still works for 845 HPPA (the only current STACK_GROWSUP target) since the 846 argument registers are *also* allocated in decreasing 847 order. If another such target is added, this logic may 848 have to get more complicated to differentiate between 849 stack arguments and register arguments. */ 850 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 851 s->gen_opparam_buf[pi++] = args[i] + 1; 852 s->gen_opparam_buf[pi++] = args[i]; 853 #else 854 s->gen_opparam_buf[pi++] = args[i]; 855 s->gen_opparam_buf[pi++] = args[i] + 1; 856 #endif 857 real_args += 2; 858 continue; 859 } 860 861 s->gen_opparam_buf[pi++] = args[i]; 862 real_args++; 863 } 864 s->gen_opparam_buf[pi++] = (uintptr_t)func; 865 s->gen_opparam_buf[pi++] = flags; 866 867 i = s->gen_next_op_idx; 868 tcg_debug_assert(i < OPC_BUF_SIZE); 869 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); 870 871 /* Set links for sequential allocation during translation. */ 872 s->gen_op_buf[i] = (TCGOp){ 873 .opc = INDEX_op_call, 874 .callo = nb_rets, 875 .calli = real_args, 876 .args = pi_first, 877 .prev = i - 1, 878 .next = i + 1 879 }; 880 881 /* Make sure the calli field didn't overflow. */ 882 tcg_debug_assert(s->gen_op_buf[i].calli == real_args); 883 884 s->gen_op_buf[0].prev = i; 885 s->gen_next_op_idx = i + 1; 886 s->gen_next_parm_idx = pi; 887 888 #if defined(__sparc__) && !defined(__arch64__) \ 889 && !defined(CONFIG_TCG_INTERPRETER) 890 /* Free all of the parts we allocated above. */ 891 for (i = real_args = 0; i < orig_nargs; ++i) { 892 int is_64bit = orig_sizemask & (1 << (i+1)*2); 893 if (is_64bit) { 894 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); 895 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); 896 tcg_temp_free_i32(h); 897 tcg_temp_free_i32(l); 898 } else { 899 real_args++; 900 } 901 } 902 if (orig_sizemask & 1) { 903 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 904 Note that describing these as TCGv_i64 eliminates an unnecessary 905 zero-extension that tcg_gen_concat_i32_i64 would create. */ 906 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); 907 tcg_temp_free_i64(retl); 908 tcg_temp_free_i64(reth); 909 } 910 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 911 for (i = 0; i < nargs; ++i) { 912 int is_64bit = sizemask & (1 << (i+1)*2); 913 if (!is_64bit) { 914 TCGv_i64 temp = MAKE_TCGV_I64(args[i]); 915 tcg_temp_free_i64(temp); 916 } 917 } 918 #endif /* TCG_TARGET_EXTEND_ARGS */ 919 } 920 921 static void tcg_reg_alloc_start(TCGContext *s) 922 { 923 int i; 924 TCGTemp *ts; 925 for(i = 0; i < s->nb_globals; i++) { 926 ts = &s->temps[i]; 927 if (ts->fixed_reg) { 928 ts->val_type = TEMP_VAL_REG; 929 } else { 930 ts->val_type = TEMP_VAL_MEM; 931 } 932 } 933 for(i = s->nb_globals; i < s->nb_temps; i++) { 934 ts = &s->temps[i]; 935 if (ts->temp_local) { 936 ts->val_type = TEMP_VAL_MEM; 937 } else { 938 ts->val_type = TEMP_VAL_DEAD; 939 } 940 ts->mem_allocated = 0; 941 ts->fixed_reg = 0; 942 } 943 944 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 945 } 946 947 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 948 TCGTemp *ts) 949 { 950 int idx = temp_idx(s, ts); 951 952 if (idx < s->nb_globals) { 953 pstrcpy(buf, buf_size, ts->name); 954 } else if (ts->temp_local) { 955 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 956 } else { 957 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 958 } 959 return buf; 960 } 961 962 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 963 int buf_size, int idx) 964 { 965 tcg_debug_assert(idx >= 0 && idx < s->nb_temps); 966 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); 967 } 968 969 /* Find helper name. */ 970 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 971 { 972 const char *ret = NULL; 973 if (s->helpers) { 974 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); 975 if (info) { 976 ret = info->name; 977 } 978 } 979 return ret; 980 } 981 982 static const char * const cond_name[] = 983 { 984 [TCG_COND_NEVER] = "never", 985 [TCG_COND_ALWAYS] = "always", 986 [TCG_COND_EQ] = "eq", 987 [TCG_COND_NE] = "ne", 988 [TCG_COND_LT] = "lt", 989 [TCG_COND_GE] = "ge", 990 [TCG_COND_LE] = "le", 991 [TCG_COND_GT] = "gt", 992 [TCG_COND_LTU] = "ltu", 993 [TCG_COND_GEU] = "geu", 994 [TCG_COND_LEU] = "leu", 995 [TCG_COND_GTU] = "gtu" 996 }; 997 998 static const char * const ldst_name[] = 999 { 1000 [MO_UB] = "ub", 1001 [MO_SB] = "sb", 1002 [MO_LEUW] = "leuw", 1003 [MO_LESW] = "lesw", 1004 [MO_LEUL] = "leul", 1005 [MO_LESL] = "lesl", 1006 [MO_LEQ] = "leq", 1007 [MO_BEUW] = "beuw", 1008 [MO_BESW] = "besw", 1009 [MO_BEUL] = "beul", 1010 [MO_BESL] = "besl", 1011 [MO_BEQ] = "beq", 1012 }; 1013 1014 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1015 #ifdef ALIGNED_ONLY 1016 [MO_UNALN >> MO_ASHIFT] = "un+", 1017 [MO_ALIGN >> MO_ASHIFT] = "", 1018 #else 1019 [MO_UNALN >> MO_ASHIFT] = "", 1020 [MO_ALIGN >> MO_ASHIFT] = "al+", 1021 #endif 1022 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1023 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1024 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1025 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1026 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1027 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1028 }; 1029 1030 void tcg_dump_ops(TCGContext *s) 1031 { 1032 char buf[128]; 1033 TCGOp *op; 1034 int oi; 1035 1036 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1037 int i, k, nb_oargs, nb_iargs, nb_cargs; 1038 const TCGOpDef *def; 1039 const TCGArg *args; 1040 TCGOpcode c; 1041 int col = 0; 1042 1043 op = &s->gen_op_buf[oi]; 1044 c = op->opc; 1045 def = &tcg_op_defs[c]; 1046 args = &s->gen_opparam_buf[op->args]; 1047 1048 if (c == INDEX_op_insn_start) { 1049 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1050 1051 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1052 target_ulong a; 1053 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1054 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 1055 #else 1056 a = args[i]; 1057 #endif 1058 col += qemu_log(" " TARGET_FMT_lx, a); 1059 } 1060 } else if (c == INDEX_op_call) { 1061 /* variable number of arguments */ 1062 nb_oargs = op->callo; 1063 nb_iargs = op->calli; 1064 nb_cargs = def->nb_cargs; 1065 1066 /* function name, flags, out args */ 1067 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1068 tcg_find_helper(s, args[nb_oargs + nb_iargs]), 1069 args[nb_oargs + nb_iargs + 1], nb_oargs); 1070 for (i = 0; i < nb_oargs; i++) { 1071 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1072 args[i])); 1073 } 1074 for (i = 0; i < nb_iargs; i++) { 1075 TCGArg arg = args[nb_oargs + i]; 1076 const char *t = "<dummy>"; 1077 if (arg != TCG_CALL_DUMMY_ARG) { 1078 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); 1079 } 1080 col += qemu_log(",%s", t); 1081 } 1082 } else { 1083 col += qemu_log(" %s ", def->name); 1084 1085 nb_oargs = def->nb_oargs; 1086 nb_iargs = def->nb_iargs; 1087 nb_cargs = def->nb_cargs; 1088 1089 k = 0; 1090 for (i = 0; i < nb_oargs; i++) { 1091 if (k != 0) { 1092 col += qemu_log(","); 1093 } 1094 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1095 args[k++])); 1096 } 1097 for (i = 0; i < nb_iargs; i++) { 1098 if (k != 0) { 1099 col += qemu_log(","); 1100 } 1101 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1102 args[k++])); 1103 } 1104 switch (c) { 1105 case INDEX_op_brcond_i32: 1106 case INDEX_op_setcond_i32: 1107 case INDEX_op_movcond_i32: 1108 case INDEX_op_brcond2_i32: 1109 case INDEX_op_setcond2_i32: 1110 case INDEX_op_brcond_i64: 1111 case INDEX_op_setcond_i64: 1112 case INDEX_op_movcond_i64: 1113 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { 1114 col += qemu_log(",%s", cond_name[args[k++]]); 1115 } else { 1116 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); 1117 } 1118 i = 1; 1119 break; 1120 case INDEX_op_qemu_ld_i32: 1121 case INDEX_op_qemu_st_i32: 1122 case INDEX_op_qemu_ld_i64: 1123 case INDEX_op_qemu_st_i64: 1124 { 1125 TCGMemOpIdx oi = args[k++]; 1126 TCGMemOp op = get_memop(oi); 1127 unsigned ix = get_mmuidx(oi); 1128 1129 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1130 col += qemu_log(",$0x%x,%u", op, ix); 1131 } else { 1132 const char *s_al, *s_op; 1133 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1134 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1135 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1136 } 1137 i = 1; 1138 } 1139 break; 1140 default: 1141 i = 0; 1142 break; 1143 } 1144 switch (c) { 1145 case INDEX_op_set_label: 1146 case INDEX_op_br: 1147 case INDEX_op_brcond_i32: 1148 case INDEX_op_brcond_i64: 1149 case INDEX_op_brcond2_i32: 1150 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); 1151 i++, k++; 1152 break; 1153 default: 1154 break; 1155 } 1156 for (; i < nb_cargs; i++, k++) { 1157 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); 1158 } 1159 } 1160 if (op->life) { 1161 unsigned life = op->life; 1162 1163 for (; col < 48; ++col) { 1164 putc(' ', qemu_logfile); 1165 } 1166 1167 if (life & (SYNC_ARG * 3)) { 1168 qemu_log(" sync:"); 1169 for (i = 0; i < 2; ++i) { 1170 if (life & (SYNC_ARG << i)) { 1171 qemu_log(" %d", i); 1172 } 1173 } 1174 } 1175 life /= DEAD_ARG; 1176 if (life) { 1177 qemu_log(" dead:"); 1178 for (i = 0; life; ++i, life >>= 1) { 1179 if (life & 1) { 1180 qemu_log(" %d", i); 1181 } 1182 } 1183 } 1184 } 1185 qemu_log("\n"); 1186 } 1187 } 1188 1189 /* we give more priority to constraints with less registers */ 1190 static int get_constraint_priority(const TCGOpDef *def, int k) 1191 { 1192 const TCGArgConstraint *arg_ct; 1193 1194 int i, n; 1195 arg_ct = &def->args_ct[k]; 1196 if (arg_ct->ct & TCG_CT_ALIAS) { 1197 /* an alias is equivalent to a single register */ 1198 n = 1; 1199 } else { 1200 if (!(arg_ct->ct & TCG_CT_REG)) 1201 return 0; 1202 n = 0; 1203 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1204 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1205 n++; 1206 } 1207 } 1208 return TCG_TARGET_NB_REGS - n + 1; 1209 } 1210 1211 /* sort from highest priority to lowest */ 1212 static void sort_constraints(TCGOpDef *def, int start, int n) 1213 { 1214 int i, j, p1, p2, tmp; 1215 1216 for(i = 0; i < n; i++) 1217 def->sorted_args[start + i] = start + i; 1218 if (n <= 1) 1219 return; 1220 for(i = 0; i < n - 1; i++) { 1221 for(j = i + 1; j < n; j++) { 1222 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1223 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1224 if (p1 < p2) { 1225 tmp = def->sorted_args[start + i]; 1226 def->sorted_args[start + i] = def->sorted_args[start + j]; 1227 def->sorted_args[start + j] = tmp; 1228 } 1229 } 1230 } 1231 } 1232 1233 static void process_op_defs(TCGContext *s) 1234 { 1235 TCGOpcode op; 1236 1237 for (op = 0; op < NB_OPS; op++) { 1238 TCGOpDef *def = &tcg_op_defs[op]; 1239 const TCGTargetOpDef *tdefs; 1240 TCGType type; 1241 int i, nb_args; 1242 1243 if (def->flags & TCG_OPF_NOT_PRESENT) { 1244 continue; 1245 } 1246 1247 nb_args = def->nb_iargs + def->nb_oargs; 1248 if (nb_args == 0) { 1249 continue; 1250 } 1251 1252 tdefs = tcg_target_op_def(op); 1253 /* Missing TCGTargetOpDef entry. */ 1254 tcg_debug_assert(tdefs != NULL); 1255 1256 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1257 for (i = 0; i < nb_args; i++) { 1258 const char *ct_str = tdefs->args_ct_str[i]; 1259 /* Incomplete TCGTargetOpDef entry. */ 1260 tcg_debug_assert(ct_str != NULL); 1261 1262 tcg_regset_clear(def->args_ct[i].u.regs); 1263 def->args_ct[i].ct = 0; 1264 while (*ct_str != '\0') { 1265 switch(*ct_str) { 1266 case '0' ... '9': 1267 { 1268 int oarg = *ct_str - '0'; 1269 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1270 tcg_debug_assert(oarg < def->nb_oargs); 1271 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1272 /* TCG_CT_ALIAS is for the output arguments. 1273 The input is tagged with TCG_CT_IALIAS. */ 1274 def->args_ct[i] = def->args_ct[oarg]; 1275 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1276 def->args_ct[oarg].alias_index = i; 1277 def->args_ct[i].ct |= TCG_CT_IALIAS; 1278 def->args_ct[i].alias_index = oarg; 1279 } 1280 ct_str++; 1281 break; 1282 case '&': 1283 def->args_ct[i].ct |= TCG_CT_NEWREG; 1284 ct_str++; 1285 break; 1286 case 'i': 1287 def->args_ct[i].ct |= TCG_CT_CONST; 1288 ct_str++; 1289 break; 1290 default: 1291 ct_str = target_parse_constraint(&def->args_ct[i], 1292 ct_str, type); 1293 /* Typo in TCGTargetOpDef constraint. */ 1294 tcg_debug_assert(ct_str != NULL); 1295 } 1296 } 1297 } 1298 1299 /* TCGTargetOpDef entry with too much information? */ 1300 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1301 1302 /* sort the constraints (XXX: this is just an heuristic) */ 1303 sort_constraints(def, 0, def->nb_oargs); 1304 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1305 } 1306 } 1307 1308 void tcg_op_remove(TCGContext *s, TCGOp *op) 1309 { 1310 int next = op->next; 1311 int prev = op->prev; 1312 1313 /* We should never attempt to remove the list terminator. */ 1314 tcg_debug_assert(op != &s->gen_op_buf[0]); 1315 1316 s->gen_op_buf[next].prev = prev; 1317 s->gen_op_buf[prev].next = next; 1318 1319 memset(op, 0, sizeof(*op)); 1320 1321 #ifdef CONFIG_PROFILER 1322 s->del_op_count++; 1323 #endif 1324 } 1325 1326 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1327 TCGOpcode opc, int nargs) 1328 { 1329 int oi = s->gen_next_op_idx; 1330 int pi = s->gen_next_parm_idx; 1331 int prev = old_op->prev; 1332 int next = old_op - s->gen_op_buf; 1333 TCGOp *new_op; 1334 1335 tcg_debug_assert(oi < OPC_BUF_SIZE); 1336 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1337 s->gen_next_op_idx = oi + 1; 1338 s->gen_next_parm_idx = pi + nargs; 1339 1340 new_op = &s->gen_op_buf[oi]; 1341 *new_op = (TCGOp){ 1342 .opc = opc, 1343 .args = pi, 1344 .prev = prev, 1345 .next = next 1346 }; 1347 s->gen_op_buf[prev].next = oi; 1348 old_op->prev = oi; 1349 1350 return new_op; 1351 } 1352 1353 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1354 TCGOpcode opc, int nargs) 1355 { 1356 int oi = s->gen_next_op_idx; 1357 int pi = s->gen_next_parm_idx; 1358 int prev = old_op - s->gen_op_buf; 1359 int next = old_op->next; 1360 TCGOp *new_op; 1361 1362 tcg_debug_assert(oi < OPC_BUF_SIZE); 1363 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1364 s->gen_next_op_idx = oi + 1; 1365 s->gen_next_parm_idx = pi + nargs; 1366 1367 new_op = &s->gen_op_buf[oi]; 1368 *new_op = (TCGOp){ 1369 .opc = opc, 1370 .args = pi, 1371 .prev = prev, 1372 .next = next 1373 }; 1374 s->gen_op_buf[next].prev = oi; 1375 old_op->next = oi; 1376 1377 return new_op; 1378 } 1379 1380 #define TS_DEAD 1 1381 #define TS_MEM 2 1382 1383 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1384 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1385 1386 /* liveness analysis: end of function: all temps are dead, and globals 1387 should be in memory. */ 1388 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) 1389 { 1390 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); 1391 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); 1392 } 1393 1394 /* liveness analysis: end of basic block: all temps are dead, globals 1395 and local temps should be in memory. */ 1396 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) 1397 { 1398 int i, n; 1399 1400 tcg_la_func_end(s, temp_state); 1401 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { 1402 if (s->temps[i].temp_local) { 1403 temp_state[i] |= TS_MEM; 1404 } 1405 } 1406 } 1407 1408 /* Liveness analysis : update the opc_arg_life array to tell if a 1409 given input arguments is dead. Instructions updating dead 1410 temporaries are removed. */ 1411 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) 1412 { 1413 int nb_globals = s->nb_globals; 1414 int oi, oi_prev; 1415 1416 tcg_la_func_end(s, temp_state); 1417 1418 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1419 int i, nb_iargs, nb_oargs; 1420 TCGOpcode opc_new, opc_new2; 1421 bool have_opc_new2; 1422 TCGLifeData arg_life = 0; 1423 TCGArg arg; 1424 1425 TCGOp * const op = &s->gen_op_buf[oi]; 1426 TCGArg * const args = &s->gen_opparam_buf[op->args]; 1427 TCGOpcode opc = op->opc; 1428 const TCGOpDef *def = &tcg_op_defs[opc]; 1429 1430 oi_prev = op->prev; 1431 1432 switch (opc) { 1433 case INDEX_op_call: 1434 { 1435 int call_flags; 1436 1437 nb_oargs = op->callo; 1438 nb_iargs = op->calli; 1439 call_flags = args[nb_oargs + nb_iargs + 1]; 1440 1441 /* pure functions can be removed if their result is unused */ 1442 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 1443 for (i = 0; i < nb_oargs; i++) { 1444 arg = args[i]; 1445 if (temp_state[arg] != TS_DEAD) { 1446 goto do_not_remove_call; 1447 } 1448 } 1449 goto do_remove; 1450 } else { 1451 do_not_remove_call: 1452 1453 /* output args are dead */ 1454 for (i = 0; i < nb_oargs; i++) { 1455 arg = args[i]; 1456 if (temp_state[arg] & TS_DEAD) { 1457 arg_life |= DEAD_ARG << i; 1458 } 1459 if (temp_state[arg] & TS_MEM) { 1460 arg_life |= SYNC_ARG << i; 1461 } 1462 temp_state[arg] = TS_DEAD; 1463 } 1464 1465 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 1466 TCG_CALL_NO_READ_GLOBALS))) { 1467 /* globals should go back to memory */ 1468 memset(temp_state, TS_DEAD | TS_MEM, nb_globals); 1469 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 1470 /* globals should be synced to memory */ 1471 for (i = 0; i < nb_globals; i++) { 1472 temp_state[i] |= TS_MEM; 1473 } 1474 } 1475 1476 /* record arguments that die in this helper */ 1477 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1478 arg = args[i]; 1479 if (arg != TCG_CALL_DUMMY_ARG) { 1480 if (temp_state[arg] & TS_DEAD) { 1481 arg_life |= DEAD_ARG << i; 1482 } 1483 } 1484 } 1485 /* input arguments are live for preceding opcodes */ 1486 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1487 arg = args[i]; 1488 if (arg != TCG_CALL_DUMMY_ARG) { 1489 temp_state[arg] &= ~TS_DEAD; 1490 } 1491 } 1492 } 1493 } 1494 break; 1495 case INDEX_op_insn_start: 1496 break; 1497 case INDEX_op_discard: 1498 /* mark the temporary as dead */ 1499 temp_state[args[0]] = TS_DEAD; 1500 break; 1501 1502 case INDEX_op_add2_i32: 1503 opc_new = INDEX_op_add_i32; 1504 goto do_addsub2; 1505 case INDEX_op_sub2_i32: 1506 opc_new = INDEX_op_sub_i32; 1507 goto do_addsub2; 1508 case INDEX_op_add2_i64: 1509 opc_new = INDEX_op_add_i64; 1510 goto do_addsub2; 1511 case INDEX_op_sub2_i64: 1512 opc_new = INDEX_op_sub_i64; 1513 do_addsub2: 1514 nb_iargs = 4; 1515 nb_oargs = 2; 1516 /* Test if the high part of the operation is dead, but not 1517 the low part. The result can be optimized to a simple 1518 add or sub. This happens often for x86_64 guest when the 1519 cpu mode is set to 32 bit. */ 1520 if (temp_state[args[1]] == TS_DEAD) { 1521 if (temp_state[args[0]] == TS_DEAD) { 1522 goto do_remove; 1523 } 1524 /* Replace the opcode and adjust the args in place, 1525 leaving 3 unused args at the end. */ 1526 op->opc = opc = opc_new; 1527 args[1] = args[2]; 1528 args[2] = args[4]; 1529 /* Fall through and mark the single-word operation live. */ 1530 nb_iargs = 2; 1531 nb_oargs = 1; 1532 } 1533 goto do_not_remove; 1534 1535 case INDEX_op_mulu2_i32: 1536 opc_new = INDEX_op_mul_i32; 1537 opc_new2 = INDEX_op_muluh_i32; 1538 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 1539 goto do_mul2; 1540 case INDEX_op_muls2_i32: 1541 opc_new = INDEX_op_mul_i32; 1542 opc_new2 = INDEX_op_mulsh_i32; 1543 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 1544 goto do_mul2; 1545 case INDEX_op_mulu2_i64: 1546 opc_new = INDEX_op_mul_i64; 1547 opc_new2 = INDEX_op_muluh_i64; 1548 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 1549 goto do_mul2; 1550 case INDEX_op_muls2_i64: 1551 opc_new = INDEX_op_mul_i64; 1552 opc_new2 = INDEX_op_mulsh_i64; 1553 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 1554 goto do_mul2; 1555 do_mul2: 1556 nb_iargs = 2; 1557 nb_oargs = 2; 1558 if (temp_state[args[1]] == TS_DEAD) { 1559 if (temp_state[args[0]] == TS_DEAD) { 1560 /* Both parts of the operation are dead. */ 1561 goto do_remove; 1562 } 1563 /* The high part of the operation is dead; generate the low. */ 1564 op->opc = opc = opc_new; 1565 args[1] = args[2]; 1566 args[2] = args[3]; 1567 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { 1568 /* The low part of the operation is dead; generate the high. */ 1569 op->opc = opc = opc_new2; 1570 args[0] = args[1]; 1571 args[1] = args[2]; 1572 args[2] = args[3]; 1573 } else { 1574 goto do_not_remove; 1575 } 1576 /* Mark the single-word operation live. */ 1577 nb_oargs = 1; 1578 goto do_not_remove; 1579 1580 default: 1581 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 1582 nb_iargs = def->nb_iargs; 1583 nb_oargs = def->nb_oargs; 1584 1585 /* Test if the operation can be removed because all 1586 its outputs are dead. We assume that nb_oargs == 0 1587 implies side effects */ 1588 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 1589 for (i = 0; i < nb_oargs; i++) { 1590 if (temp_state[args[i]] != TS_DEAD) { 1591 goto do_not_remove; 1592 } 1593 } 1594 do_remove: 1595 tcg_op_remove(s, op); 1596 } else { 1597 do_not_remove: 1598 /* output args are dead */ 1599 for (i = 0; i < nb_oargs; i++) { 1600 arg = args[i]; 1601 if (temp_state[arg] & TS_DEAD) { 1602 arg_life |= DEAD_ARG << i; 1603 } 1604 if (temp_state[arg] & TS_MEM) { 1605 arg_life |= SYNC_ARG << i; 1606 } 1607 temp_state[arg] = TS_DEAD; 1608 } 1609 1610 /* if end of basic block, update */ 1611 if (def->flags & TCG_OPF_BB_END) { 1612 tcg_la_bb_end(s, temp_state); 1613 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1614 /* globals should be synced to memory */ 1615 for (i = 0; i < nb_globals; i++) { 1616 temp_state[i] |= TS_MEM; 1617 } 1618 } 1619 1620 /* record arguments that die in this opcode */ 1621 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1622 arg = args[i]; 1623 if (temp_state[arg] & TS_DEAD) { 1624 arg_life |= DEAD_ARG << i; 1625 } 1626 } 1627 /* input arguments are live for preceding opcodes */ 1628 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1629 temp_state[args[i]] &= ~TS_DEAD; 1630 } 1631 } 1632 break; 1633 } 1634 op->life = arg_life; 1635 } 1636 } 1637 1638 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 1639 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) 1640 { 1641 int nb_globals = s->nb_globals; 1642 int16_t *dir_temps; 1643 int i, oi, oi_next; 1644 bool changes = false; 1645 1646 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); 1647 memset(dir_temps, 0, nb_globals * sizeof(int16_t)); 1648 1649 /* Create a temporary for each indirect global. */ 1650 for (i = 0; i < nb_globals; ++i) { 1651 TCGTemp *its = &s->temps[i]; 1652 if (its->indirect_reg) { 1653 TCGTemp *dts = tcg_temp_alloc(s); 1654 dts->type = its->type; 1655 dts->base_type = its->base_type; 1656 dir_temps[i] = temp_idx(s, dts); 1657 } 1658 } 1659 1660 memset(temp_state, TS_DEAD, nb_globals); 1661 1662 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 1663 TCGOp *op = &s->gen_op_buf[oi]; 1664 TCGArg *args = &s->gen_opparam_buf[op->args]; 1665 TCGOpcode opc = op->opc; 1666 const TCGOpDef *def = &tcg_op_defs[opc]; 1667 TCGLifeData arg_life = op->life; 1668 int nb_iargs, nb_oargs, call_flags; 1669 TCGArg arg, dir; 1670 1671 oi_next = op->next; 1672 1673 if (opc == INDEX_op_call) { 1674 nb_oargs = op->callo; 1675 nb_iargs = op->calli; 1676 call_flags = args[nb_oargs + nb_iargs + 1]; 1677 } else { 1678 nb_iargs = def->nb_iargs; 1679 nb_oargs = def->nb_oargs; 1680 1681 /* Set flags similar to how calls require. */ 1682 if (def->flags & TCG_OPF_BB_END) { 1683 /* Like writing globals: save_globals */ 1684 call_flags = 0; 1685 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1686 /* Like reading globals: sync_globals */ 1687 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 1688 } else { 1689 /* No effect on globals. */ 1690 call_flags = (TCG_CALL_NO_READ_GLOBALS | 1691 TCG_CALL_NO_WRITE_GLOBALS); 1692 } 1693 } 1694 1695 /* Make sure that input arguments are available. */ 1696 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1697 arg = args[i]; 1698 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ 1699 if (arg < nb_globals) { 1700 dir = dir_temps[arg]; 1701 if (dir != 0 && temp_state[arg] == TS_DEAD) { 1702 TCGTemp *its = &s->temps[arg]; 1703 TCGOpcode lopc = (its->type == TCG_TYPE_I32 1704 ? INDEX_op_ld_i32 1705 : INDEX_op_ld_i64); 1706 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 1707 TCGArg *largs = &s->gen_opparam_buf[lop->args]; 1708 1709 largs[0] = dir; 1710 largs[1] = temp_idx(s, its->mem_base); 1711 largs[2] = its->mem_offset; 1712 1713 /* Loaded, but synced with memory. */ 1714 temp_state[arg] = TS_MEM; 1715 } 1716 } 1717 } 1718 1719 /* Perform input replacement, and mark inputs that became dead. 1720 No action is required except keeping temp_state up to date 1721 so that we reload when needed. */ 1722 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1723 arg = args[i]; 1724 if (arg < nb_globals) { 1725 dir = dir_temps[arg]; 1726 if (dir != 0) { 1727 args[i] = dir; 1728 changes = true; 1729 if (IS_DEAD_ARG(i)) { 1730 temp_state[arg] = TS_DEAD; 1731 } 1732 } 1733 } 1734 } 1735 1736 /* Liveness analysis should ensure that the following are 1737 all correct, for call sites and basic block end points. */ 1738 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 1739 /* Nothing to do */ 1740 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 1741 for (i = 0; i < nb_globals; ++i) { 1742 /* Liveness should see that globals are synced back, 1743 that is, either TS_DEAD or TS_MEM. */ 1744 tcg_debug_assert(dir_temps[i] == 0 1745 || temp_state[i] != 0); 1746 } 1747 } else { 1748 for (i = 0; i < nb_globals; ++i) { 1749 /* Liveness should see that globals are saved back, 1750 that is, TS_DEAD, waiting to be reloaded. */ 1751 tcg_debug_assert(dir_temps[i] == 0 1752 || temp_state[i] == TS_DEAD); 1753 } 1754 } 1755 1756 /* Outputs become available. */ 1757 for (i = 0; i < nb_oargs; i++) { 1758 arg = args[i]; 1759 if (arg >= nb_globals) { 1760 continue; 1761 } 1762 dir = dir_temps[arg]; 1763 if (dir == 0) { 1764 continue; 1765 } 1766 args[i] = dir; 1767 changes = true; 1768 1769 /* The output is now live and modified. */ 1770 temp_state[arg] = 0; 1771 1772 /* Sync outputs upon their last write. */ 1773 if (NEED_SYNC_ARG(i)) { 1774 TCGTemp *its = &s->temps[arg]; 1775 TCGOpcode sopc = (its->type == TCG_TYPE_I32 1776 ? INDEX_op_st_i32 1777 : INDEX_op_st_i64); 1778 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 1779 TCGArg *sargs = &s->gen_opparam_buf[sop->args]; 1780 1781 sargs[0] = dir; 1782 sargs[1] = temp_idx(s, its->mem_base); 1783 sargs[2] = its->mem_offset; 1784 1785 temp_state[arg] = TS_MEM; 1786 } 1787 /* Drop outputs that are dead. */ 1788 if (IS_DEAD_ARG(i)) { 1789 temp_state[arg] = TS_DEAD; 1790 } 1791 } 1792 } 1793 1794 return changes; 1795 } 1796 1797 #ifdef CONFIG_DEBUG_TCG 1798 static void dump_regs(TCGContext *s) 1799 { 1800 TCGTemp *ts; 1801 int i; 1802 char buf[64]; 1803 1804 for(i = 0; i < s->nb_temps; i++) { 1805 ts = &s->temps[i]; 1806 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); 1807 switch(ts->val_type) { 1808 case TEMP_VAL_REG: 1809 printf("%s", tcg_target_reg_names[ts->reg]); 1810 break; 1811 case TEMP_VAL_MEM: 1812 printf("%d(%s)", (int)ts->mem_offset, 1813 tcg_target_reg_names[ts->mem_base->reg]); 1814 break; 1815 case TEMP_VAL_CONST: 1816 printf("$0x%" TCG_PRIlx, ts->val); 1817 break; 1818 case TEMP_VAL_DEAD: 1819 printf("D"); 1820 break; 1821 default: 1822 printf("???"); 1823 break; 1824 } 1825 printf("\n"); 1826 } 1827 1828 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1829 if (s->reg_to_temp[i] != NULL) { 1830 printf("%s: %s\n", 1831 tcg_target_reg_names[i], 1832 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 1833 } 1834 } 1835 } 1836 1837 static void check_regs(TCGContext *s) 1838 { 1839 int reg; 1840 int k; 1841 TCGTemp *ts; 1842 char buf[64]; 1843 1844 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 1845 ts = s->reg_to_temp[reg]; 1846 if (ts != NULL) { 1847 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 1848 printf("Inconsistency for register %s:\n", 1849 tcg_target_reg_names[reg]); 1850 goto fail; 1851 } 1852 } 1853 } 1854 for (k = 0; k < s->nb_temps; k++) { 1855 ts = &s->temps[k]; 1856 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 1857 && s->reg_to_temp[ts->reg] != ts) { 1858 printf("Inconsistency for temp %s:\n", 1859 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 1860 fail: 1861 printf("reg state:\n"); 1862 dump_regs(s); 1863 tcg_abort(); 1864 } 1865 } 1866 } 1867 #endif 1868 1869 static void temp_allocate_frame(TCGContext *s, int temp) 1870 { 1871 TCGTemp *ts; 1872 ts = &s->temps[temp]; 1873 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 1874 /* Sparc64 stack is accessed with offset of 2047 */ 1875 s->current_frame_offset = (s->current_frame_offset + 1876 (tcg_target_long)sizeof(tcg_target_long) - 1) & 1877 ~(sizeof(tcg_target_long) - 1); 1878 #endif 1879 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 1880 s->frame_end) { 1881 tcg_abort(); 1882 } 1883 ts->mem_offset = s->current_frame_offset; 1884 ts->mem_base = s->frame_temp; 1885 ts->mem_allocated = 1; 1886 s->current_frame_offset += sizeof(tcg_target_long); 1887 } 1888 1889 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 1890 1891 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 1892 mark it free; otherwise mark it dead. */ 1893 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 1894 { 1895 if (ts->fixed_reg) { 1896 return; 1897 } 1898 if (ts->val_type == TEMP_VAL_REG) { 1899 s->reg_to_temp[ts->reg] = NULL; 1900 } 1901 ts->val_type = (free_or_dead < 0 1902 || ts->temp_local 1903 || temp_idx(s, ts) < s->nb_globals 1904 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1905 } 1906 1907 /* Mark a temporary as dead. */ 1908 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 1909 { 1910 temp_free_or_dead(s, ts, 1); 1911 } 1912 1913 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 1914 registers needs to be allocated to store a constant. If 'free_or_dead' 1915 is non-zero, subsequently release the temporary; if it is positive, the 1916 temp is dead; if it is negative, the temp is free. */ 1917 static void temp_sync(TCGContext *s, TCGTemp *ts, 1918 TCGRegSet allocated_regs, int free_or_dead) 1919 { 1920 if (ts->fixed_reg) { 1921 return; 1922 } 1923 if (!ts->mem_coherent) { 1924 if (!ts->mem_allocated) { 1925 temp_allocate_frame(s, temp_idx(s, ts)); 1926 } 1927 switch (ts->val_type) { 1928 case TEMP_VAL_CONST: 1929 /* If we're going to free the temp immediately, then we won't 1930 require it later in a register, so attempt to store the 1931 constant to memory directly. */ 1932 if (free_or_dead 1933 && tcg_out_sti(s, ts->type, ts->val, 1934 ts->mem_base->reg, ts->mem_offset)) { 1935 break; 1936 } 1937 temp_load(s, ts, tcg_target_available_regs[ts->type], 1938 allocated_regs); 1939 /* fallthrough */ 1940 1941 case TEMP_VAL_REG: 1942 tcg_out_st(s, ts->type, ts->reg, 1943 ts->mem_base->reg, ts->mem_offset); 1944 break; 1945 1946 case TEMP_VAL_MEM: 1947 break; 1948 1949 case TEMP_VAL_DEAD: 1950 default: 1951 tcg_abort(); 1952 } 1953 ts->mem_coherent = 1; 1954 } 1955 if (free_or_dead) { 1956 temp_free_or_dead(s, ts, free_or_dead); 1957 } 1958 } 1959 1960 /* free register 'reg' by spilling the corresponding temporary if necessary */ 1961 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 1962 { 1963 TCGTemp *ts = s->reg_to_temp[reg]; 1964 if (ts != NULL) { 1965 temp_sync(s, ts, allocated_regs, -1); 1966 } 1967 } 1968 1969 /* Allocate a register belonging to reg1 & ~reg2 */ 1970 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 1971 TCGRegSet allocated_regs, bool rev) 1972 { 1973 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 1974 const int *order; 1975 TCGReg reg; 1976 TCGRegSet reg_ct; 1977 1978 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); 1979 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 1980 1981 /* first try free registers */ 1982 for(i = 0; i < n; i++) { 1983 reg = order[i]; 1984 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 1985 return reg; 1986 } 1987 1988 /* XXX: do better spill choice */ 1989 for(i = 0; i < n; i++) { 1990 reg = order[i]; 1991 if (tcg_regset_test_reg(reg_ct, reg)) { 1992 tcg_reg_free(s, reg, allocated_regs); 1993 return reg; 1994 } 1995 } 1996 1997 tcg_abort(); 1998 } 1999 2000 /* Make sure the temporary is in a register. If needed, allocate the register 2001 from DESIRED while avoiding ALLOCATED. */ 2002 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2003 TCGRegSet allocated_regs) 2004 { 2005 TCGReg reg; 2006 2007 switch (ts->val_type) { 2008 case TEMP_VAL_REG: 2009 return; 2010 case TEMP_VAL_CONST: 2011 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2012 tcg_out_movi(s, ts->type, reg, ts->val); 2013 ts->mem_coherent = 0; 2014 break; 2015 case TEMP_VAL_MEM: 2016 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2017 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2018 ts->mem_coherent = 1; 2019 break; 2020 case TEMP_VAL_DEAD: 2021 default: 2022 tcg_abort(); 2023 } 2024 ts->reg = reg; 2025 ts->val_type = TEMP_VAL_REG; 2026 s->reg_to_temp[reg] = ts; 2027 } 2028 2029 /* Save a temporary to memory. 'allocated_regs' is used in case a 2030 temporary registers needs to be allocated to store a constant. */ 2031 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2032 { 2033 /* The liveness analysis already ensures that globals are back 2034 in memory. Keep an tcg_debug_assert for safety. */ 2035 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2036 } 2037 2038 /* save globals to their canonical location and assume they can be 2039 modified be the following code. 'allocated_regs' is used in case a 2040 temporary registers needs to be allocated to store a constant. */ 2041 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2042 { 2043 int i; 2044 2045 for (i = 0; i < s->nb_globals; i++) { 2046 temp_save(s, &s->temps[i], allocated_regs); 2047 } 2048 } 2049 2050 /* sync globals to their canonical location and assume they can be 2051 read by the following code. 'allocated_regs' is used in case a 2052 temporary registers needs to be allocated to store a constant. */ 2053 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2054 { 2055 int i; 2056 2057 for (i = 0; i < s->nb_globals; i++) { 2058 TCGTemp *ts = &s->temps[i]; 2059 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2060 || ts->fixed_reg 2061 || ts->mem_coherent); 2062 } 2063 } 2064 2065 /* at the end of a basic block, we assume all temporaries are dead and 2066 all globals are stored at their canonical location. */ 2067 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2068 { 2069 int i; 2070 2071 for (i = s->nb_globals; i < s->nb_temps; i++) { 2072 TCGTemp *ts = &s->temps[i]; 2073 if (ts->temp_local) { 2074 temp_save(s, ts, allocated_regs); 2075 } else { 2076 /* The liveness analysis already ensures that temps are dead. 2077 Keep an tcg_debug_assert for safety. */ 2078 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2079 } 2080 } 2081 2082 save_globals(s, allocated_regs); 2083 } 2084 2085 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2086 tcg_target_ulong val, TCGLifeData arg_life) 2087 { 2088 if (ots->fixed_reg) { 2089 /* For fixed registers, we do not do any constant propagation. */ 2090 tcg_out_movi(s, ots->type, ots->reg, val); 2091 return; 2092 } 2093 2094 /* The movi is not explicitly generated here. */ 2095 if (ots->val_type == TEMP_VAL_REG) { 2096 s->reg_to_temp[ots->reg] = NULL; 2097 } 2098 ots->val_type = TEMP_VAL_CONST; 2099 ots->val = val; 2100 ots->mem_coherent = 0; 2101 if (NEED_SYNC_ARG(0)) { 2102 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2103 } else if (IS_DEAD_ARG(0)) { 2104 temp_dead(s, ots); 2105 } 2106 } 2107 2108 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, 2109 TCGLifeData arg_life) 2110 { 2111 TCGTemp *ots = &s->temps[args[0]]; 2112 tcg_target_ulong val = args[1]; 2113 2114 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2115 } 2116 2117 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, 2118 const TCGArg *args, TCGLifeData arg_life) 2119 { 2120 TCGRegSet allocated_regs; 2121 TCGTemp *ts, *ots; 2122 TCGType otype, itype; 2123 2124 tcg_regset_set(allocated_regs, s->reserved_regs); 2125 ots = &s->temps[args[0]]; 2126 ts = &s->temps[args[1]]; 2127 2128 /* Note that otype != itype for no-op truncation. */ 2129 otype = ots->type; 2130 itype = ts->type; 2131 2132 if (ts->val_type == TEMP_VAL_CONST) { 2133 /* propagate constant or generate sti */ 2134 tcg_target_ulong val = ts->val; 2135 if (IS_DEAD_ARG(1)) { 2136 temp_dead(s, ts); 2137 } 2138 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2139 return; 2140 } 2141 2142 /* If the source value is in memory we're going to be forced 2143 to have it in a register in order to perform the copy. Copy 2144 the SOURCE value into its own register first, that way we 2145 don't have to reload SOURCE the next time it is used. */ 2146 if (ts->val_type == TEMP_VAL_MEM) { 2147 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2148 } 2149 2150 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2151 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2152 /* mov to a non-saved dead register makes no sense (even with 2153 liveness analysis disabled). */ 2154 tcg_debug_assert(NEED_SYNC_ARG(0)); 2155 if (!ots->mem_allocated) { 2156 temp_allocate_frame(s, args[0]); 2157 } 2158 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2159 if (IS_DEAD_ARG(1)) { 2160 temp_dead(s, ts); 2161 } 2162 temp_dead(s, ots); 2163 } else { 2164 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2165 /* the mov can be suppressed */ 2166 if (ots->val_type == TEMP_VAL_REG) { 2167 s->reg_to_temp[ots->reg] = NULL; 2168 } 2169 ots->reg = ts->reg; 2170 temp_dead(s, ts); 2171 } else { 2172 if (ots->val_type != TEMP_VAL_REG) { 2173 /* When allocating a new register, make sure to not spill the 2174 input one. */ 2175 tcg_regset_set_reg(allocated_regs, ts->reg); 2176 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2177 allocated_regs, ots->indirect_base); 2178 } 2179 tcg_out_mov(s, otype, ots->reg, ts->reg); 2180 } 2181 ots->val_type = TEMP_VAL_REG; 2182 ots->mem_coherent = 0; 2183 s->reg_to_temp[ots->reg] = ots; 2184 if (NEED_SYNC_ARG(0)) { 2185 temp_sync(s, ots, allocated_regs, 0); 2186 } 2187 } 2188 } 2189 2190 static void tcg_reg_alloc_op(TCGContext *s, 2191 const TCGOpDef *def, TCGOpcode opc, 2192 const TCGArg *args, TCGLifeData arg_life) 2193 { 2194 TCGRegSet i_allocated_regs; 2195 TCGRegSet o_allocated_regs; 2196 int i, k, nb_iargs, nb_oargs; 2197 TCGReg reg; 2198 TCGArg arg; 2199 const TCGArgConstraint *arg_ct; 2200 TCGTemp *ts; 2201 TCGArg new_args[TCG_MAX_OP_ARGS]; 2202 int const_args[TCG_MAX_OP_ARGS]; 2203 2204 nb_oargs = def->nb_oargs; 2205 nb_iargs = def->nb_iargs; 2206 2207 /* copy constants */ 2208 memcpy(new_args + nb_oargs + nb_iargs, 2209 args + nb_oargs + nb_iargs, 2210 sizeof(TCGArg) * def->nb_cargs); 2211 2212 tcg_regset_set(i_allocated_regs, s->reserved_regs); 2213 tcg_regset_set(o_allocated_regs, s->reserved_regs); 2214 2215 /* satisfy input constraints */ 2216 for(k = 0; k < nb_iargs; k++) { 2217 i = def->sorted_args[nb_oargs + k]; 2218 arg = args[i]; 2219 arg_ct = &def->args_ct[i]; 2220 ts = &s->temps[arg]; 2221 2222 if (ts->val_type == TEMP_VAL_CONST 2223 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2224 /* constant is OK for instruction */ 2225 const_args[i] = 1; 2226 new_args[i] = ts->val; 2227 goto iarg_end; 2228 } 2229 2230 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2231 2232 if (arg_ct->ct & TCG_CT_IALIAS) { 2233 if (ts->fixed_reg) { 2234 /* if fixed register, we must allocate a new register 2235 if the alias is not the same register */ 2236 if (arg != args[arg_ct->alias_index]) 2237 goto allocate_in_reg; 2238 } else { 2239 /* if the input is aliased to an output and if it is 2240 not dead after the instruction, we must allocate 2241 a new register and move it */ 2242 if (!IS_DEAD_ARG(i)) { 2243 goto allocate_in_reg; 2244 } 2245 /* check if the current register has already been allocated 2246 for another input aliased to an output */ 2247 int k2, i2; 2248 for (k2 = 0 ; k2 < k ; k2++) { 2249 i2 = def->sorted_args[nb_oargs + k2]; 2250 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2251 (new_args[i2] == ts->reg)) { 2252 goto allocate_in_reg; 2253 } 2254 } 2255 } 2256 } 2257 reg = ts->reg; 2258 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2259 /* nothing to do : the constraint is satisfied */ 2260 } else { 2261 allocate_in_reg: 2262 /* allocate a new register matching the constraint 2263 and move the temporary register into it */ 2264 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2265 ts->indirect_base); 2266 tcg_out_mov(s, ts->type, reg, ts->reg); 2267 } 2268 new_args[i] = reg; 2269 const_args[i] = 0; 2270 tcg_regset_set_reg(i_allocated_regs, reg); 2271 iarg_end: ; 2272 } 2273 2274 /* mark dead temporaries and free the associated registers */ 2275 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2276 if (IS_DEAD_ARG(i)) { 2277 temp_dead(s, &s->temps[args[i]]); 2278 } 2279 } 2280 2281 if (def->flags & TCG_OPF_BB_END) { 2282 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2283 } else { 2284 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2285 /* XXX: permit generic clobber register list ? */ 2286 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2287 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2288 tcg_reg_free(s, i, i_allocated_regs); 2289 } 2290 } 2291 } 2292 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2293 /* sync globals if the op has side effects and might trigger 2294 an exception. */ 2295 sync_globals(s, i_allocated_regs); 2296 } 2297 2298 /* satisfy the output constraints */ 2299 for(k = 0; k < nb_oargs; k++) { 2300 i = def->sorted_args[k]; 2301 arg = args[i]; 2302 arg_ct = &def->args_ct[i]; 2303 ts = &s->temps[arg]; 2304 if ((arg_ct->ct & TCG_CT_ALIAS) 2305 && !const_args[arg_ct->alias_index]) { 2306 reg = new_args[arg_ct->alias_index]; 2307 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2308 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2309 i_allocated_regs | o_allocated_regs, 2310 ts->indirect_base); 2311 } else { 2312 /* if fixed register, we try to use it */ 2313 reg = ts->reg; 2314 if (ts->fixed_reg && 2315 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2316 goto oarg_end; 2317 } 2318 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2319 ts->indirect_base); 2320 } 2321 tcg_regset_set_reg(o_allocated_regs, reg); 2322 /* if a fixed register is used, then a move will be done afterwards */ 2323 if (!ts->fixed_reg) { 2324 if (ts->val_type == TEMP_VAL_REG) { 2325 s->reg_to_temp[ts->reg] = NULL; 2326 } 2327 ts->val_type = TEMP_VAL_REG; 2328 ts->reg = reg; 2329 /* temp value is modified, so the value kept in memory is 2330 potentially not the same */ 2331 ts->mem_coherent = 0; 2332 s->reg_to_temp[reg] = ts; 2333 } 2334 oarg_end: 2335 new_args[i] = reg; 2336 } 2337 } 2338 2339 /* emit instruction */ 2340 tcg_out_op(s, opc, new_args, const_args); 2341 2342 /* move the outputs in the correct register if needed */ 2343 for(i = 0; i < nb_oargs; i++) { 2344 ts = &s->temps[args[i]]; 2345 reg = new_args[i]; 2346 if (ts->fixed_reg && ts->reg != reg) { 2347 tcg_out_mov(s, ts->type, ts->reg, reg); 2348 } 2349 if (NEED_SYNC_ARG(i)) { 2350 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2351 } else if (IS_DEAD_ARG(i)) { 2352 temp_dead(s, ts); 2353 } 2354 } 2355 } 2356 2357 #ifdef TCG_TARGET_STACK_GROWSUP 2358 #define STACK_DIR(x) (-(x)) 2359 #else 2360 #define STACK_DIR(x) (x) 2361 #endif 2362 2363 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, 2364 const TCGArg * const args, TCGLifeData arg_life) 2365 { 2366 int flags, nb_regs, i; 2367 TCGReg reg; 2368 TCGArg arg; 2369 TCGTemp *ts; 2370 intptr_t stack_offset; 2371 size_t call_stack_size; 2372 tcg_insn_unit *func_addr; 2373 int allocate_args; 2374 TCGRegSet allocated_regs; 2375 2376 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; 2377 flags = args[nb_oargs + nb_iargs + 1]; 2378 2379 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2380 if (nb_regs > nb_iargs) { 2381 nb_regs = nb_iargs; 2382 } 2383 2384 /* assign stack slots first */ 2385 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2386 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2387 ~(TCG_TARGET_STACK_ALIGN - 1); 2388 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2389 if (allocate_args) { 2390 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2391 preallocate call stack */ 2392 tcg_abort(); 2393 } 2394 2395 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2396 for(i = nb_regs; i < nb_iargs; i++) { 2397 arg = args[nb_oargs + i]; 2398 #ifdef TCG_TARGET_STACK_GROWSUP 2399 stack_offset -= sizeof(tcg_target_long); 2400 #endif 2401 if (arg != TCG_CALL_DUMMY_ARG) { 2402 ts = &s->temps[arg]; 2403 temp_load(s, ts, tcg_target_available_regs[ts->type], 2404 s->reserved_regs); 2405 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2406 } 2407 #ifndef TCG_TARGET_STACK_GROWSUP 2408 stack_offset += sizeof(tcg_target_long); 2409 #endif 2410 } 2411 2412 /* assign input registers */ 2413 tcg_regset_set(allocated_regs, s->reserved_regs); 2414 for(i = 0; i < nb_regs; i++) { 2415 arg = args[nb_oargs + i]; 2416 if (arg != TCG_CALL_DUMMY_ARG) { 2417 ts = &s->temps[arg]; 2418 reg = tcg_target_call_iarg_regs[i]; 2419 tcg_reg_free(s, reg, allocated_regs); 2420 2421 if (ts->val_type == TEMP_VAL_REG) { 2422 if (ts->reg != reg) { 2423 tcg_out_mov(s, ts->type, reg, ts->reg); 2424 } 2425 } else { 2426 TCGRegSet arg_set; 2427 2428 tcg_regset_clear(arg_set); 2429 tcg_regset_set_reg(arg_set, reg); 2430 temp_load(s, ts, arg_set, allocated_regs); 2431 } 2432 2433 tcg_regset_set_reg(allocated_regs, reg); 2434 } 2435 } 2436 2437 /* mark dead temporaries and free the associated registers */ 2438 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2439 if (IS_DEAD_ARG(i)) { 2440 temp_dead(s, &s->temps[args[i]]); 2441 } 2442 } 2443 2444 /* clobber call registers */ 2445 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2446 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2447 tcg_reg_free(s, i, allocated_regs); 2448 } 2449 } 2450 2451 /* Save globals if they might be written by the helper, sync them if 2452 they might be read. */ 2453 if (flags & TCG_CALL_NO_READ_GLOBALS) { 2454 /* Nothing to do */ 2455 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 2456 sync_globals(s, allocated_regs); 2457 } else { 2458 save_globals(s, allocated_regs); 2459 } 2460 2461 tcg_out_call(s, func_addr); 2462 2463 /* assign output registers and emit moves if needed */ 2464 for(i = 0; i < nb_oargs; i++) { 2465 arg = args[i]; 2466 ts = &s->temps[arg]; 2467 reg = tcg_target_call_oarg_regs[i]; 2468 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 2469 2470 if (ts->fixed_reg) { 2471 if (ts->reg != reg) { 2472 tcg_out_mov(s, ts->type, ts->reg, reg); 2473 } 2474 } else { 2475 if (ts->val_type == TEMP_VAL_REG) { 2476 s->reg_to_temp[ts->reg] = NULL; 2477 } 2478 ts->val_type = TEMP_VAL_REG; 2479 ts->reg = reg; 2480 ts->mem_coherent = 0; 2481 s->reg_to_temp[reg] = ts; 2482 if (NEED_SYNC_ARG(i)) { 2483 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2484 } else if (IS_DEAD_ARG(i)) { 2485 temp_dead(s, ts); 2486 } 2487 } 2488 } 2489 } 2490 2491 #ifdef CONFIG_PROFILER 2492 2493 static int64_t tcg_table_op_count[NB_OPS]; 2494 2495 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2496 { 2497 int i; 2498 2499 for (i = 0; i < NB_OPS; i++) { 2500 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 2501 tcg_table_op_count[i]); 2502 } 2503 } 2504 #else 2505 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2506 { 2507 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2508 } 2509 #endif 2510 2511 2512 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 2513 { 2514 int i, oi, oi_next, num_insns; 2515 2516 #ifdef CONFIG_PROFILER 2517 { 2518 int n; 2519 2520 n = s->gen_op_buf[0].prev + 1; 2521 s->op_count += n; 2522 if (n > s->op_count_max) { 2523 s->op_count_max = n; 2524 } 2525 2526 n = s->nb_temps; 2527 s->temp_count += n; 2528 if (n > s->temp_count_max) { 2529 s->temp_count_max = n; 2530 } 2531 } 2532 #endif 2533 2534 #ifdef DEBUG_DISAS 2535 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 2536 && qemu_log_in_addr_range(tb->pc))) { 2537 qemu_log_lock(); 2538 qemu_log("OP:\n"); 2539 tcg_dump_ops(s); 2540 qemu_log("\n"); 2541 qemu_log_unlock(); 2542 } 2543 #endif 2544 2545 #ifdef CONFIG_PROFILER 2546 s->opt_time -= profile_getclock(); 2547 #endif 2548 2549 #ifdef USE_TCG_OPTIMIZATIONS 2550 tcg_optimize(s); 2551 #endif 2552 2553 #ifdef CONFIG_PROFILER 2554 s->opt_time += profile_getclock(); 2555 s->la_time -= profile_getclock(); 2556 #endif 2557 2558 { 2559 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); 2560 2561 liveness_pass_1(s, temp_state); 2562 2563 if (s->nb_indirects > 0) { 2564 #ifdef DEBUG_DISAS 2565 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 2566 && qemu_log_in_addr_range(tb->pc))) { 2567 qemu_log_lock(); 2568 qemu_log("OP before indirect lowering:\n"); 2569 tcg_dump_ops(s); 2570 qemu_log("\n"); 2571 qemu_log_unlock(); 2572 } 2573 #endif 2574 /* Replace indirect temps with direct temps. */ 2575 if (liveness_pass_2(s, temp_state)) { 2576 /* If changes were made, re-run liveness. */ 2577 liveness_pass_1(s, temp_state); 2578 } 2579 } 2580 } 2581 2582 #ifdef CONFIG_PROFILER 2583 s->la_time += profile_getclock(); 2584 #endif 2585 2586 #ifdef DEBUG_DISAS 2587 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 2588 && qemu_log_in_addr_range(tb->pc))) { 2589 qemu_log_lock(); 2590 qemu_log("OP after optimization and liveness analysis:\n"); 2591 tcg_dump_ops(s); 2592 qemu_log("\n"); 2593 qemu_log_unlock(); 2594 } 2595 #endif 2596 2597 tcg_reg_alloc_start(s); 2598 2599 s->code_buf = tb->tc_ptr; 2600 s->code_ptr = tb->tc_ptr; 2601 2602 tcg_out_tb_init(s); 2603 2604 num_insns = -1; 2605 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2606 TCGOp * const op = &s->gen_op_buf[oi]; 2607 TCGArg * const args = &s->gen_opparam_buf[op->args]; 2608 TCGOpcode opc = op->opc; 2609 const TCGOpDef *def = &tcg_op_defs[opc]; 2610 TCGLifeData arg_life = op->life; 2611 2612 oi_next = op->next; 2613 #ifdef CONFIG_PROFILER 2614 tcg_table_op_count[opc]++; 2615 #endif 2616 2617 switch (opc) { 2618 case INDEX_op_mov_i32: 2619 case INDEX_op_mov_i64: 2620 tcg_reg_alloc_mov(s, def, args, arg_life); 2621 break; 2622 case INDEX_op_movi_i32: 2623 case INDEX_op_movi_i64: 2624 tcg_reg_alloc_movi(s, args, arg_life); 2625 break; 2626 case INDEX_op_insn_start: 2627 if (num_insns >= 0) { 2628 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2629 } 2630 num_insns++; 2631 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2632 target_ulong a; 2633 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2634 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 2635 #else 2636 a = args[i]; 2637 #endif 2638 s->gen_insn_data[num_insns][i] = a; 2639 } 2640 break; 2641 case INDEX_op_discard: 2642 temp_dead(s, &s->temps[args[0]]); 2643 break; 2644 case INDEX_op_set_label: 2645 tcg_reg_alloc_bb_end(s, s->reserved_regs); 2646 tcg_out_label(s, arg_label(args[0]), s->code_ptr); 2647 break; 2648 case INDEX_op_call: 2649 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); 2650 break; 2651 default: 2652 /* Sanity check that we've not introduced any unhandled opcodes. */ 2653 if (def->flags & TCG_OPF_NOT_PRESENT) { 2654 tcg_abort(); 2655 } 2656 /* Note: in order to speed up the code, it would be much 2657 faster to have specialized register allocator functions for 2658 some common argument patterns */ 2659 tcg_reg_alloc_op(s, def, opc, args, arg_life); 2660 break; 2661 } 2662 #ifdef CONFIG_DEBUG_TCG 2663 check_regs(s); 2664 #endif 2665 /* Test for (pending) buffer overflow. The assumption is that any 2666 one operation beginning below the high water mark cannot overrun 2667 the buffer completely. Thus we can test for overflow after 2668 generating code without having to check during generation. */ 2669 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 2670 return -1; 2671 } 2672 } 2673 tcg_debug_assert(num_insns >= 0); 2674 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2675 2676 /* Generate TB finalization at the end of block */ 2677 if (!tcg_out_tb_finalize(s)) { 2678 return -1; 2679 } 2680 2681 /* flush instruction cache */ 2682 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 2683 2684 return tcg_current_code_size(s); 2685 } 2686 2687 #ifdef CONFIG_PROFILER 2688 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2689 { 2690 TCGContext *s = &tcg_ctx; 2691 int64_t tb_count = s->tb_count; 2692 int64_t tb_div_count = tb_count ? tb_count : 1; 2693 int64_t tot = s->interm_time + s->code_time; 2694 2695 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 2696 tot, tot / 2.4e9); 2697 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 2698 tb_count, s->tb_count1 - tb_count, 2699 (double)(s->tb_count1 - s->tb_count) 2700 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 2701 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 2702 (double)s->op_count / tb_div_count, s->op_count_max); 2703 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 2704 (double)s->del_op_count / tb_div_count); 2705 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 2706 (double)s->temp_count / tb_div_count, s->temp_count_max); 2707 cpu_fprintf(f, "avg host code/TB %0.1f\n", 2708 (double)s->code_out_len / tb_div_count); 2709 cpu_fprintf(f, "avg search data/TB %0.1f\n", 2710 (double)s->search_out_len / tb_div_count); 2711 2712 cpu_fprintf(f, "cycles/op %0.1f\n", 2713 s->op_count ? (double)tot / s->op_count : 0); 2714 cpu_fprintf(f, "cycles/in byte %0.1f\n", 2715 s->code_in_len ? (double)tot / s->code_in_len : 0); 2716 cpu_fprintf(f, "cycles/out byte %0.1f\n", 2717 s->code_out_len ? (double)tot / s->code_out_len : 0); 2718 cpu_fprintf(f, "cycles/search byte %0.1f\n", 2719 s->search_out_len ? (double)tot / s->search_out_len : 0); 2720 if (tot == 0) { 2721 tot = 1; 2722 } 2723 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 2724 (double)s->interm_time / tot * 100.0); 2725 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2726 (double)s->code_time / tot * 100.0); 2727 cpu_fprintf(f, "optim./code time %0.1f%%\n", 2728 (double)s->opt_time / (s->code_time ? s->code_time : 1) 2729 * 100.0); 2730 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2731 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2732 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2733 s->restore_count); 2734 cpu_fprintf(f, " avg cycles %0.1f\n", 2735 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 2736 } 2737 #else 2738 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2739 { 2740 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2741 } 2742 #endif 2743 2744 #ifdef ELF_HOST_MACHINE 2745 /* In order to use this feature, the backend needs to do three things: 2746 2747 (1) Define ELF_HOST_MACHINE to indicate both what value to 2748 put into the ELF image and to indicate support for the feature. 2749 2750 (2) Define tcg_register_jit. This should create a buffer containing 2751 the contents of a .debug_frame section that describes the post- 2752 prologue unwind info for the tcg machine. 2753 2754 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 2755 */ 2756 2757 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 2758 typedef enum { 2759 JIT_NOACTION = 0, 2760 JIT_REGISTER_FN, 2761 JIT_UNREGISTER_FN 2762 } jit_actions_t; 2763 2764 struct jit_code_entry { 2765 struct jit_code_entry *next_entry; 2766 struct jit_code_entry *prev_entry; 2767 const void *symfile_addr; 2768 uint64_t symfile_size; 2769 }; 2770 2771 struct jit_descriptor { 2772 uint32_t version; 2773 uint32_t action_flag; 2774 struct jit_code_entry *relevant_entry; 2775 struct jit_code_entry *first_entry; 2776 }; 2777 2778 void __jit_debug_register_code(void) __attribute__((noinline)); 2779 void __jit_debug_register_code(void) 2780 { 2781 asm(""); 2782 } 2783 2784 /* Must statically initialize the version, because GDB may check 2785 the version before we can set it. */ 2786 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 2787 2788 /* End GDB interface. */ 2789 2790 static int find_string(const char *strtab, const char *str) 2791 { 2792 const char *p = strtab + 1; 2793 2794 while (1) { 2795 if (strcmp(p, str) == 0) { 2796 return p - strtab; 2797 } 2798 p += strlen(p) + 1; 2799 } 2800 } 2801 2802 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 2803 const void *debug_frame, 2804 size_t debug_frame_size) 2805 { 2806 struct __attribute__((packed)) DebugInfo { 2807 uint32_t len; 2808 uint16_t version; 2809 uint32_t abbrev; 2810 uint8_t ptr_size; 2811 uint8_t cu_die; 2812 uint16_t cu_lang; 2813 uintptr_t cu_low_pc; 2814 uintptr_t cu_high_pc; 2815 uint8_t fn_die; 2816 char fn_name[16]; 2817 uintptr_t fn_low_pc; 2818 uintptr_t fn_high_pc; 2819 uint8_t cu_eoc; 2820 }; 2821 2822 struct ElfImage { 2823 ElfW(Ehdr) ehdr; 2824 ElfW(Phdr) phdr; 2825 ElfW(Shdr) shdr[7]; 2826 ElfW(Sym) sym[2]; 2827 struct DebugInfo di; 2828 uint8_t da[24]; 2829 char str[80]; 2830 }; 2831 2832 struct ElfImage *img; 2833 2834 static const struct ElfImage img_template = { 2835 .ehdr = { 2836 .e_ident[EI_MAG0] = ELFMAG0, 2837 .e_ident[EI_MAG1] = ELFMAG1, 2838 .e_ident[EI_MAG2] = ELFMAG2, 2839 .e_ident[EI_MAG3] = ELFMAG3, 2840 .e_ident[EI_CLASS] = ELF_CLASS, 2841 .e_ident[EI_DATA] = ELF_DATA, 2842 .e_ident[EI_VERSION] = EV_CURRENT, 2843 .e_type = ET_EXEC, 2844 .e_machine = ELF_HOST_MACHINE, 2845 .e_version = EV_CURRENT, 2846 .e_phoff = offsetof(struct ElfImage, phdr), 2847 .e_shoff = offsetof(struct ElfImage, shdr), 2848 .e_ehsize = sizeof(ElfW(Shdr)), 2849 .e_phentsize = sizeof(ElfW(Phdr)), 2850 .e_phnum = 1, 2851 .e_shentsize = sizeof(ElfW(Shdr)), 2852 .e_shnum = ARRAY_SIZE(img->shdr), 2853 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 2854 #ifdef ELF_HOST_FLAGS 2855 .e_flags = ELF_HOST_FLAGS, 2856 #endif 2857 #ifdef ELF_OSABI 2858 .e_ident[EI_OSABI] = ELF_OSABI, 2859 #endif 2860 }, 2861 .phdr = { 2862 .p_type = PT_LOAD, 2863 .p_flags = PF_X, 2864 }, 2865 .shdr = { 2866 [0] = { .sh_type = SHT_NULL }, 2867 /* Trick: The contents of code_gen_buffer are not present in 2868 this fake ELF file; that got allocated elsewhere. Therefore 2869 we mark .text as SHT_NOBITS (similar to .bss) so that readers 2870 will not look for contents. We can record any address. */ 2871 [1] = { /* .text */ 2872 .sh_type = SHT_NOBITS, 2873 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 2874 }, 2875 [2] = { /* .debug_info */ 2876 .sh_type = SHT_PROGBITS, 2877 .sh_offset = offsetof(struct ElfImage, di), 2878 .sh_size = sizeof(struct DebugInfo), 2879 }, 2880 [3] = { /* .debug_abbrev */ 2881 .sh_type = SHT_PROGBITS, 2882 .sh_offset = offsetof(struct ElfImage, da), 2883 .sh_size = sizeof(img->da), 2884 }, 2885 [4] = { /* .debug_frame */ 2886 .sh_type = SHT_PROGBITS, 2887 .sh_offset = sizeof(struct ElfImage), 2888 }, 2889 [5] = { /* .symtab */ 2890 .sh_type = SHT_SYMTAB, 2891 .sh_offset = offsetof(struct ElfImage, sym), 2892 .sh_size = sizeof(img->sym), 2893 .sh_info = 1, 2894 .sh_link = ARRAY_SIZE(img->shdr) - 1, 2895 .sh_entsize = sizeof(ElfW(Sym)), 2896 }, 2897 [6] = { /* .strtab */ 2898 .sh_type = SHT_STRTAB, 2899 .sh_offset = offsetof(struct ElfImage, str), 2900 .sh_size = sizeof(img->str), 2901 } 2902 }, 2903 .sym = { 2904 [1] = { /* code_gen_buffer */ 2905 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 2906 .st_shndx = 1, 2907 } 2908 }, 2909 .di = { 2910 .len = sizeof(struct DebugInfo) - 4, 2911 .version = 2, 2912 .ptr_size = sizeof(void *), 2913 .cu_die = 1, 2914 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 2915 .fn_die = 2, 2916 .fn_name = "code_gen_buffer" 2917 }, 2918 .da = { 2919 1, /* abbrev number (the cu) */ 2920 0x11, 1, /* DW_TAG_compile_unit, has children */ 2921 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 2922 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2923 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2924 0, 0, /* end of abbrev */ 2925 2, /* abbrev number (the fn) */ 2926 0x2e, 0, /* DW_TAG_subprogram, no children */ 2927 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 2928 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2929 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2930 0, 0, /* end of abbrev */ 2931 0 /* no more abbrev */ 2932 }, 2933 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 2934 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 2935 }; 2936 2937 /* We only need a single jit entry; statically allocate it. */ 2938 static struct jit_code_entry one_entry; 2939 2940 uintptr_t buf = (uintptr_t)buf_ptr; 2941 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 2942 DebugFrameHeader *dfh; 2943 2944 img = g_malloc(img_size); 2945 *img = img_template; 2946 2947 img->phdr.p_vaddr = buf; 2948 img->phdr.p_paddr = buf; 2949 img->phdr.p_memsz = buf_size; 2950 2951 img->shdr[1].sh_name = find_string(img->str, ".text"); 2952 img->shdr[1].sh_addr = buf; 2953 img->shdr[1].sh_size = buf_size; 2954 2955 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 2956 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 2957 2958 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 2959 img->shdr[4].sh_size = debug_frame_size; 2960 2961 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 2962 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 2963 2964 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 2965 img->sym[1].st_value = buf; 2966 img->sym[1].st_size = buf_size; 2967 2968 img->di.cu_low_pc = buf; 2969 img->di.cu_high_pc = buf + buf_size; 2970 img->di.fn_low_pc = buf; 2971 img->di.fn_high_pc = buf + buf_size; 2972 2973 dfh = (DebugFrameHeader *)(img + 1); 2974 memcpy(dfh, debug_frame, debug_frame_size); 2975 dfh->fde.func_start = buf; 2976 dfh->fde.func_len = buf_size; 2977 2978 #ifdef DEBUG_JIT 2979 /* Enable this block to be able to debug the ELF image file creation. 2980 One can use readelf, objdump, or other inspection utilities. */ 2981 { 2982 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 2983 if (f) { 2984 if (fwrite(img, img_size, 1, f) != img_size) { 2985 /* Avoid stupid unused return value warning for fwrite. */ 2986 } 2987 fclose(f); 2988 } 2989 } 2990 #endif 2991 2992 one_entry.symfile_addr = img; 2993 one_entry.symfile_size = img_size; 2994 2995 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 2996 __jit_debug_descriptor.relevant_entry = &one_entry; 2997 __jit_debug_descriptor.first_entry = &one_entry; 2998 __jit_debug_register_code(); 2999 } 3000 #else 3001 /* No support for the feature. Provide the entry point expected by exec.c, 3002 and implement the internal function we declared earlier. */ 3003 3004 static void tcg_register_jit_int(void *buf, size_t size, 3005 const void *debug_frame, 3006 size_t debug_frame_size) 3007 { 3008 } 3009 3010 void tcg_register_jit(void *buf, size_t buf_size) 3011 { 3012 } 3013 #endif /* ELF_HOST_MACHINE */ 3014