1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 62 /* Forward declarations for functions declared in tcg-target.inc.c and 63 used here. */ 64 static void tcg_target_init(TCGContext *s); 65 static void tcg_target_qemu_prologue(TCGContext *s); 66 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 67 intptr_t value, intptr_t addend); 68 69 /* The CIE and FDE header definitions will be common to all hosts. */ 70 typedef struct { 71 uint32_t len __attribute__((aligned((sizeof(void *))))); 72 uint32_t id; 73 uint8_t version; 74 char augmentation[1]; 75 uint8_t code_align; 76 uint8_t data_align; 77 uint8_t return_column; 78 } DebugFrameCIE; 79 80 typedef struct QEMU_PACKED { 81 uint32_t len __attribute__((aligned((sizeof(void *))))); 82 uint32_t cie_offset; 83 uintptr_t func_start; 84 uintptr_t func_len; 85 } DebugFrameFDEHeader; 86 87 typedef struct QEMU_PACKED { 88 DebugFrameCIE cie; 89 DebugFrameFDEHeader fde; 90 } DebugFrameHeader; 91 92 static void tcg_register_jit_int(void *buf, size_t size, 93 const void *debug_frame, 94 size_t debug_frame_size) 95 __attribute__((unused)); 96 97 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 98 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); 99 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 100 intptr_t arg2); 101 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 102 static void tcg_out_movi(TCGContext *s, TCGType type, 103 TCGReg ret, tcg_target_long arg); 104 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 105 const int *const_args); 106 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 107 intptr_t arg2); 108 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 109 TCGReg base, intptr_t ofs); 110 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 111 static int tcg_target_const_match(tcg_target_long val, TCGType type, 112 const TCGArgConstraint *arg_ct); 113 static void tcg_out_tb_init(TCGContext *s); 114 static bool tcg_out_tb_finalize(TCGContext *s); 115 116 117 118 static TCGRegSet tcg_target_available_regs[2]; 119 static TCGRegSet tcg_target_call_clobber_regs; 120 121 #if TCG_TARGET_INSN_UNIT_SIZE == 1 122 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 123 { 124 *s->code_ptr++ = v; 125 } 126 127 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 128 uint8_t v) 129 { 130 *p = v; 131 } 132 #endif 133 134 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 135 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 136 { 137 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 138 *s->code_ptr++ = v; 139 } else { 140 tcg_insn_unit *p = s->code_ptr; 141 memcpy(p, &v, sizeof(v)); 142 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 143 } 144 } 145 146 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 147 uint16_t v) 148 { 149 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 150 *p = v; 151 } else { 152 memcpy(p, &v, sizeof(v)); 153 } 154 } 155 #endif 156 157 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 158 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 159 { 160 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 161 *s->code_ptr++ = v; 162 } else { 163 tcg_insn_unit *p = s->code_ptr; 164 memcpy(p, &v, sizeof(v)); 165 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 166 } 167 } 168 169 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 170 uint32_t v) 171 { 172 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 173 *p = v; 174 } else { 175 memcpy(p, &v, sizeof(v)); 176 } 177 } 178 #endif 179 180 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 181 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 182 { 183 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 184 *s->code_ptr++ = v; 185 } else { 186 tcg_insn_unit *p = s->code_ptr; 187 memcpy(p, &v, sizeof(v)); 188 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 189 } 190 } 191 192 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 193 uint64_t v) 194 { 195 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 196 *p = v; 197 } else { 198 memcpy(p, &v, sizeof(v)); 199 } 200 } 201 #endif 202 203 /* label relocation processing */ 204 205 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 206 TCGLabel *l, intptr_t addend) 207 { 208 TCGRelocation *r; 209 210 if (l->has_value) { 211 /* FIXME: This may break relocations on RISC targets that 212 modify instruction fields in place. The caller may not have 213 written the initial value. */ 214 patch_reloc(code_ptr, type, l->u.value, addend); 215 } else { 216 /* add a new relocation entry */ 217 r = tcg_malloc(sizeof(TCGRelocation)); 218 r->type = type; 219 r->ptr = code_ptr; 220 r->addend = addend; 221 r->next = l->u.first_reloc; 222 l->u.first_reloc = r; 223 } 224 } 225 226 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 227 { 228 intptr_t value = (intptr_t)ptr; 229 TCGRelocation *r; 230 231 tcg_debug_assert(!l->has_value); 232 233 for (r = l->u.first_reloc; r != NULL; r = r->next) { 234 patch_reloc(r->ptr, r->type, value, r->addend); 235 } 236 237 l->has_value = 1; 238 l->u.value_ptr = ptr; 239 } 240 241 TCGLabel *gen_new_label(void) 242 { 243 TCGContext *s = &tcg_ctx; 244 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 245 246 *l = (TCGLabel){ 247 .id = s->nb_labels++ 248 }; 249 250 return l; 251 } 252 253 #include "tcg-target.inc.c" 254 255 /* pool based memory allocation */ 256 void *tcg_malloc_internal(TCGContext *s, int size) 257 { 258 TCGPool *p; 259 int pool_size; 260 261 if (size > TCG_POOL_CHUNK_SIZE) { 262 /* big malloc: insert a new pool (XXX: could optimize) */ 263 p = g_malloc(sizeof(TCGPool) + size); 264 p->size = size; 265 p->next = s->pool_first_large; 266 s->pool_first_large = p; 267 return p->data; 268 } else { 269 p = s->pool_current; 270 if (!p) { 271 p = s->pool_first; 272 if (!p) 273 goto new_pool; 274 } else { 275 if (!p->next) { 276 new_pool: 277 pool_size = TCG_POOL_CHUNK_SIZE; 278 p = g_malloc(sizeof(TCGPool) + pool_size); 279 p->size = pool_size; 280 p->next = NULL; 281 if (s->pool_current) 282 s->pool_current->next = p; 283 else 284 s->pool_first = p; 285 } else { 286 p = p->next; 287 } 288 } 289 } 290 s->pool_current = p; 291 s->pool_cur = p->data + size; 292 s->pool_end = p->data + p->size; 293 return p->data; 294 } 295 296 void tcg_pool_reset(TCGContext *s) 297 { 298 TCGPool *p, *t; 299 for (p = s->pool_first_large; p; p = t) { 300 t = p->next; 301 g_free(p); 302 } 303 s->pool_first_large = NULL; 304 s->pool_cur = s->pool_end = NULL; 305 s->pool_current = NULL; 306 } 307 308 typedef struct TCGHelperInfo { 309 void *func; 310 const char *name; 311 unsigned flags; 312 unsigned sizemask; 313 } TCGHelperInfo; 314 315 #include "exec/helper-proto.h" 316 317 static const TCGHelperInfo all_helpers[] = { 318 #include "exec/helper-tcg.h" 319 }; 320 321 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 322 323 void tcg_context_init(TCGContext *s) 324 { 325 int op, total_args, n, i; 326 TCGOpDef *def; 327 TCGArgConstraint *args_ct; 328 int *sorted_args; 329 GHashTable *helper_table; 330 331 memset(s, 0, sizeof(*s)); 332 s->nb_globals = 0; 333 334 /* Count total number of arguments and allocate the corresponding 335 space */ 336 total_args = 0; 337 for(op = 0; op < NB_OPS; op++) { 338 def = &tcg_op_defs[op]; 339 n = def->nb_iargs + def->nb_oargs; 340 total_args += n; 341 } 342 343 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 344 sorted_args = g_malloc(sizeof(int) * total_args); 345 346 for(op = 0; op < NB_OPS; op++) { 347 def = &tcg_op_defs[op]; 348 def->args_ct = args_ct; 349 def->sorted_args = sorted_args; 350 n = def->nb_iargs + def->nb_oargs; 351 sorted_args += n; 352 args_ct += n; 353 } 354 355 /* Register helpers. */ 356 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 357 s->helpers = helper_table = g_hash_table_new(NULL, NULL); 358 359 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 360 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 361 (gpointer)&all_helpers[i]); 362 } 363 364 tcg_target_init(s); 365 366 /* Reverse the order of the saved registers, assuming they're all at 367 the start of tcg_target_reg_alloc_order. */ 368 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 369 int r = tcg_target_reg_alloc_order[n]; 370 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 371 break; 372 } 373 } 374 for (i = 0; i < n; ++i) { 375 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 376 } 377 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 378 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 379 } 380 } 381 382 void tcg_prologue_init(TCGContext *s) 383 { 384 size_t prologue_size, total_size; 385 void *buf0, *buf1; 386 387 /* Put the prologue at the beginning of code_gen_buffer. */ 388 buf0 = s->code_gen_buffer; 389 s->code_ptr = buf0; 390 s->code_buf = buf0; 391 s->code_gen_prologue = buf0; 392 393 /* Generate the prologue. */ 394 tcg_target_qemu_prologue(s); 395 buf1 = s->code_ptr; 396 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 397 398 /* Deduct the prologue from the buffer. */ 399 prologue_size = tcg_current_code_size(s); 400 s->code_gen_ptr = buf1; 401 s->code_gen_buffer = buf1; 402 s->code_buf = buf1; 403 total_size = s->code_gen_buffer_size - prologue_size; 404 s->code_gen_buffer_size = total_size; 405 406 /* Compute a high-water mark, at which we voluntarily flush the buffer 407 and start over. The size here is arbitrary, significantly larger 408 than we expect the code generation for any one opcode to require. */ 409 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); 410 411 tcg_register_jit(s->code_gen_buffer, total_size); 412 413 #ifdef DEBUG_DISAS 414 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 415 qemu_log_lock(); 416 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 417 log_disas(buf0, prologue_size); 418 qemu_log("\n"); 419 qemu_log_flush(); 420 qemu_log_unlock(); 421 } 422 #endif 423 } 424 425 void tcg_func_start(TCGContext *s) 426 { 427 tcg_pool_reset(s); 428 s->nb_temps = s->nb_globals; 429 430 /* No temps have been previously allocated for size or locality. */ 431 memset(s->free_temps, 0, sizeof(s->free_temps)); 432 433 s->nb_labels = 0; 434 s->current_frame_offset = s->frame_start; 435 436 #ifdef CONFIG_DEBUG_TCG 437 s->goto_tb_issue_mask = 0; 438 #endif 439 440 s->gen_op_buf[0].next = 1; 441 s->gen_op_buf[0].prev = 0; 442 s->gen_next_op_idx = 1; 443 s->gen_next_parm_idx = 0; 444 445 s->be = tcg_malloc(sizeof(TCGBackendData)); 446 } 447 448 static inline int temp_idx(TCGContext *s, TCGTemp *ts) 449 { 450 ptrdiff_t n = ts - s->temps; 451 tcg_debug_assert(n >= 0 && n < s->nb_temps); 452 return n; 453 } 454 455 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 456 { 457 int n = s->nb_temps++; 458 tcg_debug_assert(n < TCG_MAX_TEMPS); 459 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 460 } 461 462 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 463 { 464 tcg_debug_assert(s->nb_globals == s->nb_temps); 465 s->nb_globals++; 466 return tcg_temp_alloc(s); 467 } 468 469 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, 470 TCGReg reg, const char *name) 471 { 472 TCGTemp *ts; 473 474 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 475 tcg_abort(); 476 } 477 478 ts = tcg_global_alloc(s); 479 ts->base_type = type; 480 ts->type = type; 481 ts->fixed_reg = 1; 482 ts->reg = reg; 483 ts->name = name; 484 tcg_regset_set_reg(s->reserved_regs, reg); 485 486 return temp_idx(s, ts); 487 } 488 489 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 490 { 491 int idx; 492 s->frame_start = start; 493 s->frame_end = start + size; 494 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 495 s->frame_temp = &s->temps[idx]; 496 } 497 498 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) 499 { 500 TCGContext *s = &tcg_ctx; 501 int idx; 502 503 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 504 tcg_abort(); 505 } 506 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); 507 return MAKE_TCGV_I32(idx); 508 } 509 510 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) 511 { 512 TCGContext *s = &tcg_ctx; 513 int idx; 514 515 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 516 tcg_abort(); 517 } 518 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); 519 return MAKE_TCGV_I64(idx); 520 } 521 522 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 523 intptr_t offset, const char *name) 524 { 525 TCGContext *s = &tcg_ctx; 526 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; 527 TCGTemp *ts = tcg_global_alloc(s); 528 int indirect_reg = 0, bigendian = 0; 529 #ifdef HOST_WORDS_BIGENDIAN 530 bigendian = 1; 531 #endif 532 533 if (!base_ts->fixed_reg) { 534 /* We do not support double-indirect registers. */ 535 tcg_debug_assert(!base_ts->indirect_reg); 536 base_ts->indirect_base = 1; 537 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 538 ? 2 : 1); 539 indirect_reg = 1; 540 } 541 542 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 543 TCGTemp *ts2 = tcg_global_alloc(s); 544 char buf[64]; 545 546 ts->base_type = TCG_TYPE_I64; 547 ts->type = TCG_TYPE_I32; 548 ts->indirect_reg = indirect_reg; 549 ts->mem_allocated = 1; 550 ts->mem_base = base_ts; 551 ts->mem_offset = offset + bigendian * 4; 552 pstrcpy(buf, sizeof(buf), name); 553 pstrcat(buf, sizeof(buf), "_0"); 554 ts->name = strdup(buf); 555 556 tcg_debug_assert(ts2 == ts + 1); 557 ts2->base_type = TCG_TYPE_I64; 558 ts2->type = TCG_TYPE_I32; 559 ts2->indirect_reg = indirect_reg; 560 ts2->mem_allocated = 1; 561 ts2->mem_base = base_ts; 562 ts2->mem_offset = offset + (1 - bigendian) * 4; 563 pstrcpy(buf, sizeof(buf), name); 564 pstrcat(buf, sizeof(buf), "_1"); 565 ts2->name = strdup(buf); 566 } else { 567 ts->base_type = type; 568 ts->type = type; 569 ts->indirect_reg = indirect_reg; 570 ts->mem_allocated = 1; 571 ts->mem_base = base_ts; 572 ts->mem_offset = offset; 573 ts->name = name; 574 } 575 return temp_idx(s, ts); 576 } 577 578 static int tcg_temp_new_internal(TCGType type, int temp_local) 579 { 580 TCGContext *s = &tcg_ctx; 581 TCGTemp *ts; 582 int idx, k; 583 584 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 585 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 586 if (idx < TCG_MAX_TEMPS) { 587 /* There is already an available temp with the right type. */ 588 clear_bit(idx, s->free_temps[k].l); 589 590 ts = &s->temps[idx]; 591 ts->temp_allocated = 1; 592 tcg_debug_assert(ts->base_type == type); 593 tcg_debug_assert(ts->temp_local == temp_local); 594 } else { 595 ts = tcg_temp_alloc(s); 596 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 597 TCGTemp *ts2 = tcg_temp_alloc(s); 598 599 ts->base_type = type; 600 ts->type = TCG_TYPE_I32; 601 ts->temp_allocated = 1; 602 ts->temp_local = temp_local; 603 604 tcg_debug_assert(ts2 == ts + 1); 605 ts2->base_type = TCG_TYPE_I64; 606 ts2->type = TCG_TYPE_I32; 607 ts2->temp_allocated = 1; 608 ts2->temp_local = temp_local; 609 } else { 610 ts->base_type = type; 611 ts->type = type; 612 ts->temp_allocated = 1; 613 ts->temp_local = temp_local; 614 } 615 idx = temp_idx(s, ts); 616 } 617 618 #if defined(CONFIG_DEBUG_TCG) 619 s->temps_in_use++; 620 #endif 621 return idx; 622 } 623 624 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 625 { 626 int idx; 627 628 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 629 return MAKE_TCGV_I32(idx); 630 } 631 632 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 633 { 634 int idx; 635 636 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 637 return MAKE_TCGV_I64(idx); 638 } 639 640 static void tcg_temp_free_internal(int idx) 641 { 642 TCGContext *s = &tcg_ctx; 643 TCGTemp *ts; 644 int k; 645 646 #if defined(CONFIG_DEBUG_TCG) 647 s->temps_in_use--; 648 if (s->temps_in_use < 0) { 649 fprintf(stderr, "More temporaries freed than allocated!\n"); 650 } 651 #endif 652 653 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); 654 ts = &s->temps[idx]; 655 tcg_debug_assert(ts->temp_allocated != 0); 656 ts->temp_allocated = 0; 657 658 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 659 set_bit(idx, s->free_temps[k].l); 660 } 661 662 void tcg_temp_free_i32(TCGv_i32 arg) 663 { 664 tcg_temp_free_internal(GET_TCGV_I32(arg)); 665 } 666 667 void tcg_temp_free_i64(TCGv_i64 arg) 668 { 669 tcg_temp_free_internal(GET_TCGV_I64(arg)); 670 } 671 672 TCGv_i32 tcg_const_i32(int32_t val) 673 { 674 TCGv_i32 t0; 675 t0 = tcg_temp_new_i32(); 676 tcg_gen_movi_i32(t0, val); 677 return t0; 678 } 679 680 TCGv_i64 tcg_const_i64(int64_t val) 681 { 682 TCGv_i64 t0; 683 t0 = tcg_temp_new_i64(); 684 tcg_gen_movi_i64(t0, val); 685 return t0; 686 } 687 688 TCGv_i32 tcg_const_local_i32(int32_t val) 689 { 690 TCGv_i32 t0; 691 t0 = tcg_temp_local_new_i32(); 692 tcg_gen_movi_i32(t0, val); 693 return t0; 694 } 695 696 TCGv_i64 tcg_const_local_i64(int64_t val) 697 { 698 TCGv_i64 t0; 699 t0 = tcg_temp_local_new_i64(); 700 tcg_gen_movi_i64(t0, val); 701 return t0; 702 } 703 704 #if defined(CONFIG_DEBUG_TCG) 705 void tcg_clear_temp_count(void) 706 { 707 TCGContext *s = &tcg_ctx; 708 s->temps_in_use = 0; 709 } 710 711 int tcg_check_temp_count(void) 712 { 713 TCGContext *s = &tcg_ctx; 714 if (s->temps_in_use) { 715 /* Clear the count so that we don't give another 716 * warning immediately next time around. 717 */ 718 s->temps_in_use = 0; 719 return 1; 720 } 721 return 0; 722 } 723 #endif 724 725 /* Note: we convert the 64 bit args to 32 bit and do some alignment 726 and endian swap. Maybe it would be better to do the alignment 727 and endian swap in tcg_reg_alloc_call(). */ 728 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, 729 int nargs, TCGArg *args) 730 { 731 int i, real_args, nb_rets, pi, pi_first; 732 unsigned sizemask, flags; 733 TCGHelperInfo *info; 734 735 info = g_hash_table_lookup(s->helpers, (gpointer)func); 736 flags = info->flags; 737 sizemask = info->sizemask; 738 739 #if defined(__sparc__) && !defined(__arch64__) \ 740 && !defined(CONFIG_TCG_INTERPRETER) 741 /* We have 64-bit values in one register, but need to pass as two 742 separate parameters. Split them. */ 743 int orig_sizemask = sizemask; 744 int orig_nargs = nargs; 745 TCGv_i64 retl, reth; 746 747 TCGV_UNUSED_I64(retl); 748 TCGV_UNUSED_I64(reth); 749 if (sizemask != 0) { 750 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); 751 for (i = real_args = 0; i < nargs; ++i) { 752 int is_64bit = sizemask & (1 << (i+1)*2); 753 if (is_64bit) { 754 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 755 TCGv_i32 h = tcg_temp_new_i32(); 756 TCGv_i32 l = tcg_temp_new_i32(); 757 tcg_gen_extr_i64_i32(l, h, orig); 758 split_args[real_args++] = GET_TCGV_I32(h); 759 split_args[real_args++] = GET_TCGV_I32(l); 760 } else { 761 split_args[real_args++] = args[i]; 762 } 763 } 764 nargs = real_args; 765 args = split_args; 766 sizemask = 0; 767 } 768 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 769 for (i = 0; i < nargs; ++i) { 770 int is_64bit = sizemask & (1 << (i+1)*2); 771 int is_signed = sizemask & (2 << (i+1)*2); 772 if (!is_64bit) { 773 TCGv_i64 temp = tcg_temp_new_i64(); 774 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 775 if (is_signed) { 776 tcg_gen_ext32s_i64(temp, orig); 777 } else { 778 tcg_gen_ext32u_i64(temp, orig); 779 } 780 args[i] = GET_TCGV_I64(temp); 781 } 782 } 783 #endif /* TCG_TARGET_EXTEND_ARGS */ 784 785 pi_first = pi = s->gen_next_parm_idx; 786 if (ret != TCG_CALL_DUMMY_ARG) { 787 #if defined(__sparc__) && !defined(__arch64__) \ 788 && !defined(CONFIG_TCG_INTERPRETER) 789 if (orig_sizemask & 1) { 790 /* The 32-bit ABI is going to return the 64-bit value in 791 the %o0/%o1 register pair. Prepare for this by using 792 two return temporaries, and reassemble below. */ 793 retl = tcg_temp_new_i64(); 794 reth = tcg_temp_new_i64(); 795 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); 796 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); 797 nb_rets = 2; 798 } else { 799 s->gen_opparam_buf[pi++] = ret; 800 nb_rets = 1; 801 } 802 #else 803 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 804 #ifdef HOST_WORDS_BIGENDIAN 805 s->gen_opparam_buf[pi++] = ret + 1; 806 s->gen_opparam_buf[pi++] = ret; 807 #else 808 s->gen_opparam_buf[pi++] = ret; 809 s->gen_opparam_buf[pi++] = ret + 1; 810 #endif 811 nb_rets = 2; 812 } else { 813 s->gen_opparam_buf[pi++] = ret; 814 nb_rets = 1; 815 } 816 #endif 817 } else { 818 nb_rets = 0; 819 } 820 real_args = 0; 821 for (i = 0; i < nargs; i++) { 822 int is_64bit = sizemask & (1 << (i+1)*2); 823 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 824 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 825 /* some targets want aligned 64 bit args */ 826 if (real_args & 1) { 827 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; 828 real_args++; 829 } 830 #endif 831 /* If stack grows up, then we will be placing successive 832 arguments at lower addresses, which means we need to 833 reverse the order compared to how we would normally 834 treat either big or little-endian. For those arguments 835 that will wind up in registers, this still works for 836 HPPA (the only current STACK_GROWSUP target) since the 837 argument registers are *also* allocated in decreasing 838 order. If another such target is added, this logic may 839 have to get more complicated to differentiate between 840 stack arguments and register arguments. */ 841 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 842 s->gen_opparam_buf[pi++] = args[i] + 1; 843 s->gen_opparam_buf[pi++] = args[i]; 844 #else 845 s->gen_opparam_buf[pi++] = args[i]; 846 s->gen_opparam_buf[pi++] = args[i] + 1; 847 #endif 848 real_args += 2; 849 continue; 850 } 851 852 s->gen_opparam_buf[pi++] = args[i]; 853 real_args++; 854 } 855 s->gen_opparam_buf[pi++] = (uintptr_t)func; 856 s->gen_opparam_buf[pi++] = flags; 857 858 i = s->gen_next_op_idx; 859 tcg_debug_assert(i < OPC_BUF_SIZE); 860 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); 861 862 /* Set links for sequential allocation during translation. */ 863 s->gen_op_buf[i] = (TCGOp){ 864 .opc = INDEX_op_call, 865 .callo = nb_rets, 866 .calli = real_args, 867 .args = pi_first, 868 .prev = i - 1, 869 .next = i + 1 870 }; 871 872 /* Make sure the calli field didn't overflow. */ 873 tcg_debug_assert(s->gen_op_buf[i].calli == real_args); 874 875 s->gen_op_buf[0].prev = i; 876 s->gen_next_op_idx = i + 1; 877 s->gen_next_parm_idx = pi; 878 879 #if defined(__sparc__) && !defined(__arch64__) \ 880 && !defined(CONFIG_TCG_INTERPRETER) 881 /* Free all of the parts we allocated above. */ 882 for (i = real_args = 0; i < orig_nargs; ++i) { 883 int is_64bit = orig_sizemask & (1 << (i+1)*2); 884 if (is_64bit) { 885 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); 886 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); 887 tcg_temp_free_i32(h); 888 tcg_temp_free_i32(l); 889 } else { 890 real_args++; 891 } 892 } 893 if (orig_sizemask & 1) { 894 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 895 Note that describing these as TCGv_i64 eliminates an unnecessary 896 zero-extension that tcg_gen_concat_i32_i64 would create. */ 897 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); 898 tcg_temp_free_i64(retl); 899 tcg_temp_free_i64(reth); 900 } 901 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 902 for (i = 0; i < nargs; ++i) { 903 int is_64bit = sizemask & (1 << (i+1)*2); 904 if (!is_64bit) { 905 TCGv_i64 temp = MAKE_TCGV_I64(args[i]); 906 tcg_temp_free_i64(temp); 907 } 908 } 909 #endif /* TCG_TARGET_EXTEND_ARGS */ 910 } 911 912 static void tcg_reg_alloc_start(TCGContext *s) 913 { 914 int i; 915 TCGTemp *ts; 916 for(i = 0; i < s->nb_globals; i++) { 917 ts = &s->temps[i]; 918 if (ts->fixed_reg) { 919 ts->val_type = TEMP_VAL_REG; 920 } else { 921 ts->val_type = TEMP_VAL_MEM; 922 } 923 } 924 for(i = s->nb_globals; i < s->nb_temps; i++) { 925 ts = &s->temps[i]; 926 if (ts->temp_local) { 927 ts->val_type = TEMP_VAL_MEM; 928 } else { 929 ts->val_type = TEMP_VAL_DEAD; 930 } 931 ts->mem_allocated = 0; 932 ts->fixed_reg = 0; 933 } 934 935 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 936 } 937 938 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 939 TCGTemp *ts) 940 { 941 int idx = temp_idx(s, ts); 942 943 if (idx < s->nb_globals) { 944 pstrcpy(buf, buf_size, ts->name); 945 } else if (ts->temp_local) { 946 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 947 } else { 948 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 949 } 950 return buf; 951 } 952 953 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 954 int buf_size, int idx) 955 { 956 tcg_debug_assert(idx >= 0 && idx < s->nb_temps); 957 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); 958 } 959 960 /* Find helper name. */ 961 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 962 { 963 const char *ret = NULL; 964 if (s->helpers) { 965 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); 966 if (info) { 967 ret = info->name; 968 } 969 } 970 return ret; 971 } 972 973 static const char * const cond_name[] = 974 { 975 [TCG_COND_NEVER] = "never", 976 [TCG_COND_ALWAYS] = "always", 977 [TCG_COND_EQ] = "eq", 978 [TCG_COND_NE] = "ne", 979 [TCG_COND_LT] = "lt", 980 [TCG_COND_GE] = "ge", 981 [TCG_COND_LE] = "le", 982 [TCG_COND_GT] = "gt", 983 [TCG_COND_LTU] = "ltu", 984 [TCG_COND_GEU] = "geu", 985 [TCG_COND_LEU] = "leu", 986 [TCG_COND_GTU] = "gtu" 987 }; 988 989 static const char * const ldst_name[] = 990 { 991 [MO_UB] = "ub", 992 [MO_SB] = "sb", 993 [MO_LEUW] = "leuw", 994 [MO_LESW] = "lesw", 995 [MO_LEUL] = "leul", 996 [MO_LESL] = "lesl", 997 [MO_LEQ] = "leq", 998 [MO_BEUW] = "beuw", 999 [MO_BESW] = "besw", 1000 [MO_BEUL] = "beul", 1001 [MO_BESL] = "besl", 1002 [MO_BEQ] = "beq", 1003 }; 1004 1005 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1006 #ifdef ALIGNED_ONLY 1007 [MO_UNALN >> MO_ASHIFT] = "un+", 1008 [MO_ALIGN >> MO_ASHIFT] = "", 1009 #else 1010 [MO_UNALN >> MO_ASHIFT] = "", 1011 [MO_ALIGN >> MO_ASHIFT] = "al+", 1012 #endif 1013 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1014 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1015 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1016 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1017 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1018 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1019 }; 1020 1021 void tcg_dump_ops(TCGContext *s) 1022 { 1023 char buf[128]; 1024 TCGOp *op; 1025 int oi; 1026 1027 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1028 int i, k, nb_oargs, nb_iargs, nb_cargs; 1029 const TCGOpDef *def; 1030 const TCGArg *args; 1031 TCGOpcode c; 1032 int col = 0; 1033 1034 op = &s->gen_op_buf[oi]; 1035 c = op->opc; 1036 def = &tcg_op_defs[c]; 1037 args = &s->gen_opparam_buf[op->args]; 1038 1039 if (c == INDEX_op_insn_start) { 1040 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1041 1042 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1043 target_ulong a; 1044 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1045 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 1046 #else 1047 a = args[i]; 1048 #endif 1049 col += qemu_log(" " TARGET_FMT_lx, a); 1050 } 1051 } else if (c == INDEX_op_call) { 1052 /* variable number of arguments */ 1053 nb_oargs = op->callo; 1054 nb_iargs = op->calli; 1055 nb_cargs = def->nb_cargs; 1056 1057 /* function name, flags, out args */ 1058 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1059 tcg_find_helper(s, args[nb_oargs + nb_iargs]), 1060 args[nb_oargs + nb_iargs + 1], nb_oargs); 1061 for (i = 0; i < nb_oargs; i++) { 1062 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1063 args[i])); 1064 } 1065 for (i = 0; i < nb_iargs; i++) { 1066 TCGArg arg = args[nb_oargs + i]; 1067 const char *t = "<dummy>"; 1068 if (arg != TCG_CALL_DUMMY_ARG) { 1069 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); 1070 } 1071 col += qemu_log(",%s", t); 1072 } 1073 } else { 1074 col += qemu_log(" %s ", def->name); 1075 1076 nb_oargs = def->nb_oargs; 1077 nb_iargs = def->nb_iargs; 1078 nb_cargs = def->nb_cargs; 1079 1080 k = 0; 1081 for (i = 0; i < nb_oargs; i++) { 1082 if (k != 0) { 1083 col += qemu_log(","); 1084 } 1085 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1086 args[k++])); 1087 } 1088 for (i = 0; i < nb_iargs; i++) { 1089 if (k != 0) { 1090 col += qemu_log(","); 1091 } 1092 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1093 args[k++])); 1094 } 1095 switch (c) { 1096 case INDEX_op_brcond_i32: 1097 case INDEX_op_setcond_i32: 1098 case INDEX_op_movcond_i32: 1099 case INDEX_op_brcond2_i32: 1100 case INDEX_op_setcond2_i32: 1101 case INDEX_op_brcond_i64: 1102 case INDEX_op_setcond_i64: 1103 case INDEX_op_movcond_i64: 1104 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { 1105 col += qemu_log(",%s", cond_name[args[k++]]); 1106 } else { 1107 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); 1108 } 1109 i = 1; 1110 break; 1111 case INDEX_op_qemu_ld_i32: 1112 case INDEX_op_qemu_st_i32: 1113 case INDEX_op_qemu_ld_i64: 1114 case INDEX_op_qemu_st_i64: 1115 { 1116 TCGMemOpIdx oi = args[k++]; 1117 TCGMemOp op = get_memop(oi); 1118 unsigned ix = get_mmuidx(oi); 1119 1120 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1121 col += qemu_log(",$0x%x,%u", op, ix); 1122 } else { 1123 const char *s_al, *s_op; 1124 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1125 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1126 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1127 } 1128 i = 1; 1129 } 1130 break; 1131 default: 1132 i = 0; 1133 break; 1134 } 1135 switch (c) { 1136 case INDEX_op_set_label: 1137 case INDEX_op_br: 1138 case INDEX_op_brcond_i32: 1139 case INDEX_op_brcond_i64: 1140 case INDEX_op_brcond2_i32: 1141 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); 1142 i++, k++; 1143 break; 1144 default: 1145 break; 1146 } 1147 for (; i < nb_cargs; i++, k++) { 1148 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); 1149 } 1150 } 1151 if (op->life) { 1152 unsigned life = op->life; 1153 1154 for (; col < 48; ++col) { 1155 putc(' ', qemu_logfile); 1156 } 1157 1158 if (life & (SYNC_ARG * 3)) { 1159 qemu_log(" sync:"); 1160 for (i = 0; i < 2; ++i) { 1161 if (life & (SYNC_ARG << i)) { 1162 qemu_log(" %d", i); 1163 } 1164 } 1165 } 1166 life /= DEAD_ARG; 1167 if (life) { 1168 qemu_log(" dead:"); 1169 for (i = 0; life; ++i, life >>= 1) { 1170 if (life & 1) { 1171 qemu_log(" %d", i); 1172 } 1173 } 1174 } 1175 } 1176 qemu_log("\n"); 1177 } 1178 } 1179 1180 /* we give more priority to constraints with less registers */ 1181 static int get_constraint_priority(const TCGOpDef *def, int k) 1182 { 1183 const TCGArgConstraint *arg_ct; 1184 1185 int i, n; 1186 arg_ct = &def->args_ct[k]; 1187 if (arg_ct->ct & TCG_CT_ALIAS) { 1188 /* an alias is equivalent to a single register */ 1189 n = 1; 1190 } else { 1191 if (!(arg_ct->ct & TCG_CT_REG)) 1192 return 0; 1193 n = 0; 1194 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1195 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1196 n++; 1197 } 1198 } 1199 return TCG_TARGET_NB_REGS - n + 1; 1200 } 1201 1202 /* sort from highest priority to lowest */ 1203 static void sort_constraints(TCGOpDef *def, int start, int n) 1204 { 1205 int i, j, p1, p2, tmp; 1206 1207 for(i = 0; i < n; i++) 1208 def->sorted_args[start + i] = start + i; 1209 if (n <= 1) 1210 return; 1211 for(i = 0; i < n - 1; i++) { 1212 for(j = i + 1; j < n; j++) { 1213 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1214 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1215 if (p1 < p2) { 1216 tmp = def->sorted_args[start + i]; 1217 def->sorted_args[start + i] = def->sorted_args[start + j]; 1218 def->sorted_args[start + j] = tmp; 1219 } 1220 } 1221 } 1222 } 1223 1224 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) 1225 { 1226 TCGOpcode op; 1227 TCGOpDef *def; 1228 const char *ct_str; 1229 int i, nb_args; 1230 1231 for(;;) { 1232 if (tdefs->op == (TCGOpcode)-1) 1233 break; 1234 op = tdefs->op; 1235 tcg_debug_assert((unsigned)op < NB_OPS); 1236 def = &tcg_op_defs[op]; 1237 #if defined(CONFIG_DEBUG_TCG) 1238 /* Duplicate entry in op definitions? */ 1239 tcg_debug_assert(!def->used); 1240 def->used = 1; 1241 #endif 1242 nb_args = def->nb_iargs + def->nb_oargs; 1243 for(i = 0; i < nb_args; i++) { 1244 ct_str = tdefs->args_ct_str[i]; 1245 /* Incomplete TCGTargetOpDef entry? */ 1246 tcg_debug_assert(ct_str != NULL); 1247 tcg_regset_clear(def->args_ct[i].u.regs); 1248 def->args_ct[i].ct = 0; 1249 if (ct_str[0] >= '0' && ct_str[0] <= '9') { 1250 int oarg; 1251 oarg = ct_str[0] - '0'; 1252 tcg_debug_assert(oarg < def->nb_oargs); 1253 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1254 /* TCG_CT_ALIAS is for the output arguments. The input 1255 argument is tagged with TCG_CT_IALIAS. */ 1256 def->args_ct[i] = def->args_ct[oarg]; 1257 def->args_ct[oarg].ct = TCG_CT_ALIAS; 1258 def->args_ct[oarg].alias_index = i; 1259 def->args_ct[i].ct |= TCG_CT_IALIAS; 1260 def->args_ct[i].alias_index = oarg; 1261 } else { 1262 for(;;) { 1263 if (*ct_str == '\0') 1264 break; 1265 switch(*ct_str) { 1266 case 'i': 1267 def->args_ct[i].ct |= TCG_CT_CONST; 1268 ct_str++; 1269 break; 1270 default: 1271 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) { 1272 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n", 1273 ct_str, i, def->name); 1274 exit(1); 1275 } 1276 } 1277 } 1278 } 1279 } 1280 1281 /* TCGTargetOpDef entry with too much information? */ 1282 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1283 1284 /* sort the constraints (XXX: this is just an heuristic) */ 1285 sort_constraints(def, 0, def->nb_oargs); 1286 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1287 1288 #if 0 1289 { 1290 int i; 1291 1292 printf("%s: sorted=", def->name); 1293 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++) 1294 printf(" %d", def->sorted_args[i]); 1295 printf("\n"); 1296 } 1297 #endif 1298 tdefs++; 1299 } 1300 1301 #if defined(CONFIG_DEBUG_TCG) 1302 i = 0; 1303 for (op = 0; op < tcg_op_defs_max; op++) { 1304 const TCGOpDef *def = &tcg_op_defs[op]; 1305 if (def->flags & TCG_OPF_NOT_PRESENT) { 1306 /* Wrong entry in op definitions? */ 1307 if (def->used) { 1308 fprintf(stderr, "Invalid op definition for %s\n", def->name); 1309 i = 1; 1310 } 1311 } else { 1312 /* Missing entry in op definitions? */ 1313 if (!def->used) { 1314 fprintf(stderr, "Missing op definition for %s\n", def->name); 1315 i = 1; 1316 } 1317 } 1318 } 1319 if (i == 1) { 1320 tcg_abort(); 1321 } 1322 #endif 1323 } 1324 1325 void tcg_op_remove(TCGContext *s, TCGOp *op) 1326 { 1327 int next = op->next; 1328 int prev = op->prev; 1329 1330 /* We should never attempt to remove the list terminator. */ 1331 tcg_debug_assert(op != &s->gen_op_buf[0]); 1332 1333 s->gen_op_buf[next].prev = prev; 1334 s->gen_op_buf[prev].next = next; 1335 1336 memset(op, 0, sizeof(*op)); 1337 1338 #ifdef CONFIG_PROFILER 1339 s->del_op_count++; 1340 #endif 1341 } 1342 1343 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1344 TCGOpcode opc, int nargs) 1345 { 1346 int oi = s->gen_next_op_idx; 1347 int pi = s->gen_next_parm_idx; 1348 int prev = old_op->prev; 1349 int next = old_op - s->gen_op_buf; 1350 TCGOp *new_op; 1351 1352 tcg_debug_assert(oi < OPC_BUF_SIZE); 1353 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1354 s->gen_next_op_idx = oi + 1; 1355 s->gen_next_parm_idx = pi + nargs; 1356 1357 new_op = &s->gen_op_buf[oi]; 1358 *new_op = (TCGOp){ 1359 .opc = opc, 1360 .args = pi, 1361 .prev = prev, 1362 .next = next 1363 }; 1364 s->gen_op_buf[prev].next = oi; 1365 old_op->prev = oi; 1366 1367 return new_op; 1368 } 1369 1370 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1371 TCGOpcode opc, int nargs) 1372 { 1373 int oi = s->gen_next_op_idx; 1374 int pi = s->gen_next_parm_idx; 1375 int prev = old_op - s->gen_op_buf; 1376 int next = old_op->next; 1377 TCGOp *new_op; 1378 1379 tcg_debug_assert(oi < OPC_BUF_SIZE); 1380 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1381 s->gen_next_op_idx = oi + 1; 1382 s->gen_next_parm_idx = pi + nargs; 1383 1384 new_op = &s->gen_op_buf[oi]; 1385 *new_op = (TCGOp){ 1386 .opc = opc, 1387 .args = pi, 1388 .prev = prev, 1389 .next = next 1390 }; 1391 s->gen_op_buf[next].prev = oi; 1392 old_op->next = oi; 1393 1394 return new_op; 1395 } 1396 1397 #define TS_DEAD 1 1398 #define TS_MEM 2 1399 1400 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1401 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1402 1403 /* liveness analysis: end of function: all temps are dead, and globals 1404 should be in memory. */ 1405 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) 1406 { 1407 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); 1408 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); 1409 } 1410 1411 /* liveness analysis: end of basic block: all temps are dead, globals 1412 and local temps should be in memory. */ 1413 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) 1414 { 1415 int i, n; 1416 1417 tcg_la_func_end(s, temp_state); 1418 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { 1419 if (s->temps[i].temp_local) { 1420 temp_state[i] |= TS_MEM; 1421 } 1422 } 1423 } 1424 1425 /* Liveness analysis : update the opc_arg_life array to tell if a 1426 given input arguments is dead. Instructions updating dead 1427 temporaries are removed. */ 1428 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) 1429 { 1430 int nb_globals = s->nb_globals; 1431 int oi, oi_prev; 1432 1433 tcg_la_func_end(s, temp_state); 1434 1435 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1436 int i, nb_iargs, nb_oargs; 1437 TCGOpcode opc_new, opc_new2; 1438 bool have_opc_new2; 1439 TCGLifeData arg_life = 0; 1440 TCGArg arg; 1441 1442 TCGOp * const op = &s->gen_op_buf[oi]; 1443 TCGArg * const args = &s->gen_opparam_buf[op->args]; 1444 TCGOpcode opc = op->opc; 1445 const TCGOpDef *def = &tcg_op_defs[opc]; 1446 1447 oi_prev = op->prev; 1448 1449 switch (opc) { 1450 case INDEX_op_call: 1451 { 1452 int call_flags; 1453 1454 nb_oargs = op->callo; 1455 nb_iargs = op->calli; 1456 call_flags = args[nb_oargs + nb_iargs + 1]; 1457 1458 /* pure functions can be removed if their result is unused */ 1459 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 1460 for (i = 0; i < nb_oargs; i++) { 1461 arg = args[i]; 1462 if (temp_state[arg] != TS_DEAD) { 1463 goto do_not_remove_call; 1464 } 1465 } 1466 goto do_remove; 1467 } else { 1468 do_not_remove_call: 1469 1470 /* output args are dead */ 1471 for (i = 0; i < nb_oargs; i++) { 1472 arg = args[i]; 1473 if (temp_state[arg] & TS_DEAD) { 1474 arg_life |= DEAD_ARG << i; 1475 } 1476 if (temp_state[arg] & TS_MEM) { 1477 arg_life |= SYNC_ARG << i; 1478 } 1479 temp_state[arg] = TS_DEAD; 1480 } 1481 1482 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 1483 TCG_CALL_NO_READ_GLOBALS))) { 1484 /* globals should go back to memory */ 1485 memset(temp_state, TS_DEAD | TS_MEM, nb_globals); 1486 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 1487 /* globals should be synced to memory */ 1488 for (i = 0; i < nb_globals; i++) { 1489 temp_state[i] |= TS_MEM; 1490 } 1491 } 1492 1493 /* record arguments that die in this helper */ 1494 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1495 arg = args[i]; 1496 if (arg != TCG_CALL_DUMMY_ARG) { 1497 if (temp_state[arg] & TS_DEAD) { 1498 arg_life |= DEAD_ARG << i; 1499 } 1500 } 1501 } 1502 /* input arguments are live for preceding opcodes */ 1503 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1504 arg = args[i]; 1505 if (arg != TCG_CALL_DUMMY_ARG) { 1506 temp_state[arg] &= ~TS_DEAD; 1507 } 1508 } 1509 } 1510 } 1511 break; 1512 case INDEX_op_insn_start: 1513 break; 1514 case INDEX_op_discard: 1515 /* mark the temporary as dead */ 1516 temp_state[args[0]] = TS_DEAD; 1517 break; 1518 1519 case INDEX_op_add2_i32: 1520 opc_new = INDEX_op_add_i32; 1521 goto do_addsub2; 1522 case INDEX_op_sub2_i32: 1523 opc_new = INDEX_op_sub_i32; 1524 goto do_addsub2; 1525 case INDEX_op_add2_i64: 1526 opc_new = INDEX_op_add_i64; 1527 goto do_addsub2; 1528 case INDEX_op_sub2_i64: 1529 opc_new = INDEX_op_sub_i64; 1530 do_addsub2: 1531 nb_iargs = 4; 1532 nb_oargs = 2; 1533 /* Test if the high part of the operation is dead, but not 1534 the low part. The result can be optimized to a simple 1535 add or sub. This happens often for x86_64 guest when the 1536 cpu mode is set to 32 bit. */ 1537 if (temp_state[args[1]] == TS_DEAD) { 1538 if (temp_state[args[0]] == TS_DEAD) { 1539 goto do_remove; 1540 } 1541 /* Replace the opcode and adjust the args in place, 1542 leaving 3 unused args at the end. */ 1543 op->opc = opc = opc_new; 1544 args[1] = args[2]; 1545 args[2] = args[4]; 1546 /* Fall through and mark the single-word operation live. */ 1547 nb_iargs = 2; 1548 nb_oargs = 1; 1549 } 1550 goto do_not_remove; 1551 1552 case INDEX_op_mulu2_i32: 1553 opc_new = INDEX_op_mul_i32; 1554 opc_new2 = INDEX_op_muluh_i32; 1555 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 1556 goto do_mul2; 1557 case INDEX_op_muls2_i32: 1558 opc_new = INDEX_op_mul_i32; 1559 opc_new2 = INDEX_op_mulsh_i32; 1560 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 1561 goto do_mul2; 1562 case INDEX_op_mulu2_i64: 1563 opc_new = INDEX_op_mul_i64; 1564 opc_new2 = INDEX_op_muluh_i64; 1565 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 1566 goto do_mul2; 1567 case INDEX_op_muls2_i64: 1568 opc_new = INDEX_op_mul_i64; 1569 opc_new2 = INDEX_op_mulsh_i64; 1570 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 1571 goto do_mul2; 1572 do_mul2: 1573 nb_iargs = 2; 1574 nb_oargs = 2; 1575 if (temp_state[args[1]] == TS_DEAD) { 1576 if (temp_state[args[0]] == TS_DEAD) { 1577 /* Both parts of the operation are dead. */ 1578 goto do_remove; 1579 } 1580 /* The high part of the operation is dead; generate the low. */ 1581 op->opc = opc = opc_new; 1582 args[1] = args[2]; 1583 args[2] = args[3]; 1584 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { 1585 /* The low part of the operation is dead; generate the high. */ 1586 op->opc = opc = opc_new2; 1587 args[0] = args[1]; 1588 args[1] = args[2]; 1589 args[2] = args[3]; 1590 } else { 1591 goto do_not_remove; 1592 } 1593 /* Mark the single-word operation live. */ 1594 nb_oargs = 1; 1595 goto do_not_remove; 1596 1597 default: 1598 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 1599 nb_iargs = def->nb_iargs; 1600 nb_oargs = def->nb_oargs; 1601 1602 /* Test if the operation can be removed because all 1603 its outputs are dead. We assume that nb_oargs == 0 1604 implies side effects */ 1605 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 1606 for (i = 0; i < nb_oargs; i++) { 1607 if (temp_state[args[i]] != TS_DEAD) { 1608 goto do_not_remove; 1609 } 1610 } 1611 do_remove: 1612 tcg_op_remove(s, op); 1613 } else { 1614 do_not_remove: 1615 /* output args are dead */ 1616 for (i = 0; i < nb_oargs; i++) { 1617 arg = args[i]; 1618 if (temp_state[arg] & TS_DEAD) { 1619 arg_life |= DEAD_ARG << i; 1620 } 1621 if (temp_state[arg] & TS_MEM) { 1622 arg_life |= SYNC_ARG << i; 1623 } 1624 temp_state[arg] = TS_DEAD; 1625 } 1626 1627 /* if end of basic block, update */ 1628 if (def->flags & TCG_OPF_BB_END) { 1629 tcg_la_bb_end(s, temp_state); 1630 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1631 /* globals should be synced to memory */ 1632 for (i = 0; i < nb_globals; i++) { 1633 temp_state[i] |= TS_MEM; 1634 } 1635 } 1636 1637 /* record arguments that die in this opcode */ 1638 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1639 arg = args[i]; 1640 if (temp_state[arg] & TS_DEAD) { 1641 arg_life |= DEAD_ARG << i; 1642 } 1643 } 1644 /* input arguments are live for preceding opcodes */ 1645 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1646 temp_state[args[i]] &= ~TS_DEAD; 1647 } 1648 } 1649 break; 1650 } 1651 op->life = arg_life; 1652 } 1653 } 1654 1655 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 1656 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) 1657 { 1658 int nb_globals = s->nb_globals; 1659 int16_t *dir_temps; 1660 int i, oi, oi_next; 1661 bool changes = false; 1662 1663 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); 1664 memset(dir_temps, 0, nb_globals * sizeof(int16_t)); 1665 1666 /* Create a temporary for each indirect global. */ 1667 for (i = 0; i < nb_globals; ++i) { 1668 TCGTemp *its = &s->temps[i]; 1669 if (its->indirect_reg) { 1670 TCGTemp *dts = tcg_temp_alloc(s); 1671 dts->type = its->type; 1672 dts->base_type = its->base_type; 1673 dir_temps[i] = temp_idx(s, dts); 1674 } 1675 } 1676 1677 memset(temp_state, TS_DEAD, nb_globals); 1678 1679 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 1680 TCGOp *op = &s->gen_op_buf[oi]; 1681 TCGArg *args = &s->gen_opparam_buf[op->args]; 1682 TCGOpcode opc = op->opc; 1683 const TCGOpDef *def = &tcg_op_defs[opc]; 1684 TCGLifeData arg_life = op->life; 1685 int nb_iargs, nb_oargs, call_flags; 1686 TCGArg arg, dir; 1687 1688 oi_next = op->next; 1689 1690 if (opc == INDEX_op_call) { 1691 nb_oargs = op->callo; 1692 nb_iargs = op->calli; 1693 call_flags = args[nb_oargs + nb_iargs + 1]; 1694 } else { 1695 nb_iargs = def->nb_iargs; 1696 nb_oargs = def->nb_oargs; 1697 1698 /* Set flags similar to how calls require. */ 1699 if (def->flags & TCG_OPF_BB_END) { 1700 /* Like writing globals: save_globals */ 1701 call_flags = 0; 1702 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1703 /* Like reading globals: sync_globals */ 1704 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 1705 } else { 1706 /* No effect on globals. */ 1707 call_flags = (TCG_CALL_NO_READ_GLOBALS | 1708 TCG_CALL_NO_WRITE_GLOBALS); 1709 } 1710 } 1711 1712 /* Make sure that input arguments are available. */ 1713 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1714 arg = args[i]; 1715 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ 1716 if (arg < nb_globals) { 1717 dir = dir_temps[arg]; 1718 if (dir != 0 && temp_state[arg] == TS_DEAD) { 1719 TCGTemp *its = &s->temps[arg]; 1720 TCGOpcode lopc = (its->type == TCG_TYPE_I32 1721 ? INDEX_op_ld_i32 1722 : INDEX_op_ld_i64); 1723 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 1724 TCGArg *largs = &s->gen_opparam_buf[lop->args]; 1725 1726 largs[0] = dir; 1727 largs[1] = temp_idx(s, its->mem_base); 1728 largs[2] = its->mem_offset; 1729 1730 /* Loaded, but synced with memory. */ 1731 temp_state[arg] = TS_MEM; 1732 } 1733 } 1734 } 1735 1736 /* Perform input replacement, and mark inputs that became dead. 1737 No action is required except keeping temp_state up to date 1738 so that we reload when needed. */ 1739 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1740 arg = args[i]; 1741 if (arg < nb_globals) { 1742 dir = dir_temps[arg]; 1743 if (dir != 0) { 1744 args[i] = dir; 1745 changes = true; 1746 if (IS_DEAD_ARG(i)) { 1747 temp_state[arg] = TS_DEAD; 1748 } 1749 } 1750 } 1751 } 1752 1753 /* Liveness analysis should ensure that the following are 1754 all correct, for call sites and basic block end points. */ 1755 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 1756 /* Nothing to do */ 1757 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 1758 for (i = 0; i < nb_globals; ++i) { 1759 /* Liveness should see that globals are synced back, 1760 that is, either TS_DEAD or TS_MEM. */ 1761 tcg_debug_assert(dir_temps[i] == 0 1762 || temp_state[i] != 0); 1763 } 1764 } else { 1765 for (i = 0; i < nb_globals; ++i) { 1766 /* Liveness should see that globals are saved back, 1767 that is, TS_DEAD, waiting to be reloaded. */ 1768 tcg_debug_assert(dir_temps[i] == 0 1769 || temp_state[i] == TS_DEAD); 1770 } 1771 } 1772 1773 /* Outputs become available. */ 1774 for (i = 0; i < nb_oargs; i++) { 1775 arg = args[i]; 1776 if (arg >= nb_globals) { 1777 continue; 1778 } 1779 dir = dir_temps[arg]; 1780 if (dir == 0) { 1781 continue; 1782 } 1783 args[i] = dir; 1784 changes = true; 1785 1786 /* The output is now live and modified. */ 1787 temp_state[arg] = 0; 1788 1789 /* Sync outputs upon their last write. */ 1790 if (NEED_SYNC_ARG(i)) { 1791 TCGTemp *its = &s->temps[arg]; 1792 TCGOpcode sopc = (its->type == TCG_TYPE_I32 1793 ? INDEX_op_st_i32 1794 : INDEX_op_st_i64); 1795 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 1796 TCGArg *sargs = &s->gen_opparam_buf[sop->args]; 1797 1798 sargs[0] = dir; 1799 sargs[1] = temp_idx(s, its->mem_base); 1800 sargs[2] = its->mem_offset; 1801 1802 temp_state[arg] = TS_MEM; 1803 } 1804 /* Drop outputs that are dead. */ 1805 if (IS_DEAD_ARG(i)) { 1806 temp_state[arg] = TS_DEAD; 1807 } 1808 } 1809 } 1810 1811 return changes; 1812 } 1813 1814 #ifdef CONFIG_DEBUG_TCG 1815 static void dump_regs(TCGContext *s) 1816 { 1817 TCGTemp *ts; 1818 int i; 1819 char buf[64]; 1820 1821 for(i = 0; i < s->nb_temps; i++) { 1822 ts = &s->temps[i]; 1823 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); 1824 switch(ts->val_type) { 1825 case TEMP_VAL_REG: 1826 printf("%s", tcg_target_reg_names[ts->reg]); 1827 break; 1828 case TEMP_VAL_MEM: 1829 printf("%d(%s)", (int)ts->mem_offset, 1830 tcg_target_reg_names[ts->mem_base->reg]); 1831 break; 1832 case TEMP_VAL_CONST: 1833 printf("$0x%" TCG_PRIlx, ts->val); 1834 break; 1835 case TEMP_VAL_DEAD: 1836 printf("D"); 1837 break; 1838 default: 1839 printf("???"); 1840 break; 1841 } 1842 printf("\n"); 1843 } 1844 1845 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1846 if (s->reg_to_temp[i] != NULL) { 1847 printf("%s: %s\n", 1848 tcg_target_reg_names[i], 1849 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 1850 } 1851 } 1852 } 1853 1854 static void check_regs(TCGContext *s) 1855 { 1856 int reg; 1857 int k; 1858 TCGTemp *ts; 1859 char buf[64]; 1860 1861 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 1862 ts = s->reg_to_temp[reg]; 1863 if (ts != NULL) { 1864 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 1865 printf("Inconsistency for register %s:\n", 1866 tcg_target_reg_names[reg]); 1867 goto fail; 1868 } 1869 } 1870 } 1871 for (k = 0; k < s->nb_temps; k++) { 1872 ts = &s->temps[k]; 1873 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 1874 && s->reg_to_temp[ts->reg] != ts) { 1875 printf("Inconsistency for temp %s:\n", 1876 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 1877 fail: 1878 printf("reg state:\n"); 1879 dump_regs(s); 1880 tcg_abort(); 1881 } 1882 } 1883 } 1884 #endif 1885 1886 static void temp_allocate_frame(TCGContext *s, int temp) 1887 { 1888 TCGTemp *ts; 1889 ts = &s->temps[temp]; 1890 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 1891 /* Sparc64 stack is accessed with offset of 2047 */ 1892 s->current_frame_offset = (s->current_frame_offset + 1893 (tcg_target_long)sizeof(tcg_target_long) - 1) & 1894 ~(sizeof(tcg_target_long) - 1); 1895 #endif 1896 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 1897 s->frame_end) { 1898 tcg_abort(); 1899 } 1900 ts->mem_offset = s->current_frame_offset; 1901 ts->mem_base = s->frame_temp; 1902 ts->mem_allocated = 1; 1903 s->current_frame_offset += sizeof(tcg_target_long); 1904 } 1905 1906 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 1907 1908 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 1909 mark it free; otherwise mark it dead. */ 1910 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 1911 { 1912 if (ts->fixed_reg) { 1913 return; 1914 } 1915 if (ts->val_type == TEMP_VAL_REG) { 1916 s->reg_to_temp[ts->reg] = NULL; 1917 } 1918 ts->val_type = (free_or_dead < 0 1919 || ts->temp_local 1920 || temp_idx(s, ts) < s->nb_globals 1921 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1922 } 1923 1924 /* Mark a temporary as dead. */ 1925 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 1926 { 1927 temp_free_or_dead(s, ts, 1); 1928 } 1929 1930 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 1931 registers needs to be allocated to store a constant. If 'free_or_dead' 1932 is non-zero, subsequently release the temporary; if it is positive, the 1933 temp is dead; if it is negative, the temp is free. */ 1934 static void temp_sync(TCGContext *s, TCGTemp *ts, 1935 TCGRegSet allocated_regs, int free_or_dead) 1936 { 1937 if (ts->fixed_reg) { 1938 return; 1939 } 1940 if (!ts->mem_coherent) { 1941 if (!ts->mem_allocated) { 1942 temp_allocate_frame(s, temp_idx(s, ts)); 1943 } 1944 switch (ts->val_type) { 1945 case TEMP_VAL_CONST: 1946 /* If we're going to free the temp immediately, then we won't 1947 require it later in a register, so attempt to store the 1948 constant to memory directly. */ 1949 if (free_or_dead 1950 && tcg_out_sti(s, ts->type, ts->val, 1951 ts->mem_base->reg, ts->mem_offset)) { 1952 break; 1953 } 1954 temp_load(s, ts, tcg_target_available_regs[ts->type], 1955 allocated_regs); 1956 /* fallthrough */ 1957 1958 case TEMP_VAL_REG: 1959 tcg_out_st(s, ts->type, ts->reg, 1960 ts->mem_base->reg, ts->mem_offset); 1961 break; 1962 1963 case TEMP_VAL_MEM: 1964 break; 1965 1966 case TEMP_VAL_DEAD: 1967 default: 1968 tcg_abort(); 1969 } 1970 ts->mem_coherent = 1; 1971 } 1972 if (free_or_dead) { 1973 temp_free_or_dead(s, ts, free_or_dead); 1974 } 1975 } 1976 1977 /* free register 'reg' by spilling the corresponding temporary if necessary */ 1978 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 1979 { 1980 TCGTemp *ts = s->reg_to_temp[reg]; 1981 if (ts != NULL) { 1982 temp_sync(s, ts, allocated_regs, -1); 1983 } 1984 } 1985 1986 /* Allocate a register belonging to reg1 & ~reg2 */ 1987 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 1988 TCGRegSet allocated_regs, bool rev) 1989 { 1990 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 1991 const int *order; 1992 TCGReg reg; 1993 TCGRegSet reg_ct; 1994 1995 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); 1996 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 1997 1998 /* first try free registers */ 1999 for(i = 0; i < n; i++) { 2000 reg = order[i]; 2001 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2002 return reg; 2003 } 2004 2005 /* XXX: do better spill choice */ 2006 for(i = 0; i < n; i++) { 2007 reg = order[i]; 2008 if (tcg_regset_test_reg(reg_ct, reg)) { 2009 tcg_reg_free(s, reg, allocated_regs); 2010 return reg; 2011 } 2012 } 2013 2014 tcg_abort(); 2015 } 2016 2017 /* Make sure the temporary is in a register. If needed, allocate the register 2018 from DESIRED while avoiding ALLOCATED. */ 2019 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2020 TCGRegSet allocated_regs) 2021 { 2022 TCGReg reg; 2023 2024 switch (ts->val_type) { 2025 case TEMP_VAL_REG: 2026 return; 2027 case TEMP_VAL_CONST: 2028 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2029 tcg_out_movi(s, ts->type, reg, ts->val); 2030 ts->mem_coherent = 0; 2031 break; 2032 case TEMP_VAL_MEM: 2033 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2034 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2035 ts->mem_coherent = 1; 2036 break; 2037 case TEMP_VAL_DEAD: 2038 default: 2039 tcg_abort(); 2040 } 2041 ts->reg = reg; 2042 ts->val_type = TEMP_VAL_REG; 2043 s->reg_to_temp[reg] = ts; 2044 } 2045 2046 /* Save a temporary to memory. 'allocated_regs' is used in case a 2047 temporary registers needs to be allocated to store a constant. */ 2048 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2049 { 2050 /* The liveness analysis already ensures that globals are back 2051 in memory. Keep an tcg_debug_assert for safety. */ 2052 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2053 } 2054 2055 /* save globals to their canonical location and assume they can be 2056 modified be the following code. 'allocated_regs' is used in case a 2057 temporary registers needs to be allocated to store a constant. */ 2058 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2059 { 2060 int i; 2061 2062 for (i = 0; i < s->nb_globals; i++) { 2063 temp_save(s, &s->temps[i], allocated_regs); 2064 } 2065 } 2066 2067 /* sync globals to their canonical location and assume they can be 2068 read by the following code. 'allocated_regs' is used in case a 2069 temporary registers needs to be allocated to store a constant. */ 2070 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2071 { 2072 int i; 2073 2074 for (i = 0; i < s->nb_globals; i++) { 2075 TCGTemp *ts = &s->temps[i]; 2076 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2077 || ts->fixed_reg 2078 || ts->mem_coherent); 2079 } 2080 } 2081 2082 /* at the end of a basic block, we assume all temporaries are dead and 2083 all globals are stored at their canonical location. */ 2084 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2085 { 2086 int i; 2087 2088 for (i = s->nb_globals; i < s->nb_temps; i++) { 2089 TCGTemp *ts = &s->temps[i]; 2090 if (ts->temp_local) { 2091 temp_save(s, ts, allocated_regs); 2092 } else { 2093 /* The liveness analysis already ensures that temps are dead. 2094 Keep an tcg_debug_assert for safety. */ 2095 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2096 } 2097 } 2098 2099 save_globals(s, allocated_regs); 2100 } 2101 2102 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2103 tcg_target_ulong val, TCGLifeData arg_life) 2104 { 2105 if (ots->fixed_reg) { 2106 /* For fixed registers, we do not do any constant propagation. */ 2107 tcg_out_movi(s, ots->type, ots->reg, val); 2108 return; 2109 } 2110 2111 /* The movi is not explicitly generated here. */ 2112 if (ots->val_type == TEMP_VAL_REG) { 2113 s->reg_to_temp[ots->reg] = NULL; 2114 } 2115 ots->val_type = TEMP_VAL_CONST; 2116 ots->val = val; 2117 ots->mem_coherent = 0; 2118 if (NEED_SYNC_ARG(0)) { 2119 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2120 } else if (IS_DEAD_ARG(0)) { 2121 temp_dead(s, ots); 2122 } 2123 } 2124 2125 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, 2126 TCGLifeData arg_life) 2127 { 2128 TCGTemp *ots = &s->temps[args[0]]; 2129 tcg_target_ulong val = args[1]; 2130 2131 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2132 } 2133 2134 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, 2135 const TCGArg *args, TCGLifeData arg_life) 2136 { 2137 TCGRegSet allocated_regs; 2138 TCGTemp *ts, *ots; 2139 TCGType otype, itype; 2140 2141 tcg_regset_set(allocated_regs, s->reserved_regs); 2142 ots = &s->temps[args[0]]; 2143 ts = &s->temps[args[1]]; 2144 2145 /* Note that otype != itype for no-op truncation. */ 2146 otype = ots->type; 2147 itype = ts->type; 2148 2149 if (ts->val_type == TEMP_VAL_CONST) { 2150 /* propagate constant or generate sti */ 2151 tcg_target_ulong val = ts->val; 2152 if (IS_DEAD_ARG(1)) { 2153 temp_dead(s, ts); 2154 } 2155 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2156 return; 2157 } 2158 2159 /* If the source value is in memory we're going to be forced 2160 to have it in a register in order to perform the copy. Copy 2161 the SOURCE value into its own register first, that way we 2162 don't have to reload SOURCE the next time it is used. */ 2163 if (ts->val_type == TEMP_VAL_MEM) { 2164 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2165 } 2166 2167 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2168 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2169 /* mov to a non-saved dead register makes no sense (even with 2170 liveness analysis disabled). */ 2171 tcg_debug_assert(NEED_SYNC_ARG(0)); 2172 if (!ots->mem_allocated) { 2173 temp_allocate_frame(s, args[0]); 2174 } 2175 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2176 if (IS_DEAD_ARG(1)) { 2177 temp_dead(s, ts); 2178 } 2179 temp_dead(s, ots); 2180 } else { 2181 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2182 /* the mov can be suppressed */ 2183 if (ots->val_type == TEMP_VAL_REG) { 2184 s->reg_to_temp[ots->reg] = NULL; 2185 } 2186 ots->reg = ts->reg; 2187 temp_dead(s, ts); 2188 } else { 2189 if (ots->val_type != TEMP_VAL_REG) { 2190 /* When allocating a new register, make sure to not spill the 2191 input one. */ 2192 tcg_regset_set_reg(allocated_regs, ts->reg); 2193 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2194 allocated_regs, ots->indirect_base); 2195 } 2196 tcg_out_mov(s, otype, ots->reg, ts->reg); 2197 } 2198 ots->val_type = TEMP_VAL_REG; 2199 ots->mem_coherent = 0; 2200 s->reg_to_temp[ots->reg] = ots; 2201 if (NEED_SYNC_ARG(0)) { 2202 temp_sync(s, ots, allocated_regs, 0); 2203 } 2204 } 2205 } 2206 2207 static void tcg_reg_alloc_op(TCGContext *s, 2208 const TCGOpDef *def, TCGOpcode opc, 2209 const TCGArg *args, TCGLifeData arg_life) 2210 { 2211 TCGRegSet allocated_regs; 2212 int i, k, nb_iargs, nb_oargs; 2213 TCGReg reg; 2214 TCGArg arg; 2215 const TCGArgConstraint *arg_ct; 2216 TCGTemp *ts; 2217 TCGArg new_args[TCG_MAX_OP_ARGS]; 2218 int const_args[TCG_MAX_OP_ARGS]; 2219 2220 nb_oargs = def->nb_oargs; 2221 nb_iargs = def->nb_iargs; 2222 2223 /* copy constants */ 2224 memcpy(new_args + nb_oargs + nb_iargs, 2225 args + nb_oargs + nb_iargs, 2226 sizeof(TCGArg) * def->nb_cargs); 2227 2228 /* satisfy input constraints */ 2229 tcg_regset_set(allocated_regs, s->reserved_regs); 2230 for(k = 0; k < nb_iargs; k++) { 2231 i = def->sorted_args[nb_oargs + k]; 2232 arg = args[i]; 2233 arg_ct = &def->args_ct[i]; 2234 ts = &s->temps[arg]; 2235 2236 if (ts->val_type == TEMP_VAL_CONST 2237 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2238 /* constant is OK for instruction */ 2239 const_args[i] = 1; 2240 new_args[i] = ts->val; 2241 goto iarg_end; 2242 } 2243 2244 temp_load(s, ts, arg_ct->u.regs, allocated_regs); 2245 2246 if (arg_ct->ct & TCG_CT_IALIAS) { 2247 if (ts->fixed_reg) { 2248 /* if fixed register, we must allocate a new register 2249 if the alias is not the same register */ 2250 if (arg != args[arg_ct->alias_index]) 2251 goto allocate_in_reg; 2252 } else { 2253 /* if the input is aliased to an output and if it is 2254 not dead after the instruction, we must allocate 2255 a new register and move it */ 2256 if (!IS_DEAD_ARG(i)) { 2257 goto allocate_in_reg; 2258 } 2259 /* check if the current register has already been allocated 2260 for another input aliased to an output */ 2261 int k2, i2; 2262 for (k2 = 0 ; k2 < k ; k2++) { 2263 i2 = def->sorted_args[nb_oargs + k2]; 2264 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2265 (new_args[i2] == ts->reg)) { 2266 goto allocate_in_reg; 2267 } 2268 } 2269 } 2270 } 2271 reg = ts->reg; 2272 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2273 /* nothing to do : the constraint is satisfied */ 2274 } else { 2275 allocate_in_reg: 2276 /* allocate a new register matching the constraint 2277 and move the temporary register into it */ 2278 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs, 2279 ts->indirect_base); 2280 tcg_out_mov(s, ts->type, reg, ts->reg); 2281 } 2282 new_args[i] = reg; 2283 const_args[i] = 0; 2284 tcg_regset_set_reg(allocated_regs, reg); 2285 iarg_end: ; 2286 } 2287 2288 /* mark dead temporaries and free the associated registers */ 2289 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2290 if (IS_DEAD_ARG(i)) { 2291 temp_dead(s, &s->temps[args[i]]); 2292 } 2293 } 2294 2295 if (def->flags & TCG_OPF_BB_END) { 2296 tcg_reg_alloc_bb_end(s, allocated_regs); 2297 } else { 2298 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2299 /* XXX: permit generic clobber register list ? */ 2300 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2301 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2302 tcg_reg_free(s, i, allocated_regs); 2303 } 2304 } 2305 } 2306 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2307 /* sync globals if the op has side effects and might trigger 2308 an exception. */ 2309 sync_globals(s, allocated_regs); 2310 } 2311 2312 /* satisfy the output constraints */ 2313 tcg_regset_set(allocated_regs, s->reserved_regs); 2314 for(k = 0; k < nb_oargs; k++) { 2315 i = def->sorted_args[k]; 2316 arg = args[i]; 2317 arg_ct = &def->args_ct[i]; 2318 ts = &s->temps[arg]; 2319 if (arg_ct->ct & TCG_CT_ALIAS) { 2320 reg = new_args[arg_ct->alias_index]; 2321 } else { 2322 /* if fixed register, we try to use it */ 2323 reg = ts->reg; 2324 if (ts->fixed_reg && 2325 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2326 goto oarg_end; 2327 } 2328 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs, 2329 ts->indirect_base); 2330 } 2331 tcg_regset_set_reg(allocated_regs, reg); 2332 /* if a fixed register is used, then a move will be done afterwards */ 2333 if (!ts->fixed_reg) { 2334 if (ts->val_type == TEMP_VAL_REG) { 2335 s->reg_to_temp[ts->reg] = NULL; 2336 } 2337 ts->val_type = TEMP_VAL_REG; 2338 ts->reg = reg; 2339 /* temp value is modified, so the value kept in memory is 2340 potentially not the same */ 2341 ts->mem_coherent = 0; 2342 s->reg_to_temp[reg] = ts; 2343 } 2344 oarg_end: 2345 new_args[i] = reg; 2346 } 2347 } 2348 2349 /* emit instruction */ 2350 tcg_out_op(s, opc, new_args, const_args); 2351 2352 /* move the outputs in the correct register if needed */ 2353 for(i = 0; i < nb_oargs; i++) { 2354 ts = &s->temps[args[i]]; 2355 reg = new_args[i]; 2356 if (ts->fixed_reg && ts->reg != reg) { 2357 tcg_out_mov(s, ts->type, ts->reg, reg); 2358 } 2359 if (NEED_SYNC_ARG(i)) { 2360 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2361 } else if (IS_DEAD_ARG(i)) { 2362 temp_dead(s, ts); 2363 } 2364 } 2365 } 2366 2367 #ifdef TCG_TARGET_STACK_GROWSUP 2368 #define STACK_DIR(x) (-(x)) 2369 #else 2370 #define STACK_DIR(x) (x) 2371 #endif 2372 2373 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, 2374 const TCGArg * const args, TCGLifeData arg_life) 2375 { 2376 int flags, nb_regs, i; 2377 TCGReg reg; 2378 TCGArg arg; 2379 TCGTemp *ts; 2380 intptr_t stack_offset; 2381 size_t call_stack_size; 2382 tcg_insn_unit *func_addr; 2383 int allocate_args; 2384 TCGRegSet allocated_regs; 2385 2386 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; 2387 flags = args[nb_oargs + nb_iargs + 1]; 2388 2389 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2390 if (nb_regs > nb_iargs) { 2391 nb_regs = nb_iargs; 2392 } 2393 2394 /* assign stack slots first */ 2395 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2396 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2397 ~(TCG_TARGET_STACK_ALIGN - 1); 2398 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2399 if (allocate_args) { 2400 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2401 preallocate call stack */ 2402 tcg_abort(); 2403 } 2404 2405 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2406 for(i = nb_regs; i < nb_iargs; i++) { 2407 arg = args[nb_oargs + i]; 2408 #ifdef TCG_TARGET_STACK_GROWSUP 2409 stack_offset -= sizeof(tcg_target_long); 2410 #endif 2411 if (arg != TCG_CALL_DUMMY_ARG) { 2412 ts = &s->temps[arg]; 2413 temp_load(s, ts, tcg_target_available_regs[ts->type], 2414 s->reserved_regs); 2415 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2416 } 2417 #ifndef TCG_TARGET_STACK_GROWSUP 2418 stack_offset += sizeof(tcg_target_long); 2419 #endif 2420 } 2421 2422 /* assign input registers */ 2423 tcg_regset_set(allocated_regs, s->reserved_regs); 2424 for(i = 0; i < nb_regs; i++) { 2425 arg = args[nb_oargs + i]; 2426 if (arg != TCG_CALL_DUMMY_ARG) { 2427 ts = &s->temps[arg]; 2428 reg = tcg_target_call_iarg_regs[i]; 2429 tcg_reg_free(s, reg, allocated_regs); 2430 2431 if (ts->val_type == TEMP_VAL_REG) { 2432 if (ts->reg != reg) { 2433 tcg_out_mov(s, ts->type, reg, ts->reg); 2434 } 2435 } else { 2436 TCGRegSet arg_set; 2437 2438 tcg_regset_clear(arg_set); 2439 tcg_regset_set_reg(arg_set, reg); 2440 temp_load(s, ts, arg_set, allocated_regs); 2441 } 2442 2443 tcg_regset_set_reg(allocated_regs, reg); 2444 } 2445 } 2446 2447 /* mark dead temporaries and free the associated registers */ 2448 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2449 if (IS_DEAD_ARG(i)) { 2450 temp_dead(s, &s->temps[args[i]]); 2451 } 2452 } 2453 2454 /* clobber call registers */ 2455 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2456 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2457 tcg_reg_free(s, i, allocated_regs); 2458 } 2459 } 2460 2461 /* Save globals if they might be written by the helper, sync them if 2462 they might be read. */ 2463 if (flags & TCG_CALL_NO_READ_GLOBALS) { 2464 /* Nothing to do */ 2465 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 2466 sync_globals(s, allocated_regs); 2467 } else { 2468 save_globals(s, allocated_regs); 2469 } 2470 2471 tcg_out_call(s, func_addr); 2472 2473 /* assign output registers and emit moves if needed */ 2474 for(i = 0; i < nb_oargs; i++) { 2475 arg = args[i]; 2476 ts = &s->temps[arg]; 2477 reg = tcg_target_call_oarg_regs[i]; 2478 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 2479 2480 if (ts->fixed_reg) { 2481 if (ts->reg != reg) { 2482 tcg_out_mov(s, ts->type, ts->reg, reg); 2483 } 2484 } else { 2485 if (ts->val_type == TEMP_VAL_REG) { 2486 s->reg_to_temp[ts->reg] = NULL; 2487 } 2488 ts->val_type = TEMP_VAL_REG; 2489 ts->reg = reg; 2490 ts->mem_coherent = 0; 2491 s->reg_to_temp[reg] = ts; 2492 if (NEED_SYNC_ARG(i)) { 2493 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2494 } else if (IS_DEAD_ARG(i)) { 2495 temp_dead(s, ts); 2496 } 2497 } 2498 } 2499 } 2500 2501 #ifdef CONFIG_PROFILER 2502 2503 static int64_t tcg_table_op_count[NB_OPS]; 2504 2505 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2506 { 2507 int i; 2508 2509 for (i = 0; i < NB_OPS; i++) { 2510 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 2511 tcg_table_op_count[i]); 2512 } 2513 } 2514 #else 2515 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2516 { 2517 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2518 } 2519 #endif 2520 2521 2522 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 2523 { 2524 int i, oi, oi_next, num_insns; 2525 2526 #ifdef CONFIG_PROFILER 2527 { 2528 int n; 2529 2530 n = s->gen_op_buf[0].prev + 1; 2531 s->op_count += n; 2532 if (n > s->op_count_max) { 2533 s->op_count_max = n; 2534 } 2535 2536 n = s->nb_temps; 2537 s->temp_count += n; 2538 if (n > s->temp_count_max) { 2539 s->temp_count_max = n; 2540 } 2541 } 2542 #endif 2543 2544 #ifdef DEBUG_DISAS 2545 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 2546 && qemu_log_in_addr_range(tb->pc))) { 2547 qemu_log_lock(); 2548 qemu_log("OP:\n"); 2549 tcg_dump_ops(s); 2550 qemu_log("\n"); 2551 qemu_log_unlock(); 2552 } 2553 #endif 2554 2555 #ifdef CONFIG_PROFILER 2556 s->opt_time -= profile_getclock(); 2557 #endif 2558 2559 #ifdef USE_TCG_OPTIMIZATIONS 2560 tcg_optimize(s); 2561 #endif 2562 2563 #ifdef CONFIG_PROFILER 2564 s->opt_time += profile_getclock(); 2565 s->la_time -= profile_getclock(); 2566 #endif 2567 2568 { 2569 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); 2570 2571 liveness_pass_1(s, temp_state); 2572 2573 if (s->nb_indirects > 0) { 2574 #ifdef DEBUG_DISAS 2575 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 2576 && qemu_log_in_addr_range(tb->pc))) { 2577 qemu_log_lock(); 2578 qemu_log("OP before indirect lowering:\n"); 2579 tcg_dump_ops(s); 2580 qemu_log("\n"); 2581 qemu_log_unlock(); 2582 } 2583 #endif 2584 /* Replace indirect temps with direct temps. */ 2585 if (liveness_pass_2(s, temp_state)) { 2586 /* If changes were made, re-run liveness. */ 2587 liveness_pass_1(s, temp_state); 2588 } 2589 } 2590 } 2591 2592 #ifdef CONFIG_PROFILER 2593 s->la_time += profile_getclock(); 2594 #endif 2595 2596 #ifdef DEBUG_DISAS 2597 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 2598 && qemu_log_in_addr_range(tb->pc))) { 2599 qemu_log_lock(); 2600 qemu_log("OP after optimization and liveness analysis:\n"); 2601 tcg_dump_ops(s); 2602 qemu_log("\n"); 2603 qemu_log_unlock(); 2604 } 2605 #endif 2606 2607 tcg_reg_alloc_start(s); 2608 2609 s->code_buf = tb->tc_ptr; 2610 s->code_ptr = tb->tc_ptr; 2611 2612 tcg_out_tb_init(s); 2613 2614 num_insns = -1; 2615 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2616 TCGOp * const op = &s->gen_op_buf[oi]; 2617 TCGArg * const args = &s->gen_opparam_buf[op->args]; 2618 TCGOpcode opc = op->opc; 2619 const TCGOpDef *def = &tcg_op_defs[opc]; 2620 TCGLifeData arg_life = op->life; 2621 2622 oi_next = op->next; 2623 #ifdef CONFIG_PROFILER 2624 tcg_table_op_count[opc]++; 2625 #endif 2626 2627 switch (opc) { 2628 case INDEX_op_mov_i32: 2629 case INDEX_op_mov_i64: 2630 tcg_reg_alloc_mov(s, def, args, arg_life); 2631 break; 2632 case INDEX_op_movi_i32: 2633 case INDEX_op_movi_i64: 2634 tcg_reg_alloc_movi(s, args, arg_life); 2635 break; 2636 case INDEX_op_insn_start: 2637 if (num_insns >= 0) { 2638 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2639 } 2640 num_insns++; 2641 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2642 target_ulong a; 2643 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2644 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 2645 #else 2646 a = args[i]; 2647 #endif 2648 s->gen_insn_data[num_insns][i] = a; 2649 } 2650 break; 2651 case INDEX_op_discard: 2652 temp_dead(s, &s->temps[args[0]]); 2653 break; 2654 case INDEX_op_set_label: 2655 tcg_reg_alloc_bb_end(s, s->reserved_regs); 2656 tcg_out_label(s, arg_label(args[0]), s->code_ptr); 2657 break; 2658 case INDEX_op_call: 2659 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); 2660 break; 2661 default: 2662 /* Sanity check that we've not introduced any unhandled opcodes. */ 2663 if (def->flags & TCG_OPF_NOT_PRESENT) { 2664 tcg_abort(); 2665 } 2666 /* Note: in order to speed up the code, it would be much 2667 faster to have specialized register allocator functions for 2668 some common argument patterns */ 2669 tcg_reg_alloc_op(s, def, opc, args, arg_life); 2670 break; 2671 } 2672 #ifdef CONFIG_DEBUG_TCG 2673 check_regs(s); 2674 #endif 2675 /* Test for (pending) buffer overflow. The assumption is that any 2676 one operation beginning below the high water mark cannot overrun 2677 the buffer completely. Thus we can test for overflow after 2678 generating code without having to check during generation. */ 2679 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 2680 return -1; 2681 } 2682 } 2683 tcg_debug_assert(num_insns >= 0); 2684 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2685 2686 /* Generate TB finalization at the end of block */ 2687 if (!tcg_out_tb_finalize(s)) { 2688 return -1; 2689 } 2690 2691 /* flush instruction cache */ 2692 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 2693 2694 return tcg_current_code_size(s); 2695 } 2696 2697 #ifdef CONFIG_PROFILER 2698 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2699 { 2700 TCGContext *s = &tcg_ctx; 2701 int64_t tb_count = s->tb_count; 2702 int64_t tb_div_count = tb_count ? tb_count : 1; 2703 int64_t tot = s->interm_time + s->code_time; 2704 2705 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 2706 tot, tot / 2.4e9); 2707 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 2708 tb_count, s->tb_count1 - tb_count, 2709 (double)(s->tb_count1 - s->tb_count) 2710 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 2711 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 2712 (double)s->op_count / tb_div_count, s->op_count_max); 2713 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 2714 (double)s->del_op_count / tb_div_count); 2715 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 2716 (double)s->temp_count / tb_div_count, s->temp_count_max); 2717 cpu_fprintf(f, "avg host code/TB %0.1f\n", 2718 (double)s->code_out_len / tb_div_count); 2719 cpu_fprintf(f, "avg search data/TB %0.1f\n", 2720 (double)s->search_out_len / tb_div_count); 2721 2722 cpu_fprintf(f, "cycles/op %0.1f\n", 2723 s->op_count ? (double)tot / s->op_count : 0); 2724 cpu_fprintf(f, "cycles/in byte %0.1f\n", 2725 s->code_in_len ? (double)tot / s->code_in_len : 0); 2726 cpu_fprintf(f, "cycles/out byte %0.1f\n", 2727 s->code_out_len ? (double)tot / s->code_out_len : 0); 2728 cpu_fprintf(f, "cycles/search byte %0.1f\n", 2729 s->search_out_len ? (double)tot / s->search_out_len : 0); 2730 if (tot == 0) { 2731 tot = 1; 2732 } 2733 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 2734 (double)s->interm_time / tot * 100.0); 2735 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2736 (double)s->code_time / tot * 100.0); 2737 cpu_fprintf(f, "optim./code time %0.1f%%\n", 2738 (double)s->opt_time / (s->code_time ? s->code_time : 1) 2739 * 100.0); 2740 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2741 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2742 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2743 s->restore_count); 2744 cpu_fprintf(f, " avg cycles %0.1f\n", 2745 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 2746 } 2747 #else 2748 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2749 { 2750 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2751 } 2752 #endif 2753 2754 #ifdef ELF_HOST_MACHINE 2755 /* In order to use this feature, the backend needs to do three things: 2756 2757 (1) Define ELF_HOST_MACHINE to indicate both what value to 2758 put into the ELF image and to indicate support for the feature. 2759 2760 (2) Define tcg_register_jit. This should create a buffer containing 2761 the contents of a .debug_frame section that describes the post- 2762 prologue unwind info for the tcg machine. 2763 2764 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 2765 */ 2766 2767 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 2768 typedef enum { 2769 JIT_NOACTION = 0, 2770 JIT_REGISTER_FN, 2771 JIT_UNREGISTER_FN 2772 } jit_actions_t; 2773 2774 struct jit_code_entry { 2775 struct jit_code_entry *next_entry; 2776 struct jit_code_entry *prev_entry; 2777 const void *symfile_addr; 2778 uint64_t symfile_size; 2779 }; 2780 2781 struct jit_descriptor { 2782 uint32_t version; 2783 uint32_t action_flag; 2784 struct jit_code_entry *relevant_entry; 2785 struct jit_code_entry *first_entry; 2786 }; 2787 2788 void __jit_debug_register_code(void) __attribute__((noinline)); 2789 void __jit_debug_register_code(void) 2790 { 2791 asm(""); 2792 } 2793 2794 /* Must statically initialize the version, because GDB may check 2795 the version before we can set it. */ 2796 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 2797 2798 /* End GDB interface. */ 2799 2800 static int find_string(const char *strtab, const char *str) 2801 { 2802 const char *p = strtab + 1; 2803 2804 while (1) { 2805 if (strcmp(p, str) == 0) { 2806 return p - strtab; 2807 } 2808 p += strlen(p) + 1; 2809 } 2810 } 2811 2812 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 2813 const void *debug_frame, 2814 size_t debug_frame_size) 2815 { 2816 struct __attribute__((packed)) DebugInfo { 2817 uint32_t len; 2818 uint16_t version; 2819 uint32_t abbrev; 2820 uint8_t ptr_size; 2821 uint8_t cu_die; 2822 uint16_t cu_lang; 2823 uintptr_t cu_low_pc; 2824 uintptr_t cu_high_pc; 2825 uint8_t fn_die; 2826 char fn_name[16]; 2827 uintptr_t fn_low_pc; 2828 uintptr_t fn_high_pc; 2829 uint8_t cu_eoc; 2830 }; 2831 2832 struct ElfImage { 2833 ElfW(Ehdr) ehdr; 2834 ElfW(Phdr) phdr; 2835 ElfW(Shdr) shdr[7]; 2836 ElfW(Sym) sym[2]; 2837 struct DebugInfo di; 2838 uint8_t da[24]; 2839 char str[80]; 2840 }; 2841 2842 struct ElfImage *img; 2843 2844 static const struct ElfImage img_template = { 2845 .ehdr = { 2846 .e_ident[EI_MAG0] = ELFMAG0, 2847 .e_ident[EI_MAG1] = ELFMAG1, 2848 .e_ident[EI_MAG2] = ELFMAG2, 2849 .e_ident[EI_MAG3] = ELFMAG3, 2850 .e_ident[EI_CLASS] = ELF_CLASS, 2851 .e_ident[EI_DATA] = ELF_DATA, 2852 .e_ident[EI_VERSION] = EV_CURRENT, 2853 .e_type = ET_EXEC, 2854 .e_machine = ELF_HOST_MACHINE, 2855 .e_version = EV_CURRENT, 2856 .e_phoff = offsetof(struct ElfImage, phdr), 2857 .e_shoff = offsetof(struct ElfImage, shdr), 2858 .e_ehsize = sizeof(ElfW(Shdr)), 2859 .e_phentsize = sizeof(ElfW(Phdr)), 2860 .e_phnum = 1, 2861 .e_shentsize = sizeof(ElfW(Shdr)), 2862 .e_shnum = ARRAY_SIZE(img->shdr), 2863 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 2864 #ifdef ELF_HOST_FLAGS 2865 .e_flags = ELF_HOST_FLAGS, 2866 #endif 2867 #ifdef ELF_OSABI 2868 .e_ident[EI_OSABI] = ELF_OSABI, 2869 #endif 2870 }, 2871 .phdr = { 2872 .p_type = PT_LOAD, 2873 .p_flags = PF_X, 2874 }, 2875 .shdr = { 2876 [0] = { .sh_type = SHT_NULL }, 2877 /* Trick: The contents of code_gen_buffer are not present in 2878 this fake ELF file; that got allocated elsewhere. Therefore 2879 we mark .text as SHT_NOBITS (similar to .bss) so that readers 2880 will not look for contents. We can record any address. */ 2881 [1] = { /* .text */ 2882 .sh_type = SHT_NOBITS, 2883 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 2884 }, 2885 [2] = { /* .debug_info */ 2886 .sh_type = SHT_PROGBITS, 2887 .sh_offset = offsetof(struct ElfImage, di), 2888 .sh_size = sizeof(struct DebugInfo), 2889 }, 2890 [3] = { /* .debug_abbrev */ 2891 .sh_type = SHT_PROGBITS, 2892 .sh_offset = offsetof(struct ElfImage, da), 2893 .sh_size = sizeof(img->da), 2894 }, 2895 [4] = { /* .debug_frame */ 2896 .sh_type = SHT_PROGBITS, 2897 .sh_offset = sizeof(struct ElfImage), 2898 }, 2899 [5] = { /* .symtab */ 2900 .sh_type = SHT_SYMTAB, 2901 .sh_offset = offsetof(struct ElfImage, sym), 2902 .sh_size = sizeof(img->sym), 2903 .sh_info = 1, 2904 .sh_link = ARRAY_SIZE(img->shdr) - 1, 2905 .sh_entsize = sizeof(ElfW(Sym)), 2906 }, 2907 [6] = { /* .strtab */ 2908 .sh_type = SHT_STRTAB, 2909 .sh_offset = offsetof(struct ElfImage, str), 2910 .sh_size = sizeof(img->str), 2911 } 2912 }, 2913 .sym = { 2914 [1] = { /* code_gen_buffer */ 2915 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 2916 .st_shndx = 1, 2917 } 2918 }, 2919 .di = { 2920 .len = sizeof(struct DebugInfo) - 4, 2921 .version = 2, 2922 .ptr_size = sizeof(void *), 2923 .cu_die = 1, 2924 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 2925 .fn_die = 2, 2926 .fn_name = "code_gen_buffer" 2927 }, 2928 .da = { 2929 1, /* abbrev number (the cu) */ 2930 0x11, 1, /* DW_TAG_compile_unit, has children */ 2931 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 2932 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2933 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2934 0, 0, /* end of abbrev */ 2935 2, /* abbrev number (the fn) */ 2936 0x2e, 0, /* DW_TAG_subprogram, no children */ 2937 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 2938 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2939 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2940 0, 0, /* end of abbrev */ 2941 0 /* no more abbrev */ 2942 }, 2943 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 2944 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 2945 }; 2946 2947 /* We only need a single jit entry; statically allocate it. */ 2948 static struct jit_code_entry one_entry; 2949 2950 uintptr_t buf = (uintptr_t)buf_ptr; 2951 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 2952 DebugFrameHeader *dfh; 2953 2954 img = g_malloc(img_size); 2955 *img = img_template; 2956 2957 img->phdr.p_vaddr = buf; 2958 img->phdr.p_paddr = buf; 2959 img->phdr.p_memsz = buf_size; 2960 2961 img->shdr[1].sh_name = find_string(img->str, ".text"); 2962 img->shdr[1].sh_addr = buf; 2963 img->shdr[1].sh_size = buf_size; 2964 2965 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 2966 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 2967 2968 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 2969 img->shdr[4].sh_size = debug_frame_size; 2970 2971 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 2972 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 2973 2974 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 2975 img->sym[1].st_value = buf; 2976 img->sym[1].st_size = buf_size; 2977 2978 img->di.cu_low_pc = buf; 2979 img->di.cu_high_pc = buf + buf_size; 2980 img->di.fn_low_pc = buf; 2981 img->di.fn_high_pc = buf + buf_size; 2982 2983 dfh = (DebugFrameHeader *)(img + 1); 2984 memcpy(dfh, debug_frame, debug_frame_size); 2985 dfh->fde.func_start = buf; 2986 dfh->fde.func_len = buf_size; 2987 2988 #ifdef DEBUG_JIT 2989 /* Enable this block to be able to debug the ELF image file creation. 2990 One can use readelf, objdump, or other inspection utilities. */ 2991 { 2992 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 2993 if (f) { 2994 if (fwrite(img, img_size, 1, f) != img_size) { 2995 /* Avoid stupid unused return value warning for fwrite. */ 2996 } 2997 fclose(f); 2998 } 2999 } 3000 #endif 3001 3002 one_entry.symfile_addr = img; 3003 one_entry.symfile_size = img_size; 3004 3005 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3006 __jit_debug_descriptor.relevant_entry = &one_entry; 3007 __jit_debug_descriptor.first_entry = &one_entry; 3008 __jit_debug_register_code(); 3009 } 3010 #else 3011 /* No support for the feature. Provide the entry point expected by exec.c, 3012 and implement the internal function we declared earlier. */ 3013 3014 static void tcg_register_jit_int(void *buf, size_t size, 3015 const void *debug_frame, 3016 size_t debug_frame_size) 3017 { 3018 } 3019 3020 void tcg_register_jit(void *buf, size_t buf_size) 3021 { 3022 } 3023 #endif /* ELF_HOST_MACHINE */ 3024