1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 62 /* Forward declarations for functions declared in tcg-target.inc.c and 63 used here. */ 64 static void tcg_target_init(TCGContext *s); 65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 66 static void tcg_target_qemu_prologue(TCGContext *s); 67 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 68 intptr_t value, intptr_t addend); 69 70 /* The CIE and FDE header definitions will be common to all hosts. */ 71 typedef struct { 72 uint32_t len __attribute__((aligned((sizeof(void *))))); 73 uint32_t id; 74 uint8_t version; 75 char augmentation[1]; 76 uint8_t code_align; 77 uint8_t data_align; 78 uint8_t return_column; 79 } DebugFrameCIE; 80 81 typedef struct QEMU_PACKED { 82 uint32_t len __attribute__((aligned((sizeof(void *))))); 83 uint32_t cie_offset; 84 uintptr_t func_start; 85 uintptr_t func_len; 86 } DebugFrameFDEHeader; 87 88 typedef struct QEMU_PACKED { 89 DebugFrameCIE cie; 90 DebugFrameFDEHeader fde; 91 } DebugFrameHeader; 92 93 static void tcg_register_jit_int(void *buf, size_t size, 94 const void *debug_frame, 95 size_t debug_frame_size) 96 __attribute__((unused)); 97 98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 99 static const char *target_parse_constraint(TCGArgConstraint *ct, 100 const char *ct_str, TCGType type); 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 107 const int *const_args); 108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 109 intptr_t arg2); 110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 111 TCGReg base, intptr_t ofs); 112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 113 static int tcg_target_const_match(tcg_target_long val, TCGType type, 114 const TCGArgConstraint *arg_ct); 115 static void tcg_out_tb_init(TCGContext *s); 116 static bool tcg_out_tb_finalize(TCGContext *s); 117 118 119 120 static TCGRegSet tcg_target_available_regs[2]; 121 static TCGRegSet tcg_target_call_clobber_regs; 122 123 #if TCG_TARGET_INSN_UNIT_SIZE == 1 124 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 125 { 126 *s->code_ptr++ = v; 127 } 128 129 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 130 uint8_t v) 131 { 132 *p = v; 133 } 134 #endif 135 136 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 137 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 138 { 139 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 140 *s->code_ptr++ = v; 141 } else { 142 tcg_insn_unit *p = s->code_ptr; 143 memcpy(p, &v, sizeof(v)); 144 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 145 } 146 } 147 148 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 149 uint16_t v) 150 { 151 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 152 *p = v; 153 } else { 154 memcpy(p, &v, sizeof(v)); 155 } 156 } 157 #endif 158 159 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 160 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 161 { 162 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 163 *s->code_ptr++ = v; 164 } else { 165 tcg_insn_unit *p = s->code_ptr; 166 memcpy(p, &v, sizeof(v)); 167 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 168 } 169 } 170 171 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 172 uint32_t v) 173 { 174 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 175 *p = v; 176 } else { 177 memcpy(p, &v, sizeof(v)); 178 } 179 } 180 #endif 181 182 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 183 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 184 { 185 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 186 *s->code_ptr++ = v; 187 } else { 188 tcg_insn_unit *p = s->code_ptr; 189 memcpy(p, &v, sizeof(v)); 190 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 191 } 192 } 193 194 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 195 uint64_t v) 196 { 197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 198 *p = v; 199 } else { 200 memcpy(p, &v, sizeof(v)); 201 } 202 } 203 #endif 204 205 /* label relocation processing */ 206 207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 208 TCGLabel *l, intptr_t addend) 209 { 210 TCGRelocation *r; 211 212 if (l->has_value) { 213 /* FIXME: This may break relocations on RISC targets that 214 modify instruction fields in place. The caller may not have 215 written the initial value. */ 216 patch_reloc(code_ptr, type, l->u.value, addend); 217 } else { 218 /* add a new relocation entry */ 219 r = tcg_malloc(sizeof(TCGRelocation)); 220 r->type = type; 221 r->ptr = code_ptr; 222 r->addend = addend; 223 r->next = l->u.first_reloc; 224 l->u.first_reloc = r; 225 } 226 } 227 228 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 229 { 230 intptr_t value = (intptr_t)ptr; 231 TCGRelocation *r; 232 233 tcg_debug_assert(!l->has_value); 234 235 for (r = l->u.first_reloc; r != NULL; r = r->next) { 236 patch_reloc(r->ptr, r->type, value, r->addend); 237 } 238 239 l->has_value = 1; 240 l->u.value_ptr = ptr; 241 } 242 243 TCGLabel *gen_new_label(void) 244 { 245 TCGContext *s = &tcg_ctx; 246 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 247 248 *l = (TCGLabel){ 249 .id = s->nb_labels++ 250 }; 251 252 return l; 253 } 254 255 #include "tcg-target.inc.c" 256 257 /* pool based memory allocation */ 258 void *tcg_malloc_internal(TCGContext *s, int size) 259 { 260 TCGPool *p; 261 int pool_size; 262 263 if (size > TCG_POOL_CHUNK_SIZE) { 264 /* big malloc: insert a new pool (XXX: could optimize) */ 265 p = g_malloc(sizeof(TCGPool) + size); 266 p->size = size; 267 p->next = s->pool_first_large; 268 s->pool_first_large = p; 269 return p->data; 270 } else { 271 p = s->pool_current; 272 if (!p) { 273 p = s->pool_first; 274 if (!p) 275 goto new_pool; 276 } else { 277 if (!p->next) { 278 new_pool: 279 pool_size = TCG_POOL_CHUNK_SIZE; 280 p = g_malloc(sizeof(TCGPool) + pool_size); 281 p->size = pool_size; 282 p->next = NULL; 283 if (s->pool_current) 284 s->pool_current->next = p; 285 else 286 s->pool_first = p; 287 } else { 288 p = p->next; 289 } 290 } 291 } 292 s->pool_current = p; 293 s->pool_cur = p->data + size; 294 s->pool_end = p->data + p->size; 295 return p->data; 296 } 297 298 void tcg_pool_reset(TCGContext *s) 299 { 300 TCGPool *p, *t; 301 for (p = s->pool_first_large; p; p = t) { 302 t = p->next; 303 g_free(p); 304 } 305 s->pool_first_large = NULL; 306 s->pool_cur = s->pool_end = NULL; 307 s->pool_current = NULL; 308 } 309 310 typedef struct TCGHelperInfo { 311 void *func; 312 const char *name; 313 unsigned flags; 314 unsigned sizemask; 315 } TCGHelperInfo; 316 317 #include "exec/helper-proto.h" 318 319 static const TCGHelperInfo all_helpers[] = { 320 #include "exec/helper-tcg.h" 321 }; 322 323 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 324 static void process_op_defs(TCGContext *s); 325 326 void tcg_context_init(TCGContext *s) 327 { 328 int op, total_args, n, i; 329 TCGOpDef *def; 330 TCGArgConstraint *args_ct; 331 int *sorted_args; 332 GHashTable *helper_table; 333 334 memset(s, 0, sizeof(*s)); 335 s->nb_globals = 0; 336 337 /* Count total number of arguments and allocate the corresponding 338 space */ 339 total_args = 0; 340 for(op = 0; op < NB_OPS; op++) { 341 def = &tcg_op_defs[op]; 342 n = def->nb_iargs + def->nb_oargs; 343 total_args += n; 344 } 345 346 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 347 sorted_args = g_malloc(sizeof(int) * total_args); 348 349 for(op = 0; op < NB_OPS; op++) { 350 def = &tcg_op_defs[op]; 351 def->args_ct = args_ct; 352 def->sorted_args = sorted_args; 353 n = def->nb_iargs + def->nb_oargs; 354 sorted_args += n; 355 args_ct += n; 356 } 357 358 /* Register helpers. */ 359 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 360 s->helpers = helper_table = g_hash_table_new(NULL, NULL); 361 362 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 363 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 364 (gpointer)&all_helpers[i]); 365 } 366 367 tcg_target_init(s); 368 process_op_defs(s); 369 370 /* Reverse the order of the saved registers, assuming they're all at 371 the start of tcg_target_reg_alloc_order. */ 372 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 373 int r = tcg_target_reg_alloc_order[n]; 374 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 375 break; 376 } 377 } 378 for (i = 0; i < n; ++i) { 379 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 380 } 381 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 382 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 383 } 384 } 385 386 void tcg_prologue_init(TCGContext *s) 387 { 388 size_t prologue_size, total_size; 389 void *buf0, *buf1; 390 391 /* Put the prologue at the beginning of code_gen_buffer. */ 392 buf0 = s->code_gen_buffer; 393 s->code_ptr = buf0; 394 s->code_buf = buf0; 395 s->code_gen_prologue = buf0; 396 397 /* Generate the prologue. */ 398 tcg_target_qemu_prologue(s); 399 buf1 = s->code_ptr; 400 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 401 402 /* Deduct the prologue from the buffer. */ 403 prologue_size = tcg_current_code_size(s); 404 s->code_gen_ptr = buf1; 405 s->code_gen_buffer = buf1; 406 s->code_buf = buf1; 407 total_size = s->code_gen_buffer_size - prologue_size; 408 s->code_gen_buffer_size = total_size; 409 410 /* Compute a high-water mark, at which we voluntarily flush the buffer 411 and start over. The size here is arbitrary, significantly larger 412 than we expect the code generation for any one opcode to require. */ 413 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); 414 415 tcg_register_jit(s->code_gen_buffer, total_size); 416 417 #ifdef DEBUG_DISAS 418 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 419 qemu_log_lock(); 420 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 421 log_disas(buf0, prologue_size); 422 qemu_log("\n"); 423 qemu_log_flush(); 424 qemu_log_unlock(); 425 } 426 #endif 427 } 428 429 void tcg_func_start(TCGContext *s) 430 { 431 tcg_pool_reset(s); 432 s->nb_temps = s->nb_globals; 433 434 /* No temps have been previously allocated for size or locality. */ 435 memset(s->free_temps, 0, sizeof(s->free_temps)); 436 437 s->nb_labels = 0; 438 s->current_frame_offset = s->frame_start; 439 440 #ifdef CONFIG_DEBUG_TCG 441 s->goto_tb_issue_mask = 0; 442 #endif 443 444 s->gen_op_buf[0].next = 1; 445 s->gen_op_buf[0].prev = 0; 446 s->gen_next_op_idx = 1; 447 s->gen_next_parm_idx = 0; 448 449 s->be = tcg_malloc(sizeof(TCGBackendData)); 450 } 451 452 static inline int temp_idx(TCGContext *s, TCGTemp *ts) 453 { 454 ptrdiff_t n = ts - s->temps; 455 tcg_debug_assert(n >= 0 && n < s->nb_temps); 456 return n; 457 } 458 459 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 460 { 461 int n = s->nb_temps++; 462 tcg_debug_assert(n < TCG_MAX_TEMPS); 463 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 464 } 465 466 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 467 { 468 tcg_debug_assert(s->nb_globals == s->nb_temps); 469 s->nb_globals++; 470 return tcg_temp_alloc(s); 471 } 472 473 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, 474 TCGReg reg, const char *name) 475 { 476 TCGTemp *ts; 477 478 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 479 tcg_abort(); 480 } 481 482 ts = tcg_global_alloc(s); 483 ts->base_type = type; 484 ts->type = type; 485 ts->fixed_reg = 1; 486 ts->reg = reg; 487 ts->name = name; 488 tcg_regset_set_reg(s->reserved_regs, reg); 489 490 return temp_idx(s, ts); 491 } 492 493 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 494 { 495 int idx; 496 s->frame_start = start; 497 s->frame_end = start + size; 498 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 499 s->frame_temp = &s->temps[idx]; 500 } 501 502 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) 503 { 504 TCGContext *s = &tcg_ctx; 505 int idx; 506 507 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 508 tcg_abort(); 509 } 510 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); 511 return MAKE_TCGV_I32(idx); 512 } 513 514 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) 515 { 516 TCGContext *s = &tcg_ctx; 517 int idx; 518 519 if (tcg_regset_test_reg(s->reserved_regs, reg)) { 520 tcg_abort(); 521 } 522 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); 523 return MAKE_TCGV_I64(idx); 524 } 525 526 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 527 intptr_t offset, const char *name) 528 { 529 TCGContext *s = &tcg_ctx; 530 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; 531 TCGTemp *ts = tcg_global_alloc(s); 532 int indirect_reg = 0, bigendian = 0; 533 #ifdef HOST_WORDS_BIGENDIAN 534 bigendian = 1; 535 #endif 536 537 if (!base_ts->fixed_reg) { 538 /* We do not support double-indirect registers. */ 539 tcg_debug_assert(!base_ts->indirect_reg); 540 base_ts->indirect_base = 1; 541 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 542 ? 2 : 1); 543 indirect_reg = 1; 544 } 545 546 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 547 TCGTemp *ts2 = tcg_global_alloc(s); 548 char buf[64]; 549 550 ts->base_type = TCG_TYPE_I64; 551 ts->type = TCG_TYPE_I32; 552 ts->indirect_reg = indirect_reg; 553 ts->mem_allocated = 1; 554 ts->mem_base = base_ts; 555 ts->mem_offset = offset + bigendian * 4; 556 pstrcpy(buf, sizeof(buf), name); 557 pstrcat(buf, sizeof(buf), "_0"); 558 ts->name = strdup(buf); 559 560 tcg_debug_assert(ts2 == ts + 1); 561 ts2->base_type = TCG_TYPE_I64; 562 ts2->type = TCG_TYPE_I32; 563 ts2->indirect_reg = indirect_reg; 564 ts2->mem_allocated = 1; 565 ts2->mem_base = base_ts; 566 ts2->mem_offset = offset + (1 - bigendian) * 4; 567 pstrcpy(buf, sizeof(buf), name); 568 pstrcat(buf, sizeof(buf), "_1"); 569 ts2->name = strdup(buf); 570 } else { 571 ts->base_type = type; 572 ts->type = type; 573 ts->indirect_reg = indirect_reg; 574 ts->mem_allocated = 1; 575 ts->mem_base = base_ts; 576 ts->mem_offset = offset; 577 ts->name = name; 578 } 579 return temp_idx(s, ts); 580 } 581 582 static int tcg_temp_new_internal(TCGType type, int temp_local) 583 { 584 TCGContext *s = &tcg_ctx; 585 TCGTemp *ts; 586 int idx, k; 587 588 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 589 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 590 if (idx < TCG_MAX_TEMPS) { 591 /* There is already an available temp with the right type. */ 592 clear_bit(idx, s->free_temps[k].l); 593 594 ts = &s->temps[idx]; 595 ts->temp_allocated = 1; 596 tcg_debug_assert(ts->base_type == type); 597 tcg_debug_assert(ts->temp_local == temp_local); 598 } else { 599 ts = tcg_temp_alloc(s); 600 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 601 TCGTemp *ts2 = tcg_temp_alloc(s); 602 603 ts->base_type = type; 604 ts->type = TCG_TYPE_I32; 605 ts->temp_allocated = 1; 606 ts->temp_local = temp_local; 607 608 tcg_debug_assert(ts2 == ts + 1); 609 ts2->base_type = TCG_TYPE_I64; 610 ts2->type = TCG_TYPE_I32; 611 ts2->temp_allocated = 1; 612 ts2->temp_local = temp_local; 613 } else { 614 ts->base_type = type; 615 ts->type = type; 616 ts->temp_allocated = 1; 617 ts->temp_local = temp_local; 618 } 619 idx = temp_idx(s, ts); 620 } 621 622 #if defined(CONFIG_DEBUG_TCG) 623 s->temps_in_use++; 624 #endif 625 return idx; 626 } 627 628 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 629 { 630 int idx; 631 632 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 633 return MAKE_TCGV_I32(idx); 634 } 635 636 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 637 { 638 int idx; 639 640 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 641 return MAKE_TCGV_I64(idx); 642 } 643 644 static void tcg_temp_free_internal(int idx) 645 { 646 TCGContext *s = &tcg_ctx; 647 TCGTemp *ts; 648 int k; 649 650 #if defined(CONFIG_DEBUG_TCG) 651 s->temps_in_use--; 652 if (s->temps_in_use < 0) { 653 fprintf(stderr, "More temporaries freed than allocated!\n"); 654 } 655 #endif 656 657 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); 658 ts = &s->temps[idx]; 659 tcg_debug_assert(ts->temp_allocated != 0); 660 ts->temp_allocated = 0; 661 662 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 663 set_bit(idx, s->free_temps[k].l); 664 } 665 666 void tcg_temp_free_i32(TCGv_i32 arg) 667 { 668 tcg_temp_free_internal(GET_TCGV_I32(arg)); 669 } 670 671 void tcg_temp_free_i64(TCGv_i64 arg) 672 { 673 tcg_temp_free_internal(GET_TCGV_I64(arg)); 674 } 675 676 TCGv_i32 tcg_const_i32(int32_t val) 677 { 678 TCGv_i32 t0; 679 t0 = tcg_temp_new_i32(); 680 tcg_gen_movi_i32(t0, val); 681 return t0; 682 } 683 684 TCGv_i64 tcg_const_i64(int64_t val) 685 { 686 TCGv_i64 t0; 687 t0 = tcg_temp_new_i64(); 688 tcg_gen_movi_i64(t0, val); 689 return t0; 690 } 691 692 TCGv_i32 tcg_const_local_i32(int32_t val) 693 { 694 TCGv_i32 t0; 695 t0 = tcg_temp_local_new_i32(); 696 tcg_gen_movi_i32(t0, val); 697 return t0; 698 } 699 700 TCGv_i64 tcg_const_local_i64(int64_t val) 701 { 702 TCGv_i64 t0; 703 t0 = tcg_temp_local_new_i64(); 704 tcg_gen_movi_i64(t0, val); 705 return t0; 706 } 707 708 #if defined(CONFIG_DEBUG_TCG) 709 void tcg_clear_temp_count(void) 710 { 711 TCGContext *s = &tcg_ctx; 712 s->temps_in_use = 0; 713 } 714 715 int tcg_check_temp_count(void) 716 { 717 TCGContext *s = &tcg_ctx; 718 if (s->temps_in_use) { 719 /* Clear the count so that we don't give another 720 * warning immediately next time around. 721 */ 722 s->temps_in_use = 0; 723 return 1; 724 } 725 return 0; 726 } 727 #endif 728 729 /* Note: we convert the 64 bit args to 32 bit and do some alignment 730 and endian swap. Maybe it would be better to do the alignment 731 and endian swap in tcg_reg_alloc_call(). */ 732 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, 733 int nargs, TCGArg *args) 734 { 735 int i, real_args, nb_rets, pi, pi_first; 736 unsigned sizemask, flags; 737 TCGHelperInfo *info; 738 739 info = g_hash_table_lookup(s->helpers, (gpointer)func); 740 flags = info->flags; 741 sizemask = info->sizemask; 742 743 #if defined(__sparc__) && !defined(__arch64__) \ 744 && !defined(CONFIG_TCG_INTERPRETER) 745 /* We have 64-bit values in one register, but need to pass as two 746 separate parameters. Split them. */ 747 int orig_sizemask = sizemask; 748 int orig_nargs = nargs; 749 TCGv_i64 retl, reth; 750 751 TCGV_UNUSED_I64(retl); 752 TCGV_UNUSED_I64(reth); 753 if (sizemask != 0) { 754 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); 755 for (i = real_args = 0; i < nargs; ++i) { 756 int is_64bit = sizemask & (1 << (i+1)*2); 757 if (is_64bit) { 758 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 759 TCGv_i32 h = tcg_temp_new_i32(); 760 TCGv_i32 l = tcg_temp_new_i32(); 761 tcg_gen_extr_i64_i32(l, h, orig); 762 split_args[real_args++] = GET_TCGV_I32(h); 763 split_args[real_args++] = GET_TCGV_I32(l); 764 } else { 765 split_args[real_args++] = args[i]; 766 } 767 } 768 nargs = real_args; 769 args = split_args; 770 sizemask = 0; 771 } 772 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 773 for (i = 0; i < nargs; ++i) { 774 int is_64bit = sizemask & (1 << (i+1)*2); 775 int is_signed = sizemask & (2 << (i+1)*2); 776 if (!is_64bit) { 777 TCGv_i64 temp = tcg_temp_new_i64(); 778 TCGv_i64 orig = MAKE_TCGV_I64(args[i]); 779 if (is_signed) { 780 tcg_gen_ext32s_i64(temp, orig); 781 } else { 782 tcg_gen_ext32u_i64(temp, orig); 783 } 784 args[i] = GET_TCGV_I64(temp); 785 } 786 } 787 #endif /* TCG_TARGET_EXTEND_ARGS */ 788 789 pi_first = pi = s->gen_next_parm_idx; 790 if (ret != TCG_CALL_DUMMY_ARG) { 791 #if defined(__sparc__) && !defined(__arch64__) \ 792 && !defined(CONFIG_TCG_INTERPRETER) 793 if (orig_sizemask & 1) { 794 /* The 32-bit ABI is going to return the 64-bit value in 795 the %o0/%o1 register pair. Prepare for this by using 796 two return temporaries, and reassemble below. */ 797 retl = tcg_temp_new_i64(); 798 reth = tcg_temp_new_i64(); 799 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); 800 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); 801 nb_rets = 2; 802 } else { 803 s->gen_opparam_buf[pi++] = ret; 804 nb_rets = 1; 805 } 806 #else 807 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 808 #ifdef HOST_WORDS_BIGENDIAN 809 s->gen_opparam_buf[pi++] = ret + 1; 810 s->gen_opparam_buf[pi++] = ret; 811 #else 812 s->gen_opparam_buf[pi++] = ret; 813 s->gen_opparam_buf[pi++] = ret + 1; 814 #endif 815 nb_rets = 2; 816 } else { 817 s->gen_opparam_buf[pi++] = ret; 818 nb_rets = 1; 819 } 820 #endif 821 } else { 822 nb_rets = 0; 823 } 824 real_args = 0; 825 for (i = 0; i < nargs; i++) { 826 int is_64bit = sizemask & (1 << (i+1)*2); 827 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 828 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 829 /* some targets want aligned 64 bit args */ 830 if (real_args & 1) { 831 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; 832 real_args++; 833 } 834 #endif 835 /* If stack grows up, then we will be placing successive 836 arguments at lower addresses, which means we need to 837 reverse the order compared to how we would normally 838 treat either big or little-endian. For those arguments 839 that will wind up in registers, this still works for 840 HPPA (the only current STACK_GROWSUP target) since the 841 argument registers are *also* allocated in decreasing 842 order. If another such target is added, this logic may 843 have to get more complicated to differentiate between 844 stack arguments and register arguments. */ 845 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 846 s->gen_opparam_buf[pi++] = args[i] + 1; 847 s->gen_opparam_buf[pi++] = args[i]; 848 #else 849 s->gen_opparam_buf[pi++] = args[i]; 850 s->gen_opparam_buf[pi++] = args[i] + 1; 851 #endif 852 real_args += 2; 853 continue; 854 } 855 856 s->gen_opparam_buf[pi++] = args[i]; 857 real_args++; 858 } 859 s->gen_opparam_buf[pi++] = (uintptr_t)func; 860 s->gen_opparam_buf[pi++] = flags; 861 862 i = s->gen_next_op_idx; 863 tcg_debug_assert(i < OPC_BUF_SIZE); 864 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); 865 866 /* Set links for sequential allocation during translation. */ 867 s->gen_op_buf[i] = (TCGOp){ 868 .opc = INDEX_op_call, 869 .callo = nb_rets, 870 .calli = real_args, 871 .args = pi_first, 872 .prev = i - 1, 873 .next = i + 1 874 }; 875 876 /* Make sure the calli field didn't overflow. */ 877 tcg_debug_assert(s->gen_op_buf[i].calli == real_args); 878 879 s->gen_op_buf[0].prev = i; 880 s->gen_next_op_idx = i + 1; 881 s->gen_next_parm_idx = pi; 882 883 #if defined(__sparc__) && !defined(__arch64__) \ 884 && !defined(CONFIG_TCG_INTERPRETER) 885 /* Free all of the parts we allocated above. */ 886 for (i = real_args = 0; i < orig_nargs; ++i) { 887 int is_64bit = orig_sizemask & (1 << (i+1)*2); 888 if (is_64bit) { 889 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); 890 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); 891 tcg_temp_free_i32(h); 892 tcg_temp_free_i32(l); 893 } else { 894 real_args++; 895 } 896 } 897 if (orig_sizemask & 1) { 898 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 899 Note that describing these as TCGv_i64 eliminates an unnecessary 900 zero-extension that tcg_gen_concat_i32_i64 would create. */ 901 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); 902 tcg_temp_free_i64(retl); 903 tcg_temp_free_i64(reth); 904 } 905 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 906 for (i = 0; i < nargs; ++i) { 907 int is_64bit = sizemask & (1 << (i+1)*2); 908 if (!is_64bit) { 909 TCGv_i64 temp = MAKE_TCGV_I64(args[i]); 910 tcg_temp_free_i64(temp); 911 } 912 } 913 #endif /* TCG_TARGET_EXTEND_ARGS */ 914 } 915 916 static void tcg_reg_alloc_start(TCGContext *s) 917 { 918 int i; 919 TCGTemp *ts; 920 for(i = 0; i < s->nb_globals; i++) { 921 ts = &s->temps[i]; 922 if (ts->fixed_reg) { 923 ts->val_type = TEMP_VAL_REG; 924 } else { 925 ts->val_type = TEMP_VAL_MEM; 926 } 927 } 928 for(i = s->nb_globals; i < s->nb_temps; i++) { 929 ts = &s->temps[i]; 930 if (ts->temp_local) { 931 ts->val_type = TEMP_VAL_MEM; 932 } else { 933 ts->val_type = TEMP_VAL_DEAD; 934 } 935 ts->mem_allocated = 0; 936 ts->fixed_reg = 0; 937 } 938 939 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 940 } 941 942 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 943 TCGTemp *ts) 944 { 945 int idx = temp_idx(s, ts); 946 947 if (idx < s->nb_globals) { 948 pstrcpy(buf, buf_size, ts->name); 949 } else if (ts->temp_local) { 950 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 951 } else { 952 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 953 } 954 return buf; 955 } 956 957 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 958 int buf_size, int idx) 959 { 960 tcg_debug_assert(idx >= 0 && idx < s->nb_temps); 961 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); 962 } 963 964 /* Find helper name. */ 965 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 966 { 967 const char *ret = NULL; 968 if (s->helpers) { 969 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); 970 if (info) { 971 ret = info->name; 972 } 973 } 974 return ret; 975 } 976 977 static const char * const cond_name[] = 978 { 979 [TCG_COND_NEVER] = "never", 980 [TCG_COND_ALWAYS] = "always", 981 [TCG_COND_EQ] = "eq", 982 [TCG_COND_NE] = "ne", 983 [TCG_COND_LT] = "lt", 984 [TCG_COND_GE] = "ge", 985 [TCG_COND_LE] = "le", 986 [TCG_COND_GT] = "gt", 987 [TCG_COND_LTU] = "ltu", 988 [TCG_COND_GEU] = "geu", 989 [TCG_COND_LEU] = "leu", 990 [TCG_COND_GTU] = "gtu" 991 }; 992 993 static const char * const ldst_name[] = 994 { 995 [MO_UB] = "ub", 996 [MO_SB] = "sb", 997 [MO_LEUW] = "leuw", 998 [MO_LESW] = "lesw", 999 [MO_LEUL] = "leul", 1000 [MO_LESL] = "lesl", 1001 [MO_LEQ] = "leq", 1002 [MO_BEUW] = "beuw", 1003 [MO_BESW] = "besw", 1004 [MO_BEUL] = "beul", 1005 [MO_BESL] = "besl", 1006 [MO_BEQ] = "beq", 1007 }; 1008 1009 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1010 #ifdef ALIGNED_ONLY 1011 [MO_UNALN >> MO_ASHIFT] = "un+", 1012 [MO_ALIGN >> MO_ASHIFT] = "", 1013 #else 1014 [MO_UNALN >> MO_ASHIFT] = "", 1015 [MO_ALIGN >> MO_ASHIFT] = "al+", 1016 #endif 1017 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1018 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1019 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1020 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1021 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1022 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1023 }; 1024 1025 void tcg_dump_ops(TCGContext *s) 1026 { 1027 char buf[128]; 1028 TCGOp *op; 1029 int oi; 1030 1031 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1032 int i, k, nb_oargs, nb_iargs, nb_cargs; 1033 const TCGOpDef *def; 1034 const TCGArg *args; 1035 TCGOpcode c; 1036 int col = 0; 1037 1038 op = &s->gen_op_buf[oi]; 1039 c = op->opc; 1040 def = &tcg_op_defs[c]; 1041 args = &s->gen_opparam_buf[op->args]; 1042 1043 if (c == INDEX_op_insn_start) { 1044 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1045 1046 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1047 target_ulong a; 1048 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1049 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 1050 #else 1051 a = args[i]; 1052 #endif 1053 col += qemu_log(" " TARGET_FMT_lx, a); 1054 } 1055 } else if (c == INDEX_op_call) { 1056 /* variable number of arguments */ 1057 nb_oargs = op->callo; 1058 nb_iargs = op->calli; 1059 nb_cargs = def->nb_cargs; 1060 1061 /* function name, flags, out args */ 1062 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1063 tcg_find_helper(s, args[nb_oargs + nb_iargs]), 1064 args[nb_oargs + nb_iargs + 1], nb_oargs); 1065 for (i = 0; i < nb_oargs; i++) { 1066 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1067 args[i])); 1068 } 1069 for (i = 0; i < nb_iargs; i++) { 1070 TCGArg arg = args[nb_oargs + i]; 1071 const char *t = "<dummy>"; 1072 if (arg != TCG_CALL_DUMMY_ARG) { 1073 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); 1074 } 1075 col += qemu_log(",%s", t); 1076 } 1077 } else { 1078 col += qemu_log(" %s ", def->name); 1079 1080 nb_oargs = def->nb_oargs; 1081 nb_iargs = def->nb_iargs; 1082 nb_cargs = def->nb_cargs; 1083 1084 k = 0; 1085 for (i = 0; i < nb_oargs; i++) { 1086 if (k != 0) { 1087 col += qemu_log(","); 1088 } 1089 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1090 args[k++])); 1091 } 1092 for (i = 0; i < nb_iargs; i++) { 1093 if (k != 0) { 1094 col += qemu_log(","); 1095 } 1096 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), 1097 args[k++])); 1098 } 1099 switch (c) { 1100 case INDEX_op_brcond_i32: 1101 case INDEX_op_setcond_i32: 1102 case INDEX_op_movcond_i32: 1103 case INDEX_op_brcond2_i32: 1104 case INDEX_op_setcond2_i32: 1105 case INDEX_op_brcond_i64: 1106 case INDEX_op_setcond_i64: 1107 case INDEX_op_movcond_i64: 1108 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { 1109 col += qemu_log(",%s", cond_name[args[k++]]); 1110 } else { 1111 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); 1112 } 1113 i = 1; 1114 break; 1115 case INDEX_op_qemu_ld_i32: 1116 case INDEX_op_qemu_st_i32: 1117 case INDEX_op_qemu_ld_i64: 1118 case INDEX_op_qemu_st_i64: 1119 { 1120 TCGMemOpIdx oi = args[k++]; 1121 TCGMemOp op = get_memop(oi); 1122 unsigned ix = get_mmuidx(oi); 1123 1124 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1125 col += qemu_log(",$0x%x,%u", op, ix); 1126 } else { 1127 const char *s_al, *s_op; 1128 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1129 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1130 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1131 } 1132 i = 1; 1133 } 1134 break; 1135 default: 1136 i = 0; 1137 break; 1138 } 1139 switch (c) { 1140 case INDEX_op_set_label: 1141 case INDEX_op_br: 1142 case INDEX_op_brcond_i32: 1143 case INDEX_op_brcond_i64: 1144 case INDEX_op_brcond2_i32: 1145 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); 1146 i++, k++; 1147 break; 1148 default: 1149 break; 1150 } 1151 for (; i < nb_cargs; i++, k++) { 1152 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); 1153 } 1154 } 1155 if (op->life) { 1156 unsigned life = op->life; 1157 1158 for (; col < 48; ++col) { 1159 putc(' ', qemu_logfile); 1160 } 1161 1162 if (life & (SYNC_ARG * 3)) { 1163 qemu_log(" sync:"); 1164 for (i = 0; i < 2; ++i) { 1165 if (life & (SYNC_ARG << i)) { 1166 qemu_log(" %d", i); 1167 } 1168 } 1169 } 1170 life /= DEAD_ARG; 1171 if (life) { 1172 qemu_log(" dead:"); 1173 for (i = 0; life; ++i, life >>= 1) { 1174 if (life & 1) { 1175 qemu_log(" %d", i); 1176 } 1177 } 1178 } 1179 } 1180 qemu_log("\n"); 1181 } 1182 } 1183 1184 /* we give more priority to constraints with less registers */ 1185 static int get_constraint_priority(const TCGOpDef *def, int k) 1186 { 1187 const TCGArgConstraint *arg_ct; 1188 1189 int i, n; 1190 arg_ct = &def->args_ct[k]; 1191 if (arg_ct->ct & TCG_CT_ALIAS) { 1192 /* an alias is equivalent to a single register */ 1193 n = 1; 1194 } else { 1195 if (!(arg_ct->ct & TCG_CT_REG)) 1196 return 0; 1197 n = 0; 1198 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1199 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1200 n++; 1201 } 1202 } 1203 return TCG_TARGET_NB_REGS - n + 1; 1204 } 1205 1206 /* sort from highest priority to lowest */ 1207 static void sort_constraints(TCGOpDef *def, int start, int n) 1208 { 1209 int i, j, p1, p2, tmp; 1210 1211 for(i = 0; i < n; i++) 1212 def->sorted_args[start + i] = start + i; 1213 if (n <= 1) 1214 return; 1215 for(i = 0; i < n - 1; i++) { 1216 for(j = i + 1; j < n; j++) { 1217 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1218 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1219 if (p1 < p2) { 1220 tmp = def->sorted_args[start + i]; 1221 def->sorted_args[start + i] = def->sorted_args[start + j]; 1222 def->sorted_args[start + j] = tmp; 1223 } 1224 } 1225 } 1226 } 1227 1228 static void process_op_defs(TCGContext *s) 1229 { 1230 TCGOpcode op; 1231 1232 for (op = 0; op < NB_OPS; op++) { 1233 TCGOpDef *def = &tcg_op_defs[op]; 1234 const TCGTargetOpDef *tdefs; 1235 TCGType type; 1236 int i, nb_args; 1237 1238 if (def->flags & TCG_OPF_NOT_PRESENT) { 1239 continue; 1240 } 1241 1242 nb_args = def->nb_iargs + def->nb_oargs; 1243 if (nb_args == 0) { 1244 continue; 1245 } 1246 1247 tdefs = tcg_target_op_def(op); 1248 /* Missing TCGTargetOpDef entry. */ 1249 tcg_debug_assert(tdefs != NULL); 1250 1251 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1252 for (i = 0; i < nb_args; i++) { 1253 const char *ct_str = tdefs->args_ct_str[i]; 1254 /* Incomplete TCGTargetOpDef entry. */ 1255 tcg_debug_assert(ct_str != NULL); 1256 1257 tcg_regset_clear(def->args_ct[i].u.regs); 1258 def->args_ct[i].ct = 0; 1259 while (*ct_str != '\0') { 1260 switch(*ct_str) { 1261 case '0' ... '9': 1262 { 1263 int oarg = *ct_str - '0'; 1264 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1265 tcg_debug_assert(oarg < def->nb_oargs); 1266 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1267 /* TCG_CT_ALIAS is for the output arguments. 1268 The input is tagged with TCG_CT_IALIAS. */ 1269 def->args_ct[i] = def->args_ct[oarg]; 1270 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1271 def->args_ct[oarg].alias_index = i; 1272 def->args_ct[i].ct |= TCG_CT_IALIAS; 1273 def->args_ct[i].alias_index = oarg; 1274 } 1275 ct_str++; 1276 break; 1277 case '&': 1278 def->args_ct[i].ct |= TCG_CT_NEWREG; 1279 ct_str++; 1280 break; 1281 case 'i': 1282 def->args_ct[i].ct |= TCG_CT_CONST; 1283 ct_str++; 1284 break; 1285 default: 1286 ct_str = target_parse_constraint(&def->args_ct[i], 1287 ct_str, type); 1288 /* Typo in TCGTargetOpDef constraint. */ 1289 tcg_debug_assert(ct_str != NULL); 1290 } 1291 } 1292 } 1293 1294 /* TCGTargetOpDef entry with too much information? */ 1295 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1296 1297 /* sort the constraints (XXX: this is just an heuristic) */ 1298 sort_constraints(def, 0, def->nb_oargs); 1299 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1300 } 1301 } 1302 1303 void tcg_op_remove(TCGContext *s, TCGOp *op) 1304 { 1305 int next = op->next; 1306 int prev = op->prev; 1307 1308 /* We should never attempt to remove the list terminator. */ 1309 tcg_debug_assert(op != &s->gen_op_buf[0]); 1310 1311 s->gen_op_buf[next].prev = prev; 1312 s->gen_op_buf[prev].next = next; 1313 1314 memset(op, 0, sizeof(*op)); 1315 1316 #ifdef CONFIG_PROFILER 1317 s->del_op_count++; 1318 #endif 1319 } 1320 1321 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1322 TCGOpcode opc, int nargs) 1323 { 1324 int oi = s->gen_next_op_idx; 1325 int pi = s->gen_next_parm_idx; 1326 int prev = old_op->prev; 1327 int next = old_op - s->gen_op_buf; 1328 TCGOp *new_op; 1329 1330 tcg_debug_assert(oi < OPC_BUF_SIZE); 1331 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1332 s->gen_next_op_idx = oi + 1; 1333 s->gen_next_parm_idx = pi + nargs; 1334 1335 new_op = &s->gen_op_buf[oi]; 1336 *new_op = (TCGOp){ 1337 .opc = opc, 1338 .args = pi, 1339 .prev = prev, 1340 .next = next 1341 }; 1342 s->gen_op_buf[prev].next = oi; 1343 old_op->prev = oi; 1344 1345 return new_op; 1346 } 1347 1348 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1349 TCGOpcode opc, int nargs) 1350 { 1351 int oi = s->gen_next_op_idx; 1352 int pi = s->gen_next_parm_idx; 1353 int prev = old_op - s->gen_op_buf; 1354 int next = old_op->next; 1355 TCGOp *new_op; 1356 1357 tcg_debug_assert(oi < OPC_BUF_SIZE); 1358 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 1359 s->gen_next_op_idx = oi + 1; 1360 s->gen_next_parm_idx = pi + nargs; 1361 1362 new_op = &s->gen_op_buf[oi]; 1363 *new_op = (TCGOp){ 1364 .opc = opc, 1365 .args = pi, 1366 .prev = prev, 1367 .next = next 1368 }; 1369 s->gen_op_buf[next].prev = oi; 1370 old_op->next = oi; 1371 1372 return new_op; 1373 } 1374 1375 #define TS_DEAD 1 1376 #define TS_MEM 2 1377 1378 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1379 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1380 1381 /* liveness analysis: end of function: all temps are dead, and globals 1382 should be in memory. */ 1383 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) 1384 { 1385 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); 1386 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); 1387 } 1388 1389 /* liveness analysis: end of basic block: all temps are dead, globals 1390 and local temps should be in memory. */ 1391 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) 1392 { 1393 int i, n; 1394 1395 tcg_la_func_end(s, temp_state); 1396 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { 1397 if (s->temps[i].temp_local) { 1398 temp_state[i] |= TS_MEM; 1399 } 1400 } 1401 } 1402 1403 /* Liveness analysis : update the opc_arg_life array to tell if a 1404 given input arguments is dead. Instructions updating dead 1405 temporaries are removed. */ 1406 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) 1407 { 1408 int nb_globals = s->nb_globals; 1409 int oi, oi_prev; 1410 1411 tcg_la_func_end(s, temp_state); 1412 1413 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 1414 int i, nb_iargs, nb_oargs; 1415 TCGOpcode opc_new, opc_new2; 1416 bool have_opc_new2; 1417 TCGLifeData arg_life = 0; 1418 TCGArg arg; 1419 1420 TCGOp * const op = &s->gen_op_buf[oi]; 1421 TCGArg * const args = &s->gen_opparam_buf[op->args]; 1422 TCGOpcode opc = op->opc; 1423 const TCGOpDef *def = &tcg_op_defs[opc]; 1424 1425 oi_prev = op->prev; 1426 1427 switch (opc) { 1428 case INDEX_op_call: 1429 { 1430 int call_flags; 1431 1432 nb_oargs = op->callo; 1433 nb_iargs = op->calli; 1434 call_flags = args[nb_oargs + nb_iargs + 1]; 1435 1436 /* pure functions can be removed if their result is unused */ 1437 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 1438 for (i = 0; i < nb_oargs; i++) { 1439 arg = args[i]; 1440 if (temp_state[arg] != TS_DEAD) { 1441 goto do_not_remove_call; 1442 } 1443 } 1444 goto do_remove; 1445 } else { 1446 do_not_remove_call: 1447 1448 /* output args are dead */ 1449 for (i = 0; i < nb_oargs; i++) { 1450 arg = args[i]; 1451 if (temp_state[arg] & TS_DEAD) { 1452 arg_life |= DEAD_ARG << i; 1453 } 1454 if (temp_state[arg] & TS_MEM) { 1455 arg_life |= SYNC_ARG << i; 1456 } 1457 temp_state[arg] = TS_DEAD; 1458 } 1459 1460 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 1461 TCG_CALL_NO_READ_GLOBALS))) { 1462 /* globals should go back to memory */ 1463 memset(temp_state, TS_DEAD | TS_MEM, nb_globals); 1464 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 1465 /* globals should be synced to memory */ 1466 for (i = 0; i < nb_globals; i++) { 1467 temp_state[i] |= TS_MEM; 1468 } 1469 } 1470 1471 /* record arguments that die in this helper */ 1472 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1473 arg = args[i]; 1474 if (arg != TCG_CALL_DUMMY_ARG) { 1475 if (temp_state[arg] & TS_DEAD) { 1476 arg_life |= DEAD_ARG << i; 1477 } 1478 } 1479 } 1480 /* input arguments are live for preceding opcodes */ 1481 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1482 arg = args[i]; 1483 if (arg != TCG_CALL_DUMMY_ARG) { 1484 temp_state[arg] &= ~TS_DEAD; 1485 } 1486 } 1487 } 1488 } 1489 break; 1490 case INDEX_op_insn_start: 1491 break; 1492 case INDEX_op_discard: 1493 /* mark the temporary as dead */ 1494 temp_state[args[0]] = TS_DEAD; 1495 break; 1496 1497 case INDEX_op_add2_i32: 1498 opc_new = INDEX_op_add_i32; 1499 goto do_addsub2; 1500 case INDEX_op_sub2_i32: 1501 opc_new = INDEX_op_sub_i32; 1502 goto do_addsub2; 1503 case INDEX_op_add2_i64: 1504 opc_new = INDEX_op_add_i64; 1505 goto do_addsub2; 1506 case INDEX_op_sub2_i64: 1507 opc_new = INDEX_op_sub_i64; 1508 do_addsub2: 1509 nb_iargs = 4; 1510 nb_oargs = 2; 1511 /* Test if the high part of the operation is dead, but not 1512 the low part. The result can be optimized to a simple 1513 add or sub. This happens often for x86_64 guest when the 1514 cpu mode is set to 32 bit. */ 1515 if (temp_state[args[1]] == TS_DEAD) { 1516 if (temp_state[args[0]] == TS_DEAD) { 1517 goto do_remove; 1518 } 1519 /* Replace the opcode and adjust the args in place, 1520 leaving 3 unused args at the end. */ 1521 op->opc = opc = opc_new; 1522 args[1] = args[2]; 1523 args[2] = args[4]; 1524 /* Fall through and mark the single-word operation live. */ 1525 nb_iargs = 2; 1526 nb_oargs = 1; 1527 } 1528 goto do_not_remove; 1529 1530 case INDEX_op_mulu2_i32: 1531 opc_new = INDEX_op_mul_i32; 1532 opc_new2 = INDEX_op_muluh_i32; 1533 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 1534 goto do_mul2; 1535 case INDEX_op_muls2_i32: 1536 opc_new = INDEX_op_mul_i32; 1537 opc_new2 = INDEX_op_mulsh_i32; 1538 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 1539 goto do_mul2; 1540 case INDEX_op_mulu2_i64: 1541 opc_new = INDEX_op_mul_i64; 1542 opc_new2 = INDEX_op_muluh_i64; 1543 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 1544 goto do_mul2; 1545 case INDEX_op_muls2_i64: 1546 opc_new = INDEX_op_mul_i64; 1547 opc_new2 = INDEX_op_mulsh_i64; 1548 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 1549 goto do_mul2; 1550 do_mul2: 1551 nb_iargs = 2; 1552 nb_oargs = 2; 1553 if (temp_state[args[1]] == TS_DEAD) { 1554 if (temp_state[args[0]] == TS_DEAD) { 1555 /* Both parts of the operation are dead. */ 1556 goto do_remove; 1557 } 1558 /* The high part of the operation is dead; generate the low. */ 1559 op->opc = opc = opc_new; 1560 args[1] = args[2]; 1561 args[2] = args[3]; 1562 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { 1563 /* The low part of the operation is dead; generate the high. */ 1564 op->opc = opc = opc_new2; 1565 args[0] = args[1]; 1566 args[1] = args[2]; 1567 args[2] = args[3]; 1568 } else { 1569 goto do_not_remove; 1570 } 1571 /* Mark the single-word operation live. */ 1572 nb_oargs = 1; 1573 goto do_not_remove; 1574 1575 default: 1576 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 1577 nb_iargs = def->nb_iargs; 1578 nb_oargs = def->nb_oargs; 1579 1580 /* Test if the operation can be removed because all 1581 its outputs are dead. We assume that nb_oargs == 0 1582 implies side effects */ 1583 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 1584 for (i = 0; i < nb_oargs; i++) { 1585 if (temp_state[args[i]] != TS_DEAD) { 1586 goto do_not_remove; 1587 } 1588 } 1589 do_remove: 1590 tcg_op_remove(s, op); 1591 } else { 1592 do_not_remove: 1593 /* output args are dead */ 1594 for (i = 0; i < nb_oargs; i++) { 1595 arg = args[i]; 1596 if (temp_state[arg] & TS_DEAD) { 1597 arg_life |= DEAD_ARG << i; 1598 } 1599 if (temp_state[arg] & TS_MEM) { 1600 arg_life |= SYNC_ARG << i; 1601 } 1602 temp_state[arg] = TS_DEAD; 1603 } 1604 1605 /* if end of basic block, update */ 1606 if (def->flags & TCG_OPF_BB_END) { 1607 tcg_la_bb_end(s, temp_state); 1608 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1609 /* globals should be synced to memory */ 1610 for (i = 0; i < nb_globals; i++) { 1611 temp_state[i] |= TS_MEM; 1612 } 1613 } 1614 1615 /* record arguments that die in this opcode */ 1616 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1617 arg = args[i]; 1618 if (temp_state[arg] & TS_DEAD) { 1619 arg_life |= DEAD_ARG << i; 1620 } 1621 } 1622 /* input arguments are live for preceding opcodes */ 1623 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 1624 temp_state[args[i]] &= ~TS_DEAD; 1625 } 1626 } 1627 break; 1628 } 1629 op->life = arg_life; 1630 } 1631 } 1632 1633 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 1634 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) 1635 { 1636 int nb_globals = s->nb_globals; 1637 int16_t *dir_temps; 1638 int i, oi, oi_next; 1639 bool changes = false; 1640 1641 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); 1642 memset(dir_temps, 0, nb_globals * sizeof(int16_t)); 1643 1644 /* Create a temporary for each indirect global. */ 1645 for (i = 0; i < nb_globals; ++i) { 1646 TCGTemp *its = &s->temps[i]; 1647 if (its->indirect_reg) { 1648 TCGTemp *dts = tcg_temp_alloc(s); 1649 dts->type = its->type; 1650 dts->base_type = its->base_type; 1651 dir_temps[i] = temp_idx(s, dts); 1652 } 1653 } 1654 1655 memset(temp_state, TS_DEAD, nb_globals); 1656 1657 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 1658 TCGOp *op = &s->gen_op_buf[oi]; 1659 TCGArg *args = &s->gen_opparam_buf[op->args]; 1660 TCGOpcode opc = op->opc; 1661 const TCGOpDef *def = &tcg_op_defs[opc]; 1662 TCGLifeData arg_life = op->life; 1663 int nb_iargs, nb_oargs, call_flags; 1664 TCGArg arg, dir; 1665 1666 oi_next = op->next; 1667 1668 if (opc == INDEX_op_call) { 1669 nb_oargs = op->callo; 1670 nb_iargs = op->calli; 1671 call_flags = args[nb_oargs + nb_iargs + 1]; 1672 } else { 1673 nb_iargs = def->nb_iargs; 1674 nb_oargs = def->nb_oargs; 1675 1676 /* Set flags similar to how calls require. */ 1677 if (def->flags & TCG_OPF_BB_END) { 1678 /* Like writing globals: save_globals */ 1679 call_flags = 0; 1680 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 1681 /* Like reading globals: sync_globals */ 1682 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 1683 } else { 1684 /* No effect on globals. */ 1685 call_flags = (TCG_CALL_NO_READ_GLOBALS | 1686 TCG_CALL_NO_WRITE_GLOBALS); 1687 } 1688 } 1689 1690 /* Make sure that input arguments are available. */ 1691 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1692 arg = args[i]; 1693 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ 1694 if (arg < nb_globals) { 1695 dir = dir_temps[arg]; 1696 if (dir != 0 && temp_state[arg] == TS_DEAD) { 1697 TCGTemp *its = &s->temps[arg]; 1698 TCGOpcode lopc = (its->type == TCG_TYPE_I32 1699 ? INDEX_op_ld_i32 1700 : INDEX_op_ld_i64); 1701 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 1702 TCGArg *largs = &s->gen_opparam_buf[lop->args]; 1703 1704 largs[0] = dir; 1705 largs[1] = temp_idx(s, its->mem_base); 1706 largs[2] = its->mem_offset; 1707 1708 /* Loaded, but synced with memory. */ 1709 temp_state[arg] = TS_MEM; 1710 } 1711 } 1712 } 1713 1714 /* Perform input replacement, and mark inputs that became dead. 1715 No action is required except keeping temp_state up to date 1716 so that we reload when needed. */ 1717 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 1718 arg = args[i]; 1719 if (arg < nb_globals) { 1720 dir = dir_temps[arg]; 1721 if (dir != 0) { 1722 args[i] = dir; 1723 changes = true; 1724 if (IS_DEAD_ARG(i)) { 1725 temp_state[arg] = TS_DEAD; 1726 } 1727 } 1728 } 1729 } 1730 1731 /* Liveness analysis should ensure that the following are 1732 all correct, for call sites and basic block end points. */ 1733 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 1734 /* Nothing to do */ 1735 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 1736 for (i = 0; i < nb_globals; ++i) { 1737 /* Liveness should see that globals are synced back, 1738 that is, either TS_DEAD or TS_MEM. */ 1739 tcg_debug_assert(dir_temps[i] == 0 1740 || temp_state[i] != 0); 1741 } 1742 } else { 1743 for (i = 0; i < nb_globals; ++i) { 1744 /* Liveness should see that globals are saved back, 1745 that is, TS_DEAD, waiting to be reloaded. */ 1746 tcg_debug_assert(dir_temps[i] == 0 1747 || temp_state[i] == TS_DEAD); 1748 } 1749 } 1750 1751 /* Outputs become available. */ 1752 for (i = 0; i < nb_oargs; i++) { 1753 arg = args[i]; 1754 if (arg >= nb_globals) { 1755 continue; 1756 } 1757 dir = dir_temps[arg]; 1758 if (dir == 0) { 1759 continue; 1760 } 1761 args[i] = dir; 1762 changes = true; 1763 1764 /* The output is now live and modified. */ 1765 temp_state[arg] = 0; 1766 1767 /* Sync outputs upon their last write. */ 1768 if (NEED_SYNC_ARG(i)) { 1769 TCGTemp *its = &s->temps[arg]; 1770 TCGOpcode sopc = (its->type == TCG_TYPE_I32 1771 ? INDEX_op_st_i32 1772 : INDEX_op_st_i64); 1773 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 1774 TCGArg *sargs = &s->gen_opparam_buf[sop->args]; 1775 1776 sargs[0] = dir; 1777 sargs[1] = temp_idx(s, its->mem_base); 1778 sargs[2] = its->mem_offset; 1779 1780 temp_state[arg] = TS_MEM; 1781 } 1782 /* Drop outputs that are dead. */ 1783 if (IS_DEAD_ARG(i)) { 1784 temp_state[arg] = TS_DEAD; 1785 } 1786 } 1787 } 1788 1789 return changes; 1790 } 1791 1792 #ifdef CONFIG_DEBUG_TCG 1793 static void dump_regs(TCGContext *s) 1794 { 1795 TCGTemp *ts; 1796 int i; 1797 char buf[64]; 1798 1799 for(i = 0; i < s->nb_temps; i++) { 1800 ts = &s->temps[i]; 1801 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); 1802 switch(ts->val_type) { 1803 case TEMP_VAL_REG: 1804 printf("%s", tcg_target_reg_names[ts->reg]); 1805 break; 1806 case TEMP_VAL_MEM: 1807 printf("%d(%s)", (int)ts->mem_offset, 1808 tcg_target_reg_names[ts->mem_base->reg]); 1809 break; 1810 case TEMP_VAL_CONST: 1811 printf("$0x%" TCG_PRIlx, ts->val); 1812 break; 1813 case TEMP_VAL_DEAD: 1814 printf("D"); 1815 break; 1816 default: 1817 printf("???"); 1818 break; 1819 } 1820 printf("\n"); 1821 } 1822 1823 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1824 if (s->reg_to_temp[i] != NULL) { 1825 printf("%s: %s\n", 1826 tcg_target_reg_names[i], 1827 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 1828 } 1829 } 1830 } 1831 1832 static void check_regs(TCGContext *s) 1833 { 1834 int reg; 1835 int k; 1836 TCGTemp *ts; 1837 char buf[64]; 1838 1839 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 1840 ts = s->reg_to_temp[reg]; 1841 if (ts != NULL) { 1842 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 1843 printf("Inconsistency for register %s:\n", 1844 tcg_target_reg_names[reg]); 1845 goto fail; 1846 } 1847 } 1848 } 1849 for (k = 0; k < s->nb_temps; k++) { 1850 ts = &s->temps[k]; 1851 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 1852 && s->reg_to_temp[ts->reg] != ts) { 1853 printf("Inconsistency for temp %s:\n", 1854 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 1855 fail: 1856 printf("reg state:\n"); 1857 dump_regs(s); 1858 tcg_abort(); 1859 } 1860 } 1861 } 1862 #endif 1863 1864 static void temp_allocate_frame(TCGContext *s, int temp) 1865 { 1866 TCGTemp *ts; 1867 ts = &s->temps[temp]; 1868 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 1869 /* Sparc64 stack is accessed with offset of 2047 */ 1870 s->current_frame_offset = (s->current_frame_offset + 1871 (tcg_target_long)sizeof(tcg_target_long) - 1) & 1872 ~(sizeof(tcg_target_long) - 1); 1873 #endif 1874 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 1875 s->frame_end) { 1876 tcg_abort(); 1877 } 1878 ts->mem_offset = s->current_frame_offset; 1879 ts->mem_base = s->frame_temp; 1880 ts->mem_allocated = 1; 1881 s->current_frame_offset += sizeof(tcg_target_long); 1882 } 1883 1884 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 1885 1886 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 1887 mark it free; otherwise mark it dead. */ 1888 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 1889 { 1890 if (ts->fixed_reg) { 1891 return; 1892 } 1893 if (ts->val_type == TEMP_VAL_REG) { 1894 s->reg_to_temp[ts->reg] = NULL; 1895 } 1896 ts->val_type = (free_or_dead < 0 1897 || ts->temp_local 1898 || temp_idx(s, ts) < s->nb_globals 1899 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1900 } 1901 1902 /* Mark a temporary as dead. */ 1903 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 1904 { 1905 temp_free_or_dead(s, ts, 1); 1906 } 1907 1908 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 1909 registers needs to be allocated to store a constant. If 'free_or_dead' 1910 is non-zero, subsequently release the temporary; if it is positive, the 1911 temp is dead; if it is negative, the temp is free. */ 1912 static void temp_sync(TCGContext *s, TCGTemp *ts, 1913 TCGRegSet allocated_regs, int free_or_dead) 1914 { 1915 if (ts->fixed_reg) { 1916 return; 1917 } 1918 if (!ts->mem_coherent) { 1919 if (!ts->mem_allocated) { 1920 temp_allocate_frame(s, temp_idx(s, ts)); 1921 } 1922 switch (ts->val_type) { 1923 case TEMP_VAL_CONST: 1924 /* If we're going to free the temp immediately, then we won't 1925 require it later in a register, so attempt to store the 1926 constant to memory directly. */ 1927 if (free_or_dead 1928 && tcg_out_sti(s, ts->type, ts->val, 1929 ts->mem_base->reg, ts->mem_offset)) { 1930 break; 1931 } 1932 temp_load(s, ts, tcg_target_available_regs[ts->type], 1933 allocated_regs); 1934 /* fallthrough */ 1935 1936 case TEMP_VAL_REG: 1937 tcg_out_st(s, ts->type, ts->reg, 1938 ts->mem_base->reg, ts->mem_offset); 1939 break; 1940 1941 case TEMP_VAL_MEM: 1942 break; 1943 1944 case TEMP_VAL_DEAD: 1945 default: 1946 tcg_abort(); 1947 } 1948 ts->mem_coherent = 1; 1949 } 1950 if (free_or_dead) { 1951 temp_free_or_dead(s, ts, free_or_dead); 1952 } 1953 } 1954 1955 /* free register 'reg' by spilling the corresponding temporary if necessary */ 1956 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 1957 { 1958 TCGTemp *ts = s->reg_to_temp[reg]; 1959 if (ts != NULL) { 1960 temp_sync(s, ts, allocated_regs, -1); 1961 } 1962 } 1963 1964 /* Allocate a register belonging to reg1 & ~reg2 */ 1965 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 1966 TCGRegSet allocated_regs, bool rev) 1967 { 1968 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 1969 const int *order; 1970 TCGReg reg; 1971 TCGRegSet reg_ct; 1972 1973 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); 1974 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 1975 1976 /* first try free registers */ 1977 for(i = 0; i < n; i++) { 1978 reg = order[i]; 1979 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 1980 return reg; 1981 } 1982 1983 /* XXX: do better spill choice */ 1984 for(i = 0; i < n; i++) { 1985 reg = order[i]; 1986 if (tcg_regset_test_reg(reg_ct, reg)) { 1987 tcg_reg_free(s, reg, allocated_regs); 1988 return reg; 1989 } 1990 } 1991 1992 tcg_abort(); 1993 } 1994 1995 /* Make sure the temporary is in a register. If needed, allocate the register 1996 from DESIRED while avoiding ALLOCATED. */ 1997 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 1998 TCGRegSet allocated_regs) 1999 { 2000 TCGReg reg; 2001 2002 switch (ts->val_type) { 2003 case TEMP_VAL_REG: 2004 return; 2005 case TEMP_VAL_CONST: 2006 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2007 tcg_out_movi(s, ts->type, reg, ts->val); 2008 ts->mem_coherent = 0; 2009 break; 2010 case TEMP_VAL_MEM: 2011 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2012 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2013 ts->mem_coherent = 1; 2014 break; 2015 case TEMP_VAL_DEAD: 2016 default: 2017 tcg_abort(); 2018 } 2019 ts->reg = reg; 2020 ts->val_type = TEMP_VAL_REG; 2021 s->reg_to_temp[reg] = ts; 2022 } 2023 2024 /* Save a temporary to memory. 'allocated_regs' is used in case a 2025 temporary registers needs to be allocated to store a constant. */ 2026 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2027 { 2028 /* The liveness analysis already ensures that globals are back 2029 in memory. Keep an tcg_debug_assert for safety. */ 2030 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2031 } 2032 2033 /* save globals to their canonical location and assume they can be 2034 modified be the following code. 'allocated_regs' is used in case a 2035 temporary registers needs to be allocated to store a constant. */ 2036 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2037 { 2038 int i; 2039 2040 for (i = 0; i < s->nb_globals; i++) { 2041 temp_save(s, &s->temps[i], allocated_regs); 2042 } 2043 } 2044 2045 /* sync globals to their canonical location and assume they can be 2046 read by the following code. 'allocated_regs' is used in case a 2047 temporary registers needs to be allocated to store a constant. */ 2048 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2049 { 2050 int i; 2051 2052 for (i = 0; i < s->nb_globals; i++) { 2053 TCGTemp *ts = &s->temps[i]; 2054 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2055 || ts->fixed_reg 2056 || ts->mem_coherent); 2057 } 2058 } 2059 2060 /* at the end of a basic block, we assume all temporaries are dead and 2061 all globals are stored at their canonical location. */ 2062 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2063 { 2064 int i; 2065 2066 for (i = s->nb_globals; i < s->nb_temps; i++) { 2067 TCGTemp *ts = &s->temps[i]; 2068 if (ts->temp_local) { 2069 temp_save(s, ts, allocated_regs); 2070 } else { 2071 /* The liveness analysis already ensures that temps are dead. 2072 Keep an tcg_debug_assert for safety. */ 2073 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2074 } 2075 } 2076 2077 save_globals(s, allocated_regs); 2078 } 2079 2080 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2081 tcg_target_ulong val, TCGLifeData arg_life) 2082 { 2083 if (ots->fixed_reg) { 2084 /* For fixed registers, we do not do any constant propagation. */ 2085 tcg_out_movi(s, ots->type, ots->reg, val); 2086 return; 2087 } 2088 2089 /* The movi is not explicitly generated here. */ 2090 if (ots->val_type == TEMP_VAL_REG) { 2091 s->reg_to_temp[ots->reg] = NULL; 2092 } 2093 ots->val_type = TEMP_VAL_CONST; 2094 ots->val = val; 2095 ots->mem_coherent = 0; 2096 if (NEED_SYNC_ARG(0)) { 2097 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2098 } else if (IS_DEAD_ARG(0)) { 2099 temp_dead(s, ots); 2100 } 2101 } 2102 2103 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, 2104 TCGLifeData arg_life) 2105 { 2106 TCGTemp *ots = &s->temps[args[0]]; 2107 tcg_target_ulong val = args[1]; 2108 2109 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2110 } 2111 2112 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, 2113 const TCGArg *args, TCGLifeData arg_life) 2114 { 2115 TCGRegSet allocated_regs; 2116 TCGTemp *ts, *ots; 2117 TCGType otype, itype; 2118 2119 tcg_regset_set(allocated_regs, s->reserved_regs); 2120 ots = &s->temps[args[0]]; 2121 ts = &s->temps[args[1]]; 2122 2123 /* Note that otype != itype for no-op truncation. */ 2124 otype = ots->type; 2125 itype = ts->type; 2126 2127 if (ts->val_type == TEMP_VAL_CONST) { 2128 /* propagate constant or generate sti */ 2129 tcg_target_ulong val = ts->val; 2130 if (IS_DEAD_ARG(1)) { 2131 temp_dead(s, ts); 2132 } 2133 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2134 return; 2135 } 2136 2137 /* If the source value is in memory we're going to be forced 2138 to have it in a register in order to perform the copy. Copy 2139 the SOURCE value into its own register first, that way we 2140 don't have to reload SOURCE the next time it is used. */ 2141 if (ts->val_type == TEMP_VAL_MEM) { 2142 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2143 } 2144 2145 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2146 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2147 /* mov to a non-saved dead register makes no sense (even with 2148 liveness analysis disabled). */ 2149 tcg_debug_assert(NEED_SYNC_ARG(0)); 2150 if (!ots->mem_allocated) { 2151 temp_allocate_frame(s, args[0]); 2152 } 2153 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2154 if (IS_DEAD_ARG(1)) { 2155 temp_dead(s, ts); 2156 } 2157 temp_dead(s, ots); 2158 } else { 2159 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2160 /* the mov can be suppressed */ 2161 if (ots->val_type == TEMP_VAL_REG) { 2162 s->reg_to_temp[ots->reg] = NULL; 2163 } 2164 ots->reg = ts->reg; 2165 temp_dead(s, ts); 2166 } else { 2167 if (ots->val_type != TEMP_VAL_REG) { 2168 /* When allocating a new register, make sure to not spill the 2169 input one. */ 2170 tcg_regset_set_reg(allocated_regs, ts->reg); 2171 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2172 allocated_regs, ots->indirect_base); 2173 } 2174 tcg_out_mov(s, otype, ots->reg, ts->reg); 2175 } 2176 ots->val_type = TEMP_VAL_REG; 2177 ots->mem_coherent = 0; 2178 s->reg_to_temp[ots->reg] = ots; 2179 if (NEED_SYNC_ARG(0)) { 2180 temp_sync(s, ots, allocated_regs, 0); 2181 } 2182 } 2183 } 2184 2185 static void tcg_reg_alloc_op(TCGContext *s, 2186 const TCGOpDef *def, TCGOpcode opc, 2187 const TCGArg *args, TCGLifeData arg_life) 2188 { 2189 TCGRegSet i_allocated_regs; 2190 TCGRegSet o_allocated_regs; 2191 int i, k, nb_iargs, nb_oargs; 2192 TCGReg reg; 2193 TCGArg arg; 2194 const TCGArgConstraint *arg_ct; 2195 TCGTemp *ts; 2196 TCGArg new_args[TCG_MAX_OP_ARGS]; 2197 int const_args[TCG_MAX_OP_ARGS]; 2198 2199 nb_oargs = def->nb_oargs; 2200 nb_iargs = def->nb_iargs; 2201 2202 /* copy constants */ 2203 memcpy(new_args + nb_oargs + nb_iargs, 2204 args + nb_oargs + nb_iargs, 2205 sizeof(TCGArg) * def->nb_cargs); 2206 2207 tcg_regset_set(i_allocated_regs, s->reserved_regs); 2208 tcg_regset_set(o_allocated_regs, s->reserved_regs); 2209 2210 /* satisfy input constraints */ 2211 for(k = 0; k < nb_iargs; k++) { 2212 i = def->sorted_args[nb_oargs + k]; 2213 arg = args[i]; 2214 arg_ct = &def->args_ct[i]; 2215 ts = &s->temps[arg]; 2216 2217 if (ts->val_type == TEMP_VAL_CONST 2218 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2219 /* constant is OK for instruction */ 2220 const_args[i] = 1; 2221 new_args[i] = ts->val; 2222 goto iarg_end; 2223 } 2224 2225 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2226 2227 if (arg_ct->ct & TCG_CT_IALIAS) { 2228 if (ts->fixed_reg) { 2229 /* if fixed register, we must allocate a new register 2230 if the alias is not the same register */ 2231 if (arg != args[arg_ct->alias_index]) 2232 goto allocate_in_reg; 2233 } else { 2234 /* if the input is aliased to an output and if it is 2235 not dead after the instruction, we must allocate 2236 a new register and move it */ 2237 if (!IS_DEAD_ARG(i)) { 2238 goto allocate_in_reg; 2239 } 2240 /* check if the current register has already been allocated 2241 for another input aliased to an output */ 2242 int k2, i2; 2243 for (k2 = 0 ; k2 < k ; k2++) { 2244 i2 = def->sorted_args[nb_oargs + k2]; 2245 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2246 (new_args[i2] == ts->reg)) { 2247 goto allocate_in_reg; 2248 } 2249 } 2250 } 2251 } 2252 reg = ts->reg; 2253 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2254 /* nothing to do : the constraint is satisfied */ 2255 } else { 2256 allocate_in_reg: 2257 /* allocate a new register matching the constraint 2258 and move the temporary register into it */ 2259 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2260 ts->indirect_base); 2261 tcg_out_mov(s, ts->type, reg, ts->reg); 2262 } 2263 new_args[i] = reg; 2264 const_args[i] = 0; 2265 tcg_regset_set_reg(i_allocated_regs, reg); 2266 iarg_end: ; 2267 } 2268 2269 /* mark dead temporaries and free the associated registers */ 2270 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2271 if (IS_DEAD_ARG(i)) { 2272 temp_dead(s, &s->temps[args[i]]); 2273 } 2274 } 2275 2276 if (def->flags & TCG_OPF_BB_END) { 2277 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2278 } else { 2279 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2280 /* XXX: permit generic clobber register list ? */ 2281 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2282 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2283 tcg_reg_free(s, i, i_allocated_regs); 2284 } 2285 } 2286 } 2287 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2288 /* sync globals if the op has side effects and might trigger 2289 an exception. */ 2290 sync_globals(s, i_allocated_regs); 2291 } 2292 2293 /* satisfy the output constraints */ 2294 for(k = 0; k < nb_oargs; k++) { 2295 i = def->sorted_args[k]; 2296 arg = args[i]; 2297 arg_ct = &def->args_ct[i]; 2298 ts = &s->temps[arg]; 2299 if ((arg_ct->ct & TCG_CT_ALIAS) 2300 && !const_args[arg_ct->alias_index]) { 2301 reg = new_args[arg_ct->alias_index]; 2302 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2303 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2304 i_allocated_regs | o_allocated_regs, 2305 ts->indirect_base); 2306 } else { 2307 /* if fixed register, we try to use it */ 2308 reg = ts->reg; 2309 if (ts->fixed_reg && 2310 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2311 goto oarg_end; 2312 } 2313 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2314 ts->indirect_base); 2315 } 2316 tcg_regset_set_reg(o_allocated_regs, reg); 2317 /* if a fixed register is used, then a move will be done afterwards */ 2318 if (!ts->fixed_reg) { 2319 if (ts->val_type == TEMP_VAL_REG) { 2320 s->reg_to_temp[ts->reg] = NULL; 2321 } 2322 ts->val_type = TEMP_VAL_REG; 2323 ts->reg = reg; 2324 /* temp value is modified, so the value kept in memory is 2325 potentially not the same */ 2326 ts->mem_coherent = 0; 2327 s->reg_to_temp[reg] = ts; 2328 } 2329 oarg_end: 2330 new_args[i] = reg; 2331 } 2332 } 2333 2334 /* emit instruction */ 2335 tcg_out_op(s, opc, new_args, const_args); 2336 2337 /* move the outputs in the correct register if needed */ 2338 for(i = 0; i < nb_oargs; i++) { 2339 ts = &s->temps[args[i]]; 2340 reg = new_args[i]; 2341 if (ts->fixed_reg && ts->reg != reg) { 2342 tcg_out_mov(s, ts->type, ts->reg, reg); 2343 } 2344 if (NEED_SYNC_ARG(i)) { 2345 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2346 } else if (IS_DEAD_ARG(i)) { 2347 temp_dead(s, ts); 2348 } 2349 } 2350 } 2351 2352 #ifdef TCG_TARGET_STACK_GROWSUP 2353 #define STACK_DIR(x) (-(x)) 2354 #else 2355 #define STACK_DIR(x) (x) 2356 #endif 2357 2358 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, 2359 const TCGArg * const args, TCGLifeData arg_life) 2360 { 2361 int flags, nb_regs, i; 2362 TCGReg reg; 2363 TCGArg arg; 2364 TCGTemp *ts; 2365 intptr_t stack_offset; 2366 size_t call_stack_size; 2367 tcg_insn_unit *func_addr; 2368 int allocate_args; 2369 TCGRegSet allocated_regs; 2370 2371 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; 2372 flags = args[nb_oargs + nb_iargs + 1]; 2373 2374 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2375 if (nb_regs > nb_iargs) { 2376 nb_regs = nb_iargs; 2377 } 2378 2379 /* assign stack slots first */ 2380 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2381 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2382 ~(TCG_TARGET_STACK_ALIGN - 1); 2383 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2384 if (allocate_args) { 2385 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2386 preallocate call stack */ 2387 tcg_abort(); 2388 } 2389 2390 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2391 for(i = nb_regs; i < nb_iargs; i++) { 2392 arg = args[nb_oargs + i]; 2393 #ifdef TCG_TARGET_STACK_GROWSUP 2394 stack_offset -= sizeof(tcg_target_long); 2395 #endif 2396 if (arg != TCG_CALL_DUMMY_ARG) { 2397 ts = &s->temps[arg]; 2398 temp_load(s, ts, tcg_target_available_regs[ts->type], 2399 s->reserved_regs); 2400 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2401 } 2402 #ifndef TCG_TARGET_STACK_GROWSUP 2403 stack_offset += sizeof(tcg_target_long); 2404 #endif 2405 } 2406 2407 /* assign input registers */ 2408 tcg_regset_set(allocated_regs, s->reserved_regs); 2409 for(i = 0; i < nb_regs; i++) { 2410 arg = args[nb_oargs + i]; 2411 if (arg != TCG_CALL_DUMMY_ARG) { 2412 ts = &s->temps[arg]; 2413 reg = tcg_target_call_iarg_regs[i]; 2414 tcg_reg_free(s, reg, allocated_regs); 2415 2416 if (ts->val_type == TEMP_VAL_REG) { 2417 if (ts->reg != reg) { 2418 tcg_out_mov(s, ts->type, reg, ts->reg); 2419 } 2420 } else { 2421 TCGRegSet arg_set; 2422 2423 tcg_regset_clear(arg_set); 2424 tcg_regset_set_reg(arg_set, reg); 2425 temp_load(s, ts, arg_set, allocated_regs); 2426 } 2427 2428 tcg_regset_set_reg(allocated_regs, reg); 2429 } 2430 } 2431 2432 /* mark dead temporaries and free the associated registers */ 2433 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2434 if (IS_DEAD_ARG(i)) { 2435 temp_dead(s, &s->temps[args[i]]); 2436 } 2437 } 2438 2439 /* clobber call registers */ 2440 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2441 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2442 tcg_reg_free(s, i, allocated_regs); 2443 } 2444 } 2445 2446 /* Save globals if they might be written by the helper, sync them if 2447 they might be read. */ 2448 if (flags & TCG_CALL_NO_READ_GLOBALS) { 2449 /* Nothing to do */ 2450 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 2451 sync_globals(s, allocated_regs); 2452 } else { 2453 save_globals(s, allocated_regs); 2454 } 2455 2456 tcg_out_call(s, func_addr); 2457 2458 /* assign output registers and emit moves if needed */ 2459 for(i = 0; i < nb_oargs; i++) { 2460 arg = args[i]; 2461 ts = &s->temps[arg]; 2462 reg = tcg_target_call_oarg_regs[i]; 2463 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 2464 2465 if (ts->fixed_reg) { 2466 if (ts->reg != reg) { 2467 tcg_out_mov(s, ts->type, ts->reg, reg); 2468 } 2469 } else { 2470 if (ts->val_type == TEMP_VAL_REG) { 2471 s->reg_to_temp[ts->reg] = NULL; 2472 } 2473 ts->val_type = TEMP_VAL_REG; 2474 ts->reg = reg; 2475 ts->mem_coherent = 0; 2476 s->reg_to_temp[reg] = ts; 2477 if (NEED_SYNC_ARG(i)) { 2478 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 2479 } else if (IS_DEAD_ARG(i)) { 2480 temp_dead(s, ts); 2481 } 2482 } 2483 } 2484 } 2485 2486 #ifdef CONFIG_PROFILER 2487 2488 static int64_t tcg_table_op_count[NB_OPS]; 2489 2490 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2491 { 2492 int i; 2493 2494 for (i = 0; i < NB_OPS; i++) { 2495 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 2496 tcg_table_op_count[i]); 2497 } 2498 } 2499 #else 2500 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 2501 { 2502 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2503 } 2504 #endif 2505 2506 2507 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 2508 { 2509 int i, oi, oi_next, num_insns; 2510 2511 #ifdef CONFIG_PROFILER 2512 { 2513 int n; 2514 2515 n = s->gen_op_buf[0].prev + 1; 2516 s->op_count += n; 2517 if (n > s->op_count_max) { 2518 s->op_count_max = n; 2519 } 2520 2521 n = s->nb_temps; 2522 s->temp_count += n; 2523 if (n > s->temp_count_max) { 2524 s->temp_count_max = n; 2525 } 2526 } 2527 #endif 2528 2529 #ifdef DEBUG_DISAS 2530 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 2531 && qemu_log_in_addr_range(tb->pc))) { 2532 qemu_log_lock(); 2533 qemu_log("OP:\n"); 2534 tcg_dump_ops(s); 2535 qemu_log("\n"); 2536 qemu_log_unlock(); 2537 } 2538 #endif 2539 2540 #ifdef CONFIG_PROFILER 2541 s->opt_time -= profile_getclock(); 2542 #endif 2543 2544 #ifdef USE_TCG_OPTIMIZATIONS 2545 tcg_optimize(s); 2546 #endif 2547 2548 #ifdef CONFIG_PROFILER 2549 s->opt_time += profile_getclock(); 2550 s->la_time -= profile_getclock(); 2551 #endif 2552 2553 { 2554 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); 2555 2556 liveness_pass_1(s, temp_state); 2557 2558 if (s->nb_indirects > 0) { 2559 #ifdef DEBUG_DISAS 2560 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 2561 && qemu_log_in_addr_range(tb->pc))) { 2562 qemu_log_lock(); 2563 qemu_log("OP before indirect lowering:\n"); 2564 tcg_dump_ops(s); 2565 qemu_log("\n"); 2566 qemu_log_unlock(); 2567 } 2568 #endif 2569 /* Replace indirect temps with direct temps. */ 2570 if (liveness_pass_2(s, temp_state)) { 2571 /* If changes were made, re-run liveness. */ 2572 liveness_pass_1(s, temp_state); 2573 } 2574 } 2575 } 2576 2577 #ifdef CONFIG_PROFILER 2578 s->la_time += profile_getclock(); 2579 #endif 2580 2581 #ifdef DEBUG_DISAS 2582 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 2583 && qemu_log_in_addr_range(tb->pc))) { 2584 qemu_log_lock(); 2585 qemu_log("OP after optimization and liveness analysis:\n"); 2586 tcg_dump_ops(s); 2587 qemu_log("\n"); 2588 qemu_log_unlock(); 2589 } 2590 #endif 2591 2592 tcg_reg_alloc_start(s); 2593 2594 s->code_buf = tb->tc_ptr; 2595 s->code_ptr = tb->tc_ptr; 2596 2597 tcg_out_tb_init(s); 2598 2599 num_insns = -1; 2600 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2601 TCGOp * const op = &s->gen_op_buf[oi]; 2602 TCGArg * const args = &s->gen_opparam_buf[op->args]; 2603 TCGOpcode opc = op->opc; 2604 const TCGOpDef *def = &tcg_op_defs[opc]; 2605 TCGLifeData arg_life = op->life; 2606 2607 oi_next = op->next; 2608 #ifdef CONFIG_PROFILER 2609 tcg_table_op_count[opc]++; 2610 #endif 2611 2612 switch (opc) { 2613 case INDEX_op_mov_i32: 2614 case INDEX_op_mov_i64: 2615 tcg_reg_alloc_mov(s, def, args, arg_life); 2616 break; 2617 case INDEX_op_movi_i32: 2618 case INDEX_op_movi_i64: 2619 tcg_reg_alloc_movi(s, args, arg_life); 2620 break; 2621 case INDEX_op_insn_start: 2622 if (num_insns >= 0) { 2623 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2624 } 2625 num_insns++; 2626 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2627 target_ulong a; 2628 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2629 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; 2630 #else 2631 a = args[i]; 2632 #endif 2633 s->gen_insn_data[num_insns][i] = a; 2634 } 2635 break; 2636 case INDEX_op_discard: 2637 temp_dead(s, &s->temps[args[0]]); 2638 break; 2639 case INDEX_op_set_label: 2640 tcg_reg_alloc_bb_end(s, s->reserved_regs); 2641 tcg_out_label(s, arg_label(args[0]), s->code_ptr); 2642 break; 2643 case INDEX_op_call: 2644 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); 2645 break; 2646 default: 2647 /* Sanity check that we've not introduced any unhandled opcodes. */ 2648 if (def->flags & TCG_OPF_NOT_PRESENT) { 2649 tcg_abort(); 2650 } 2651 /* Note: in order to speed up the code, it would be much 2652 faster to have specialized register allocator functions for 2653 some common argument patterns */ 2654 tcg_reg_alloc_op(s, def, opc, args, arg_life); 2655 break; 2656 } 2657 #ifdef CONFIG_DEBUG_TCG 2658 check_regs(s); 2659 #endif 2660 /* Test for (pending) buffer overflow. The assumption is that any 2661 one operation beginning below the high water mark cannot overrun 2662 the buffer completely. Thus we can test for overflow after 2663 generating code without having to check during generation. */ 2664 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 2665 return -1; 2666 } 2667 } 2668 tcg_debug_assert(num_insns >= 0); 2669 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 2670 2671 /* Generate TB finalization at the end of block */ 2672 if (!tcg_out_tb_finalize(s)) { 2673 return -1; 2674 } 2675 2676 /* flush instruction cache */ 2677 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 2678 2679 return tcg_current_code_size(s); 2680 } 2681 2682 #ifdef CONFIG_PROFILER 2683 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2684 { 2685 TCGContext *s = &tcg_ctx; 2686 int64_t tb_count = s->tb_count; 2687 int64_t tb_div_count = tb_count ? tb_count : 1; 2688 int64_t tot = s->interm_time + s->code_time; 2689 2690 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 2691 tot, tot / 2.4e9); 2692 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 2693 tb_count, s->tb_count1 - tb_count, 2694 (double)(s->tb_count1 - s->tb_count) 2695 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 2696 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 2697 (double)s->op_count / tb_div_count, s->op_count_max); 2698 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 2699 (double)s->del_op_count / tb_div_count); 2700 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 2701 (double)s->temp_count / tb_div_count, s->temp_count_max); 2702 cpu_fprintf(f, "avg host code/TB %0.1f\n", 2703 (double)s->code_out_len / tb_div_count); 2704 cpu_fprintf(f, "avg search data/TB %0.1f\n", 2705 (double)s->search_out_len / tb_div_count); 2706 2707 cpu_fprintf(f, "cycles/op %0.1f\n", 2708 s->op_count ? (double)tot / s->op_count : 0); 2709 cpu_fprintf(f, "cycles/in byte %0.1f\n", 2710 s->code_in_len ? (double)tot / s->code_in_len : 0); 2711 cpu_fprintf(f, "cycles/out byte %0.1f\n", 2712 s->code_out_len ? (double)tot / s->code_out_len : 0); 2713 cpu_fprintf(f, "cycles/search byte %0.1f\n", 2714 s->search_out_len ? (double)tot / s->search_out_len : 0); 2715 if (tot == 0) { 2716 tot = 1; 2717 } 2718 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 2719 (double)s->interm_time / tot * 100.0); 2720 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2721 (double)s->code_time / tot * 100.0); 2722 cpu_fprintf(f, "optim./code time %0.1f%%\n", 2723 (double)s->opt_time / (s->code_time ? s->code_time : 1) 2724 * 100.0); 2725 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2726 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2727 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2728 s->restore_count); 2729 cpu_fprintf(f, " avg cycles %0.1f\n", 2730 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 2731 } 2732 #else 2733 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 2734 { 2735 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 2736 } 2737 #endif 2738 2739 #ifdef ELF_HOST_MACHINE 2740 /* In order to use this feature, the backend needs to do three things: 2741 2742 (1) Define ELF_HOST_MACHINE to indicate both what value to 2743 put into the ELF image and to indicate support for the feature. 2744 2745 (2) Define tcg_register_jit. This should create a buffer containing 2746 the contents of a .debug_frame section that describes the post- 2747 prologue unwind info for the tcg machine. 2748 2749 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 2750 */ 2751 2752 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 2753 typedef enum { 2754 JIT_NOACTION = 0, 2755 JIT_REGISTER_FN, 2756 JIT_UNREGISTER_FN 2757 } jit_actions_t; 2758 2759 struct jit_code_entry { 2760 struct jit_code_entry *next_entry; 2761 struct jit_code_entry *prev_entry; 2762 const void *symfile_addr; 2763 uint64_t symfile_size; 2764 }; 2765 2766 struct jit_descriptor { 2767 uint32_t version; 2768 uint32_t action_flag; 2769 struct jit_code_entry *relevant_entry; 2770 struct jit_code_entry *first_entry; 2771 }; 2772 2773 void __jit_debug_register_code(void) __attribute__((noinline)); 2774 void __jit_debug_register_code(void) 2775 { 2776 asm(""); 2777 } 2778 2779 /* Must statically initialize the version, because GDB may check 2780 the version before we can set it. */ 2781 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 2782 2783 /* End GDB interface. */ 2784 2785 static int find_string(const char *strtab, const char *str) 2786 { 2787 const char *p = strtab + 1; 2788 2789 while (1) { 2790 if (strcmp(p, str) == 0) { 2791 return p - strtab; 2792 } 2793 p += strlen(p) + 1; 2794 } 2795 } 2796 2797 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 2798 const void *debug_frame, 2799 size_t debug_frame_size) 2800 { 2801 struct __attribute__((packed)) DebugInfo { 2802 uint32_t len; 2803 uint16_t version; 2804 uint32_t abbrev; 2805 uint8_t ptr_size; 2806 uint8_t cu_die; 2807 uint16_t cu_lang; 2808 uintptr_t cu_low_pc; 2809 uintptr_t cu_high_pc; 2810 uint8_t fn_die; 2811 char fn_name[16]; 2812 uintptr_t fn_low_pc; 2813 uintptr_t fn_high_pc; 2814 uint8_t cu_eoc; 2815 }; 2816 2817 struct ElfImage { 2818 ElfW(Ehdr) ehdr; 2819 ElfW(Phdr) phdr; 2820 ElfW(Shdr) shdr[7]; 2821 ElfW(Sym) sym[2]; 2822 struct DebugInfo di; 2823 uint8_t da[24]; 2824 char str[80]; 2825 }; 2826 2827 struct ElfImage *img; 2828 2829 static const struct ElfImage img_template = { 2830 .ehdr = { 2831 .e_ident[EI_MAG0] = ELFMAG0, 2832 .e_ident[EI_MAG1] = ELFMAG1, 2833 .e_ident[EI_MAG2] = ELFMAG2, 2834 .e_ident[EI_MAG3] = ELFMAG3, 2835 .e_ident[EI_CLASS] = ELF_CLASS, 2836 .e_ident[EI_DATA] = ELF_DATA, 2837 .e_ident[EI_VERSION] = EV_CURRENT, 2838 .e_type = ET_EXEC, 2839 .e_machine = ELF_HOST_MACHINE, 2840 .e_version = EV_CURRENT, 2841 .e_phoff = offsetof(struct ElfImage, phdr), 2842 .e_shoff = offsetof(struct ElfImage, shdr), 2843 .e_ehsize = sizeof(ElfW(Shdr)), 2844 .e_phentsize = sizeof(ElfW(Phdr)), 2845 .e_phnum = 1, 2846 .e_shentsize = sizeof(ElfW(Shdr)), 2847 .e_shnum = ARRAY_SIZE(img->shdr), 2848 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 2849 #ifdef ELF_HOST_FLAGS 2850 .e_flags = ELF_HOST_FLAGS, 2851 #endif 2852 #ifdef ELF_OSABI 2853 .e_ident[EI_OSABI] = ELF_OSABI, 2854 #endif 2855 }, 2856 .phdr = { 2857 .p_type = PT_LOAD, 2858 .p_flags = PF_X, 2859 }, 2860 .shdr = { 2861 [0] = { .sh_type = SHT_NULL }, 2862 /* Trick: The contents of code_gen_buffer are not present in 2863 this fake ELF file; that got allocated elsewhere. Therefore 2864 we mark .text as SHT_NOBITS (similar to .bss) so that readers 2865 will not look for contents. We can record any address. */ 2866 [1] = { /* .text */ 2867 .sh_type = SHT_NOBITS, 2868 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 2869 }, 2870 [2] = { /* .debug_info */ 2871 .sh_type = SHT_PROGBITS, 2872 .sh_offset = offsetof(struct ElfImage, di), 2873 .sh_size = sizeof(struct DebugInfo), 2874 }, 2875 [3] = { /* .debug_abbrev */ 2876 .sh_type = SHT_PROGBITS, 2877 .sh_offset = offsetof(struct ElfImage, da), 2878 .sh_size = sizeof(img->da), 2879 }, 2880 [4] = { /* .debug_frame */ 2881 .sh_type = SHT_PROGBITS, 2882 .sh_offset = sizeof(struct ElfImage), 2883 }, 2884 [5] = { /* .symtab */ 2885 .sh_type = SHT_SYMTAB, 2886 .sh_offset = offsetof(struct ElfImage, sym), 2887 .sh_size = sizeof(img->sym), 2888 .sh_info = 1, 2889 .sh_link = ARRAY_SIZE(img->shdr) - 1, 2890 .sh_entsize = sizeof(ElfW(Sym)), 2891 }, 2892 [6] = { /* .strtab */ 2893 .sh_type = SHT_STRTAB, 2894 .sh_offset = offsetof(struct ElfImage, str), 2895 .sh_size = sizeof(img->str), 2896 } 2897 }, 2898 .sym = { 2899 [1] = { /* code_gen_buffer */ 2900 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 2901 .st_shndx = 1, 2902 } 2903 }, 2904 .di = { 2905 .len = sizeof(struct DebugInfo) - 4, 2906 .version = 2, 2907 .ptr_size = sizeof(void *), 2908 .cu_die = 1, 2909 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 2910 .fn_die = 2, 2911 .fn_name = "code_gen_buffer" 2912 }, 2913 .da = { 2914 1, /* abbrev number (the cu) */ 2915 0x11, 1, /* DW_TAG_compile_unit, has children */ 2916 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 2917 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2918 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2919 0, 0, /* end of abbrev */ 2920 2, /* abbrev number (the fn) */ 2921 0x2e, 0, /* DW_TAG_subprogram, no children */ 2922 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 2923 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 2924 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 2925 0, 0, /* end of abbrev */ 2926 0 /* no more abbrev */ 2927 }, 2928 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 2929 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 2930 }; 2931 2932 /* We only need a single jit entry; statically allocate it. */ 2933 static struct jit_code_entry one_entry; 2934 2935 uintptr_t buf = (uintptr_t)buf_ptr; 2936 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 2937 DebugFrameHeader *dfh; 2938 2939 img = g_malloc(img_size); 2940 *img = img_template; 2941 2942 img->phdr.p_vaddr = buf; 2943 img->phdr.p_paddr = buf; 2944 img->phdr.p_memsz = buf_size; 2945 2946 img->shdr[1].sh_name = find_string(img->str, ".text"); 2947 img->shdr[1].sh_addr = buf; 2948 img->shdr[1].sh_size = buf_size; 2949 2950 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 2951 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 2952 2953 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 2954 img->shdr[4].sh_size = debug_frame_size; 2955 2956 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 2957 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 2958 2959 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 2960 img->sym[1].st_value = buf; 2961 img->sym[1].st_size = buf_size; 2962 2963 img->di.cu_low_pc = buf; 2964 img->di.cu_high_pc = buf + buf_size; 2965 img->di.fn_low_pc = buf; 2966 img->di.fn_high_pc = buf + buf_size; 2967 2968 dfh = (DebugFrameHeader *)(img + 1); 2969 memcpy(dfh, debug_frame, debug_frame_size); 2970 dfh->fde.func_start = buf; 2971 dfh->fde.func_len = buf_size; 2972 2973 #ifdef DEBUG_JIT 2974 /* Enable this block to be able to debug the ELF image file creation. 2975 One can use readelf, objdump, or other inspection utilities. */ 2976 { 2977 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 2978 if (f) { 2979 if (fwrite(img, img_size, 1, f) != img_size) { 2980 /* Avoid stupid unused return value warning for fwrite. */ 2981 } 2982 fclose(f); 2983 } 2984 } 2985 #endif 2986 2987 one_entry.symfile_addr = img; 2988 one_entry.symfile_size = img_size; 2989 2990 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 2991 __jit_debug_descriptor.relevant_entry = &one_entry; 2992 __jit_debug_descriptor.first_entry = &one_entry; 2993 __jit_debug_register_code(); 2994 } 2995 #else 2996 /* No support for the feature. Provide the entry point expected by exec.c, 2997 and implement the internal function we declared earlier. */ 2998 2999 static void tcg_register_jit_int(void *buf, size_t size, 3000 const void *debug_frame, 3001 size_t debug_frame_size) 3002 { 3003 } 3004 3005 void tcg_register_jit(void *buf, size_t buf_size) 3006 { 3007 } 3008 #endif /* ELF_HOST_MACHINE */ 3009