1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/cutils.h" 34 #include "qemu/host-utils.h" 35 #include "qemu/timer.h" 36 37 /* Note: the long term plan is to reduce the dependencies on the QEMU 38 CPU definitions. Currently they are used for qemu_ld/st 39 instructions */ 40 #define NO_CPU_IO_DEFS 41 #include "cpu.h" 42 43 #include "exec/cpu-common.h" 44 #include "exec/exec-all.h" 45 46 #include "tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #ifdef HOST_WORDS_BIGENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "sysemu/sysemu.h" 62 63 /* Forward declarations for functions declared in tcg-target.inc.c and 64 used here. */ 65 static void tcg_target_init(TCGContext *s); 66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static void patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 71 /* The CIE and FDE header definitions will be common to all hosts. */ 72 typedef struct { 73 uint32_t len __attribute__((aligned((sizeof(void *))))); 74 uint32_t id; 75 uint8_t version; 76 char augmentation[1]; 77 uint8_t code_align; 78 uint8_t data_align; 79 uint8_t return_column; 80 } DebugFrameCIE; 81 82 typedef struct QEMU_PACKED { 83 uint32_t len __attribute__((aligned((sizeof(void *))))); 84 uint32_t cie_offset; 85 uintptr_t func_start; 86 uintptr_t func_len; 87 } DebugFrameFDEHeader; 88 89 typedef struct QEMU_PACKED { 90 DebugFrameCIE cie; 91 DebugFrameFDEHeader fde; 92 } DebugFrameHeader; 93 94 static void tcg_register_jit_int(void *buf, size_t size, 95 const void *debug_frame, 96 size_t debug_frame_size) 97 __attribute__((unused)); 98 99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */ 100 static const char *target_parse_constraint(TCGArgConstraint *ct, 101 const char *ct_str, TCGType type); 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 108 const int *const_args); 109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 110 intptr_t arg2); 111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 112 TCGReg base, intptr_t ofs); 113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); 114 static int tcg_target_const_match(tcg_target_long val, TCGType type, 115 const TCGArgConstraint *arg_ct); 116 #ifdef TCG_TARGET_NEED_LDST_LABELS 117 static bool tcg_out_ldst_finalize(TCGContext *s); 118 #endif 119 120 #define TCG_HIGHWATER 1024 121 122 static TCGContext **tcg_ctxs; 123 static unsigned int n_tcg_ctxs; 124 TCGv_env cpu_env = 0; 125 126 /* 127 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 128 * dynamically allocate from as demand dictates. Given appropriate region 129 * sizing, this minimizes flushes even when some TCG threads generate a lot 130 * more code than others. 131 */ 132 struct tcg_region_state { 133 QemuMutex lock; 134 135 /* fields set at init time */ 136 void *start; 137 void *start_aligned; 138 void *end; 139 size_t n; 140 size_t size; /* size of one region */ 141 size_t stride; /* .size + guard size */ 142 143 /* fields protected by the lock */ 144 size_t current; /* current region index */ 145 size_t agg_size_full; /* aggregate size of full regions */ 146 }; 147 148 static struct tcg_region_state region; 149 150 static TCGRegSet tcg_target_available_regs[2]; 151 static TCGRegSet tcg_target_call_clobber_regs; 152 153 #if TCG_TARGET_INSN_UNIT_SIZE == 1 154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 155 { 156 *s->code_ptr++ = v; 157 } 158 159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 160 uint8_t v) 161 { 162 *p = v; 163 } 164 #endif 165 166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 168 { 169 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 170 *s->code_ptr++ = v; 171 } else { 172 tcg_insn_unit *p = s->code_ptr; 173 memcpy(p, &v, sizeof(v)); 174 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 175 } 176 } 177 178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 179 uint16_t v) 180 { 181 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 182 *p = v; 183 } else { 184 memcpy(p, &v, sizeof(v)); 185 } 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 202 uint32_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 225 uint64_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 /* label relocation processing */ 236 237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 238 TCGLabel *l, intptr_t addend) 239 { 240 TCGRelocation *r; 241 242 if (l->has_value) { 243 /* FIXME: This may break relocations on RISC targets that 244 modify instruction fields in place. The caller may not have 245 written the initial value. */ 246 patch_reloc(code_ptr, type, l->u.value, addend); 247 } else { 248 /* add a new relocation entry */ 249 r = tcg_malloc(sizeof(TCGRelocation)); 250 r->type = type; 251 r->ptr = code_ptr; 252 r->addend = addend; 253 r->next = l->u.first_reloc; 254 l->u.first_reloc = r; 255 } 256 } 257 258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 259 { 260 intptr_t value = (intptr_t)ptr; 261 TCGRelocation *r; 262 263 tcg_debug_assert(!l->has_value); 264 265 for (r = l->u.first_reloc; r != NULL; r = r->next) { 266 patch_reloc(r->ptr, r->type, value, r->addend); 267 } 268 269 l->has_value = 1; 270 l->u.value_ptr = ptr; 271 } 272 273 TCGLabel *gen_new_label(void) 274 { 275 TCGContext *s = tcg_ctx; 276 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 277 278 *l = (TCGLabel){ 279 .id = s->nb_labels++ 280 }; 281 282 return l; 283 } 284 285 #include "tcg-target.inc.c" 286 287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 288 { 289 void *start, *end; 290 291 start = region.start_aligned + curr_region * region.stride; 292 end = start + region.size; 293 294 if (curr_region == 0) { 295 start = region.start; 296 } 297 if (curr_region == region.n - 1) { 298 end = region.end; 299 } 300 301 *pstart = start; 302 *pend = end; 303 } 304 305 static void tcg_region_assign(TCGContext *s, size_t curr_region) 306 { 307 void *start, *end; 308 309 tcg_region_bounds(curr_region, &start, &end); 310 311 s->code_gen_buffer = start; 312 s->code_gen_ptr = start; 313 s->code_gen_buffer_size = end - start; 314 s->code_gen_highwater = end - TCG_HIGHWATER; 315 } 316 317 static bool tcg_region_alloc__locked(TCGContext *s) 318 { 319 if (region.current == region.n) { 320 return true; 321 } 322 tcg_region_assign(s, region.current); 323 region.current++; 324 return false; 325 } 326 327 /* 328 * Request a new region once the one in use has filled up. 329 * Returns true on error. 330 */ 331 static bool tcg_region_alloc(TCGContext *s) 332 { 333 bool err; 334 /* read the region size now; alloc__locked will overwrite it on success */ 335 size_t size_full = s->code_gen_buffer_size; 336 337 qemu_mutex_lock(®ion.lock); 338 err = tcg_region_alloc__locked(s); 339 if (!err) { 340 region.agg_size_full += size_full - TCG_HIGHWATER; 341 } 342 qemu_mutex_unlock(®ion.lock); 343 return err; 344 } 345 346 /* 347 * Perform a context's first region allocation. 348 * This function does _not_ increment region.agg_size_full. 349 */ 350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 351 { 352 return tcg_region_alloc__locked(s); 353 } 354 355 /* Call from a safe-work context */ 356 void tcg_region_reset_all(void) 357 { 358 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 359 unsigned int i; 360 361 qemu_mutex_lock(®ion.lock); 362 region.current = 0; 363 region.agg_size_full = 0; 364 365 for (i = 0; i < n_ctxs; i++) { 366 TCGContext *s = atomic_read(&tcg_ctxs[i]); 367 bool err = tcg_region_initial_alloc__locked(s); 368 369 g_assert(!err); 370 } 371 qemu_mutex_unlock(®ion.lock); 372 } 373 374 #ifdef CONFIG_USER_ONLY 375 static size_t tcg_n_regions(void) 376 { 377 return 1; 378 } 379 #else 380 /* 381 * It is likely that some vCPUs will translate more code than others, so we 382 * first try to set more regions than max_cpus, with those regions being of 383 * reasonable size. If that's not possible we make do by evenly dividing 384 * the code_gen_buffer among the vCPUs. 385 */ 386 static size_t tcg_n_regions(void) 387 { 388 size_t i; 389 390 /* Use a single region if all we have is one vCPU thread */ 391 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 392 return 1; 393 } 394 395 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 396 for (i = 8; i > 0; i--) { 397 size_t regions_per_thread = i; 398 size_t region_size; 399 400 region_size = tcg_init_ctx.code_gen_buffer_size; 401 region_size /= max_cpus * regions_per_thread; 402 403 if (region_size >= 2 * 1024u * 1024) { 404 return max_cpus * regions_per_thread; 405 } 406 } 407 /* If we can't, then just allocate one region per vCPU thread */ 408 return max_cpus; 409 } 410 #endif 411 412 /* 413 * Initializes region partitioning. 414 * 415 * Called at init time from the parent thread (i.e. the one calling 416 * tcg_context_init), after the target's TCG globals have been set. 417 * 418 * Region partitioning works by splitting code_gen_buffer into separate regions, 419 * and then assigning regions to TCG threads so that the threads can translate 420 * code in parallel without synchronization. 421 * 422 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 423 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 424 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 425 * must have been parsed before calling this function, since it calls 426 * qemu_tcg_mttcg_enabled(). 427 * 428 * In user-mode we use a single region. Having multiple regions in user-mode 429 * is not supported, because the number of vCPU threads (recall that each thread 430 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 431 * OS, and usually this number is huge (tens of thousands is not uncommon). 432 * Thus, given this large bound on the number of vCPU threads and the fact 433 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 434 * that the availability of at least one region per vCPU thread. 435 * 436 * However, this user-mode limitation is unlikely to be a significant problem 437 * in practice. Multi-threaded guests share most if not all of their translated 438 * code, which makes parallel code generation less appealing than in softmmu. 439 */ 440 void tcg_region_init(void) 441 { 442 void *buf = tcg_init_ctx.code_gen_buffer; 443 void *aligned; 444 size_t size = tcg_init_ctx.code_gen_buffer_size; 445 size_t page_size = qemu_real_host_page_size; 446 size_t region_size; 447 size_t n_regions; 448 size_t i; 449 450 n_regions = tcg_n_regions(); 451 452 /* The first region will be 'aligned - buf' bytes larger than the others */ 453 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 454 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 455 /* 456 * Make region_size a multiple of page_size, using aligned as the start. 457 * As a result of this we might end up with a few extra pages at the end of 458 * the buffer; we will assign those to the last region. 459 */ 460 region_size = (size - (aligned - buf)) / n_regions; 461 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 462 463 /* A region must have at least 2 pages; one code, one guard */ 464 g_assert(region_size >= 2 * page_size); 465 466 /* init the region struct */ 467 qemu_mutex_init(®ion.lock); 468 region.n = n_regions; 469 region.size = region_size - page_size; 470 region.stride = region_size; 471 region.start = buf; 472 region.start_aligned = aligned; 473 /* page-align the end, since its last page will be a guard page */ 474 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 475 /* account for that last guard page */ 476 region.end -= page_size; 477 478 /* set guard pages */ 479 for (i = 0; i < region.n; i++) { 480 void *start, *end; 481 int rc; 482 483 tcg_region_bounds(i, &start, &end); 484 rc = qemu_mprotect_none(end, page_size); 485 g_assert(!rc); 486 } 487 488 /* In user-mode we support only one ctx, so do the initial allocation now */ 489 #ifdef CONFIG_USER_ONLY 490 { 491 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 492 493 g_assert(!err); 494 } 495 #endif 496 } 497 498 /* 499 * All TCG threads except the parent (i.e. the one that called tcg_context_init 500 * and registered the target's TCG globals) must register with this function 501 * before initiating translation. 502 * 503 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 504 * of tcg_region_init() for the reasoning behind this. 505 * 506 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 507 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 508 * is not used anymore for translation once this function is called. 509 * 510 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 511 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 512 */ 513 #ifdef CONFIG_USER_ONLY 514 void tcg_register_thread(void) 515 { 516 tcg_ctx = &tcg_init_ctx; 517 } 518 #else 519 void tcg_register_thread(void) 520 { 521 TCGContext *s = g_malloc(sizeof(*s)); 522 unsigned int i, n; 523 bool err; 524 525 *s = tcg_init_ctx; 526 527 /* Relink mem_base. */ 528 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 529 if (tcg_init_ctx.temps[i].mem_base) { 530 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 531 tcg_debug_assert(b >= 0 && b < n); 532 s->temps[i].mem_base = &s->temps[b]; 533 } 534 } 535 536 /* Claim an entry in tcg_ctxs */ 537 n = atomic_fetch_inc(&n_tcg_ctxs); 538 g_assert(n < max_cpus); 539 atomic_set(&tcg_ctxs[n], s); 540 541 tcg_ctx = s; 542 qemu_mutex_lock(®ion.lock); 543 err = tcg_region_initial_alloc__locked(tcg_ctx); 544 g_assert(!err); 545 qemu_mutex_unlock(®ion.lock); 546 } 547 #endif /* !CONFIG_USER_ONLY */ 548 549 /* 550 * Returns the size (in bytes) of all translated code (i.e. from all regions) 551 * currently in the cache. 552 * See also: tcg_code_capacity() 553 * Do not confuse with tcg_current_code_size(); that one applies to a single 554 * TCG context. 555 */ 556 size_t tcg_code_size(void) 557 { 558 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 559 unsigned int i; 560 size_t total; 561 562 qemu_mutex_lock(®ion.lock); 563 total = region.agg_size_full; 564 for (i = 0; i < n_ctxs; i++) { 565 const TCGContext *s = atomic_read(&tcg_ctxs[i]); 566 size_t size; 567 568 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 569 g_assert(size <= s->code_gen_buffer_size); 570 total += size; 571 } 572 qemu_mutex_unlock(®ion.lock); 573 return total; 574 } 575 576 /* 577 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 578 * regions. 579 * See also: tcg_code_size() 580 */ 581 size_t tcg_code_capacity(void) 582 { 583 size_t guard_size, capacity; 584 585 /* no need for synchronization; these variables are set at init time */ 586 guard_size = region.stride - region.size; 587 capacity = region.end + guard_size - region.start; 588 capacity -= region.n * (guard_size + TCG_HIGHWATER); 589 return capacity; 590 } 591 592 /* pool based memory allocation */ 593 void *tcg_malloc_internal(TCGContext *s, int size) 594 { 595 TCGPool *p; 596 int pool_size; 597 598 if (size > TCG_POOL_CHUNK_SIZE) { 599 /* big malloc: insert a new pool (XXX: could optimize) */ 600 p = g_malloc(sizeof(TCGPool) + size); 601 p->size = size; 602 p->next = s->pool_first_large; 603 s->pool_first_large = p; 604 return p->data; 605 } else { 606 p = s->pool_current; 607 if (!p) { 608 p = s->pool_first; 609 if (!p) 610 goto new_pool; 611 } else { 612 if (!p->next) { 613 new_pool: 614 pool_size = TCG_POOL_CHUNK_SIZE; 615 p = g_malloc(sizeof(TCGPool) + pool_size); 616 p->size = pool_size; 617 p->next = NULL; 618 if (s->pool_current) 619 s->pool_current->next = p; 620 else 621 s->pool_first = p; 622 } else { 623 p = p->next; 624 } 625 } 626 } 627 s->pool_current = p; 628 s->pool_cur = p->data + size; 629 s->pool_end = p->data + p->size; 630 return p->data; 631 } 632 633 void tcg_pool_reset(TCGContext *s) 634 { 635 TCGPool *p, *t; 636 for (p = s->pool_first_large; p; p = t) { 637 t = p->next; 638 g_free(p); 639 } 640 s->pool_first_large = NULL; 641 s->pool_cur = s->pool_end = NULL; 642 s->pool_current = NULL; 643 } 644 645 typedef struct TCGHelperInfo { 646 void *func; 647 const char *name; 648 unsigned flags; 649 unsigned sizemask; 650 } TCGHelperInfo; 651 652 #include "exec/helper-proto.h" 653 654 static const TCGHelperInfo all_helpers[] = { 655 #include "exec/helper-tcg.h" 656 }; 657 static GHashTable *helper_table; 658 659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 660 static void process_op_defs(TCGContext *s); 661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 662 TCGReg reg, const char *name); 663 664 void tcg_context_init(TCGContext *s) 665 { 666 int op, total_args, n, i; 667 TCGOpDef *def; 668 TCGArgConstraint *args_ct; 669 int *sorted_args; 670 TCGTemp *ts; 671 672 memset(s, 0, sizeof(*s)); 673 s->nb_globals = 0; 674 675 /* Count total number of arguments and allocate the corresponding 676 space */ 677 total_args = 0; 678 for(op = 0; op < NB_OPS; op++) { 679 def = &tcg_op_defs[op]; 680 n = def->nb_iargs + def->nb_oargs; 681 total_args += n; 682 } 683 684 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); 685 sorted_args = g_malloc(sizeof(int) * total_args); 686 687 for(op = 0; op < NB_OPS; op++) { 688 def = &tcg_op_defs[op]; 689 def->args_ct = args_ct; 690 def->sorted_args = sorted_args; 691 n = def->nb_iargs + def->nb_oargs; 692 sorted_args += n; 693 args_ct += n; 694 } 695 696 /* Register helpers. */ 697 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 698 helper_table = g_hash_table_new(NULL, NULL); 699 700 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 701 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 702 (gpointer)&all_helpers[i]); 703 } 704 705 tcg_target_init(s); 706 process_op_defs(s); 707 708 /* Reverse the order of the saved registers, assuming they're all at 709 the start of tcg_target_reg_alloc_order. */ 710 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 711 int r = tcg_target_reg_alloc_order[n]; 712 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 713 break; 714 } 715 } 716 for (i = 0; i < n; ++i) { 717 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 718 } 719 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 720 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 721 } 722 723 tcg_ctx = s; 724 /* 725 * In user-mode we simply share the init context among threads, since we 726 * use a single region. See the documentation tcg_region_init() for the 727 * reasoning behind this. 728 * In softmmu we will have at most max_cpus TCG threads. 729 */ 730 #ifdef CONFIG_USER_ONLY 731 tcg_ctxs = &tcg_ctx; 732 n_tcg_ctxs = 1; 733 #else 734 tcg_ctxs = g_new(TCGContext *, max_cpus); 735 #endif 736 737 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 738 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 739 cpu_env = temp_tcgv_ptr(ts); 740 } 741 742 /* 743 * Allocate TBs right before their corresponding translated code, making 744 * sure that TBs and code are on different cache lines. 745 */ 746 TranslationBlock *tcg_tb_alloc(TCGContext *s) 747 { 748 uintptr_t align = qemu_icache_linesize; 749 TranslationBlock *tb; 750 void *next; 751 752 retry: 753 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 754 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 755 756 if (unlikely(next > s->code_gen_highwater)) { 757 if (tcg_region_alloc(s)) { 758 return NULL; 759 } 760 goto retry; 761 } 762 atomic_set(&s->code_gen_ptr, next); 763 s->data_gen_ptr = NULL; 764 return tb; 765 } 766 767 void tcg_prologue_init(TCGContext *s) 768 { 769 size_t prologue_size, total_size; 770 void *buf0, *buf1; 771 772 /* Put the prologue at the beginning of code_gen_buffer. */ 773 buf0 = s->code_gen_buffer; 774 total_size = s->code_gen_buffer_size; 775 s->code_ptr = buf0; 776 s->code_buf = buf0; 777 s->data_gen_ptr = NULL; 778 s->code_gen_prologue = buf0; 779 780 /* Compute a high-water mark, at which we voluntarily flush the buffer 781 and start over. The size here is arbitrary, significantly larger 782 than we expect the code generation for any one opcode to require. */ 783 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 784 785 #ifdef TCG_TARGET_NEED_POOL_LABELS 786 s->pool_labels = NULL; 787 #endif 788 789 /* Generate the prologue. */ 790 tcg_target_qemu_prologue(s); 791 792 #ifdef TCG_TARGET_NEED_POOL_LABELS 793 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 794 { 795 bool ok = tcg_out_pool_finalize(s); 796 tcg_debug_assert(ok); 797 } 798 #endif 799 800 buf1 = s->code_ptr; 801 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); 802 803 /* Deduct the prologue from the buffer. */ 804 prologue_size = tcg_current_code_size(s); 805 s->code_gen_ptr = buf1; 806 s->code_gen_buffer = buf1; 807 s->code_buf = buf1; 808 total_size -= prologue_size; 809 s->code_gen_buffer_size = total_size; 810 811 tcg_register_jit(s->code_gen_buffer, total_size); 812 813 #ifdef DEBUG_DISAS 814 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 815 qemu_log_lock(); 816 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 817 if (s->data_gen_ptr) { 818 size_t code_size = s->data_gen_ptr - buf0; 819 size_t data_size = prologue_size - code_size; 820 size_t i; 821 822 log_disas(buf0, code_size); 823 824 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 825 if (sizeof(tcg_target_ulong) == 8) { 826 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 827 (uintptr_t)s->data_gen_ptr + i, 828 *(uint64_t *)(s->data_gen_ptr + i)); 829 } else { 830 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 831 (uintptr_t)s->data_gen_ptr + i, 832 *(uint32_t *)(s->data_gen_ptr + i)); 833 } 834 } 835 } else { 836 log_disas(buf0, prologue_size); 837 } 838 qemu_log("\n"); 839 qemu_log_flush(); 840 qemu_log_unlock(); 841 } 842 #endif 843 844 /* Assert that goto_ptr is implemented completely. */ 845 if (TCG_TARGET_HAS_goto_ptr) { 846 tcg_debug_assert(s->code_gen_epilogue != NULL); 847 } 848 } 849 850 void tcg_func_start(TCGContext *s) 851 { 852 tcg_pool_reset(s); 853 s->nb_temps = s->nb_globals; 854 855 /* No temps have been previously allocated for size or locality. */ 856 memset(s->free_temps, 0, sizeof(s->free_temps)); 857 858 s->nb_labels = 0; 859 s->current_frame_offset = s->frame_start; 860 861 #ifdef CONFIG_DEBUG_TCG 862 s->goto_tb_issue_mask = 0; 863 #endif 864 865 s->gen_op_buf[0].next = 1; 866 s->gen_op_buf[0].prev = 0; 867 s->gen_next_op_idx = 1; 868 } 869 870 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) 871 { 872 int n = s->nb_temps++; 873 tcg_debug_assert(n < TCG_MAX_TEMPS); 874 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 875 } 876 877 static inline TCGTemp *tcg_global_alloc(TCGContext *s) 878 { 879 TCGTemp *ts; 880 881 tcg_debug_assert(s->nb_globals == s->nb_temps); 882 s->nb_globals++; 883 ts = tcg_temp_alloc(s); 884 ts->temp_global = 1; 885 886 return ts; 887 } 888 889 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 890 TCGReg reg, const char *name) 891 { 892 TCGTemp *ts; 893 894 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 895 tcg_abort(); 896 } 897 898 ts = tcg_global_alloc(s); 899 ts->base_type = type; 900 ts->type = type; 901 ts->fixed_reg = 1; 902 ts->reg = reg; 903 ts->name = name; 904 tcg_regset_set_reg(s->reserved_regs, reg); 905 906 return ts; 907 } 908 909 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 910 { 911 s->frame_start = start; 912 s->frame_end = start + size; 913 s->frame_temp 914 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 915 } 916 917 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 918 intptr_t offset, const char *name) 919 { 920 TCGContext *s = tcg_ctx; 921 TCGTemp *base_ts = tcgv_ptr_temp(base); 922 TCGTemp *ts = tcg_global_alloc(s); 923 int indirect_reg = 0, bigendian = 0; 924 #ifdef HOST_WORDS_BIGENDIAN 925 bigendian = 1; 926 #endif 927 928 if (!base_ts->fixed_reg) { 929 /* We do not support double-indirect registers. */ 930 tcg_debug_assert(!base_ts->indirect_reg); 931 base_ts->indirect_base = 1; 932 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 933 ? 2 : 1); 934 indirect_reg = 1; 935 } 936 937 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 938 TCGTemp *ts2 = tcg_global_alloc(s); 939 char buf[64]; 940 941 ts->base_type = TCG_TYPE_I64; 942 ts->type = TCG_TYPE_I32; 943 ts->indirect_reg = indirect_reg; 944 ts->mem_allocated = 1; 945 ts->mem_base = base_ts; 946 ts->mem_offset = offset + bigendian * 4; 947 pstrcpy(buf, sizeof(buf), name); 948 pstrcat(buf, sizeof(buf), "_0"); 949 ts->name = strdup(buf); 950 951 tcg_debug_assert(ts2 == ts + 1); 952 ts2->base_type = TCG_TYPE_I64; 953 ts2->type = TCG_TYPE_I32; 954 ts2->indirect_reg = indirect_reg; 955 ts2->mem_allocated = 1; 956 ts2->mem_base = base_ts; 957 ts2->mem_offset = offset + (1 - bigendian) * 4; 958 pstrcpy(buf, sizeof(buf), name); 959 pstrcat(buf, sizeof(buf), "_1"); 960 ts2->name = strdup(buf); 961 } else { 962 ts->base_type = type; 963 ts->type = type; 964 ts->indirect_reg = indirect_reg; 965 ts->mem_allocated = 1; 966 ts->mem_base = base_ts; 967 ts->mem_offset = offset; 968 ts->name = name; 969 } 970 return ts; 971 } 972 973 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local) 974 { 975 TCGContext *s = tcg_ctx; 976 TCGTemp *ts; 977 int idx, k; 978 979 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 980 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 981 if (idx < TCG_MAX_TEMPS) { 982 /* There is already an available temp with the right type. */ 983 clear_bit(idx, s->free_temps[k].l); 984 985 ts = &s->temps[idx]; 986 ts->temp_allocated = 1; 987 tcg_debug_assert(ts->base_type == type); 988 tcg_debug_assert(ts->temp_local == temp_local); 989 } else { 990 ts = tcg_temp_alloc(s); 991 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 992 TCGTemp *ts2 = tcg_temp_alloc(s); 993 994 ts->base_type = type; 995 ts->type = TCG_TYPE_I32; 996 ts->temp_allocated = 1; 997 ts->temp_local = temp_local; 998 999 tcg_debug_assert(ts2 == ts + 1); 1000 ts2->base_type = TCG_TYPE_I64; 1001 ts2->type = TCG_TYPE_I32; 1002 ts2->temp_allocated = 1; 1003 ts2->temp_local = temp_local; 1004 } else { 1005 ts->base_type = type; 1006 ts->type = type; 1007 ts->temp_allocated = 1; 1008 ts->temp_local = temp_local; 1009 } 1010 } 1011 1012 #if defined(CONFIG_DEBUG_TCG) 1013 s->temps_in_use++; 1014 #endif 1015 return ts; 1016 } 1017 1018 TCGv_i32 tcg_temp_new_internal_i32(int temp_local) 1019 { 1020 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); 1021 return temp_tcgv_i32(t); 1022 } 1023 1024 TCGv_i64 tcg_temp_new_internal_i64(int temp_local) 1025 { 1026 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); 1027 return temp_tcgv_i64(t); 1028 } 1029 1030 static void tcg_temp_free_internal(TCGTemp *ts) 1031 { 1032 TCGContext *s = tcg_ctx; 1033 int k, idx; 1034 1035 #if defined(CONFIG_DEBUG_TCG) 1036 s->temps_in_use--; 1037 if (s->temps_in_use < 0) { 1038 fprintf(stderr, "More temporaries freed than allocated!\n"); 1039 } 1040 #endif 1041 1042 tcg_debug_assert(ts->temp_global == 0); 1043 tcg_debug_assert(ts->temp_allocated != 0); 1044 ts->temp_allocated = 0; 1045 1046 idx = temp_idx(ts); 1047 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); 1048 set_bit(idx, s->free_temps[k].l); 1049 } 1050 1051 void tcg_temp_free_i32(TCGv_i32 arg) 1052 { 1053 tcg_temp_free_internal(tcgv_i32_temp(arg)); 1054 } 1055 1056 void tcg_temp_free_i64(TCGv_i64 arg) 1057 { 1058 tcg_temp_free_internal(tcgv_i64_temp(arg)); 1059 } 1060 1061 TCGv_i32 tcg_const_i32(int32_t val) 1062 { 1063 TCGv_i32 t0; 1064 t0 = tcg_temp_new_i32(); 1065 tcg_gen_movi_i32(t0, val); 1066 return t0; 1067 } 1068 1069 TCGv_i64 tcg_const_i64(int64_t val) 1070 { 1071 TCGv_i64 t0; 1072 t0 = tcg_temp_new_i64(); 1073 tcg_gen_movi_i64(t0, val); 1074 return t0; 1075 } 1076 1077 TCGv_i32 tcg_const_local_i32(int32_t val) 1078 { 1079 TCGv_i32 t0; 1080 t0 = tcg_temp_local_new_i32(); 1081 tcg_gen_movi_i32(t0, val); 1082 return t0; 1083 } 1084 1085 TCGv_i64 tcg_const_local_i64(int64_t val) 1086 { 1087 TCGv_i64 t0; 1088 t0 = tcg_temp_local_new_i64(); 1089 tcg_gen_movi_i64(t0, val); 1090 return t0; 1091 } 1092 1093 #if defined(CONFIG_DEBUG_TCG) 1094 void tcg_clear_temp_count(void) 1095 { 1096 TCGContext *s = tcg_ctx; 1097 s->temps_in_use = 0; 1098 } 1099 1100 int tcg_check_temp_count(void) 1101 { 1102 TCGContext *s = tcg_ctx; 1103 if (s->temps_in_use) { 1104 /* Clear the count so that we don't give another 1105 * warning immediately next time around. 1106 */ 1107 s->temps_in_use = 0; 1108 return 1; 1109 } 1110 return 0; 1111 } 1112 #endif 1113 1114 /* Return true if OP may appear in the opcode stream. 1115 Test the runtime variable that controls each opcode. */ 1116 bool tcg_op_supported(TCGOpcode op) 1117 { 1118 switch (op) { 1119 case INDEX_op_discard: 1120 case INDEX_op_set_label: 1121 case INDEX_op_call: 1122 case INDEX_op_br: 1123 case INDEX_op_mb: 1124 case INDEX_op_insn_start: 1125 case INDEX_op_exit_tb: 1126 case INDEX_op_goto_tb: 1127 case INDEX_op_qemu_ld_i32: 1128 case INDEX_op_qemu_st_i32: 1129 case INDEX_op_qemu_ld_i64: 1130 case INDEX_op_qemu_st_i64: 1131 return true; 1132 1133 case INDEX_op_goto_ptr: 1134 return TCG_TARGET_HAS_goto_ptr; 1135 1136 case INDEX_op_mov_i32: 1137 case INDEX_op_movi_i32: 1138 case INDEX_op_setcond_i32: 1139 case INDEX_op_brcond_i32: 1140 case INDEX_op_ld8u_i32: 1141 case INDEX_op_ld8s_i32: 1142 case INDEX_op_ld16u_i32: 1143 case INDEX_op_ld16s_i32: 1144 case INDEX_op_ld_i32: 1145 case INDEX_op_st8_i32: 1146 case INDEX_op_st16_i32: 1147 case INDEX_op_st_i32: 1148 case INDEX_op_add_i32: 1149 case INDEX_op_sub_i32: 1150 case INDEX_op_mul_i32: 1151 case INDEX_op_and_i32: 1152 case INDEX_op_or_i32: 1153 case INDEX_op_xor_i32: 1154 case INDEX_op_shl_i32: 1155 case INDEX_op_shr_i32: 1156 case INDEX_op_sar_i32: 1157 return true; 1158 1159 case INDEX_op_movcond_i32: 1160 return TCG_TARGET_HAS_movcond_i32; 1161 case INDEX_op_div_i32: 1162 case INDEX_op_divu_i32: 1163 return TCG_TARGET_HAS_div_i32; 1164 case INDEX_op_rem_i32: 1165 case INDEX_op_remu_i32: 1166 return TCG_TARGET_HAS_rem_i32; 1167 case INDEX_op_div2_i32: 1168 case INDEX_op_divu2_i32: 1169 return TCG_TARGET_HAS_div2_i32; 1170 case INDEX_op_rotl_i32: 1171 case INDEX_op_rotr_i32: 1172 return TCG_TARGET_HAS_rot_i32; 1173 case INDEX_op_deposit_i32: 1174 return TCG_TARGET_HAS_deposit_i32; 1175 case INDEX_op_extract_i32: 1176 return TCG_TARGET_HAS_extract_i32; 1177 case INDEX_op_sextract_i32: 1178 return TCG_TARGET_HAS_sextract_i32; 1179 case INDEX_op_add2_i32: 1180 return TCG_TARGET_HAS_add2_i32; 1181 case INDEX_op_sub2_i32: 1182 return TCG_TARGET_HAS_sub2_i32; 1183 case INDEX_op_mulu2_i32: 1184 return TCG_TARGET_HAS_mulu2_i32; 1185 case INDEX_op_muls2_i32: 1186 return TCG_TARGET_HAS_muls2_i32; 1187 case INDEX_op_muluh_i32: 1188 return TCG_TARGET_HAS_muluh_i32; 1189 case INDEX_op_mulsh_i32: 1190 return TCG_TARGET_HAS_mulsh_i32; 1191 case INDEX_op_ext8s_i32: 1192 return TCG_TARGET_HAS_ext8s_i32; 1193 case INDEX_op_ext16s_i32: 1194 return TCG_TARGET_HAS_ext16s_i32; 1195 case INDEX_op_ext8u_i32: 1196 return TCG_TARGET_HAS_ext8u_i32; 1197 case INDEX_op_ext16u_i32: 1198 return TCG_TARGET_HAS_ext16u_i32; 1199 case INDEX_op_bswap16_i32: 1200 return TCG_TARGET_HAS_bswap16_i32; 1201 case INDEX_op_bswap32_i32: 1202 return TCG_TARGET_HAS_bswap32_i32; 1203 case INDEX_op_not_i32: 1204 return TCG_TARGET_HAS_not_i32; 1205 case INDEX_op_neg_i32: 1206 return TCG_TARGET_HAS_neg_i32; 1207 case INDEX_op_andc_i32: 1208 return TCG_TARGET_HAS_andc_i32; 1209 case INDEX_op_orc_i32: 1210 return TCG_TARGET_HAS_orc_i32; 1211 case INDEX_op_eqv_i32: 1212 return TCG_TARGET_HAS_eqv_i32; 1213 case INDEX_op_nand_i32: 1214 return TCG_TARGET_HAS_nand_i32; 1215 case INDEX_op_nor_i32: 1216 return TCG_TARGET_HAS_nor_i32; 1217 case INDEX_op_clz_i32: 1218 return TCG_TARGET_HAS_clz_i32; 1219 case INDEX_op_ctz_i32: 1220 return TCG_TARGET_HAS_ctz_i32; 1221 case INDEX_op_ctpop_i32: 1222 return TCG_TARGET_HAS_ctpop_i32; 1223 1224 case INDEX_op_brcond2_i32: 1225 case INDEX_op_setcond2_i32: 1226 return TCG_TARGET_REG_BITS == 32; 1227 1228 case INDEX_op_mov_i64: 1229 case INDEX_op_movi_i64: 1230 case INDEX_op_setcond_i64: 1231 case INDEX_op_brcond_i64: 1232 case INDEX_op_ld8u_i64: 1233 case INDEX_op_ld8s_i64: 1234 case INDEX_op_ld16u_i64: 1235 case INDEX_op_ld16s_i64: 1236 case INDEX_op_ld32u_i64: 1237 case INDEX_op_ld32s_i64: 1238 case INDEX_op_ld_i64: 1239 case INDEX_op_st8_i64: 1240 case INDEX_op_st16_i64: 1241 case INDEX_op_st32_i64: 1242 case INDEX_op_st_i64: 1243 case INDEX_op_add_i64: 1244 case INDEX_op_sub_i64: 1245 case INDEX_op_mul_i64: 1246 case INDEX_op_and_i64: 1247 case INDEX_op_or_i64: 1248 case INDEX_op_xor_i64: 1249 case INDEX_op_shl_i64: 1250 case INDEX_op_shr_i64: 1251 case INDEX_op_sar_i64: 1252 case INDEX_op_ext_i32_i64: 1253 case INDEX_op_extu_i32_i64: 1254 return TCG_TARGET_REG_BITS == 64; 1255 1256 case INDEX_op_movcond_i64: 1257 return TCG_TARGET_HAS_movcond_i64; 1258 case INDEX_op_div_i64: 1259 case INDEX_op_divu_i64: 1260 return TCG_TARGET_HAS_div_i64; 1261 case INDEX_op_rem_i64: 1262 case INDEX_op_remu_i64: 1263 return TCG_TARGET_HAS_rem_i64; 1264 case INDEX_op_div2_i64: 1265 case INDEX_op_divu2_i64: 1266 return TCG_TARGET_HAS_div2_i64; 1267 case INDEX_op_rotl_i64: 1268 case INDEX_op_rotr_i64: 1269 return TCG_TARGET_HAS_rot_i64; 1270 case INDEX_op_deposit_i64: 1271 return TCG_TARGET_HAS_deposit_i64; 1272 case INDEX_op_extract_i64: 1273 return TCG_TARGET_HAS_extract_i64; 1274 case INDEX_op_sextract_i64: 1275 return TCG_TARGET_HAS_sextract_i64; 1276 case INDEX_op_extrl_i64_i32: 1277 return TCG_TARGET_HAS_extrl_i64_i32; 1278 case INDEX_op_extrh_i64_i32: 1279 return TCG_TARGET_HAS_extrh_i64_i32; 1280 case INDEX_op_ext8s_i64: 1281 return TCG_TARGET_HAS_ext8s_i64; 1282 case INDEX_op_ext16s_i64: 1283 return TCG_TARGET_HAS_ext16s_i64; 1284 case INDEX_op_ext32s_i64: 1285 return TCG_TARGET_HAS_ext32s_i64; 1286 case INDEX_op_ext8u_i64: 1287 return TCG_TARGET_HAS_ext8u_i64; 1288 case INDEX_op_ext16u_i64: 1289 return TCG_TARGET_HAS_ext16u_i64; 1290 case INDEX_op_ext32u_i64: 1291 return TCG_TARGET_HAS_ext32u_i64; 1292 case INDEX_op_bswap16_i64: 1293 return TCG_TARGET_HAS_bswap16_i64; 1294 case INDEX_op_bswap32_i64: 1295 return TCG_TARGET_HAS_bswap32_i64; 1296 case INDEX_op_bswap64_i64: 1297 return TCG_TARGET_HAS_bswap64_i64; 1298 case INDEX_op_not_i64: 1299 return TCG_TARGET_HAS_not_i64; 1300 case INDEX_op_neg_i64: 1301 return TCG_TARGET_HAS_neg_i64; 1302 case INDEX_op_andc_i64: 1303 return TCG_TARGET_HAS_andc_i64; 1304 case INDEX_op_orc_i64: 1305 return TCG_TARGET_HAS_orc_i64; 1306 case INDEX_op_eqv_i64: 1307 return TCG_TARGET_HAS_eqv_i64; 1308 case INDEX_op_nand_i64: 1309 return TCG_TARGET_HAS_nand_i64; 1310 case INDEX_op_nor_i64: 1311 return TCG_TARGET_HAS_nor_i64; 1312 case INDEX_op_clz_i64: 1313 return TCG_TARGET_HAS_clz_i64; 1314 case INDEX_op_ctz_i64: 1315 return TCG_TARGET_HAS_ctz_i64; 1316 case INDEX_op_ctpop_i64: 1317 return TCG_TARGET_HAS_ctpop_i64; 1318 case INDEX_op_add2_i64: 1319 return TCG_TARGET_HAS_add2_i64; 1320 case INDEX_op_sub2_i64: 1321 return TCG_TARGET_HAS_sub2_i64; 1322 case INDEX_op_mulu2_i64: 1323 return TCG_TARGET_HAS_mulu2_i64; 1324 case INDEX_op_muls2_i64: 1325 return TCG_TARGET_HAS_muls2_i64; 1326 case INDEX_op_muluh_i64: 1327 return TCG_TARGET_HAS_muluh_i64; 1328 case INDEX_op_mulsh_i64: 1329 return TCG_TARGET_HAS_mulsh_i64; 1330 1331 case NB_OPS: 1332 break; 1333 } 1334 g_assert_not_reached(); 1335 } 1336 1337 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1338 and endian swap. Maybe it would be better to do the alignment 1339 and endian swap in tcg_reg_alloc_call(). */ 1340 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1341 { 1342 TCGContext *s = tcg_ctx; 1343 int i, real_args, nb_rets, pi; 1344 unsigned sizemask, flags; 1345 TCGHelperInfo *info; 1346 TCGOp *op; 1347 1348 info = g_hash_table_lookup(helper_table, (gpointer)func); 1349 flags = info->flags; 1350 sizemask = info->sizemask; 1351 1352 #if defined(__sparc__) && !defined(__arch64__) \ 1353 && !defined(CONFIG_TCG_INTERPRETER) 1354 /* We have 64-bit values in one register, but need to pass as two 1355 separate parameters. Split them. */ 1356 int orig_sizemask = sizemask; 1357 int orig_nargs = nargs; 1358 TCGv_i64 retl, reth; 1359 TCGTemp *split_args[MAX_OPC_PARAM]; 1360 1361 retl = NULL; 1362 reth = NULL; 1363 if (sizemask != 0) { 1364 for (i = real_args = 0; i < nargs; ++i) { 1365 int is_64bit = sizemask & (1 << (i+1)*2); 1366 if (is_64bit) { 1367 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1368 TCGv_i32 h = tcg_temp_new_i32(); 1369 TCGv_i32 l = tcg_temp_new_i32(); 1370 tcg_gen_extr_i64_i32(l, h, orig); 1371 split_args[real_args++] = tcgv_i32_temp(h); 1372 split_args[real_args++] = tcgv_i32_temp(l); 1373 } else { 1374 split_args[real_args++] = args[i]; 1375 } 1376 } 1377 nargs = real_args; 1378 args = split_args; 1379 sizemask = 0; 1380 } 1381 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1382 for (i = 0; i < nargs; ++i) { 1383 int is_64bit = sizemask & (1 << (i+1)*2); 1384 int is_signed = sizemask & (2 << (i+1)*2); 1385 if (!is_64bit) { 1386 TCGv_i64 temp = tcg_temp_new_i64(); 1387 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1388 if (is_signed) { 1389 tcg_gen_ext32s_i64(temp, orig); 1390 } else { 1391 tcg_gen_ext32u_i64(temp, orig); 1392 } 1393 args[i] = tcgv_i64_temp(temp); 1394 } 1395 } 1396 #endif /* TCG_TARGET_EXTEND_ARGS */ 1397 1398 i = s->gen_next_op_idx; 1399 tcg_debug_assert(i < OPC_BUF_SIZE); 1400 s->gen_op_buf[0].prev = i; 1401 s->gen_next_op_idx = i + 1; 1402 op = &s->gen_op_buf[i]; 1403 1404 /* Set links for sequential allocation during translation. */ 1405 memset(op, 0, offsetof(TCGOp, args)); 1406 op->opc = INDEX_op_call; 1407 op->prev = i - 1; 1408 op->next = i + 1; 1409 1410 pi = 0; 1411 if (ret != NULL) { 1412 #if defined(__sparc__) && !defined(__arch64__) \ 1413 && !defined(CONFIG_TCG_INTERPRETER) 1414 if (orig_sizemask & 1) { 1415 /* The 32-bit ABI is going to return the 64-bit value in 1416 the %o0/%o1 register pair. Prepare for this by using 1417 two return temporaries, and reassemble below. */ 1418 retl = tcg_temp_new_i64(); 1419 reth = tcg_temp_new_i64(); 1420 op->args[pi++] = tcgv_i64_arg(reth); 1421 op->args[pi++] = tcgv_i64_arg(retl); 1422 nb_rets = 2; 1423 } else { 1424 op->args[pi++] = temp_arg(ret); 1425 nb_rets = 1; 1426 } 1427 #else 1428 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 1429 #ifdef HOST_WORDS_BIGENDIAN 1430 op->args[pi++] = temp_arg(ret + 1); 1431 op->args[pi++] = temp_arg(ret); 1432 #else 1433 op->args[pi++] = temp_arg(ret); 1434 op->args[pi++] = temp_arg(ret + 1); 1435 #endif 1436 nb_rets = 2; 1437 } else { 1438 op->args[pi++] = temp_arg(ret); 1439 nb_rets = 1; 1440 } 1441 #endif 1442 } else { 1443 nb_rets = 0; 1444 } 1445 op->callo = nb_rets; 1446 1447 real_args = 0; 1448 for (i = 0; i < nargs; i++) { 1449 int is_64bit = sizemask & (1 << (i+1)*2); 1450 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1451 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 1452 /* some targets want aligned 64 bit args */ 1453 if (real_args & 1) { 1454 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1455 real_args++; 1456 } 1457 #endif 1458 /* If stack grows up, then we will be placing successive 1459 arguments at lower addresses, which means we need to 1460 reverse the order compared to how we would normally 1461 treat either big or little-endian. For those arguments 1462 that will wind up in registers, this still works for 1463 HPPA (the only current STACK_GROWSUP target) since the 1464 argument registers are *also* allocated in decreasing 1465 order. If another such target is added, this logic may 1466 have to get more complicated to differentiate between 1467 stack arguments and register arguments. */ 1468 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 1469 op->args[pi++] = temp_arg(args[i] + 1); 1470 op->args[pi++] = temp_arg(args[i]); 1471 #else 1472 op->args[pi++] = temp_arg(args[i]); 1473 op->args[pi++] = temp_arg(args[i] + 1); 1474 #endif 1475 real_args += 2; 1476 continue; 1477 } 1478 1479 op->args[pi++] = temp_arg(args[i]); 1480 real_args++; 1481 } 1482 op->args[pi++] = (uintptr_t)func; 1483 op->args[pi++] = flags; 1484 op->calli = real_args; 1485 1486 /* Make sure the fields didn't overflow. */ 1487 tcg_debug_assert(op->calli == real_args); 1488 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1489 1490 #if defined(__sparc__) && !defined(__arch64__) \ 1491 && !defined(CONFIG_TCG_INTERPRETER) 1492 /* Free all of the parts we allocated above. */ 1493 for (i = real_args = 0; i < orig_nargs; ++i) { 1494 int is_64bit = orig_sizemask & (1 << (i+1)*2); 1495 if (is_64bit) { 1496 tcg_temp_free_internal(args[real_args++]); 1497 tcg_temp_free_internal(args[real_args++]); 1498 } else { 1499 real_args++; 1500 } 1501 } 1502 if (orig_sizemask & 1) { 1503 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1504 Note that describing these as TCGv_i64 eliminates an unnecessary 1505 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1506 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1507 tcg_temp_free_i64(retl); 1508 tcg_temp_free_i64(reth); 1509 } 1510 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1511 for (i = 0; i < nargs; ++i) { 1512 int is_64bit = sizemask & (1 << (i+1)*2); 1513 if (!is_64bit) { 1514 tcg_temp_free_internal(args[i]); 1515 } 1516 } 1517 #endif /* TCG_TARGET_EXTEND_ARGS */ 1518 } 1519 1520 static void tcg_reg_alloc_start(TCGContext *s) 1521 { 1522 int i, n; 1523 TCGTemp *ts; 1524 1525 for (i = 0, n = s->nb_globals; i < n; i++) { 1526 ts = &s->temps[i]; 1527 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM); 1528 } 1529 for (n = s->nb_temps; i < n; i++) { 1530 ts = &s->temps[i]; 1531 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 1532 ts->mem_allocated = 0; 1533 ts->fixed_reg = 0; 1534 } 1535 1536 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1537 } 1538 1539 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1540 TCGTemp *ts) 1541 { 1542 int idx = temp_idx(ts); 1543 1544 if (ts->temp_global) { 1545 pstrcpy(buf, buf_size, ts->name); 1546 } else if (ts->temp_local) { 1547 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1548 } else { 1549 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1550 } 1551 return buf; 1552 } 1553 1554 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1555 int buf_size, TCGArg arg) 1556 { 1557 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1558 } 1559 1560 /* Find helper name. */ 1561 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 1562 { 1563 const char *ret = NULL; 1564 if (helper_table) { 1565 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 1566 if (info) { 1567 ret = info->name; 1568 } 1569 } 1570 return ret; 1571 } 1572 1573 static const char * const cond_name[] = 1574 { 1575 [TCG_COND_NEVER] = "never", 1576 [TCG_COND_ALWAYS] = "always", 1577 [TCG_COND_EQ] = "eq", 1578 [TCG_COND_NE] = "ne", 1579 [TCG_COND_LT] = "lt", 1580 [TCG_COND_GE] = "ge", 1581 [TCG_COND_LE] = "le", 1582 [TCG_COND_GT] = "gt", 1583 [TCG_COND_LTU] = "ltu", 1584 [TCG_COND_GEU] = "geu", 1585 [TCG_COND_LEU] = "leu", 1586 [TCG_COND_GTU] = "gtu" 1587 }; 1588 1589 static const char * const ldst_name[] = 1590 { 1591 [MO_UB] = "ub", 1592 [MO_SB] = "sb", 1593 [MO_LEUW] = "leuw", 1594 [MO_LESW] = "lesw", 1595 [MO_LEUL] = "leul", 1596 [MO_LESL] = "lesl", 1597 [MO_LEQ] = "leq", 1598 [MO_BEUW] = "beuw", 1599 [MO_BESW] = "besw", 1600 [MO_BEUL] = "beul", 1601 [MO_BESL] = "besl", 1602 [MO_BEQ] = "beq", 1603 }; 1604 1605 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1606 #ifdef ALIGNED_ONLY 1607 [MO_UNALN >> MO_ASHIFT] = "un+", 1608 [MO_ALIGN >> MO_ASHIFT] = "", 1609 #else 1610 [MO_UNALN >> MO_ASHIFT] = "", 1611 [MO_ALIGN >> MO_ASHIFT] = "al+", 1612 #endif 1613 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1614 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1615 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1616 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1617 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1618 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1619 }; 1620 1621 void tcg_dump_ops(TCGContext *s) 1622 { 1623 char buf[128]; 1624 TCGOp *op; 1625 int oi; 1626 1627 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { 1628 int i, k, nb_oargs, nb_iargs, nb_cargs; 1629 const TCGOpDef *def; 1630 TCGOpcode c; 1631 int col = 0; 1632 1633 op = &s->gen_op_buf[oi]; 1634 c = op->opc; 1635 def = &tcg_op_defs[c]; 1636 1637 if (c == INDEX_op_insn_start) { 1638 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); 1639 1640 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1641 target_ulong a; 1642 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1643 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1644 #else 1645 a = op->args[i]; 1646 #endif 1647 col += qemu_log(" " TARGET_FMT_lx, a); 1648 } 1649 } else if (c == INDEX_op_call) { 1650 /* variable number of arguments */ 1651 nb_oargs = op->callo; 1652 nb_iargs = op->calli; 1653 nb_cargs = def->nb_cargs; 1654 1655 /* function name, flags, out args */ 1656 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 1657 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 1658 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 1659 for (i = 0; i < nb_oargs; i++) { 1660 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1661 op->args[i])); 1662 } 1663 for (i = 0; i < nb_iargs; i++) { 1664 TCGArg arg = op->args[nb_oargs + i]; 1665 const char *t = "<dummy>"; 1666 if (arg != TCG_CALL_DUMMY_ARG) { 1667 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1668 } 1669 col += qemu_log(",%s", t); 1670 } 1671 } else { 1672 col += qemu_log(" %s ", def->name); 1673 1674 nb_oargs = def->nb_oargs; 1675 nb_iargs = def->nb_iargs; 1676 nb_cargs = def->nb_cargs; 1677 1678 k = 0; 1679 for (i = 0; i < nb_oargs; i++) { 1680 if (k != 0) { 1681 col += qemu_log(","); 1682 } 1683 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1684 op->args[k++])); 1685 } 1686 for (i = 0; i < nb_iargs; i++) { 1687 if (k != 0) { 1688 col += qemu_log(","); 1689 } 1690 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 1691 op->args[k++])); 1692 } 1693 switch (c) { 1694 case INDEX_op_brcond_i32: 1695 case INDEX_op_setcond_i32: 1696 case INDEX_op_movcond_i32: 1697 case INDEX_op_brcond2_i32: 1698 case INDEX_op_setcond2_i32: 1699 case INDEX_op_brcond_i64: 1700 case INDEX_op_setcond_i64: 1701 case INDEX_op_movcond_i64: 1702 if (op->args[k] < ARRAY_SIZE(cond_name) 1703 && cond_name[op->args[k]]) { 1704 col += qemu_log(",%s", cond_name[op->args[k++]]); 1705 } else { 1706 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 1707 } 1708 i = 1; 1709 break; 1710 case INDEX_op_qemu_ld_i32: 1711 case INDEX_op_qemu_st_i32: 1712 case INDEX_op_qemu_ld_i64: 1713 case INDEX_op_qemu_st_i64: 1714 { 1715 TCGMemOpIdx oi = op->args[k++]; 1716 TCGMemOp op = get_memop(oi); 1717 unsigned ix = get_mmuidx(oi); 1718 1719 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1720 col += qemu_log(",$0x%x,%u", op, ix); 1721 } else { 1722 const char *s_al, *s_op; 1723 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1724 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1725 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 1726 } 1727 i = 1; 1728 } 1729 break; 1730 default: 1731 i = 0; 1732 break; 1733 } 1734 switch (c) { 1735 case INDEX_op_set_label: 1736 case INDEX_op_br: 1737 case INDEX_op_brcond_i32: 1738 case INDEX_op_brcond_i64: 1739 case INDEX_op_brcond2_i32: 1740 col += qemu_log("%s$L%d", k ? "," : "", 1741 arg_label(op->args[k])->id); 1742 i++, k++; 1743 break; 1744 default: 1745 break; 1746 } 1747 for (; i < nb_cargs; i++, k++) { 1748 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 1749 } 1750 } 1751 if (op->life) { 1752 unsigned life = op->life; 1753 1754 for (; col < 48; ++col) { 1755 putc(' ', qemu_logfile); 1756 } 1757 1758 if (life & (SYNC_ARG * 3)) { 1759 qemu_log(" sync:"); 1760 for (i = 0; i < 2; ++i) { 1761 if (life & (SYNC_ARG << i)) { 1762 qemu_log(" %d", i); 1763 } 1764 } 1765 } 1766 life /= DEAD_ARG; 1767 if (life) { 1768 qemu_log(" dead:"); 1769 for (i = 0; life; ++i, life >>= 1) { 1770 if (life & 1) { 1771 qemu_log(" %d", i); 1772 } 1773 } 1774 } 1775 } 1776 qemu_log("\n"); 1777 } 1778 } 1779 1780 /* we give more priority to constraints with less registers */ 1781 static int get_constraint_priority(const TCGOpDef *def, int k) 1782 { 1783 const TCGArgConstraint *arg_ct; 1784 1785 int i, n; 1786 arg_ct = &def->args_ct[k]; 1787 if (arg_ct->ct & TCG_CT_ALIAS) { 1788 /* an alias is equivalent to a single register */ 1789 n = 1; 1790 } else { 1791 if (!(arg_ct->ct & TCG_CT_REG)) 1792 return 0; 1793 n = 0; 1794 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 1795 if (tcg_regset_test_reg(arg_ct->u.regs, i)) 1796 n++; 1797 } 1798 } 1799 return TCG_TARGET_NB_REGS - n + 1; 1800 } 1801 1802 /* sort from highest priority to lowest */ 1803 static void sort_constraints(TCGOpDef *def, int start, int n) 1804 { 1805 int i, j, p1, p2, tmp; 1806 1807 for(i = 0; i < n; i++) 1808 def->sorted_args[start + i] = start + i; 1809 if (n <= 1) 1810 return; 1811 for(i = 0; i < n - 1; i++) { 1812 for(j = i + 1; j < n; j++) { 1813 p1 = get_constraint_priority(def, def->sorted_args[start + i]); 1814 p2 = get_constraint_priority(def, def->sorted_args[start + j]); 1815 if (p1 < p2) { 1816 tmp = def->sorted_args[start + i]; 1817 def->sorted_args[start + i] = def->sorted_args[start + j]; 1818 def->sorted_args[start + j] = tmp; 1819 } 1820 } 1821 } 1822 } 1823 1824 static void process_op_defs(TCGContext *s) 1825 { 1826 TCGOpcode op; 1827 1828 for (op = 0; op < NB_OPS; op++) { 1829 TCGOpDef *def = &tcg_op_defs[op]; 1830 const TCGTargetOpDef *tdefs; 1831 TCGType type; 1832 int i, nb_args; 1833 1834 if (def->flags & TCG_OPF_NOT_PRESENT) { 1835 continue; 1836 } 1837 1838 nb_args = def->nb_iargs + def->nb_oargs; 1839 if (nb_args == 0) { 1840 continue; 1841 } 1842 1843 tdefs = tcg_target_op_def(op); 1844 /* Missing TCGTargetOpDef entry. */ 1845 tcg_debug_assert(tdefs != NULL); 1846 1847 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32); 1848 for (i = 0; i < nb_args; i++) { 1849 const char *ct_str = tdefs->args_ct_str[i]; 1850 /* Incomplete TCGTargetOpDef entry. */ 1851 tcg_debug_assert(ct_str != NULL); 1852 1853 def->args_ct[i].u.regs = 0; 1854 def->args_ct[i].ct = 0; 1855 while (*ct_str != '\0') { 1856 switch(*ct_str) { 1857 case '0' ... '9': 1858 { 1859 int oarg = *ct_str - '0'; 1860 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 1861 tcg_debug_assert(oarg < def->nb_oargs); 1862 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); 1863 /* TCG_CT_ALIAS is for the output arguments. 1864 The input is tagged with TCG_CT_IALIAS. */ 1865 def->args_ct[i] = def->args_ct[oarg]; 1866 def->args_ct[oarg].ct |= TCG_CT_ALIAS; 1867 def->args_ct[oarg].alias_index = i; 1868 def->args_ct[i].ct |= TCG_CT_IALIAS; 1869 def->args_ct[i].alias_index = oarg; 1870 } 1871 ct_str++; 1872 break; 1873 case '&': 1874 def->args_ct[i].ct |= TCG_CT_NEWREG; 1875 ct_str++; 1876 break; 1877 case 'i': 1878 def->args_ct[i].ct |= TCG_CT_CONST; 1879 ct_str++; 1880 break; 1881 default: 1882 ct_str = target_parse_constraint(&def->args_ct[i], 1883 ct_str, type); 1884 /* Typo in TCGTargetOpDef constraint. */ 1885 tcg_debug_assert(ct_str != NULL); 1886 } 1887 } 1888 } 1889 1890 /* TCGTargetOpDef entry with too much information? */ 1891 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 1892 1893 /* sort the constraints (XXX: this is just an heuristic) */ 1894 sort_constraints(def, 0, def->nb_oargs); 1895 sort_constraints(def, def->nb_oargs, def->nb_iargs); 1896 } 1897 } 1898 1899 void tcg_op_remove(TCGContext *s, TCGOp *op) 1900 { 1901 int next = op->next; 1902 int prev = op->prev; 1903 1904 /* We should never attempt to remove the list terminator. */ 1905 tcg_debug_assert(op != &s->gen_op_buf[0]); 1906 1907 s->gen_op_buf[next].prev = prev; 1908 s->gen_op_buf[prev].next = next; 1909 1910 memset(op, 0, sizeof(*op)); 1911 1912 #ifdef CONFIG_PROFILER 1913 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 1914 #endif 1915 } 1916 1917 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 1918 TCGOpcode opc, int nargs) 1919 { 1920 int oi = s->gen_next_op_idx; 1921 int prev = old_op->prev; 1922 int next = old_op - s->gen_op_buf; 1923 TCGOp *new_op; 1924 1925 tcg_debug_assert(oi < OPC_BUF_SIZE); 1926 s->gen_next_op_idx = oi + 1; 1927 1928 new_op = &s->gen_op_buf[oi]; 1929 *new_op = (TCGOp){ 1930 .opc = opc, 1931 .prev = prev, 1932 .next = next 1933 }; 1934 s->gen_op_buf[prev].next = oi; 1935 old_op->prev = oi; 1936 1937 return new_op; 1938 } 1939 1940 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 1941 TCGOpcode opc, int nargs) 1942 { 1943 int oi = s->gen_next_op_idx; 1944 int prev = old_op - s->gen_op_buf; 1945 int next = old_op->next; 1946 TCGOp *new_op; 1947 1948 tcg_debug_assert(oi < OPC_BUF_SIZE); 1949 s->gen_next_op_idx = oi + 1; 1950 1951 new_op = &s->gen_op_buf[oi]; 1952 *new_op = (TCGOp){ 1953 .opc = opc, 1954 .prev = prev, 1955 .next = next 1956 }; 1957 s->gen_op_buf[next].prev = oi; 1958 old_op->next = oi; 1959 1960 return new_op; 1961 } 1962 1963 #define TS_DEAD 1 1964 #define TS_MEM 2 1965 1966 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 1967 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 1968 1969 /* liveness analysis: end of function: all temps are dead, and globals 1970 should be in memory. */ 1971 static void tcg_la_func_end(TCGContext *s) 1972 { 1973 int ng = s->nb_globals; 1974 int nt = s->nb_temps; 1975 int i; 1976 1977 for (i = 0; i < ng; ++i) { 1978 s->temps[i].state = TS_DEAD | TS_MEM; 1979 } 1980 for (i = ng; i < nt; ++i) { 1981 s->temps[i].state = TS_DEAD; 1982 } 1983 } 1984 1985 /* liveness analysis: end of basic block: all temps are dead, globals 1986 and local temps should be in memory. */ 1987 static void tcg_la_bb_end(TCGContext *s) 1988 { 1989 int ng = s->nb_globals; 1990 int nt = s->nb_temps; 1991 int i; 1992 1993 for (i = 0; i < ng; ++i) { 1994 s->temps[i].state = TS_DEAD | TS_MEM; 1995 } 1996 for (i = ng; i < nt; ++i) { 1997 s->temps[i].state = (s->temps[i].temp_local 1998 ? TS_DEAD | TS_MEM 1999 : TS_DEAD); 2000 } 2001 } 2002 2003 /* Liveness analysis : update the opc_arg_life array to tell if a 2004 given input arguments is dead. Instructions updating dead 2005 temporaries are removed. */ 2006 static void liveness_pass_1(TCGContext *s) 2007 { 2008 int nb_globals = s->nb_globals; 2009 int oi, oi_prev; 2010 2011 tcg_la_func_end(s); 2012 2013 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { 2014 int i, nb_iargs, nb_oargs; 2015 TCGOpcode opc_new, opc_new2; 2016 bool have_opc_new2; 2017 TCGLifeData arg_life = 0; 2018 TCGTemp *arg_ts; 2019 2020 TCGOp * const op = &s->gen_op_buf[oi]; 2021 TCGOpcode opc = op->opc; 2022 const TCGOpDef *def = &tcg_op_defs[opc]; 2023 2024 oi_prev = op->prev; 2025 2026 switch (opc) { 2027 case INDEX_op_call: 2028 { 2029 int call_flags; 2030 2031 nb_oargs = op->callo; 2032 nb_iargs = op->calli; 2033 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2034 2035 /* pure functions can be removed if their result is unused */ 2036 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2037 for (i = 0; i < nb_oargs; i++) { 2038 arg_ts = arg_temp(op->args[i]); 2039 if (arg_ts->state != TS_DEAD) { 2040 goto do_not_remove_call; 2041 } 2042 } 2043 goto do_remove; 2044 } else { 2045 do_not_remove_call: 2046 2047 /* output args are dead */ 2048 for (i = 0; i < nb_oargs; i++) { 2049 arg_ts = arg_temp(op->args[i]); 2050 if (arg_ts->state & TS_DEAD) { 2051 arg_life |= DEAD_ARG << i; 2052 } 2053 if (arg_ts->state & TS_MEM) { 2054 arg_life |= SYNC_ARG << i; 2055 } 2056 arg_ts->state = TS_DEAD; 2057 } 2058 2059 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2060 TCG_CALL_NO_READ_GLOBALS))) { 2061 /* globals should go back to memory */ 2062 for (i = 0; i < nb_globals; i++) { 2063 s->temps[i].state = TS_DEAD | TS_MEM; 2064 } 2065 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2066 /* globals should be synced to memory */ 2067 for (i = 0; i < nb_globals; i++) { 2068 s->temps[i].state |= TS_MEM; 2069 } 2070 } 2071 2072 /* record arguments that die in this helper */ 2073 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2074 arg_ts = arg_temp(op->args[i]); 2075 if (arg_ts && arg_ts->state & TS_DEAD) { 2076 arg_life |= DEAD_ARG << i; 2077 } 2078 } 2079 /* input arguments are live for preceding opcodes */ 2080 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2081 arg_ts = arg_temp(op->args[i]); 2082 if (arg_ts) { 2083 arg_ts->state &= ~TS_DEAD; 2084 } 2085 } 2086 } 2087 } 2088 break; 2089 case INDEX_op_insn_start: 2090 break; 2091 case INDEX_op_discard: 2092 /* mark the temporary as dead */ 2093 arg_temp(op->args[0])->state = TS_DEAD; 2094 break; 2095 2096 case INDEX_op_add2_i32: 2097 opc_new = INDEX_op_add_i32; 2098 goto do_addsub2; 2099 case INDEX_op_sub2_i32: 2100 opc_new = INDEX_op_sub_i32; 2101 goto do_addsub2; 2102 case INDEX_op_add2_i64: 2103 opc_new = INDEX_op_add_i64; 2104 goto do_addsub2; 2105 case INDEX_op_sub2_i64: 2106 opc_new = INDEX_op_sub_i64; 2107 do_addsub2: 2108 nb_iargs = 4; 2109 nb_oargs = 2; 2110 /* Test if the high part of the operation is dead, but not 2111 the low part. The result can be optimized to a simple 2112 add or sub. This happens often for x86_64 guest when the 2113 cpu mode is set to 32 bit. */ 2114 if (arg_temp(op->args[1])->state == TS_DEAD) { 2115 if (arg_temp(op->args[0])->state == TS_DEAD) { 2116 goto do_remove; 2117 } 2118 /* Replace the opcode and adjust the args in place, 2119 leaving 3 unused args at the end. */ 2120 op->opc = opc = opc_new; 2121 op->args[1] = op->args[2]; 2122 op->args[2] = op->args[4]; 2123 /* Fall through and mark the single-word operation live. */ 2124 nb_iargs = 2; 2125 nb_oargs = 1; 2126 } 2127 goto do_not_remove; 2128 2129 case INDEX_op_mulu2_i32: 2130 opc_new = INDEX_op_mul_i32; 2131 opc_new2 = INDEX_op_muluh_i32; 2132 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2133 goto do_mul2; 2134 case INDEX_op_muls2_i32: 2135 opc_new = INDEX_op_mul_i32; 2136 opc_new2 = INDEX_op_mulsh_i32; 2137 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2138 goto do_mul2; 2139 case INDEX_op_mulu2_i64: 2140 opc_new = INDEX_op_mul_i64; 2141 opc_new2 = INDEX_op_muluh_i64; 2142 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2143 goto do_mul2; 2144 case INDEX_op_muls2_i64: 2145 opc_new = INDEX_op_mul_i64; 2146 opc_new2 = INDEX_op_mulsh_i64; 2147 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2148 goto do_mul2; 2149 do_mul2: 2150 nb_iargs = 2; 2151 nb_oargs = 2; 2152 if (arg_temp(op->args[1])->state == TS_DEAD) { 2153 if (arg_temp(op->args[0])->state == TS_DEAD) { 2154 /* Both parts of the operation are dead. */ 2155 goto do_remove; 2156 } 2157 /* The high part of the operation is dead; generate the low. */ 2158 op->opc = opc = opc_new; 2159 op->args[1] = op->args[2]; 2160 op->args[2] = op->args[3]; 2161 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2162 /* The low part of the operation is dead; generate the high. */ 2163 op->opc = opc = opc_new2; 2164 op->args[0] = op->args[1]; 2165 op->args[1] = op->args[2]; 2166 op->args[2] = op->args[3]; 2167 } else { 2168 goto do_not_remove; 2169 } 2170 /* Mark the single-word operation live. */ 2171 nb_oargs = 1; 2172 goto do_not_remove; 2173 2174 default: 2175 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2176 nb_iargs = def->nb_iargs; 2177 nb_oargs = def->nb_oargs; 2178 2179 /* Test if the operation can be removed because all 2180 its outputs are dead. We assume that nb_oargs == 0 2181 implies side effects */ 2182 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2183 for (i = 0; i < nb_oargs; i++) { 2184 if (arg_temp(op->args[i])->state != TS_DEAD) { 2185 goto do_not_remove; 2186 } 2187 } 2188 do_remove: 2189 tcg_op_remove(s, op); 2190 } else { 2191 do_not_remove: 2192 /* output args are dead */ 2193 for (i = 0; i < nb_oargs; i++) { 2194 arg_ts = arg_temp(op->args[i]); 2195 if (arg_ts->state & TS_DEAD) { 2196 arg_life |= DEAD_ARG << i; 2197 } 2198 if (arg_ts->state & TS_MEM) { 2199 arg_life |= SYNC_ARG << i; 2200 } 2201 arg_ts->state = TS_DEAD; 2202 } 2203 2204 /* if end of basic block, update */ 2205 if (def->flags & TCG_OPF_BB_END) { 2206 tcg_la_bb_end(s); 2207 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2208 /* globals should be synced to memory */ 2209 for (i = 0; i < nb_globals; i++) { 2210 s->temps[i].state |= TS_MEM; 2211 } 2212 } 2213 2214 /* record arguments that die in this opcode */ 2215 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2216 arg_ts = arg_temp(op->args[i]); 2217 if (arg_ts->state & TS_DEAD) { 2218 arg_life |= DEAD_ARG << i; 2219 } 2220 } 2221 /* input arguments are live for preceding opcodes */ 2222 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2223 arg_temp(op->args[i])->state &= ~TS_DEAD; 2224 } 2225 } 2226 break; 2227 } 2228 op->life = arg_life; 2229 } 2230 } 2231 2232 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2233 static bool liveness_pass_2(TCGContext *s) 2234 { 2235 int nb_globals = s->nb_globals; 2236 int nb_temps, i, oi, oi_next; 2237 bool changes = false; 2238 2239 /* Create a temporary for each indirect global. */ 2240 for (i = 0; i < nb_globals; ++i) { 2241 TCGTemp *its = &s->temps[i]; 2242 if (its->indirect_reg) { 2243 TCGTemp *dts = tcg_temp_alloc(s); 2244 dts->type = its->type; 2245 dts->base_type = its->base_type; 2246 its->state_ptr = dts; 2247 } else { 2248 its->state_ptr = NULL; 2249 } 2250 /* All globals begin dead. */ 2251 its->state = TS_DEAD; 2252 } 2253 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2254 TCGTemp *its = &s->temps[i]; 2255 its->state_ptr = NULL; 2256 its->state = TS_DEAD; 2257 } 2258 2259 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 2260 TCGOp *op = &s->gen_op_buf[oi]; 2261 TCGOpcode opc = op->opc; 2262 const TCGOpDef *def = &tcg_op_defs[opc]; 2263 TCGLifeData arg_life = op->life; 2264 int nb_iargs, nb_oargs, call_flags; 2265 TCGTemp *arg_ts, *dir_ts; 2266 2267 oi_next = op->next; 2268 2269 if (opc == INDEX_op_call) { 2270 nb_oargs = op->callo; 2271 nb_iargs = op->calli; 2272 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2273 } else { 2274 nb_iargs = def->nb_iargs; 2275 nb_oargs = def->nb_oargs; 2276 2277 /* Set flags similar to how calls require. */ 2278 if (def->flags & TCG_OPF_BB_END) { 2279 /* Like writing globals: save_globals */ 2280 call_flags = 0; 2281 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2282 /* Like reading globals: sync_globals */ 2283 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2284 } else { 2285 /* No effect on globals. */ 2286 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2287 TCG_CALL_NO_WRITE_GLOBALS); 2288 } 2289 } 2290 2291 /* Make sure that input arguments are available. */ 2292 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2293 arg_ts = arg_temp(op->args[i]); 2294 if (arg_ts) { 2295 dir_ts = arg_ts->state_ptr; 2296 if (dir_ts && arg_ts->state == TS_DEAD) { 2297 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2298 ? INDEX_op_ld_i32 2299 : INDEX_op_ld_i64); 2300 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 2301 2302 lop->args[0] = temp_arg(dir_ts); 2303 lop->args[1] = temp_arg(arg_ts->mem_base); 2304 lop->args[2] = arg_ts->mem_offset; 2305 2306 /* Loaded, but synced with memory. */ 2307 arg_ts->state = TS_MEM; 2308 } 2309 } 2310 } 2311 2312 /* Perform input replacement, and mark inputs that became dead. 2313 No action is required except keeping temp_state up to date 2314 so that we reload when needed. */ 2315 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2316 arg_ts = arg_temp(op->args[i]); 2317 if (arg_ts) { 2318 dir_ts = arg_ts->state_ptr; 2319 if (dir_ts) { 2320 op->args[i] = temp_arg(dir_ts); 2321 changes = true; 2322 if (IS_DEAD_ARG(i)) { 2323 arg_ts->state = TS_DEAD; 2324 } 2325 } 2326 } 2327 } 2328 2329 /* Liveness analysis should ensure that the following are 2330 all correct, for call sites and basic block end points. */ 2331 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2332 /* Nothing to do */ 2333 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2334 for (i = 0; i < nb_globals; ++i) { 2335 /* Liveness should see that globals are synced back, 2336 that is, either TS_DEAD or TS_MEM. */ 2337 arg_ts = &s->temps[i]; 2338 tcg_debug_assert(arg_ts->state_ptr == 0 2339 || arg_ts->state != 0); 2340 } 2341 } else { 2342 for (i = 0; i < nb_globals; ++i) { 2343 /* Liveness should see that globals are saved back, 2344 that is, TS_DEAD, waiting to be reloaded. */ 2345 arg_ts = &s->temps[i]; 2346 tcg_debug_assert(arg_ts->state_ptr == 0 2347 || arg_ts->state == TS_DEAD); 2348 } 2349 } 2350 2351 /* Outputs become available. */ 2352 for (i = 0; i < nb_oargs; i++) { 2353 arg_ts = arg_temp(op->args[i]); 2354 dir_ts = arg_ts->state_ptr; 2355 if (!dir_ts) { 2356 continue; 2357 } 2358 op->args[i] = temp_arg(dir_ts); 2359 changes = true; 2360 2361 /* The output is now live and modified. */ 2362 arg_ts->state = 0; 2363 2364 /* Sync outputs upon their last write. */ 2365 if (NEED_SYNC_ARG(i)) { 2366 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2367 ? INDEX_op_st_i32 2368 : INDEX_op_st_i64); 2369 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 2370 2371 sop->args[0] = temp_arg(dir_ts); 2372 sop->args[1] = temp_arg(arg_ts->mem_base); 2373 sop->args[2] = arg_ts->mem_offset; 2374 2375 arg_ts->state = TS_MEM; 2376 } 2377 /* Drop outputs that are dead. */ 2378 if (IS_DEAD_ARG(i)) { 2379 arg_ts->state = TS_DEAD; 2380 } 2381 } 2382 } 2383 2384 return changes; 2385 } 2386 2387 #ifdef CONFIG_DEBUG_TCG 2388 static void dump_regs(TCGContext *s) 2389 { 2390 TCGTemp *ts; 2391 int i; 2392 char buf[64]; 2393 2394 for(i = 0; i < s->nb_temps; i++) { 2395 ts = &s->temps[i]; 2396 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2397 switch(ts->val_type) { 2398 case TEMP_VAL_REG: 2399 printf("%s", tcg_target_reg_names[ts->reg]); 2400 break; 2401 case TEMP_VAL_MEM: 2402 printf("%d(%s)", (int)ts->mem_offset, 2403 tcg_target_reg_names[ts->mem_base->reg]); 2404 break; 2405 case TEMP_VAL_CONST: 2406 printf("$0x%" TCG_PRIlx, ts->val); 2407 break; 2408 case TEMP_VAL_DEAD: 2409 printf("D"); 2410 break; 2411 default: 2412 printf("???"); 2413 break; 2414 } 2415 printf("\n"); 2416 } 2417 2418 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2419 if (s->reg_to_temp[i] != NULL) { 2420 printf("%s: %s\n", 2421 tcg_target_reg_names[i], 2422 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2423 } 2424 } 2425 } 2426 2427 static void check_regs(TCGContext *s) 2428 { 2429 int reg; 2430 int k; 2431 TCGTemp *ts; 2432 char buf[64]; 2433 2434 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2435 ts = s->reg_to_temp[reg]; 2436 if (ts != NULL) { 2437 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2438 printf("Inconsistency for register %s:\n", 2439 tcg_target_reg_names[reg]); 2440 goto fail; 2441 } 2442 } 2443 } 2444 for (k = 0; k < s->nb_temps; k++) { 2445 ts = &s->temps[k]; 2446 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg 2447 && s->reg_to_temp[ts->reg] != ts) { 2448 printf("Inconsistency for temp %s:\n", 2449 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2450 fail: 2451 printf("reg state:\n"); 2452 dump_regs(s); 2453 tcg_abort(); 2454 } 2455 } 2456 } 2457 #endif 2458 2459 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 2460 { 2461 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 2462 /* Sparc64 stack is accessed with offset of 2047 */ 2463 s->current_frame_offset = (s->current_frame_offset + 2464 (tcg_target_long)sizeof(tcg_target_long) - 1) & 2465 ~(sizeof(tcg_target_long) - 1); 2466 #endif 2467 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 2468 s->frame_end) { 2469 tcg_abort(); 2470 } 2471 ts->mem_offset = s->current_frame_offset; 2472 ts->mem_base = s->frame_temp; 2473 ts->mem_allocated = 1; 2474 s->current_frame_offset += sizeof(tcg_target_long); 2475 } 2476 2477 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); 2478 2479 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 2480 mark it free; otherwise mark it dead. */ 2481 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 2482 { 2483 if (ts->fixed_reg) { 2484 return; 2485 } 2486 if (ts->val_type == TEMP_VAL_REG) { 2487 s->reg_to_temp[ts->reg] = NULL; 2488 } 2489 ts->val_type = (free_or_dead < 0 2490 || ts->temp_local 2491 || ts->temp_global 2492 ? TEMP_VAL_MEM : TEMP_VAL_DEAD); 2493 } 2494 2495 /* Mark a temporary as dead. */ 2496 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 2497 { 2498 temp_free_or_dead(s, ts, 1); 2499 } 2500 2501 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 2502 registers needs to be allocated to store a constant. If 'free_or_dead' 2503 is non-zero, subsequently release the temporary; if it is positive, the 2504 temp is dead; if it is negative, the temp is free. */ 2505 static void temp_sync(TCGContext *s, TCGTemp *ts, 2506 TCGRegSet allocated_regs, int free_or_dead) 2507 { 2508 if (ts->fixed_reg) { 2509 return; 2510 } 2511 if (!ts->mem_coherent) { 2512 if (!ts->mem_allocated) { 2513 temp_allocate_frame(s, ts); 2514 } 2515 switch (ts->val_type) { 2516 case TEMP_VAL_CONST: 2517 /* If we're going to free the temp immediately, then we won't 2518 require it later in a register, so attempt to store the 2519 constant to memory directly. */ 2520 if (free_or_dead 2521 && tcg_out_sti(s, ts->type, ts->val, 2522 ts->mem_base->reg, ts->mem_offset)) { 2523 break; 2524 } 2525 temp_load(s, ts, tcg_target_available_regs[ts->type], 2526 allocated_regs); 2527 /* fallthrough */ 2528 2529 case TEMP_VAL_REG: 2530 tcg_out_st(s, ts->type, ts->reg, 2531 ts->mem_base->reg, ts->mem_offset); 2532 break; 2533 2534 case TEMP_VAL_MEM: 2535 break; 2536 2537 case TEMP_VAL_DEAD: 2538 default: 2539 tcg_abort(); 2540 } 2541 ts->mem_coherent = 1; 2542 } 2543 if (free_or_dead) { 2544 temp_free_or_dead(s, ts, free_or_dead); 2545 } 2546 } 2547 2548 /* free register 'reg' by spilling the corresponding temporary if necessary */ 2549 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 2550 { 2551 TCGTemp *ts = s->reg_to_temp[reg]; 2552 if (ts != NULL) { 2553 temp_sync(s, ts, allocated_regs, -1); 2554 } 2555 } 2556 2557 /* Allocate a register belonging to reg1 & ~reg2 */ 2558 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, 2559 TCGRegSet allocated_regs, bool rev) 2560 { 2561 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 2562 const int *order; 2563 TCGReg reg; 2564 TCGRegSet reg_ct; 2565 2566 reg_ct = desired_regs & ~allocated_regs; 2567 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 2568 2569 /* first try free registers */ 2570 for(i = 0; i < n; i++) { 2571 reg = order[i]; 2572 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) 2573 return reg; 2574 } 2575 2576 /* XXX: do better spill choice */ 2577 for(i = 0; i < n; i++) { 2578 reg = order[i]; 2579 if (tcg_regset_test_reg(reg_ct, reg)) { 2580 tcg_reg_free(s, reg, allocated_regs); 2581 return reg; 2582 } 2583 } 2584 2585 tcg_abort(); 2586 } 2587 2588 /* Make sure the temporary is in a register. If needed, allocate the register 2589 from DESIRED while avoiding ALLOCATED. */ 2590 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 2591 TCGRegSet allocated_regs) 2592 { 2593 TCGReg reg; 2594 2595 switch (ts->val_type) { 2596 case TEMP_VAL_REG: 2597 return; 2598 case TEMP_VAL_CONST: 2599 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2600 tcg_out_movi(s, ts->type, reg, ts->val); 2601 ts->mem_coherent = 0; 2602 break; 2603 case TEMP_VAL_MEM: 2604 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); 2605 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 2606 ts->mem_coherent = 1; 2607 break; 2608 case TEMP_VAL_DEAD: 2609 default: 2610 tcg_abort(); 2611 } 2612 ts->reg = reg; 2613 ts->val_type = TEMP_VAL_REG; 2614 s->reg_to_temp[reg] = ts; 2615 } 2616 2617 /* Save a temporary to memory. 'allocated_regs' is used in case a 2618 temporary registers needs to be allocated to store a constant. */ 2619 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 2620 { 2621 /* The liveness analysis already ensures that globals are back 2622 in memory. Keep an tcg_debug_assert for safety. */ 2623 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); 2624 } 2625 2626 /* save globals to their canonical location and assume they can be 2627 modified be the following code. 'allocated_regs' is used in case a 2628 temporary registers needs to be allocated to store a constant. */ 2629 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 2630 { 2631 int i, n; 2632 2633 for (i = 0, n = s->nb_globals; i < n; i++) { 2634 temp_save(s, &s->temps[i], allocated_regs); 2635 } 2636 } 2637 2638 /* sync globals to their canonical location and assume they can be 2639 read by the following code. 'allocated_regs' is used in case a 2640 temporary registers needs to be allocated to store a constant. */ 2641 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 2642 { 2643 int i, n; 2644 2645 for (i = 0, n = s->nb_globals; i < n; i++) { 2646 TCGTemp *ts = &s->temps[i]; 2647 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 2648 || ts->fixed_reg 2649 || ts->mem_coherent); 2650 } 2651 } 2652 2653 /* at the end of a basic block, we assume all temporaries are dead and 2654 all globals are stored at their canonical location. */ 2655 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 2656 { 2657 int i; 2658 2659 for (i = s->nb_globals; i < s->nb_temps; i++) { 2660 TCGTemp *ts = &s->temps[i]; 2661 if (ts->temp_local) { 2662 temp_save(s, ts, allocated_regs); 2663 } else { 2664 /* The liveness analysis already ensures that temps are dead. 2665 Keep an tcg_debug_assert for safety. */ 2666 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 2667 } 2668 } 2669 2670 save_globals(s, allocated_regs); 2671 } 2672 2673 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 2674 tcg_target_ulong val, TCGLifeData arg_life) 2675 { 2676 if (ots->fixed_reg) { 2677 /* For fixed registers, we do not do any constant propagation. */ 2678 tcg_out_movi(s, ots->type, ots->reg, val); 2679 return; 2680 } 2681 2682 /* The movi is not explicitly generated here. */ 2683 if (ots->val_type == TEMP_VAL_REG) { 2684 s->reg_to_temp[ots->reg] = NULL; 2685 } 2686 ots->val_type = TEMP_VAL_CONST; 2687 ots->val = val; 2688 ots->mem_coherent = 0; 2689 if (NEED_SYNC_ARG(0)) { 2690 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); 2691 } else if (IS_DEAD_ARG(0)) { 2692 temp_dead(s, ots); 2693 } 2694 } 2695 2696 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) 2697 { 2698 TCGTemp *ots = arg_temp(op->args[0]); 2699 tcg_target_ulong val = op->args[1]; 2700 2701 tcg_reg_alloc_do_movi(s, ots, val, op->life); 2702 } 2703 2704 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 2705 { 2706 const TCGLifeData arg_life = op->life; 2707 TCGRegSet allocated_regs; 2708 TCGTemp *ts, *ots; 2709 TCGType otype, itype; 2710 2711 allocated_regs = s->reserved_regs; 2712 ots = arg_temp(op->args[0]); 2713 ts = arg_temp(op->args[1]); 2714 2715 /* Note that otype != itype for no-op truncation. */ 2716 otype = ots->type; 2717 itype = ts->type; 2718 2719 if (ts->val_type == TEMP_VAL_CONST) { 2720 /* propagate constant or generate sti */ 2721 tcg_target_ulong val = ts->val; 2722 if (IS_DEAD_ARG(1)) { 2723 temp_dead(s, ts); 2724 } 2725 tcg_reg_alloc_do_movi(s, ots, val, arg_life); 2726 return; 2727 } 2728 2729 /* If the source value is in memory we're going to be forced 2730 to have it in a register in order to perform the copy. Copy 2731 the SOURCE value into its own register first, that way we 2732 don't have to reload SOURCE the next time it is used. */ 2733 if (ts->val_type == TEMP_VAL_MEM) { 2734 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); 2735 } 2736 2737 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 2738 if (IS_DEAD_ARG(0) && !ots->fixed_reg) { 2739 /* mov to a non-saved dead register makes no sense (even with 2740 liveness analysis disabled). */ 2741 tcg_debug_assert(NEED_SYNC_ARG(0)); 2742 if (!ots->mem_allocated) { 2743 temp_allocate_frame(s, ots); 2744 } 2745 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 2746 if (IS_DEAD_ARG(1)) { 2747 temp_dead(s, ts); 2748 } 2749 temp_dead(s, ots); 2750 } else { 2751 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { 2752 /* the mov can be suppressed */ 2753 if (ots->val_type == TEMP_VAL_REG) { 2754 s->reg_to_temp[ots->reg] = NULL; 2755 } 2756 ots->reg = ts->reg; 2757 temp_dead(s, ts); 2758 } else { 2759 if (ots->val_type != TEMP_VAL_REG) { 2760 /* When allocating a new register, make sure to not spill the 2761 input one. */ 2762 tcg_regset_set_reg(allocated_regs, ts->reg); 2763 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 2764 allocated_regs, ots->indirect_base); 2765 } 2766 tcg_out_mov(s, otype, ots->reg, ts->reg); 2767 } 2768 ots->val_type = TEMP_VAL_REG; 2769 ots->mem_coherent = 0; 2770 s->reg_to_temp[ots->reg] = ots; 2771 if (NEED_SYNC_ARG(0)) { 2772 temp_sync(s, ots, allocated_regs, 0); 2773 } 2774 } 2775 } 2776 2777 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 2778 { 2779 const TCGLifeData arg_life = op->life; 2780 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 2781 TCGRegSet i_allocated_regs; 2782 TCGRegSet o_allocated_regs; 2783 int i, k, nb_iargs, nb_oargs; 2784 TCGReg reg; 2785 TCGArg arg; 2786 const TCGArgConstraint *arg_ct; 2787 TCGTemp *ts; 2788 TCGArg new_args[TCG_MAX_OP_ARGS]; 2789 int const_args[TCG_MAX_OP_ARGS]; 2790 2791 nb_oargs = def->nb_oargs; 2792 nb_iargs = def->nb_iargs; 2793 2794 /* copy constants */ 2795 memcpy(new_args + nb_oargs + nb_iargs, 2796 op->args + nb_oargs + nb_iargs, 2797 sizeof(TCGArg) * def->nb_cargs); 2798 2799 i_allocated_regs = s->reserved_regs; 2800 o_allocated_regs = s->reserved_regs; 2801 2802 /* satisfy input constraints */ 2803 for (k = 0; k < nb_iargs; k++) { 2804 i = def->sorted_args[nb_oargs + k]; 2805 arg = op->args[i]; 2806 arg_ct = &def->args_ct[i]; 2807 ts = arg_temp(arg); 2808 2809 if (ts->val_type == TEMP_VAL_CONST 2810 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 2811 /* constant is OK for instruction */ 2812 const_args[i] = 1; 2813 new_args[i] = ts->val; 2814 goto iarg_end; 2815 } 2816 2817 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); 2818 2819 if (arg_ct->ct & TCG_CT_IALIAS) { 2820 if (ts->fixed_reg) { 2821 /* if fixed register, we must allocate a new register 2822 if the alias is not the same register */ 2823 if (arg != op->args[arg_ct->alias_index]) 2824 goto allocate_in_reg; 2825 } else { 2826 /* if the input is aliased to an output and if it is 2827 not dead after the instruction, we must allocate 2828 a new register and move it */ 2829 if (!IS_DEAD_ARG(i)) { 2830 goto allocate_in_reg; 2831 } 2832 /* check if the current register has already been allocated 2833 for another input aliased to an output */ 2834 int k2, i2; 2835 for (k2 = 0 ; k2 < k ; k2++) { 2836 i2 = def->sorted_args[nb_oargs + k2]; 2837 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && 2838 (new_args[i2] == ts->reg)) { 2839 goto allocate_in_reg; 2840 } 2841 } 2842 } 2843 } 2844 reg = ts->reg; 2845 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2846 /* nothing to do : the constraint is satisfied */ 2847 } else { 2848 allocate_in_reg: 2849 /* allocate a new register matching the constraint 2850 and move the temporary register into it */ 2851 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, 2852 ts->indirect_base); 2853 tcg_out_mov(s, ts->type, reg, ts->reg); 2854 } 2855 new_args[i] = reg; 2856 const_args[i] = 0; 2857 tcg_regset_set_reg(i_allocated_regs, reg); 2858 iarg_end: ; 2859 } 2860 2861 /* mark dead temporaries and free the associated registers */ 2862 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2863 if (IS_DEAD_ARG(i)) { 2864 temp_dead(s, arg_temp(op->args[i])); 2865 } 2866 } 2867 2868 if (def->flags & TCG_OPF_BB_END) { 2869 tcg_reg_alloc_bb_end(s, i_allocated_regs); 2870 } else { 2871 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2872 /* XXX: permit generic clobber register list ? */ 2873 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 2874 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 2875 tcg_reg_free(s, i, i_allocated_regs); 2876 } 2877 } 2878 } 2879 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2880 /* sync globals if the op has side effects and might trigger 2881 an exception. */ 2882 sync_globals(s, i_allocated_regs); 2883 } 2884 2885 /* satisfy the output constraints */ 2886 for(k = 0; k < nb_oargs; k++) { 2887 i = def->sorted_args[k]; 2888 arg = op->args[i]; 2889 arg_ct = &def->args_ct[i]; 2890 ts = arg_temp(arg); 2891 if ((arg_ct->ct & TCG_CT_ALIAS) 2892 && !const_args[arg_ct->alias_index]) { 2893 reg = new_args[arg_ct->alias_index]; 2894 } else if (arg_ct->ct & TCG_CT_NEWREG) { 2895 reg = tcg_reg_alloc(s, arg_ct->u.regs, 2896 i_allocated_regs | o_allocated_regs, 2897 ts->indirect_base); 2898 } else { 2899 /* if fixed register, we try to use it */ 2900 reg = ts->reg; 2901 if (ts->fixed_reg && 2902 tcg_regset_test_reg(arg_ct->u.regs, reg)) { 2903 goto oarg_end; 2904 } 2905 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, 2906 ts->indirect_base); 2907 } 2908 tcg_regset_set_reg(o_allocated_regs, reg); 2909 /* if a fixed register is used, then a move will be done afterwards */ 2910 if (!ts->fixed_reg) { 2911 if (ts->val_type == TEMP_VAL_REG) { 2912 s->reg_to_temp[ts->reg] = NULL; 2913 } 2914 ts->val_type = TEMP_VAL_REG; 2915 ts->reg = reg; 2916 /* temp value is modified, so the value kept in memory is 2917 potentially not the same */ 2918 ts->mem_coherent = 0; 2919 s->reg_to_temp[reg] = ts; 2920 } 2921 oarg_end: 2922 new_args[i] = reg; 2923 } 2924 } 2925 2926 /* emit instruction */ 2927 tcg_out_op(s, op->opc, new_args, const_args); 2928 2929 /* move the outputs in the correct register if needed */ 2930 for(i = 0; i < nb_oargs; i++) { 2931 ts = arg_temp(op->args[i]); 2932 reg = new_args[i]; 2933 if (ts->fixed_reg && ts->reg != reg) { 2934 tcg_out_mov(s, ts->type, ts->reg, reg); 2935 } 2936 if (NEED_SYNC_ARG(i)) { 2937 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); 2938 } else if (IS_DEAD_ARG(i)) { 2939 temp_dead(s, ts); 2940 } 2941 } 2942 } 2943 2944 #ifdef TCG_TARGET_STACK_GROWSUP 2945 #define STACK_DIR(x) (-(x)) 2946 #else 2947 #define STACK_DIR(x) (x) 2948 #endif 2949 2950 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 2951 { 2952 const int nb_oargs = op->callo; 2953 const int nb_iargs = op->calli; 2954 const TCGLifeData arg_life = op->life; 2955 int flags, nb_regs, i; 2956 TCGReg reg; 2957 TCGArg arg; 2958 TCGTemp *ts; 2959 intptr_t stack_offset; 2960 size_t call_stack_size; 2961 tcg_insn_unit *func_addr; 2962 int allocate_args; 2963 TCGRegSet allocated_regs; 2964 2965 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 2966 flags = op->args[nb_oargs + nb_iargs + 1]; 2967 2968 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2969 if (nb_regs > nb_iargs) { 2970 nb_regs = nb_iargs; 2971 } 2972 2973 /* assign stack slots first */ 2974 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 2975 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 2976 ~(TCG_TARGET_STACK_ALIGN - 1); 2977 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 2978 if (allocate_args) { 2979 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 2980 preallocate call stack */ 2981 tcg_abort(); 2982 } 2983 2984 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 2985 for (i = nb_regs; i < nb_iargs; i++) { 2986 arg = op->args[nb_oargs + i]; 2987 #ifdef TCG_TARGET_STACK_GROWSUP 2988 stack_offset -= sizeof(tcg_target_long); 2989 #endif 2990 if (arg != TCG_CALL_DUMMY_ARG) { 2991 ts = arg_temp(arg); 2992 temp_load(s, ts, tcg_target_available_regs[ts->type], 2993 s->reserved_regs); 2994 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 2995 } 2996 #ifndef TCG_TARGET_STACK_GROWSUP 2997 stack_offset += sizeof(tcg_target_long); 2998 #endif 2999 } 3000 3001 /* assign input registers */ 3002 allocated_regs = s->reserved_regs; 3003 for (i = 0; i < nb_regs; i++) { 3004 arg = op->args[nb_oargs + i]; 3005 if (arg != TCG_CALL_DUMMY_ARG) { 3006 ts = arg_temp(arg); 3007 reg = tcg_target_call_iarg_regs[i]; 3008 tcg_reg_free(s, reg, allocated_regs); 3009 3010 if (ts->val_type == TEMP_VAL_REG) { 3011 if (ts->reg != reg) { 3012 tcg_out_mov(s, ts->type, reg, ts->reg); 3013 } 3014 } else { 3015 TCGRegSet arg_set = 0; 3016 3017 tcg_regset_set_reg(arg_set, reg); 3018 temp_load(s, ts, arg_set, allocated_regs); 3019 } 3020 3021 tcg_regset_set_reg(allocated_regs, reg); 3022 } 3023 } 3024 3025 /* mark dead temporaries and free the associated registers */ 3026 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3027 if (IS_DEAD_ARG(i)) { 3028 temp_dead(s, arg_temp(op->args[i])); 3029 } 3030 } 3031 3032 /* clobber call registers */ 3033 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3034 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3035 tcg_reg_free(s, i, allocated_regs); 3036 } 3037 } 3038 3039 /* Save globals if they might be written by the helper, sync them if 3040 they might be read. */ 3041 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3042 /* Nothing to do */ 3043 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3044 sync_globals(s, allocated_regs); 3045 } else { 3046 save_globals(s, allocated_regs); 3047 } 3048 3049 tcg_out_call(s, func_addr); 3050 3051 /* assign output registers and emit moves if needed */ 3052 for(i = 0; i < nb_oargs; i++) { 3053 arg = op->args[i]; 3054 ts = arg_temp(arg); 3055 reg = tcg_target_call_oarg_regs[i]; 3056 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3057 3058 if (ts->fixed_reg) { 3059 if (ts->reg != reg) { 3060 tcg_out_mov(s, ts->type, ts->reg, reg); 3061 } 3062 } else { 3063 if (ts->val_type == TEMP_VAL_REG) { 3064 s->reg_to_temp[ts->reg] = NULL; 3065 } 3066 ts->val_type = TEMP_VAL_REG; 3067 ts->reg = reg; 3068 ts->mem_coherent = 0; 3069 s->reg_to_temp[reg] = ts; 3070 if (NEED_SYNC_ARG(i)) { 3071 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); 3072 } else if (IS_DEAD_ARG(i)) { 3073 temp_dead(s, ts); 3074 } 3075 } 3076 } 3077 } 3078 3079 #ifdef CONFIG_PROFILER 3080 3081 /* avoid copy/paste errors */ 3082 #define PROF_ADD(to, from, field) \ 3083 do { \ 3084 (to)->field += atomic_read(&((from)->field)); \ 3085 } while (0) 3086 3087 #define PROF_MAX(to, from, field) \ 3088 do { \ 3089 typeof((from)->field) val__ = atomic_read(&((from)->field)); \ 3090 if (val__ > (to)->field) { \ 3091 (to)->field = val__; \ 3092 } \ 3093 } while (0) 3094 3095 /* Pass in a zero'ed @prof */ 3096 static inline 3097 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 3098 { 3099 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); 3100 unsigned int i; 3101 3102 for (i = 0; i < n_ctxs; i++) { 3103 TCGContext *s = atomic_read(&tcg_ctxs[i]); 3104 const TCGProfile *orig = &s->prof; 3105 3106 if (counters) { 3107 PROF_ADD(prof, orig, tb_count1); 3108 PROF_ADD(prof, orig, tb_count); 3109 PROF_ADD(prof, orig, op_count); 3110 PROF_MAX(prof, orig, op_count_max); 3111 PROF_ADD(prof, orig, temp_count); 3112 PROF_MAX(prof, orig, temp_count_max); 3113 PROF_ADD(prof, orig, del_op_count); 3114 PROF_ADD(prof, orig, code_in_len); 3115 PROF_ADD(prof, orig, code_out_len); 3116 PROF_ADD(prof, orig, search_out_len); 3117 PROF_ADD(prof, orig, interm_time); 3118 PROF_ADD(prof, orig, code_time); 3119 PROF_ADD(prof, orig, la_time); 3120 PROF_ADD(prof, orig, opt_time); 3121 PROF_ADD(prof, orig, restore_count); 3122 PROF_ADD(prof, orig, restore_time); 3123 } 3124 if (table) { 3125 int i; 3126 3127 for (i = 0; i < NB_OPS; i++) { 3128 PROF_ADD(prof, orig, table_op_count[i]); 3129 } 3130 } 3131 } 3132 } 3133 3134 #undef PROF_ADD 3135 #undef PROF_MAX 3136 3137 static void tcg_profile_snapshot_counters(TCGProfile *prof) 3138 { 3139 tcg_profile_snapshot(prof, true, false); 3140 } 3141 3142 static void tcg_profile_snapshot_table(TCGProfile *prof) 3143 { 3144 tcg_profile_snapshot(prof, false, true); 3145 } 3146 3147 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3148 { 3149 TCGProfile prof = {}; 3150 int i; 3151 3152 tcg_profile_snapshot_table(&prof); 3153 for (i = 0; i < NB_OPS; i++) { 3154 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, 3155 prof.table_op_count[i]); 3156 } 3157 } 3158 #else 3159 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) 3160 { 3161 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3162 } 3163 #endif 3164 3165 3166 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 3167 { 3168 #ifdef CONFIG_PROFILER 3169 TCGProfile *prof = &s->prof; 3170 #endif 3171 int i, oi, oi_next, num_insns; 3172 3173 #ifdef CONFIG_PROFILER 3174 { 3175 int n; 3176 3177 n = s->gen_op_buf[0].prev + 1; 3178 atomic_set(&prof->op_count, prof->op_count + n); 3179 if (n > prof->op_count_max) { 3180 atomic_set(&prof->op_count_max, n); 3181 } 3182 3183 n = s->nb_temps; 3184 atomic_set(&prof->temp_count, prof->temp_count + n); 3185 if (n > prof->temp_count_max) { 3186 atomic_set(&prof->temp_count_max, n); 3187 } 3188 } 3189 #endif 3190 3191 #ifdef DEBUG_DISAS 3192 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 3193 && qemu_log_in_addr_range(tb->pc))) { 3194 qemu_log_lock(); 3195 qemu_log("OP:\n"); 3196 tcg_dump_ops(s); 3197 qemu_log("\n"); 3198 qemu_log_unlock(); 3199 } 3200 #endif 3201 3202 #ifdef CONFIG_PROFILER 3203 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 3204 #endif 3205 3206 #ifdef USE_TCG_OPTIMIZATIONS 3207 tcg_optimize(s); 3208 #endif 3209 3210 #ifdef CONFIG_PROFILER 3211 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 3212 atomic_set(&prof->la_time, prof->la_time - profile_getclock()); 3213 #endif 3214 3215 liveness_pass_1(s); 3216 3217 if (s->nb_indirects > 0) { 3218 #ifdef DEBUG_DISAS 3219 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 3220 && qemu_log_in_addr_range(tb->pc))) { 3221 qemu_log_lock(); 3222 qemu_log("OP before indirect lowering:\n"); 3223 tcg_dump_ops(s); 3224 qemu_log("\n"); 3225 qemu_log_unlock(); 3226 } 3227 #endif 3228 /* Replace indirect temps with direct temps. */ 3229 if (liveness_pass_2(s)) { 3230 /* If changes were made, re-run liveness. */ 3231 liveness_pass_1(s); 3232 } 3233 } 3234 3235 #ifdef CONFIG_PROFILER 3236 atomic_set(&prof->la_time, prof->la_time + profile_getclock()); 3237 #endif 3238 3239 #ifdef DEBUG_DISAS 3240 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 3241 && qemu_log_in_addr_range(tb->pc))) { 3242 qemu_log_lock(); 3243 qemu_log("OP after optimization and liveness analysis:\n"); 3244 tcg_dump_ops(s); 3245 qemu_log("\n"); 3246 qemu_log_unlock(); 3247 } 3248 #endif 3249 3250 tcg_reg_alloc_start(s); 3251 3252 s->code_buf = tb->tc.ptr; 3253 s->code_ptr = tb->tc.ptr; 3254 3255 #ifdef TCG_TARGET_NEED_LDST_LABELS 3256 s->ldst_labels = NULL; 3257 #endif 3258 #ifdef TCG_TARGET_NEED_POOL_LABELS 3259 s->pool_labels = NULL; 3260 #endif 3261 3262 num_insns = -1; 3263 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { 3264 TCGOp * const op = &s->gen_op_buf[oi]; 3265 TCGOpcode opc = op->opc; 3266 3267 oi_next = op->next; 3268 #ifdef CONFIG_PROFILER 3269 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 3270 #endif 3271 3272 switch (opc) { 3273 case INDEX_op_mov_i32: 3274 case INDEX_op_mov_i64: 3275 tcg_reg_alloc_mov(s, op); 3276 break; 3277 case INDEX_op_movi_i32: 3278 case INDEX_op_movi_i64: 3279 tcg_reg_alloc_movi(s, op); 3280 break; 3281 case INDEX_op_insn_start: 3282 if (num_insns >= 0) { 3283 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3284 } 3285 num_insns++; 3286 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 3287 target_ulong a; 3288 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 3289 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 3290 #else 3291 a = op->args[i]; 3292 #endif 3293 s->gen_insn_data[num_insns][i] = a; 3294 } 3295 break; 3296 case INDEX_op_discard: 3297 temp_dead(s, arg_temp(op->args[0])); 3298 break; 3299 case INDEX_op_set_label: 3300 tcg_reg_alloc_bb_end(s, s->reserved_regs); 3301 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); 3302 break; 3303 case INDEX_op_call: 3304 tcg_reg_alloc_call(s, op); 3305 break; 3306 default: 3307 /* Sanity check that we've not introduced any unhandled opcodes. */ 3308 tcg_debug_assert(tcg_op_supported(opc)); 3309 /* Note: in order to speed up the code, it would be much 3310 faster to have specialized register allocator functions for 3311 some common argument patterns */ 3312 tcg_reg_alloc_op(s, op); 3313 break; 3314 } 3315 #ifdef CONFIG_DEBUG_TCG 3316 check_regs(s); 3317 #endif 3318 /* Test for (pending) buffer overflow. The assumption is that any 3319 one operation beginning below the high water mark cannot overrun 3320 the buffer completely. Thus we can test for overflow after 3321 generating code without having to check during generation. */ 3322 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3323 return -1; 3324 } 3325 } 3326 tcg_debug_assert(num_insns >= 0); 3327 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3328 3329 /* Generate TB finalization at the end of block */ 3330 #ifdef TCG_TARGET_NEED_LDST_LABELS 3331 if (!tcg_out_ldst_finalize(s)) { 3332 return -1; 3333 } 3334 #endif 3335 #ifdef TCG_TARGET_NEED_POOL_LABELS 3336 if (!tcg_out_pool_finalize(s)) { 3337 return -1; 3338 } 3339 #endif 3340 3341 /* flush instruction cache */ 3342 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); 3343 3344 return tcg_current_code_size(s); 3345 } 3346 3347 #ifdef CONFIG_PROFILER 3348 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3349 { 3350 TCGProfile prof = {}; 3351 const TCGProfile *s; 3352 int64_t tb_count; 3353 int64_t tb_div_count; 3354 int64_t tot; 3355 3356 tcg_profile_snapshot_counters(&prof); 3357 s = &prof; 3358 tb_count = s->tb_count; 3359 tb_div_count = tb_count ? tb_count : 1; 3360 tot = s->interm_time + s->code_time; 3361 3362 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 3363 tot, tot / 2.4e9); 3364 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 3365 tb_count, s->tb_count1 - tb_count, 3366 (double)(s->tb_count1 - s->tb_count) 3367 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 3368 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", 3369 (double)s->op_count / tb_div_count, s->op_count_max); 3370 cpu_fprintf(f, "deleted ops/TB %0.2f\n", 3371 (double)s->del_op_count / tb_div_count); 3372 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", 3373 (double)s->temp_count / tb_div_count, s->temp_count_max); 3374 cpu_fprintf(f, "avg host code/TB %0.1f\n", 3375 (double)s->code_out_len / tb_div_count); 3376 cpu_fprintf(f, "avg search data/TB %0.1f\n", 3377 (double)s->search_out_len / tb_div_count); 3378 3379 cpu_fprintf(f, "cycles/op %0.1f\n", 3380 s->op_count ? (double)tot / s->op_count : 0); 3381 cpu_fprintf(f, "cycles/in byte %0.1f\n", 3382 s->code_in_len ? (double)tot / s->code_in_len : 0); 3383 cpu_fprintf(f, "cycles/out byte %0.1f\n", 3384 s->code_out_len ? (double)tot / s->code_out_len : 0); 3385 cpu_fprintf(f, "cycles/search byte %0.1f\n", 3386 s->search_out_len ? (double)tot / s->search_out_len : 0); 3387 if (tot == 0) { 3388 tot = 1; 3389 } 3390 cpu_fprintf(f, " gen_interm time %0.1f%%\n", 3391 (double)s->interm_time / tot * 100.0); 3392 cpu_fprintf(f, " gen_code time %0.1f%%\n", 3393 (double)s->code_time / tot * 100.0); 3394 cpu_fprintf(f, "optim./code time %0.1f%%\n", 3395 (double)s->opt_time / (s->code_time ? s->code_time : 1) 3396 * 100.0); 3397 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 3398 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 3399 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 3400 s->restore_count); 3401 cpu_fprintf(f, " avg cycles %0.1f\n", 3402 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 3403 } 3404 #else 3405 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) 3406 { 3407 cpu_fprintf(f, "[TCG profiler not compiled]\n"); 3408 } 3409 #endif 3410 3411 #ifdef ELF_HOST_MACHINE 3412 /* In order to use this feature, the backend needs to do three things: 3413 3414 (1) Define ELF_HOST_MACHINE to indicate both what value to 3415 put into the ELF image and to indicate support for the feature. 3416 3417 (2) Define tcg_register_jit. This should create a buffer containing 3418 the contents of a .debug_frame section that describes the post- 3419 prologue unwind info for the tcg machine. 3420 3421 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 3422 */ 3423 3424 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 3425 typedef enum { 3426 JIT_NOACTION = 0, 3427 JIT_REGISTER_FN, 3428 JIT_UNREGISTER_FN 3429 } jit_actions_t; 3430 3431 struct jit_code_entry { 3432 struct jit_code_entry *next_entry; 3433 struct jit_code_entry *prev_entry; 3434 const void *symfile_addr; 3435 uint64_t symfile_size; 3436 }; 3437 3438 struct jit_descriptor { 3439 uint32_t version; 3440 uint32_t action_flag; 3441 struct jit_code_entry *relevant_entry; 3442 struct jit_code_entry *first_entry; 3443 }; 3444 3445 void __jit_debug_register_code(void) __attribute__((noinline)); 3446 void __jit_debug_register_code(void) 3447 { 3448 asm(""); 3449 } 3450 3451 /* Must statically initialize the version, because GDB may check 3452 the version before we can set it. */ 3453 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 3454 3455 /* End GDB interface. */ 3456 3457 static int find_string(const char *strtab, const char *str) 3458 { 3459 const char *p = strtab + 1; 3460 3461 while (1) { 3462 if (strcmp(p, str) == 0) { 3463 return p - strtab; 3464 } 3465 p += strlen(p) + 1; 3466 } 3467 } 3468 3469 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, 3470 const void *debug_frame, 3471 size_t debug_frame_size) 3472 { 3473 struct __attribute__((packed)) DebugInfo { 3474 uint32_t len; 3475 uint16_t version; 3476 uint32_t abbrev; 3477 uint8_t ptr_size; 3478 uint8_t cu_die; 3479 uint16_t cu_lang; 3480 uintptr_t cu_low_pc; 3481 uintptr_t cu_high_pc; 3482 uint8_t fn_die; 3483 char fn_name[16]; 3484 uintptr_t fn_low_pc; 3485 uintptr_t fn_high_pc; 3486 uint8_t cu_eoc; 3487 }; 3488 3489 struct ElfImage { 3490 ElfW(Ehdr) ehdr; 3491 ElfW(Phdr) phdr; 3492 ElfW(Shdr) shdr[7]; 3493 ElfW(Sym) sym[2]; 3494 struct DebugInfo di; 3495 uint8_t da[24]; 3496 char str[80]; 3497 }; 3498 3499 struct ElfImage *img; 3500 3501 static const struct ElfImage img_template = { 3502 .ehdr = { 3503 .e_ident[EI_MAG0] = ELFMAG0, 3504 .e_ident[EI_MAG1] = ELFMAG1, 3505 .e_ident[EI_MAG2] = ELFMAG2, 3506 .e_ident[EI_MAG3] = ELFMAG3, 3507 .e_ident[EI_CLASS] = ELF_CLASS, 3508 .e_ident[EI_DATA] = ELF_DATA, 3509 .e_ident[EI_VERSION] = EV_CURRENT, 3510 .e_type = ET_EXEC, 3511 .e_machine = ELF_HOST_MACHINE, 3512 .e_version = EV_CURRENT, 3513 .e_phoff = offsetof(struct ElfImage, phdr), 3514 .e_shoff = offsetof(struct ElfImage, shdr), 3515 .e_ehsize = sizeof(ElfW(Shdr)), 3516 .e_phentsize = sizeof(ElfW(Phdr)), 3517 .e_phnum = 1, 3518 .e_shentsize = sizeof(ElfW(Shdr)), 3519 .e_shnum = ARRAY_SIZE(img->shdr), 3520 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 3521 #ifdef ELF_HOST_FLAGS 3522 .e_flags = ELF_HOST_FLAGS, 3523 #endif 3524 #ifdef ELF_OSABI 3525 .e_ident[EI_OSABI] = ELF_OSABI, 3526 #endif 3527 }, 3528 .phdr = { 3529 .p_type = PT_LOAD, 3530 .p_flags = PF_X, 3531 }, 3532 .shdr = { 3533 [0] = { .sh_type = SHT_NULL }, 3534 /* Trick: The contents of code_gen_buffer are not present in 3535 this fake ELF file; that got allocated elsewhere. Therefore 3536 we mark .text as SHT_NOBITS (similar to .bss) so that readers 3537 will not look for contents. We can record any address. */ 3538 [1] = { /* .text */ 3539 .sh_type = SHT_NOBITS, 3540 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 3541 }, 3542 [2] = { /* .debug_info */ 3543 .sh_type = SHT_PROGBITS, 3544 .sh_offset = offsetof(struct ElfImage, di), 3545 .sh_size = sizeof(struct DebugInfo), 3546 }, 3547 [3] = { /* .debug_abbrev */ 3548 .sh_type = SHT_PROGBITS, 3549 .sh_offset = offsetof(struct ElfImage, da), 3550 .sh_size = sizeof(img->da), 3551 }, 3552 [4] = { /* .debug_frame */ 3553 .sh_type = SHT_PROGBITS, 3554 .sh_offset = sizeof(struct ElfImage), 3555 }, 3556 [5] = { /* .symtab */ 3557 .sh_type = SHT_SYMTAB, 3558 .sh_offset = offsetof(struct ElfImage, sym), 3559 .sh_size = sizeof(img->sym), 3560 .sh_info = 1, 3561 .sh_link = ARRAY_SIZE(img->shdr) - 1, 3562 .sh_entsize = sizeof(ElfW(Sym)), 3563 }, 3564 [6] = { /* .strtab */ 3565 .sh_type = SHT_STRTAB, 3566 .sh_offset = offsetof(struct ElfImage, str), 3567 .sh_size = sizeof(img->str), 3568 } 3569 }, 3570 .sym = { 3571 [1] = { /* code_gen_buffer */ 3572 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 3573 .st_shndx = 1, 3574 } 3575 }, 3576 .di = { 3577 .len = sizeof(struct DebugInfo) - 4, 3578 .version = 2, 3579 .ptr_size = sizeof(void *), 3580 .cu_die = 1, 3581 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 3582 .fn_die = 2, 3583 .fn_name = "code_gen_buffer" 3584 }, 3585 .da = { 3586 1, /* abbrev number (the cu) */ 3587 0x11, 1, /* DW_TAG_compile_unit, has children */ 3588 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 3589 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3590 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3591 0, 0, /* end of abbrev */ 3592 2, /* abbrev number (the fn) */ 3593 0x2e, 0, /* DW_TAG_subprogram, no children */ 3594 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 3595 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 3596 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 3597 0, 0, /* end of abbrev */ 3598 0 /* no more abbrev */ 3599 }, 3600 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 3601 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 3602 }; 3603 3604 /* We only need a single jit entry; statically allocate it. */ 3605 static struct jit_code_entry one_entry; 3606 3607 uintptr_t buf = (uintptr_t)buf_ptr; 3608 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 3609 DebugFrameHeader *dfh; 3610 3611 img = g_malloc(img_size); 3612 *img = img_template; 3613 3614 img->phdr.p_vaddr = buf; 3615 img->phdr.p_paddr = buf; 3616 img->phdr.p_memsz = buf_size; 3617 3618 img->shdr[1].sh_name = find_string(img->str, ".text"); 3619 img->shdr[1].sh_addr = buf; 3620 img->shdr[1].sh_size = buf_size; 3621 3622 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 3623 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 3624 3625 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 3626 img->shdr[4].sh_size = debug_frame_size; 3627 3628 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 3629 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 3630 3631 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 3632 img->sym[1].st_value = buf; 3633 img->sym[1].st_size = buf_size; 3634 3635 img->di.cu_low_pc = buf; 3636 img->di.cu_high_pc = buf + buf_size; 3637 img->di.fn_low_pc = buf; 3638 img->di.fn_high_pc = buf + buf_size; 3639 3640 dfh = (DebugFrameHeader *)(img + 1); 3641 memcpy(dfh, debug_frame, debug_frame_size); 3642 dfh->fde.func_start = buf; 3643 dfh->fde.func_len = buf_size; 3644 3645 #ifdef DEBUG_JIT 3646 /* Enable this block to be able to debug the ELF image file creation. 3647 One can use readelf, objdump, or other inspection utilities. */ 3648 { 3649 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 3650 if (f) { 3651 if (fwrite(img, img_size, 1, f) != img_size) { 3652 /* Avoid stupid unused return value warning for fwrite. */ 3653 } 3654 fclose(f); 3655 } 3656 } 3657 #endif 3658 3659 one_entry.symfile_addr = img; 3660 one_entry.symfile_size = img_size; 3661 3662 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 3663 __jit_debug_descriptor.relevant_entry = &one_entry; 3664 __jit_debug_descriptor.first_entry = &one_entry; 3665 __jit_debug_register_code(); 3666 } 3667 #else 3668 /* No support for the feature. Provide the entry point expected by exec.c, 3669 and implement the internal function we declared earlier. */ 3670 3671 static void tcg_register_jit_int(void *buf, size_t size, 3672 const void *debug_frame, 3673 size_t debug_frame_size) 3674 { 3675 } 3676 3677 void tcg_register_jit(void *buf, size_t buf_size) 3678 { 3679 } 3680 #endif /* ELF_HOST_MACHINE */ 3681